Spaces:

jebin2
/

comic-panel-extractor

Running

File size: 8,171 Bytes

a7da787

import numpy as np
import os
import json
from text_detector import TextDetector, Config as CVP_Config
import cv2
import shutil

# ----------------------------------------------------------
# MASK TEXT REGIONS
# ----------------------------------------------------------

def mask_text_regions(image_path, bboxes, output_path=None, color=(0, 0, 0)):
	"""
	Make the text regions in an image white (or given color) to reduce panel extraction noise.

	Args:
		image_path (str): Path to the input image.
		bboxes (list of list): List of bounding boxes in [x1, y1, x2, y2] format.
		output_path (str, optional): Path to save the modified image.
		color (tuple): Color to fill the bounding boxes (default black).
	Returns:
		masked_image (numpy array): Image with masked text regions.
	"""
	image = cv2.imread(image_path)
	if image is None:
		raise Exception(f"Could not load image: {image_path}")

	for bbox in bboxes:
		x1, y1, x2, y2 = bbox
		cv2.rectangle(image, (x1, y1), (x2, y2), color, thickness=-1)  # Fill rectangle

	if output_path:
		cv2.imwrite(output_path, image)
		print(f"✅ Text-masked image saved to: {output_path}")

	return image


# ----------------------------------------------------------
# PRE PROCESS METHOD
# ----------------------------------------------------------

def pre_process(image_path, output_dir):
	if not os.path.exists(output_dir):
		os.makedirs(output_dir)

	# Load and preprocess image
	image = cv2.imread(image_path)
	if image is None:
		raise Exception(f"Could not load image: {image_path}")

	gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
	_, binary = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY_INV)

	# Dilate to strengthen borders and fill small gaps
	kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
	dilated = cv2.dilate(binary, kernel, iterations=2)

	cv2.imwrite(os.path.join(output_dir, "2_gray.jpg"), gray)
	cv2.imwrite(os.path.join(output_dir, "3_binary.jpg"), binary)
	cv2.imwrite(os.path.join(output_dir, "4_dilated.jpg"), dilated)


# ----------------------------------------------------------
# CLEAN DILATED IMAGE
# ----------------------------------------------------------

def clean_dilated_with_row_priority(dilated_path, output_path, max_neighbors=2):
	"""
	Clean a dilated comic page by thinning thick borders using Game-of-Life logic,
	with preference to clean rows that have fewer black pixels.
	"""
	dilated = cv2.imread(dilated_path, cv2.IMREAD_GRAYSCALE)
	if dilated is None:
		raise Exception("Could not load dilated image.")

	binary = (dilated == 0).astype(np.uint8)
	padded = np.pad(binary, pad_width=1, mode="constant", constant_values=0)
	cleaned = binary.copy()

	height, width = binary.shape
	row_black_counts = np.sum(binary, axis=1)

	for y in range(1, height + 1):
		for x in range(1, width + 1):
			if padded[y, x] == 1:
				neighbors = np.sum(padded[y-1:y+2, x-1:x+2]) - 1
				if neighbors > max_neighbors:
					neighbor_rows = [r for r in [y-1, y, y+1] if 1 <= r <= height]
					if neighbor_rows:
						row_to_clear = min(neighbor_rows, key=lambda r: row_black_counts[r-1])
						if y == row_to_clear:
							cleaned[y-1, x-1] = 0

	cleaned_img = (1 - cleaned) * 255
	cv2.imwrite(output_path, cleaned_img)
	print(f"✅ Cleaned dilated image saved to: {output_path}")
	return output_path


# ----------------------------------------------------------
# EXTRACT PANELS - BLACK PERCENTAGE METHOD
# ----------------------------------------------------------

def extract_panels_by_black_percentage_fixed(
	dilated_path, original_image_path, output_dir,
	row_thresh=20, col_thresh=20,
	min_width_ratio=0.1, min_height_ratio=0.1
):
	"""
	Extract comic panels using black percentage scan with smart width & height filtering.
	"""
	if not os.path.exists(output_dir):
		os.makedirs(output_dir)

	dilated = cv2.imread(dilated_path, cv2.IMREAD_GRAYSCALE)
	original = cv2.imread(original_image_path)
	if dilated is None or original is None:
		raise Exception("Could not load dilated or original image.")

	height, width = dilated.shape
	visual_output = original.copy()

	# Detect row gutters
	row_black_percentage = np.sum(dilated == 0, axis=1) / width * 100
	row_gutters, panel_rows = [], []
	in_gutter = False
	for y, percent_black in enumerate(row_black_percentage):
		if percent_black >= row_thresh and not in_gutter:
			start_row = y
			in_gutter = True
		elif percent_black < row_thresh and in_gutter:
			end_row = y
			row_gutters.append((start_row, end_row))
			in_gutter = False

	prev_end = 0
	for start, end in row_gutters:
		if start - prev_end > 10:
			panel_rows.append((prev_end, start))
		prev_end = end
	if height - prev_end > 10:
		panel_rows.append((prev_end, height))

	# Extract panels
	all_panels, panel_count, panel_images, panel_points = [], 0, [], []
	for y1, y2 in panel_rows:
		row_slice = dilated[y1:y2, :]
		col_black_percentage = np.sum(row_slice == 0, axis=0) / (y2 - y1) * 100
		col_gutters, panel_cols = [], []
		in_gutter_col = False
		for x, percent_black in enumerate(col_black_percentage):
			if percent_black >= col_thresh and not in_gutter_col:
				start_col = x
				in_gutter_col = True
			elif percent_black < col_thresh and in_gutter_col:
				end_col = x
				col_gutters.append((start_col, end_col))
				in_gutter_col = False

		prev_end_col = 0
		for start, end in col_gutters:
			if start - prev_end_col > 10:
				panel_cols.append((prev_end_col, start))
			prev_end_col = end
		if width - prev_end_col > 10:
			panel_cols.append((prev_end_col, width))

		for x1, x2 in panel_cols:
			w, h = x2 - x1, y2 - y1
			if w * h < (width * height) * 0.005:
				continue
			all_panels.append((x1, y1, x2, y2))

	# Post-filter
	panel_widths = [x2 - x1 for x1, _, x2, _ in all_panels]
	panel_heights = [y2 - y1 for _, y1, _, y2 in all_panels]
	avg_width = np.mean(panel_widths) if panel_widths else 0
	avg_height = np.mean(panel_heights) if panel_heights else 0
	min_allowed_width = max(avg_width * 0.5, width * min_width_ratio)
	min_allowed_height = max(avg_height * 0.5, height * min_height_ratio)

	for x1, y1, x2, y2 in all_panels:
		panel_width, panel_height = x2 - x1, y2 - y1
		if panel_width >= min_allowed_width and panel_height >= min_allowed_height:
			panel = original[y1:y2, x1:x2]
			panel_count += 1
			panel_images.append(panel)
			panel_points.append({
				"x_start": x1, "y_start": y1, "x_end": x2, "y_end": y2
			})
			panel_path = os.path.join(output_dir, f"panel_{panel_count}.jpg")
			cv2.imwrite(panel_path, panel)
			cv2.rectangle(visual_output, (x1, y1), (x2, y2), (0, 255, 0), 2)
			cv2.putText(visual_output, f"#{panel_count}", (x1+5, y1+25),
						cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2)

	print(f"✅ Extracted {panel_count} panels after smart width & height filtering.")
	return output_dir, panel_images, panel_points


# ----------------------------------------------------------
# MAIN EXECUTION
# ----------------------------------------------------------
if __name__ == "__main__":
	image_path = "input.jpg"
	output_dir = "extracted_panels"
	shutil.rmtree(output_dir, ignore_errors=True)
	os.makedirs(output_dir, exist_ok=True)

	# Detect and mask text regions
	cvp_config = CVP_Config()
	cvp_config.main_file_name = image_path
	cvp_config.temp_folder = output_dir
	cvp_config.comic_image = image_path
	cvp_config.output_video = f"{output_dir}/test.mp4"

	with TextDetector(cvp_config) as text_detector:
		bubbles_path = text_detector.detect_and_group_text(cvp_config.comic_image)
	with open(bubbles_path, "r", encoding="utf-8") as f:
		bubbles = json.load(f)

	output_path = os.path.join(output_dir, "1_text_removed.jpg")
	masked_image = mask_text_regions(image_path, [box["bbox"] for box in bubbles], output_path=output_path)

	pre_process(output_path, output_dir)

	# Clean dilated image
	dilated_path = os.path.join(output_dir, "4_dilated.jpg")
	cleaned_dilated_path = os.path.join(output_dir, "5_dilated_cleaned.jpg")
	clean_dilated_with_row_priority(dilated_path, cleaned_dilated_path, max_neighbors=2)

	# Extract panels - black percentage
	extract_panels_by_black_percentage_fixed(
		cleaned_dilated_path,
		image_path,
		output_dir,
		min_width_ratio=0.1,  # Panels must be at least 10% of total width
	)