Spaces:

jebin2
/

comic-panel-extractor

Running

App Files Files Community

comic-panel-extractor / ComicPanelExtractor.py

jebin2

test workflow

a7da787 6 months ago

raw

history blame

8.17 kB

	import numpy as np
	import os
	import json
	from text_detector import TextDetector, Config as CVP_Config
	import cv2
	import shutil

	# ----------------------------------------------------------
	# MASK TEXT REGIONS
	# ----------------------------------------------------------

	def mask_text_regions(image_path, bboxes, output_path=None, color=(0, 0, 0)):
	"""
	Make the text regions in an image white (or given color) to reduce panel extraction noise.

	Args:
	image_path (str): Path to the input image.
	bboxes (list of list): List of bounding boxes in [x1, y1, x2, y2] format.
	output_path (str, optional): Path to save the modified image.
	color (tuple): Color to fill the bounding boxes (default black).
	Returns:
	masked_image (numpy array): Image with masked text regions.
	"""
	image = cv2.imread(image_path)
	if image is None:
	raise Exception(f"Could not load image: {image_path}")

	for bbox in bboxes:
	x1, y1, x2, y2 = bbox
	cv2.rectangle(image, (x1, y1), (x2, y2), color, thickness=-1) # Fill rectangle

	if output_path:
	cv2.imwrite(output_path, image)
	print(f"✅ Text-masked image saved to: {output_path}")

	return image


	# ----------------------------------------------------------
	# PRE PROCESS METHOD
	# ----------------------------------------------------------

	def pre_process(image_path, output_dir):
	if not os.path.exists(output_dir):
	os.makedirs(output_dir)

	# Load and preprocess image
	image = cv2.imread(image_path)
	if image is None:
	raise Exception(f"Could not load image: {image_path}")

	gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
	_, binary = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY_INV)

	# Dilate to strengthen borders and fill small gaps
	kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (5, 5))
	dilated = cv2.dilate(binary, kernel, iterations=2)

	cv2.imwrite(os.path.join(output_dir, "2_gray.jpg"), gray)
	cv2.imwrite(os.path.join(output_dir, "3_binary.jpg"), binary)
	cv2.imwrite(os.path.join(output_dir, "4_dilated.jpg"), dilated)


	# ----------------------------------------------------------
	# CLEAN DILATED IMAGE
	# ----------------------------------------------------------

	def clean_dilated_with_row_priority(dilated_path, output_path, max_neighbors=2):
	"""
	Clean a dilated comic page by thinning thick borders using Game-of-Life logic,
	with preference to clean rows that have fewer black pixels.
	"""
	dilated = cv2.imread(dilated_path, cv2.IMREAD_GRAYSCALE)
	if dilated is None:
	raise Exception("Could not load dilated image.")

	binary = (dilated == 0).astype(np.uint8)
	padded = np.pad(binary, pad_width=1, mode="constant", constant_values=0)
	cleaned = binary.copy()

	height, width = binary.shape
	row_black_counts = np.sum(binary, axis=1)

	for y in range(1, height + 1):
	for x in range(1, width + 1):
	if padded[y, x] == 1:
	neighbors = np.sum(padded[y-1:y+2, x-1:x+2]) - 1
	if neighbors > max_neighbors:
	neighbor_rows = [r for r in [y-1, y, y+1] if 1 <= r <= height]
	if neighbor_rows:
	row_to_clear = min(neighbor_rows, key=lambda r: row_black_counts[r-1])
	if y == row_to_clear:
	cleaned[y-1, x-1] = 0

	cleaned_img = (1 - cleaned) * 255
	cv2.imwrite(output_path, cleaned_img)
	print(f"✅ Cleaned dilated image saved to: {output_path}")
	return output_path


	# ----------------------------------------------------------
	# EXTRACT PANELS - BLACK PERCENTAGE METHOD
	# ----------------------------------------------------------

	def extract_panels_by_black_percentage_fixed(
	dilated_path, original_image_path, output_dir,
	row_thresh=20, col_thresh=20,
	min_width_ratio=0.1, min_height_ratio=0.1
	):
	"""
	Extract comic panels using black percentage scan with smart width & height filtering.
	"""
	if not os.path.exists(output_dir):
	os.makedirs(output_dir)

	dilated = cv2.imread(dilated_path, cv2.IMREAD_GRAYSCALE)
	original = cv2.imread(original_image_path)
	if dilated is None or original is None:
	raise Exception("Could not load dilated or original image.")

	height, width = dilated.shape
	visual_output = original.copy()

	# Detect row gutters
	row_black_percentage = np.sum(dilated == 0, axis=1) / width * 100
	row_gutters, panel_rows = [], []
	in_gutter = False
	for y, percent_black in enumerate(row_black_percentage):
	if percent_black >= row_thresh and not in_gutter:
	start_row = y
	in_gutter = True
	elif percent_black < row_thresh and in_gutter:
	end_row = y
	row_gutters.append((start_row, end_row))
	in_gutter = False

	prev_end = 0
	for start, end in row_gutters:
	if start - prev_end > 10:
	panel_rows.append((prev_end, start))
	prev_end = end
	if height - prev_end > 10:
	panel_rows.append((prev_end, height))

	# Extract panels
	all_panels, panel_count, panel_images, panel_points = [], 0, [], []
	for y1, y2 in panel_rows:
	row_slice = dilated[y1:y2, :]
	col_black_percentage = np.sum(row_slice == 0, axis=0) / (y2 - y1) * 100
	col_gutters, panel_cols = [], []
	in_gutter_col = False
	for x, percent_black in enumerate(col_black_percentage):
	if percent_black >= col_thresh and not in_gutter_col:
	start_col = x
	in_gutter_col = True
	elif percent_black < col_thresh and in_gutter_col:
	end_col = x
	col_gutters.append((start_col, end_col))
	in_gutter_col = False

	prev_end_col = 0
	for start, end in col_gutters:
	if start - prev_end_col > 10:
	panel_cols.append((prev_end_col, start))
	prev_end_col = end
	if width - prev_end_col > 10:
	panel_cols.append((prev_end_col, width))

	for x1, x2 in panel_cols:
	w, h = x2 - x1, y2 - y1
	if w * h < (width * height) * 0.005:
	continue
	all_panels.append((x1, y1, x2, y2))

	# Post-filter
	panel_widths = [x2 - x1 for x1, _, x2, _ in all_panels]
	panel_heights = [y2 - y1 for _, y1, _, y2 in all_panels]
	avg_width = np.mean(panel_widths) if panel_widths else 0
	avg_height = np.mean(panel_heights) if panel_heights else 0
	min_allowed_width = max(avg_width * 0.5, width * min_width_ratio)
	min_allowed_height = max(avg_height * 0.5, height * min_height_ratio)

	for x1, y1, x2, y2 in all_panels:
	panel_width, panel_height = x2 - x1, y2 - y1
	if panel_width >= min_allowed_width and panel_height >= min_allowed_height:
	panel = original[y1:y2, x1:x2]
	panel_count += 1
	panel_images.append(panel)
	panel_points.append({
	"x_start": x1, "y_start": y1, "x_end": x2, "y_end": y2
	})
	panel_path = os.path.join(output_dir, f"panel_{panel_count}.jpg")
	cv2.imwrite(panel_path, panel)
	cv2.rectangle(visual_output, (x1, y1), (x2, y2), (0, 255, 0), 2)
	cv2.putText(visual_output, f"#{panel_count}", (x1+5, y1+25),
	cv2.FONT_HERSHEY_SIMPLEX, 0.8, (0, 0, 255), 2)

	print(f"✅ Extracted {panel_count} panels after smart width & height filtering.")
	return output_dir, panel_images, panel_points


	# ----------------------------------------------------------
	# MAIN EXECUTION
	# ----------------------------------------------------------
	if __name__ == "__main__":
	image_path = "input.jpg"
	output_dir = "extracted_panels"
	shutil.rmtree(output_dir, ignore_errors=True)
	os.makedirs(output_dir, exist_ok=True)

	# Detect and mask text regions
	cvp_config = CVP_Config()
	cvp_config.main_file_name = image_path
	cvp_config.temp_folder = output_dir
	cvp_config.comic_image = image_path
	cvp_config.output_video = f"{output_dir}/test.mp4"

	with TextDetector(cvp_config) as text_detector:
	bubbles_path = text_detector.detect_and_group_text(cvp_config.comic_image)
	with open(bubbles_path, "r", encoding="utf-8") as f:
	bubbles = json.load(f)

	output_path = os.path.join(output_dir, "1_text_removed.jpg")
	masked_image = mask_text_regions(image_path, [box["bbox"] for box in bubbles], output_path=output_path)

	pre_process(output_path, output_dir)

	# Clean dilated image
	dilated_path = os.path.join(output_dir, "4_dilated.jpg")
	cleaned_dilated_path = os.path.join(output_dir, "5_dilated_cleaned.jpg")
	clean_dilated_with_row_priority(dilated_path, cleaned_dilated_path, max_neighbors=2)

	# Extract panels - black percentage
	extract_panels_by_black_percentage_fixed(
	cleaned_dilated_path,
	image_path,
	output_dir,
	min_width_ratio=0.1, # Panels must be at least 10% of total width
	)