Spaces:

sujyo
/

kyozai-scan

Sleeping

App Files Files Community

kyozai-scan / problem_splitter.py

sujyo

Upload problem_splitter.py with huggingface_hub

2b8cefb verified 24 days ago

raw

history blame contribute delete

11.1 kB

	"""
	問題自動分割プロトタイプ v2.

	連結成分ベースで ☑ だけを抽出する.
	左端 x<200 の領域内で「サイズ ~30x30、矩形に近い」成分を ☑ とみなす.
	"""
	import os
	from typing import List, Optional, Tuple
	import numpy as np
	from PIL import Image
	import cv2

	HERE = os.path.dirname(os.path.abspath(__file__))
	SAMPLES_DIR = os.path.join(HERE, "samples")
	DEBUG_DIR = os.path.join(HERE, "debug")
	os.makedirs(DEBUG_DIR, exist_ok=True)


	def find_problem_boundaries_generic(
	page_rgb: np.ndarray,
	min_gap_rows: int = 10,
	blank_ratio: float = 0.03,
	dark_thr: int = -1, # -1 = 自動 (背景輝度から推定)
	) -> List[int]:
	"""☑ がないページ向けの汎用境界検出 ([8][9]形式対応版).

	問題と問題の間にある「水平方向の空白帯」の中央 y 座標を返す.

	カメラ写真対応:
	dark_thr=-1 のとき、ヒストグラムから背景輝度を自動推定し
	「インクと紙」を分離できる閾値を算出する.
	綴じ部の影対応:
	左端 12% をスキップして dark_per_row を計算し、
	綴じ影が空白帯を「非空白」と誤判定するのを防ぐ.
	★行フィルタ:
	gap 直後の行が問題開始マーカー(★難易度行)でなければ
	図の周囲余白や小問間の空白とみなして境界を除去する.
	"""
	gray = cv2.cvtColor(page_rgb, cv2.COLOR_RGB2GRAY)
	H, W = gray.shape

	if dark_thr < 0:
	# 背景 = 明るいほうから数えて 75 パーセンタイル付近
	bg = float(np.percentile(gray, 75))
	# インク閾値: 背景の 70% 以下をインクとみなす
	dark_thr = max(80, int(bg * 0.70))

	# 綴じ影スキップ: 左端 12% はカメラ写真の綴じ部影が集中するため除外
	x_skip = max(10, int(W * 0.12))
	region = gray[:, x_skip:]
	rW = region.shape[1]
	dark_per_row = (region < dark_thr).sum(axis=1)
	is_blank = dark_per_row < (rW * blank_ratio)

	gap_candidates = []
	in_gap, start = False, 0
	for y in range(H):
	if is_blank[y] and not in_gap:
	in_gap, start = True, y
	elif not is_blank[y] and in_gap:
	in_gap = False
	gap_h = y - start
	if gap_h >= min_gap_rows:
	gap_candidates.append((start, y))

	# ★行フィルタ + 上下端マージン除外
	star_end = min(x_skip + 200, int(W * 0.35))
	right_W = W - star_end
	edge = max(30, int(H * 0.05)) # 上下端 5% はマージンとして除外

	boundaries = []
	for (gs, ge) in gap_candidates:
	mid = gs + (ge - gs) // 2
	if mid <= edge or mid >= H - edge:
	continue # 上下端マージンは境界としない
	# gap 直後 30 行に ★ 行 (左strip にのみ暗ピクセル) があるか確認
	# 綴じ影対策: gap 内の平均 ld と比較し、明らかに増えた行のみ ★ 行と判定
	gap_ld_vals = [int((gray[y, x_skip:star_end] < dark_thr).sum()) for y in range(gs, ge)]
	gap_ld_avg = float(np.mean(gap_ld_vals)) if gap_ld_vals else 0.0
	ld_threshold = max(20, gap_ld_avg * 2.0) # gap平均の2倍以上かつ最低20px
	found_star = False
	for y in range(ge, min(ge + 30, H)):
	row = gray[y]
	ld = int((row[x_skip:star_end] < dark_thr).sum())
	rd = int((row[star_end:] < dark_thr).sum()) if right_W > 0 else 0
	if ld >= ld_threshold and rd < right_W * 0.025:
	found_star = True
	break
	if found_star:
	boundaries.append(mid)

	# 短すぎるセグメントをマージ (★行と[N]ボックスの二重検出を除去)
	# min_h: ページ高の 1/20 以上かつ最低 100px を1問の最小高さとする
	if boundaries:
	min_h = max(100, H // 20)
	ys = [0] + sorted(boundaries) + [H]
	changed = True
	while changed:
	changed = False
	min_seg, mi = H, -1
	for i in range(len(ys) - 1):
	s = ys[i + 1] - ys[i]
	if s < min_seg:
	min_seg, mi = s, i
	if min_seg < min_h and mi >= 0:
	ln = ys[mi] - ys[mi - 1] if mi > 0 else H
	rn = ys[mi + 2] - ys[mi + 1] if mi + 2 < len(ys) else H
	ys.pop(mi if ln <= rn else mi + 1)
	changed = True
	boundaries = ys[1:-1]

	return boundaries


	def derive_bboxes_from_boundaries(
	page_shape: Tuple[int, int],
	boundaries: List[int],
	right_margin_ratio: float = 0.99,
	left: int = 10,
	) -> List[Tuple[int, int, int, int]]:
	"""境界 y 座標リストから bbox リストを生成."""
	H, W = page_shape[:2]
	right = int(W * right_margin_ratio)
	ys = [0] + boundaries + [H]
	bboxes = []
	for i in range(len(ys) - 1):
	y0, y1 = ys[i], ys[i + 1]
	# コンテンツがほぼない薄いスライスは除外
	if y1 - y0 > 30:
	bboxes.append((left, y0, right, y1))
	return bboxes


	def extract_page(img_rgb: np.ndarray) -> Tuple[np.ndarray, Tuple[int, int, int, int]]:
	"""灰色背景から白い紙面を切り出す."""
	gray = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2GRAY)
	_, white = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)
	contours, _ = cv2.findContours(white, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
	if not contours:
	h, w = gray.shape
	return img_rgb, (0, 0, w, h)
	c = max(contours, key=cv2.contourArea)
	x, y, w, h = cv2.boundingRect(c)
	pad = 2
	x0, y0 = x + pad, y + pad
	x1, y1 = x + w - pad, y + h - pad
	return img_rgb[y0:y1, x0:x1].copy(), (x0, y0, x1, y1)


	def find_checkboxes_by_cc(page_rgb: np.ndarray,
	left_strip_x: int = 200,
	size_min: int = 22,
	size_max: int = 35,
	aspect_tol: float = 0.25,
	density_min: float = 0.10,
	density_max: float = 0.60) -> List[Tuple[int, int, int, int]]:
	"""連結成分で ☑ を検出.

	☑の特徴 (実測値, 1086px幅ページ):
	- サイズ ~27x27 (size_min=22 で(1)等のサブ問題マーカー w=20を除外)
	- ほぼ正方 (aspect_tol=0.25)
	- 密度 0.15-0.30 (外枠+チェックのみ。塗りつぶし文字を除外)
	"""
	gray = cv2.cvtColor(page_rgb, cv2.COLOR_RGB2GRAY)
	strip = gray[:, :left_strip_x].copy()
	_, binary = cv2.threshold(strip, 0, 255,
	cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
	n, labels, stats, centroids = cv2.connectedComponentsWithStats(binary, connectivity=8)

	boxes = []
	for i in range(1, n):
	x, y, w, h, area = stats[i]
	if not (size_min <= w <= size_max):
	continue
	if not (size_min <= h <= size_max):
	continue
	ar = w / max(h, 1)
	if not (1 - aspect_tol <= ar <= 1 + aspect_tol):
	continue
	density = area / (w * h)
	if density < density_min or density > density_max:
	continue
	boxes.append((x, y, w, h))

	boxes.sort(key=lambda b: b[1])
	return boxes


	def find_section_bands(page_rgb: np.ndarray,
	min_h: int = 25,
	mean_max: float = 248,
	std_min: float = 15,
	full_width_ratio: float = 0.35) -> List[Tuple[int, int]]:
	"""A / B / 発展のセクション帯 (y_start, y_end) を検出する.

	帯の特徴:
	- ページ幅の 35% 以上が中間グレー (100-250)
	- 行平均 < 248 かつ行std > 15 (純白でも純黒でもない)
	- 連続する行数が min_h 以上
	"""
	gray = cv2.cvtColor(page_rgb, cv2.COLOR_RGB2GRAY)
	H, W = gray.shape
	row_mean = gray.mean(axis=1)
	row_std = gray.std(axis=1)
	coverage = ((gray > 100) & (gray < 250)).sum(axis=1) / W
	is_band = (row_mean < mean_max) & (row_std > std_min) & (coverage > full_width_ratio)
	bands: List[Tuple[int, int]] = []
	in_band, start = False, 0
	for y in range(H):
	if is_band[y] and not in_band:
	in_band, start = True, y
	elif not is_band[y] and in_band:
	in_band = False
	if y - start >= min_h:
	bands.append((start, y))
	if in_band and H - start >= min_h:
	bands.append((start, H))
	return bands


	def derive_problem_bboxes(page_shape: Tuple[int, int],
	checkboxes: List[Tuple[int, int, int, int]],
	right_margin_ratio: float = 0.99,
	section_bands: Optional[List[Tuple[int, int]]] = None,
	) -> List[Tuple[int, int, int, int]]:
	"""各☑から次の☑ (またはセクション帯) までを 1 問の bbox とする.

	section_bands を渡すと、問題とセクション帯の間でカットする.
	"""
	h, w = page_shape[:2]
	right = int(w * right_margin_ratio)
	left = 50
	out = []
	for i, (cx, cy, cw, ch) in enumerate(checkboxes):
	y0 = max(0, cy - 5)
	if i + 1 < len(checkboxes):
	next_y = checkboxes[i + 1][1] - 5
	# セクション帯が間にあれば、帯の直前でカット
	if section_bands:
	for band_y0, band_y1 in section_bands:
	if cy < band_y0 < next_y:
	next_y = band_y0 - 5
	break
	y1 = next_y
	else:
	y1 = h - 20
	out.append((left, y0, right, y1))
	return out


	def visualize(page_rgb: np.ndarray,
	checkboxes: List[Tuple[int, int, int, int]],
	bboxes: List[Tuple[int, int, int, int]]) -> np.ndarray:
	out = page_rgb.copy()
	for x, y, w, h in checkboxes:
	cv2.rectangle(out, (x, y), (x + w, y + h), (0, 200, 0), 2)
	for i, (x0, y0, x1, y1) in enumerate(bboxes):
	cv2.rectangle(out, (x0, y0), (x1, y1), (220, 0, 0), 4)
	cv2.putText(out, f"Q{i+1}", (x0 + 10, y0 + 35),
	cv2.FONT_HERSHEY_SIMPLEX, 1.2, (220, 0, 0), 3)
	return out


	def process(sample_path: str, name: str):
	print(f"\n=== {name} ===")
	img = np.array(Image.open(sample_path).convert("RGB"))
	page, page_box = extract_page(img)
	print(f"page: {page.shape}")

	cbs = find_checkboxes_by_cc(page)
	print(f"checkboxes detected: {len(cbs)}")
	for i, (x, y, w, h) in enumerate(cbs):
	print(f" ☑{i+1}: x={x:4d} y={y:4d} w={w:2d} h={h:2d}")

	bboxes = derive_problem_bboxes(page.shape, cbs)
	vis = visualize(page, cbs, bboxes)
	out_path = os.path.join(DEBUG_DIR, f"{name}_detected.png")
	Image.fromarray(vis).save(out_path)
	print(f"-> {out_path}")
	return cbs, bboxes


	if __name__ == "__main__":
	process(os.path.join(SAMPLES_DIR, "sample02_p45.png"), "p45")
	process(os.path.join(SAMPLES_DIR, "sample03_p47.png"), "p47")