"""
問題自動分割プロトタイプ v2.

連結成分ベースで ☑ だけを抽出する.
左端 x<200 の領域内で「サイズ ~30x30、矩形に近い」成分を ☑ とみなす.
"""
import os
from typing import List, Optional, Tuple
import numpy as np
from PIL import Image
import cv2

HERE = os.path.dirname(os.path.abspath(__file__))
SAMPLES_DIR = os.path.join(HERE, "samples")
DEBUG_DIR = os.path.join(HERE, "debug")
os.makedirs(DEBUG_DIR, exist_ok=True)


def find_problem_boundaries_generic(
        page_rgb: np.ndarray,
        min_gap_rows: int = 10,
        blank_ratio: float = 0.03,
        dark_thr: int = -1,   # -1 = 自動 (背景輝度から推定)
) -> List[int]:
    """☑ がないページ向けの汎用境界検出 ([8][9]形式対応版).

    問題と問題の間にある「水平方向の空白帯」の中央 y 座標を返す.

    カメラ写真対応:
      dark_thr=-1 のとき、ヒストグラムから背景輝度を自動推定し
      「インクと紙」を分離できる閾値を算出する.
    綴じ部の影対応:
      左端 12% をスキップして dark_per_row を計算し、
      綴じ影が空白帯を「非空白」と誤判定するのを防ぐ.
    ★行フィルタ:
      gap 直後の行が問題開始マーカー(★難易度行)でなければ
      図の周囲余白や小問間の空白とみなして境界を除去する.
    """
    gray = cv2.cvtColor(page_rgb, cv2.COLOR_RGB2GRAY)
    H, W = gray.shape

    if dark_thr < 0:
        # 背景 = 明るいほうから数えて 75 パーセンタイル付近
        bg = float(np.percentile(gray, 75))
        # インク閾値: 背景の 70% 以下をインクとみなす
        dark_thr = max(80, int(bg * 0.70))

    # 綴じ影スキップ: 左端 12% はカメラ写真の綴じ部影が集中するため除外
    x_skip = max(10, int(W * 0.12))
    region = gray[:, x_skip:]
    rW = region.shape[1]
    dark_per_row = (region < dark_thr).sum(axis=1)
    is_blank = dark_per_row < (rW * blank_ratio)

    gap_candidates = []
    in_gap, start = False, 0
    for y in range(H):
        if is_blank[y] and not in_gap:
            in_gap, start = True, y
        elif not is_blank[y] and in_gap:
            in_gap = False
            gap_h = y - start
            if gap_h >= min_gap_rows:
                gap_candidates.append((start, y))

    # ★行フィルタ + 上下端マージン除外
    star_end = min(x_skip + 200, int(W * 0.35))
    right_W = W - star_end
    edge = max(30, int(H * 0.05))   # 上下端 5% はマージンとして除外

    boundaries = []
    for (gs, ge) in gap_candidates:
        mid = gs + (ge - gs) // 2
        if mid <= edge or mid >= H - edge:
            continue  # 上下端マージンは境界としない
        # gap 直後 30 行に ★ 行 (左strip にのみ暗ピクセル) があるか確認
        # 綴じ影対策: gap 内の平均 ld と比較し、明らかに増えた行のみ ★ 行と判定
        gap_ld_vals = [int((gray[y, x_skip:star_end] < dark_thr).sum()) for y in range(gs, ge)]
        gap_ld_avg = float(np.mean(gap_ld_vals)) if gap_ld_vals else 0.0
        ld_threshold = max(20, gap_ld_avg * 2.0)  # gap平均の2倍以上かつ最低20px
        found_star = False
        for y in range(ge, min(ge + 30, H)):
            row = gray[y]
            ld = int((row[x_skip:star_end] < dark_thr).sum())
            rd = int((row[star_end:] < dark_thr).sum()) if right_W > 0 else 0
            if ld >= ld_threshold and rd < right_W * 0.025:
                found_star = True
                break
        if found_star:
            boundaries.append(mid)

    # 短すぎるセグメントをマージ (★行と[N]ボックスの二重検出を除去)
    # min_h: ページ高の 1/20 以上かつ最低 100px を1問の最小高さとする
    if boundaries:
        min_h = max(100, H // 20)
        ys = [0] + sorted(boundaries) + [H]
        changed = True
        while changed:
            changed = False
            min_seg, mi = H, -1
            for i in range(len(ys) - 1):
                s = ys[i + 1] - ys[i]
                if s < min_seg:
                    min_seg, mi = s, i
            if min_seg < min_h and mi >= 0:
                ln = ys[mi] - ys[mi - 1] if mi > 0 else H
                rn = ys[mi + 2] - ys[mi + 1] if mi + 2 < len(ys) else H
                ys.pop(mi if ln <= rn else mi + 1)
                changed = True
        boundaries = ys[1:-1]

    return boundaries


def derive_bboxes_from_boundaries(
        page_shape: Tuple[int, int],
        boundaries: List[int],
        right_margin_ratio: float = 0.99,
        left: int = 10,
) -> List[Tuple[int, int, int, int]]:
    """境界 y 座標リストから bbox リストを生成."""
    H, W = page_shape[:2]
    right = int(W * right_margin_ratio)
    ys = [0] + boundaries + [H]
    bboxes = []
    for i in range(len(ys) - 1):
        y0, y1 = ys[i], ys[i + 1]
        # コンテンツがほぼない薄いスライスは除外
        if y1 - y0 > 30:
            bboxes.append((left, y0, right, y1))
    return bboxes


def extract_page(img_rgb: np.ndarray) -> Tuple[np.ndarray, Tuple[int, int, int, int]]:
    """灰色背景から白い紙面を切り出す."""
    gray = cv2.cvtColor(img_rgb, cv2.COLOR_RGB2GRAY)
    _, white = cv2.threshold(gray, 200, 255, cv2.THRESH_BINARY)
    contours, _ = cv2.findContours(white, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if not contours:
        h, w = gray.shape
        return img_rgb, (0, 0, w, h)
    c = max(contours, key=cv2.contourArea)
    x, y, w, h = cv2.boundingRect(c)
    pad = 2
    x0, y0 = x + pad, y + pad
    x1, y1 = x + w - pad, y + h - pad
    return img_rgb[y0:y1, x0:x1].copy(), (x0, y0, x1, y1)


def find_checkboxes_by_cc(page_rgb: np.ndarray,
                          left_strip_x: int = 200,
                          size_min: int = 22,
                          size_max: int = 35,
                          aspect_tol: float = 0.25,
                          density_min: float = 0.10,
                          density_max: float = 0.60) -> List[Tuple[int, int, int, int]]:
    """連結成分で ☑ を検出.

    ☑の特徴 (実測値, 1086px幅 ページ):
        - サイズ ~27x27 (size_min=22 で(1)等のサブ問題マーカー w=20を除外)
        - ほぼ正方 (aspect_tol=0.25)
        - 密度 0.15-0.30 (外枠+チェックのみ。塗りつぶし文字を除外)
    """
    gray = cv2.cvtColor(page_rgb, cv2.COLOR_RGB2GRAY)
    strip = gray[:, :left_strip_x].copy()
    _, binary = cv2.threshold(strip, 0, 255,
                              cv2.THRESH_BINARY_INV + cv2.THRESH_OTSU)
    n, labels, stats, centroids = cv2.connectedComponentsWithStats(binary, connectivity=8)

    boxes = []
    for i in range(1, n):
        x, y, w, h, area = stats[i]
        if not (size_min <= w <= size_max):
            continue
        if not (size_min <= h <= size_max):
            continue
        ar = w / max(h, 1)
        if not (1 - aspect_tol <= ar <= 1 + aspect_tol):
            continue
        density = area / (w * h)
        if density < density_min or density > density_max:
            continue
        boxes.append((x, y, w, h))

    boxes.sort(key=lambda b: b[1])
    return boxes


def find_section_bands(page_rgb: np.ndarray,
                       min_h: int = 25,
                       mean_max: float = 248,
                       std_min: float = 15,
                       full_width_ratio: float = 0.35) -> List[Tuple[int, int]]:
    """A / B / 発展 のセクション帯 (y_start, y_end) を検出する.

    帯の特徴:
        - ページ幅の 35% 以上が中間グレー (100-250)
        - 行平均 < 248 かつ 行std > 15 (純白でも純黒でもない)
        - 連続する行数が min_h 以上
    """
    gray = cv2.cvtColor(page_rgb, cv2.COLOR_RGB2GRAY)
    H, W = gray.shape
    row_mean = gray.mean(axis=1)
    row_std  = gray.std(axis=1)
    coverage = ((gray > 100) & (gray < 250)).sum(axis=1) / W
    is_band = (row_mean < mean_max) & (row_std > std_min) & (coverage > full_width_ratio)
    bands: List[Tuple[int, int]] = []
    in_band, start = False, 0
    for y in range(H):
        if is_band[y] and not in_band:
            in_band, start = True, y
        elif not is_band[y] and in_band:
            in_band = False
            if y - start >= min_h:
                bands.append((start, y))
    if in_band and H - start >= min_h:
        bands.append((start, H))
    return bands


def derive_problem_bboxes(page_shape: Tuple[int, int],
                          checkboxes: List[Tuple[int, int, int, int]],
                          right_margin_ratio: float = 0.99,
                          section_bands: Optional[List[Tuple[int, int]]] = None,
                          ) -> List[Tuple[int, int, int, int]]:
    """各☑から次の☑ (またはセクション帯) までを 1 問の bbox とする.

    section_bands を渡すと、問題とセクション帯の間でカットする.
    """
    h, w = page_shape[:2]
    right = int(w * right_margin_ratio)
    left = 50
    out = []
    for i, (cx, cy, cw, ch) in enumerate(checkboxes):
        y0 = max(0, cy - 5)
        if i + 1 < len(checkboxes):
            next_y = checkboxes[i + 1][1] - 5
            # セクション帯が間にあれば、帯の直前でカット
            if section_bands:
                for band_y0, band_y1 in section_bands:
                    if cy < band_y0 < next_y:
                        next_y = band_y0 - 5
                        break
            y1 = next_y
        else:
            y1 = h - 20
        out.append((left, y0, right, y1))
    return out


def visualize(page_rgb: np.ndarray,
              checkboxes: List[Tuple[int, int, int, int]],
              bboxes: List[Tuple[int, int, int, int]]) -> np.ndarray:
    out = page_rgb.copy()
    for x, y, w, h in checkboxes:
        cv2.rectangle(out, (x, y), (x + w, y + h), (0, 200, 0), 2)
    for i, (x0, y0, x1, y1) in enumerate(bboxes):
        cv2.rectangle(out, (x0, y0), (x1, y1), (220, 0, 0), 4)
        cv2.putText(out, f"Q{i+1}", (x0 + 10, y0 + 35),
                    cv2.FONT_HERSHEY_SIMPLEX, 1.2, (220, 0, 0), 3)
    return out


def process(sample_path: str, name: str):
    print(f"\n=== {name} ===")
    img = np.array(Image.open(sample_path).convert("RGB"))
    page, page_box = extract_page(img)
    print(f"page:     {page.shape}")

    cbs = find_checkboxes_by_cc(page)
    print(f"checkboxes detected: {len(cbs)}")
    for i, (x, y, w, h) in enumerate(cbs):
        print(f"  ☑{i+1}: x={x:4d} y={y:4d} w={w:2d} h={h:2d}")

    bboxes = derive_problem_bboxes(page.shape, cbs)
    vis = visualize(page, cbs, bboxes)
    out_path = os.path.join(DEBUG_DIR, f"{name}_detected.png")
    Image.fromarray(vis).save(out_path)
    print(f"-> {out_path}")
    return cbs, bboxes


if __name__ == "__main__":
    process(os.path.join(SAMPLES_DIR, "sample02_p45.png"), "p45")
    process(os.path.join(SAMPLES_DIR, "sample03_p47.png"), "p47")