# -*- coding: utf-8 -*-
from typing import Tuple
import numpy as np
from skimage.filters import threshold_otsu
from skimage.morphology import remove_small_objects, binary_dilation, square
from skimage.measure import label, moments_hu, regionprops
from skimage.transform import resize
from skimage.feature import hog

LO = 64

def binarize_from_gray01(gray01: np.ndarray, thr: float = 0.5) -> np.ndarray:
    g = gray01.astype(np.float32)
    if g.max() > 1:
        g /= 255.0
    return (g < thr)


def binarize_otsu(
    gray: np.ndarray,
    min_size: int = 12,
    dilate_k: int = 2,
    keep: str = "largest",           # "largest" | "multi" | "smart"
    area_ratio: float = 0.08,        # ↓ 放宽一点
    topk: int = 8,                   # ↑ 多留一点备选
    horiz_keep_frac: float = 0.50,   # ↓ 细长横更容易保留
    vert_keep_frac:  float = 0.55,   # ↓ 细长竖更容易保留
    ar_keep: float = 3.2,            # 新增：细长（长/宽≥ar_keep）也保
    top_edge_frac: float = 0.15      # 新增：靠顶部的细长撇也保（y0<=H*0.15）
) -> np.ndarray:
    g = gray.astype(np.float32)
    if g.max() > 1: g /= 255.0
    t  = threshold_otsu(g)
    bw = (g <= t)

    bw = remove_small_objects(bw.astype(bool), min_size=min_size).astype(bool)
    lab = label(bw)

    if lab.max() > 0:
        areas = np.bincount(lab.ravel()); areas[0] = 0
        if keep == "largest":
            bw = (lab == areas.argmax())
        else:
            props = regionprops(lab)
            H, W = bw.shape
            max_area = areas.max()
            max_w = max([p.bbox[3]-p.bbox[1] for p in props]) if props else 0
            max_h = max([p.bbox[2]-p.bbox[0] for p in props]) if props else 0

            keep_labels = []
            for p in props:
                k = p.label
                y0, x0, y1, x1 = p.bbox
                w = x1 - x0; h = y1 - y0
                aspect = max(w, h) / max(1, min(w, h))  # 细长度
                near_top = (y0 <= int(H * top_edge_frac))

                cond_area  = (areas[k] >= max_area * area_ratio)
                cond_long  = (max_w>0 and w >= max_w*horiz_keep_frac) or (max_h>0 and h >= max_h*vert_keep_frac)
                cond_slim  = (aspect >= ar_keep)          # 细长撇/挑
                cond_top   = near_top and (w >= 0.45*max_w)  # 顶边细长撇

                if cond_area or cond_long or cond_slim or cond_top:
                    keep_labels.append(k)
                if len(keep_labels) >= topk:
                    break

            mask = np.zeros_like(bw, dtype=bool)
            for k in keep_labels:
                mask |= (lab == k)
            bw = mask

    if dilate_k > 0:
        bw = binary_dilation(bw, square(dilate_k))
    return bw
def crop_and_center(bw: np.ndarray, out_size: int = LO, margin_frac: float = 0.08) -> np.ndarray:
    ys, xs = np.where(bw)
    if len(xs) == 0 or len(ys) == 0:
        return np.zeros((out_size, out_size), dtype=bool)
    x0, x1 = xs.min(), xs.max()
    y0, y1 = ys.min(), ys.max()
    crop = bw[y0:y1+1, x0:x1+1].astype(np.float32)
    h, w = crop.shape
    side = max(h, w)
    margin = int(side * margin_frac)
    pad_y_top = (side - h) // 2 + margin
    pad_y_bot = side - h - (side - h) // 2 + margin
    pad_x_lft = (side - w) // 2 + margin
    pad_x_rgt = side - w - (side - w) // 2 + margin
    sq = np.pad(crop, ((pad_y_top, pad_y_bot), (pad_x_lft, pad_x_rgt)), mode='constant')
    sq = resize(sq, (out_size, out_size), order=1, anti_aliasing=True, preserve_range=True)
    return (sq > 0.5).astype(bool)

def proj_features(bw: np.ndarray, m: int = 32) -> np.ndarray:
    hp = bw.sum(axis=0).astype(np.float32)
    vp = bw.sum(axis=1).astype(np.float32)
    if hp.max() > 0: hp /= hp.max()
    if vp.max() > 0: vp /= vp.max()
    def pool(v):
        idx = np.linspace(0, len(v), m+1, endpoint=True).astype(int)
        return np.array([v[idx[i]:idx[i+1]].mean() for i in range(m)], dtype=np.float32)
    return np.concatenate([pool(hp), pool(vp)], dtype=np.float32)

def feat_vec(bw: np.ndarray) -> np.ndarray:
    f = bw.astype(np.float32)
    hu = moments_hu(f).astype(np.float32)
    hu = np.sign(hu) * np.log1p(np.abs(hu))

    hogv = hog(f, orientations=9, pixels_per_cell=(8,8), cells_per_block=(2,2),
               block_norm='L2-Hys', feature_vector=True).astype(np.float32)

    proj = proj_features(bw).astype(np.float32)
    v = np.concatenate([hu, hogv, proj]).astype(np.float32)
    n = float(np.linalg.norm(v) + 1e-8)
    return v / n

def preprocess_and_features(gray_or_uint8: np.ndarray) -> np.ndarray:
    bw = binarize_otsu(gray_or_uint8)
    bw = crop_and_center(bw, out_size=LO, margin_frac=0.08)
    return feat_vec(bw)