Spaces:

raycosine
/

Detangutify

Running

App Files Files Community

raycosine commited on Sep 5, 2025

Commit

f158b5d

1 Parent(s): 4fd08f0

new augmentation

Browse files

Files changed (4) hide show

app.py +31 -145
features.py +24 -2
features_preproc.py +57 -14
requirements.txt +0 -6

app.py CHANGED Viewed

@@ -2,41 +2,10 @@ import gradio as gr, numpy as np
 from PIL import Image, ImageOps, ImageDraw, ImageFont
 from pathlib import Path
 import os, requests
-from features import binarize, feat_vec, cosine_sim, stroke_normalize
-from features import _ensure_ink_true
 from features_preproc import crop_and_center as crop_ref, LO
-from skimage.morphology import binary_dilation, disk
-import torch
-from annoy import AnnoyIndex
 from huggingface_hub import hf_hub_download
 ASSET_REPO = "raycosine/detangutify-data"
-EMBED_DIM = 128
-EMBEDDER_PATH = hf_hub_download(repo_id=ASSET_REPO, repo_type="dataset",
-                                filename="tangut_embedder.torchscript")
-CPS_PATH  = hf_hub_download(repo_id=ASSET_REPO, repo_type="dataset",
-                            filename="tangut_cps.npy")
-EMBEDS_PATH  = hf_hub_download(repo_id=ASSET_REPO, repo_type="dataset",
-                            filename="tangut_embeds.npy")
-ANNOY_PATH    = hf_hub_download(repo_id=ASSET_REPO, repo_type="dataset",
-                            filename="tangut_index.ann")
-USE_CNN_EMB = os.path.exists(EMBEDDER_PATH) and os.path.exists(CPS_PATH) \
-              and os.path.exists(EMBEDS_PATH) and os.path.exists(ANNOY_PATH)
-if USE_CNN_EMB:
-    EMBEDDER = torch.jit.load(EMBEDDER_PATH, map_location="cpu").eval()
-    CPS      = np.load(CPS_PATH)
-    E_TEMPL  = np.load(EMBEDS_PATH)
-    ANN      = AnnoyIndex(EMBED_DIM, 'angular')
-    ANN.load(ANNOY_PATH)
-    def to_embed(bw_float01: np.ndarray) -> np.ndarray:
-        x = torch.from_numpy(bw_float01[None, None, ...]).float()
-        with torch.no_grad():
-            e = EMBEDDER(x).detach().cpu().numpy()[0]
-        return e.astype(np.float32)
 FONT_PATH = "data/NotoSerifTangut-Regular.ttf"
 URL = "https://notofonts.github.io/tangut/fonts/NotoSerifTangut/full/ttf/NotoSerifTangut-Regular.ttf"
@@ -45,10 +14,13 @@ if not os.path.exists(FONT_PATH):
     r = requests.get(URL)
     with open(FONT_PATH, "wb") as f:
         f.write(r.content)
-DATA = np.load("data/templates_aug.npz")
 X = DATA["X"]
 Y = DATA["y"]
 SIZE = 64
@@ -116,122 +88,36 @@ def infer(img):
     if arr.dtype != np.uint8:
         arr = np.clip(arr, 0, 255).astype(np.uint8)
-    bw0 = binarize(arr, keep_largest=False, min_size=3)
-    #bw0 = binary_dilation(bw0, disk(1))
-    bw0 = _ensure_ink_true(bw0)
     bw  = crop_ref(bw0, out_size=LO, margin_frac=0.08)
     bw  = stroke_normalize(bw, target_px=3)
     viz_img = Image.fromarray((bw*255).astype(np.uint8))
-    if USE_CNN_EMB:
-        e = to_embed(bw).astype(np.float32)
-        e = e / (np.linalg.norm(e) + 1e-8)
-        K = 800
-        idxs = ANN.get_nns_by_vector(e.tolist(), K, include_distances=False)
-        cnn_cos = (E_TEMPL[idxs] @ e).astype(np.float32)
-        from skimage.morphology import skeletonize
-        import numpy as _np
-        from scipy.signal import convolve2d
-        def preprocess_glyph_for_rank(cp:int) -> _np.ndarray:
-            gimg = render_glyph(cp)
-            garr = _np.array(gimg, dtype=_np.uint8)
-            gbw0 = binarize(garr, keep_largest=False, min_size=3)
-            #gbw0 = binary_dilation(gbw0, disk(1))
-            gbw0 = _ensure_ink_true(gbw0)
-            gbw  = crop_ref(gbw0, out_size=LO, margin_frac=0.08)
-            gbw  = stroke_normalize(gbw, target_px=3) > 0.5
-            return gbw
-        def skel_bool(bw_bool:_np.ndarray):
-            return skeletonize(bw_bool.astype(bool))
-        from skimage.morphology import skeletonize
-        from scipy.ndimage import distance_transform_edt
-        q_bool = (bw > 0.5)
-        q_skel = skeletonize(q_bool)
-        from features import feat_vec, cosine_sim
-        q_shape = feat_vec(q_bool.astype(np.float32))
-        def overlap_score(a, b):
-            inter = (a & b).sum()
-            denom = max(1, min(a.sum(), b.sum()))
-            return float(inter) / float(denom)
-        def block_occ(bw, m=8):
-            H, W = bw.shape
-            ys = np.array_split(np.arange(H), m)
-            xs = np.array_split(np.arange(W), m)
-            occ = []
-            for yy in ys:
-                for xx in xs:
-                    occ.append(bw[np.ix_(yy, xx)].any())
-            return np.asarray(occ, dtype=np.uint8)
-        def iou_bool(a, b):
-            inter = (a & b).sum()
-            union = (a | b).sum()
-            return float(inter) / max(1, union)
-        def chamfer_sim(a_bool, b_bool, gamma=0.5):
-            if a_bool.sum() == 0 or b_bool.sum() == 0:
-                return 0.0
-            da = distance_transform_edt(~a_bool)
-            db = distance_transform_edt(~b_bool)
-            s1 = np.exp(-gamma * float(db[a_bool].mean()))
-            s2 = np.exp(-gamma * float(da[b_bool].mean()))
-            return 0.5 * (s1 + s2)
-        q_occ = block_occ(q_bool, m=8)
-        K = 800
-        idxs = ANN.get_nns_by_vector(e.tolist(), K, include_distances=False)
-        cnn_cos = (E_TEMPL[idxs] @ e).astype(np.float32)
-        rank_scores = []
-        for i, cos_sc in zip(idxs, cnn_cos):
-            cp_i  = int(CPS[i])
-            gbw   = preprocess_glyph_for_rank(cp_i)
-            gskel = skeletonize(gbw)
-            ov    = overlap_score(q_skel, gskel)
-            g_shape = feat_vec(gbw.astype(np.float32))
-            sh_cos  = float(cosine_sim(q_shape, np.expand_dims(g_shape,0))[0])
-            occ_iou = iou_bool(q_occ, block_occ(gbw, m=8))
-            chf     = chamfer_sim(q_skel, gskel, gamma=0.6)
-            # 权重：降低 CNN，提升几何一致性；Chamfer 占 0.25
-            final = 0.30*float(cos_sc) + 0.20*ov + 0.15*sh_cos + 0.10*occ_iou + 0.25*chf
-            rank_scores.append(final)
-        order = np.argsort(-np.asarray(rank_scores))[:10]
-        idxs  = [idxs[i] for i in order]
-        sims  = [float(np.asarray(rank_scores)[i]) for i in order]
-        gallery_items, results_json = [], []
-        for idx, sc in zip(idxs, sims):
-            cp = int(CPS[idx])
-            glyph_img = render_glyph(cp)
-            caption = f"U+{cp:05X} {chr(cp)}\nScore: {float(sc):.6f}"
-            gallery_items.append((glyph_img, caption))
-            results_json.append({"cp": cp, "char": chr(cp), "score": float(sc)})
-        return gallery_items, viz_img, results_json
-    else:
-        q = feat_vec(bw)
-        #q = pca_transform(q).astype(np.float32)
-        s = cosine_sim(q, X).astype(np.float32)
-        idxs = np.argsort(-s)[:10]
-        gallery_items = []
-        results_json = []
-        for idx in idxs:
-            cp = int(Y[idx]); sc = float(s[idx])
-            glyph_img = render_glyph(cp)
-            caption = f"U+{cp:05X} {chr(cp)}\nScore: {sc:.6f}"
-            gallery_items.append((glyph_img, caption))
-            results_json.append({"cp": cp, "char": chr(cp), "score": sc})
-        return gallery_items, viz_img, results_json
 with gr.Blocks() as demo:
     gr.Markdown("### Detangutify (Tangut Character classifier)")

 from PIL import Image, ImageOps, ImageDraw, ImageFont
 from pathlib import Path
 import os, requests
+from features import binarize, feat_vec, cosine_sim, stroke_normalize, _ensure_ink_true
 from features_preproc import crop_and_center as crop_ref, LO
 from huggingface_hub import hf_hub_download
 ASSET_REPO = "raycosine/detangutify-data"
 FONT_PATH = "data/NotoSerifTangut-Regular.ttf"
 URL = "https://notofonts.github.io/tangut/fonts/NotoSerifTangut/full/ttf/NotoSerifTangut-Regular.ttf"
     r = requests.get(URL)
     with open(FONT_PATH, "wb") as f:
         f.write(r.content)
+DATA_PATH = hf_hub_download(repo_id=ASSET_REPO, repo_type="dataset",
+                                filename="templates_aug.npz")
+DATA = np.load(DATA_PATH)
 X = DATA["X"]
 Y = DATA["y"]
+MEAN = DATA.get("mean", None)
+STD = DATA.get("std", None)
 SIZE = 64
     if arr.dtype != np.uint8:
         arr = np.clip(arr, 0, 255).astype(np.uint8)
+    #pil = Image.fromarray(arr, mode="L").resize((SIZE, SIZE), Image.BILINEAR)
+    #bw = binarize(np.array(pil, dtype=np.uint8))
+    #bw = crop_and_center(bw, SIZE)
+    #bw = stroke_normalize(bw, target_px=3)
+    #bw = crop_ref(bw, out_size=LO, margin_frac=0.08)  # 用训练同款
+    #bw = stroke_normalize(bw, target_px=2)
+    bw0 = binarize(arr)
+    bw0 = _ensure_ink_true(bw0)
     bw  = crop_ref(bw0, out_size=LO, margin_frac=0.08)
     bw  = stroke_normalize(bw, target_px=3)
     viz_img = Image.fromarray((bw*255).astype(np.uint8))
+    q = feat_vec(bw)
+    if MEAN is not None and STD is not None:
+        q = (q - MEAN.ravel()) / STD.ravel()
+    s = cosine_sim(q, X)
+    idxs = np.argsort(-s)[:10]
+    top, sec = float(s[idxs[0]]), float(s[idxs[1]]) if len(idxs)>1 else (float(s[idxs[0]]), -1)
+    low_conf = (top < 0.58) or (top - sec < 0.05)
+    gallery_items = []
+    results_json = []
+    for idx in idxs:
+        cp = int(Y[idx]); sc = float(s[idx])
+        glyph_img = render_glyph(cp)
+        #caption = f"U+{cp:05X} {chr(cp)}\nScore: {sc:.6f}"
+        caption = f"U+{cp:05X} {chr(cp)}\nScore: {sc:.6f}" + ("  ⚠️" if low_conf and idx==idxs[0] else "")
+        gallery_items.append((glyph_img, caption))
+        results_json.append({"cp": cp, "char": chr(cp), "score": sc})
+    return gallery_items, viz_img, results_json
 with gr.Blocks() as demo:
     gr.Markdown("### Detangutify (Tangut Character classifier)")

features.py CHANGED Viewed

@@ -4,7 +4,24 @@ from skimage.morphology import remove_small_objects
 from skimage.feature import hog
 from skimage.measure import moments_hu, label
 from skimage.morphology import skeletonize, binary_dilation, disk
 def _ensure_ink_true(bw_bool: np.ndarray) -> np.ndarray:
     bw = bw_bool.astype(bool)
     if bw.mean() > 0.5:
@@ -15,12 +32,17 @@ def stroke_normalize(bw: np.ndarray, target_px: int = 2) -> np.ndarray:
         bw = (bw > 0)
     if bw.mean() > 0.5:
         bw = ~bw
     skel = skeletonize(bw)
     if target_px <= 1:
         return skel.astype(np.float32)
     rad   = max(1, int(round(target_px/2)))
     thick = binary_dilation(skel, disk(rad))
-    return (thick).astype(np.float32)
 def to_64_gray(imgPIL):
     return np.array(imgPIL, dtype=np.uint8)

 from skimage.feature import hog
 from skimage.measure import moments_hu, label
 from skimage.morphology import skeletonize, binary_dilation, disk
+from scipy.ndimage import convolve
+from skimage.morphology import binary_opening
+def _prune_spurs(skel: np.ndarray, iters: int = 2) -> np.ndarray:
+    """
+    迭代剪掉骨架上长度很短的端点分支（spur）。
+    iters 表示最多向内剪掉几步（像素）。推荐 1~3。
+    """
+    s = skel.copy().astype(bool)
+    # 用 3x3 邻域统计端点：中心权重10，其它1；“10+1=11”即1个邻居的端点
+    K = np.array([[1,1,1],
+                  [1,10,1],
+                  [1,1,1]], dtype=np.uint8)
+    for _ in range(iters):
+        nb = convolve(s.astype(np.uint8), K, mode="constant", cval=0)
+        endpoints = (nb == 11)          # 只有 1 个邻居
+        # 只剪 endpoints，不动分叉/主干
+        s = s & ~endpoints
+    return s
 def _ensure_ink_true(bw_bool: np.ndarray) -> np.ndarray:
     bw = bw_bool.astype(bool)
     if bw.mean() > 0.5:
         bw = (bw > 0)
     if bw.mean() > 0.5:
         bw = ~bw
     skel = skeletonize(bw)
+    skel = _prune_spurs(skel, iters=2)      # ← 新增：剪短刺，去笔锋小尖
     if target_px <= 1:
         return skel.astype(np.float32)
     rad   = max(1, int(round(target_px/2)))
     thick = binary_dilation(skel, disk(rad))
+    #thick = binary_opening(thick, disk(1))#optional
+    return (thick & bw).astype(np.float32)
 def to_64_gray(imgPIL):
     return np.array(imgPIL, dtype=np.uint8)

features_preproc.py CHANGED Viewed

@@ -3,7 +3,7 @@ from typing import Tuple
 import numpy as np
 from skimage.filters import threshold_otsu
 from skimage.morphology import remove_small_objects, binary_dilation, square
-from skimage.measure import label, moments_hu
 from skimage.transform import resize
 from skimage.feature import hog
@@ -15,22 +15,65 @@ def binarize_from_gray01(gray01: np.ndarray, thr: float = 0.5) -> np.ndarray:
         g /= 255.0
     return (g < thr)
-def binarize_otsu(gray: np.ndarray) -> np.ndarray:
     g = gray.astype(np.float32)
-    if g.max() > 1:
-        g /= 255.0
-    t = threshold_otsu(g)
-    bw = g <= t
     lab = label(bw)
     if lab.max() > 0:
-        areas = np.bincount(lab.ravel())
-        areas[0] = 0
-        keep = areas.argmax()
-        bw = (lab == keep)
-    bw = remove_small_objects(bw.astype(bool), min_size=4).astype(bool)
-    #bw = binary_dilation(bw, np.ones((2, 2), dtype=bool))
-    return bw
 def crop_and_center(bw: np.ndarray, out_size: int = LO, margin_frac: float = 0.08) -> np.ndarray:
     ys, xs = np.where(bw)
     if len(xs) == 0 or len(ys) == 0:
@@ -46,7 +89,7 @@ def crop_and_center(bw: np.ndarray, out_size: int = LO, margin_frac: float = 0.0
     pad_x_lft = (side - w) // 2 + margin
     pad_x_rgt = side - w - (side - w) // 2 + margin
     sq = np.pad(crop, ((pad_y_top, pad_y_bot), (pad_x_lft, pad_x_rgt)), mode='constant')
-    sq = resize(sq, (out_size, out_size), order=0, anti_aliasing=True, preserve_range=True)
     return (sq > 0.5).astype(bool)
 def proj_features(bw: np.ndarray, m: int = 32) -> np.ndarray:

 import numpy as np
 from skimage.filters import threshold_otsu
 from skimage.morphology import remove_small_objects, binary_dilation, square
+from skimage.measure import label, moments_hu, regionprops
 from skimage.transform import resize
 from skimage.feature import hog
         g /= 255.0
     return (g < thr)
+def binarize_otsu(
+    gray: np.ndarray,
+    min_size: int = 12,
+    dilate_k: int = 2,
+    keep: str = "largest",           # "largest" | "multi" | "smart"
+    area_ratio: float = 0.08,        # ↓ 放宽一点
+    topk: int = 8,                   # ↑ 多留一点备选
+    horiz_keep_frac: float = 0.50,   # ↓ 细长横更容易保留
+    vert_keep_frac:  float = 0.55,   # ↓ 细长竖更容易保留
+    ar_keep: float = 3.2,            # 新增：细长（长/宽≥ar_keep）也保
+    top_edge_frac: float = 0.15      # 新增：靠顶部的细长撇也保（y0<=H*0.15）
+) -> np.ndarray:
     g = gray.astype(np.float32)
+    if g.max() > 1: g /= 255.0
+    t  = threshold_otsu(g)
+    bw = (g <= t)
+    bw = remove_small_objects(bw.astype(bool), min_size=min_size).astype(bool)
     lab = label(bw)
     if lab.max() > 0:
+        areas = np.bincount(lab.ravel()); areas[0] = 0
+        if keep == "largest":
+            bw = (lab == areas.argmax())
+        else:
+            props = regionprops(lab)
+            H, W = bw.shape
+            max_area = areas.max()
+            max_w = max([p.bbox[3]-p.bbox[1] for p in props]) if props else 0
+            max_h = max([p.bbox[2]-p.bbox[0] for p in props]) if props else 0
+            keep_labels = []
+            for p in props:
+                k = p.label
+                y0, x0, y1, x1 = p.bbox
+                w = x1 - x0; h = y1 - y0
+                aspect = max(w, h) / max(1, min(w, h))  # 细长度
+                near_top = (y0 <= int(H * top_edge_frac))
+                cond_area  = (areas[k] >= max_area * area_ratio)
+                cond_long  = (max_w>0 and w >= max_w*horiz_keep_frac) or (max_h>0 and h >= max_h*vert_keep_frac)
+                cond_slim  = (aspect >= ar_keep)          # 细长撇/挑
+                cond_top   = near_top and (w >= 0.45*max_w)  # 顶边细长撇
+                if cond_area or cond_long or cond_slim or cond_top:
+                    keep_labels.append(k)
+                if len(keep_labels) >= topk:
+                    break
+            mask = np.zeros_like(bw, dtype=bool)
+            for k in keep_labels:
+                mask |= (lab == k)
+            bw = mask
+    if dilate_k > 0:
+        bw = binary_dilation(bw, square(dilate_k))
+    return bw
 def crop_and_center(bw: np.ndarray, out_size: int = LO, margin_frac: float = 0.08) -> np.ndarray:
     ys, xs = np.where(bw)
     if len(xs) == 0 or len(ys) == 0:
     pad_x_lft = (side - w) // 2 + margin
     pad_x_rgt = side - w - (side - w) // 2 + margin
     sq = np.pad(crop, ((pad_y_top, pad_y_bot), (pad_x_lft, pad_x_rgt)), mode='constant')
+    sq = resize(sq, (out_size, out_size), order=1, anti_aliasing=True, preserve_range=True)
     return (sq > 0.5).astype(bool)
 def proj_features(bw: np.ndarray, m: int = 32) -> np.ndarray:

requirements.txt CHANGED Viewed

@@ -2,9 +2,3 @@ gradio>=4.0.0
 numpy
 Pillow
 scikit-image
-torch
-torchvision
-tqdm
-annoy
-huggingface_hub

 numpy
 Pillow
 scikit-image