| | """Debug detector output to understand word segmentation.""" |
| | import numpy as np |
| | import onnxruntime as ort |
| | from PIL import Image |
| | from pathlib import Path |
| |
|
| | models_dir = Path("oneocr_extracted/onnx_models") |
| | img = Image.open("image.png").convert("RGB") |
| | w, h = img.size |
| |
|
| | |
| | sess = ort.InferenceSession(str(next(models_dir.glob("model_00_*"))), |
| | providers=["CPUExecutionProvider"]) |
| |
|
| | scale = 800 / max(h, w) |
| | dh = (int(h * scale) + 31) // 32 * 32 |
| | dw = (int(w * scale) + 31) // 32 * 32 |
| | img_d = np.array(img.resize((dw, dh), Image.LANCZOS), dtype=np.float32) |
| | img_d = img_d[:, :, ::-1] - np.array([102.9801, 115.9465, 122.7717], dtype=np.float32) |
| | data = img_d.transpose(2, 0, 1)[np.newaxis].astype(np.float32) |
| | im_info = np.array([[dh, dw, scale]], dtype=np.float32) |
| |
|
| | outputs = sess.run(None, {"data": data, "im_info": im_info}) |
| | output_names = [o.name for o in sess.get_outputs()] |
| | out_dict = dict(zip(output_names, outputs)) |
| |
|
| | |
| | pixel_scores = out_dict["scores_hori_fpn2"][0, 0] |
| | link_scores = out_dict["link_scores_hori_fpn2"][0] |
| |
|
| | print(f"FPN2 shape: {pixel_scores.shape}") |
| | print(f"Pixel scores: min={pixel_scores.min():.4f} max={pixel_scores.max():.4f}") |
| |
|
| | |
| | text_mask = pixel_scores > 0.6 |
| | print(f"Text pixels (>0.6): {text_mask.sum()}") |
| |
|
| | |
| | ys, xs = np.where(text_mask) |
| | if len(ys) > 0: |
| | print(f"Text region: rows [{ys.min()}-{ys.max()}], cols [{xs.min()}-{xs.max()}]") |
| | |
| | |
| | |
| | |
| | row_mid = (ys.min() + ys.max()) // 2 |
| | print(f"\nHorizontal link scores at row {row_mid} (East neighbor):") |
| | link_east = link_scores[2, row_mid, :] |
| | for x in range(xs.min(), xs.max()+1): |
| | ps = pixel_scores[row_mid, x] |
| | le = link_east[x] |
| | marker = "TEXT" if ps > 0.6 else " " |
| | link_marker = "LINK" if le > 0.5 else "gap " |
| | if ps > 0.3: |
| | print(f" col={x:3d}: pixel={ps:.3f} [{marker}] east_link={le:.3f} [{link_marker}]") |
| |
|
| | |
| | print(f"\nPixel scores along row {row_mid}:") |
| | for x in range(max(0, xs.min()-2), min(pixel_scores.shape[1], xs.max()+3)): |
| | ps = pixel_scores[row_mid, x] |
| | bar = "█" * int(ps * 40) |
| | print(f" col={x:3d}: {ps:.3f} {bar}") |
| |
|
| | |
| | for thresh in [0.5, 0.6, 0.7, 0.8, 0.9]: |
| | mask = pixel_scores > thresh |
| | n = mask.sum() |
| | |
| | from scipy import ndimage |
| | try: |
| | labels, n_comps = ndimage.label(mask) |
| | print(f"\nThreshold {thresh}: {n} pixels, {n_comps} components") |
| | for c in range(1, min(n_comps+1, 10)): |
| | comp_mask = labels == c |
| | area = comp_mask.sum() |
| | ys_c, xs_c = np.where(comp_mask) |
| | print(f" Component {c}: area={area}, cols=[{xs_c.min()}-{xs_c.max()}]") |
| | except ImportError: |
| | |
| | print(f"Threshold {thresh}: {n} pixels") |
| |
|