|
|
"""Debug detector output to understand word segmentation.""" |
|
|
import numpy as np |
|
|
import onnxruntime as ort |
|
|
from PIL import Image |
|
|
from pathlib import Path |
|
|
|
|
|
models_dir = Path("oneocr_extracted/onnx_models") |
|
|
img = Image.open("image.png").convert("RGB") |
|
|
w, h = img.size |
|
|
|
|
|
|
|
|
sess = ort.InferenceSession(str(next(models_dir.glob("model_00_*"))), |
|
|
providers=["CPUExecutionProvider"]) |
|
|
|
|
|
scale = 800 / max(h, w) |
|
|
dh = (int(h * scale) + 31) // 32 * 32 |
|
|
dw = (int(w * scale) + 31) // 32 * 32 |
|
|
img_d = np.array(img.resize((dw, dh), Image.LANCZOS), dtype=np.float32) |
|
|
img_d = img_d[:, :, ::-1] - np.array([102.9801, 115.9465, 122.7717], dtype=np.float32) |
|
|
data = img_d.transpose(2, 0, 1)[np.newaxis].astype(np.float32) |
|
|
im_info = np.array([[dh, dw, scale]], dtype=np.float32) |
|
|
|
|
|
outputs = sess.run(None, {"data": data, "im_info": im_info}) |
|
|
output_names = [o.name for o in sess.get_outputs()] |
|
|
out_dict = dict(zip(output_names, outputs)) |
|
|
|
|
|
|
|
|
pixel_scores = out_dict["scores_hori_fpn2"][0, 0] |
|
|
link_scores = out_dict["link_scores_hori_fpn2"][0] |
|
|
|
|
|
print(f"FPN2 shape: {pixel_scores.shape}") |
|
|
print(f"Pixel scores: min={pixel_scores.min():.4f} max={pixel_scores.max():.4f}") |
|
|
|
|
|
|
|
|
text_mask = pixel_scores > 0.6 |
|
|
print(f"Text pixels (>0.6): {text_mask.sum()}") |
|
|
|
|
|
|
|
|
ys, xs = np.where(text_mask) |
|
|
if len(ys) > 0: |
|
|
print(f"Text region: rows [{ys.min()}-{ys.max()}], cols [{xs.min()}-{xs.max()}]") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
row_mid = (ys.min() + ys.max()) // 2 |
|
|
print(f"\nHorizontal link scores at row {row_mid} (East neighbor):") |
|
|
link_east = link_scores[2, row_mid, :] |
|
|
for x in range(xs.min(), xs.max()+1): |
|
|
ps = pixel_scores[row_mid, x] |
|
|
le = link_east[x] |
|
|
marker = "TEXT" if ps > 0.6 else " " |
|
|
link_marker = "LINK" if le > 0.5 else "gap " |
|
|
if ps > 0.3: |
|
|
print(f" col={x:3d}: pixel={ps:.3f} [{marker}] east_link={le:.3f} [{link_marker}]") |
|
|
|
|
|
|
|
|
print(f"\nPixel scores along row {row_mid}:") |
|
|
for x in range(max(0, xs.min()-2), min(pixel_scores.shape[1], xs.max()+3)): |
|
|
ps = pixel_scores[row_mid, x] |
|
|
bar = "█" * int(ps * 40) |
|
|
print(f" col={x:3d}: {ps:.3f} {bar}") |
|
|
|
|
|
|
|
|
for thresh in [0.5, 0.6, 0.7, 0.8, 0.9]: |
|
|
mask = pixel_scores > thresh |
|
|
n = mask.sum() |
|
|
|
|
|
from scipy import ndimage |
|
|
try: |
|
|
labels, n_comps = ndimage.label(mask) |
|
|
print(f"\nThreshold {thresh}: {n} pixels, {n_comps} components") |
|
|
for c in range(1, min(n_comps+1, 10)): |
|
|
comp_mask = labels == c |
|
|
area = comp_mask.sum() |
|
|
ys_c, xs_c = np.where(comp_mask) |
|
|
print(f" Component {c}: area={area}, cols=[{xs_c.min()}-{xs_c.max()}]") |
|
|
except ImportError: |
|
|
|
|
|
print(f"Threshold {thresh}: {n} pixels") |
|
|
|