oneocr / _archive /temp /brain_storm /benchmark_models.py
OneOCR Dev
OneOCR - reverse engineering complete, ONNX pipeline 53% match rate
ce847d4
"""Benchmark all ONNX models — timing, outputs, DLL vs pure ONNX comparison."""
import time
import numpy as np
import onnxruntime as ort
import onnx
from PIL import Image
from pathlib import Path
MODELS_DIR = Path("oneocr_extracted/onnx_models")
CONFIG_DIR = Path("oneocr_extracted/config_data")
def load_char_map(path):
idx2char, blank_idx = {}, 0
with open(path, "r", encoding="utf-8") as f:
for line in f:
line = line.rstrip("\n")
if not line: continue
sp = line.rfind(" ")
if sp <= 0: continue
char, idx = line[:sp], int(line[sp + 1:])
if char == "<blank>": blank_idx = idx
elif char == "<space>": idx2char[idx] = " "
else: idx2char[idx] = char
return idx2char, blank_idx
def ctc_decode(lp, idx2char, blank):
if lp.ndim == 3:
lp = lp[:, 0, :] if lp.shape[1] == 1 else lp[0]
indices = np.argmax(lp, axis=-1)
chars, prev = [], -1
for i in indices:
if i != prev and i != blank:
chars.append(idx2char.get(int(i), f"[{i}]"))
prev = i
return "".join(chars)
print("=" * 100)
print(" ONEOCR FULL MODEL BENCHMARK")
print("=" * 100)
img = Image.open("image.png").convert("RGB")
w, h = img.size
print(f"\n Test image: {w}x{h}\n")
# ─── Prepare inputs ─────────────────────────────────────────────────────────
# Detector input (BGR, mean-subtracted)
scale_det = 800 / max(h, w)
dh = (int(h * scale_det) + 31) // 32 * 32
dw = (int(w * scale_det) + 31) // 32 * 32
img_det = img.resize((dw, dh), Image.LANCZOS)
arr_det = np.array(img_det, dtype=np.float32)[:, :, ::-1] - [102.9801, 115.9465, 122.7717]
det_data = arr_det.transpose(2, 0, 1)[np.newaxis].astype(np.float32)
det_iminfo = np.array([[dh, dw, scale_det]], dtype=np.float32)
# Recognizer input (RGB /255)
target_h = 60
scale_rec = target_h / h
new_w = max(int(w * scale_rec), 32)
new_w = (new_w + 3) // 4 * 4
img_rec = img.resize((new_w, target_h), Image.LANCZOS)
rec_data = (np.array(img_rec, dtype=np.float32) / 255.0).transpose(2, 0, 1)[np.newaxis]
rec_seq = np.array([new_w // 4], dtype=np.int32)
# char maps
CHAR_MAPS = {
2: "chunk_37_char2ind.char2ind.txt",
3: "chunk_40_char2ind.char2ind.txt",
4: "chunk_43_char2ind.char2ind.txt",
5: "chunk_47_char2ind.char2ind.txt",
6: "chunk_50_char2ind.char2ind.txt",
7: "chunk_53_char2ind.char2ind.txt",
8: "chunk_57_char2ind.char2ind.txt",
9: "chunk_61_char2ind.char2ind.txt",
10: "chunk_64_char2ind.char2ind.txt",
}
SCRIPT_NAMES = {
0: "Detector", 1: "ScriptID", 2: "Latin", 3: "CJK", 4: "Arabic",
5: "Cyrillic", 6: "Devanagari", 7: "Greek", 8: "Hebrew", 9: "Tamil", 10: "Thai",
}
for i in range(11, 22): SCRIPT_NAMES[i] = f"LangSm_{i-11}"
for i in range(22, 33): SCRIPT_NAMES[i] = f"LangMd_{i-22}"
SCRIPT_NAMES[33] = "LineLayout"
# ─── Benchmark ───────────────────────────────────────────────────────────────
print(f"{'#':>3} {'Name':15s} {'KB':>8} {'Load ms':>8} {'Run ms':>8} {'Runs':>5} {'Output Shape':30s} {'RT':>6} {'Text'}")
print("-" * 100)
total_load = 0
total_run = 0
for f in sorted(MODELS_DIR.glob("*.onnx")):
idx = int(f.name.split("_")[1])
size_kb = f.stat().st_size // 1024
name = SCRIPT_NAMES.get(idx, f"model_{idx}")
# Load timing
t0 = time.perf_counter()
try:
sess = ort.InferenceSession(str(f), providers=["CPUExecutionProvider"])
t_load = (time.perf_counter() - t0) * 1000
rt = "OK"
except Exception as e:
t_load = (time.perf_counter() - t0) * 1000
rt = "CUSTOM"
print(f"{idx:>3} {name:15s} {size_kb:>8} {t_load:>8.1f} {'---':>8} {'---':>5} {'N/A (custom ops)':30s} {rt:>6}")
continue
total_load += t_load
# Run timing
input_names = [i.name for i in sess.get_inputs()]
if idx == 0: # detector
feeds = {"data": det_data, "im_info": det_iminfo}
elif idx == 1: # script ID
feeds = {"data": rec_data}
if "seq_lengths" in input_names:
feeds["seq_lengths"] = rec_seq
elif idx <= 10: # recognizers
feeds = {"data": rec_data, "seq_lengths": rec_seq}
else:
# Language models — small input
feeds = {}
for inp in sess.get_inputs():
shape = []
for d in inp.shape:
shape.append(d if isinstance(d, int) and d > 0 else 1)
if "int" in inp.type:
feeds[inp.name] = np.ones(shape, dtype=np.int32)
else:
feeds[inp.name] = np.random.randn(*shape).astype(np.float32)
# Warmup
try:
sess.run(None, feeds)
except Exception as e:
print(f"{idx:>3} {name:15s} {size_kb:>8} {t_load:>8.1f} {'ERR':>8} {'---':>5} {str(e)[:30]:30s} {rt:>6}")
continue
# Benchmark (5 runs)
n_runs = 5
t0 = time.perf_counter()
for _ in range(n_runs):
outputs = sess.run(None, feeds)
t_run = (time.perf_counter() - t0) / n_runs * 1000
total_run += t_run
out_shape = str(outputs[0].shape)
# Decode text for recognizers
text = ""
if 2 <= idx <= 10 and idx in CHAR_MAPS:
cm_path = CONFIG_DIR / CHAR_MAPS[idx]
if cm_path.exists():
idx2char, blank_idx = load_char_map(str(cm_path))
text = ctc_decode(outputs[0], idx2char, blank_idx)
print(f"{idx:>3} {name:15s} {size_kb:>8} {t_load:>8.1f} {t_run:>8.1f} {n_runs:>5} {out_shape:30s} {rt:>6} {text}")
print("-" * 100)
print(f" {'TOTAL':15s} {'':>8} {total_load:>8.1f} {total_run:>8.1f}")
# ─── DLL comparison ──────────────────────────────────────────────────────────
print(f"\n{'=' * 100}")
print(" DLL vs ONNX COMPARISON")
print("=" * 100)
print("""
┌─────────────────────────────────────────────────────────────────────────────┐
│ Feature │ DLL (oneocr.dll) │ Pure ONNX │
├───────────────────────────┼────────────────────────┼────────────────────────┤
│ Platform │ Windows only │ Any (Linux/Mac/Win) │
│ Text detection (boxes) │ YES (4-point quads) │ Raw FPN maps (need PP) │
│ Text recognition │ YES (full pipeline) │ YES (CTC decode) │
│ Word confidence │ YES (per-word float) │ YES (from logprobs) │
│ Line bounding boxes │ YES (quadrilateral) │ NO (need PixelLink PP) │
│ Word bounding boxes │ YES (quadrilateral) │ NO (need PixelLink PP) │
│ Image angle/rotation │ YES (GetImageAngle) │ NO (not in models) │
│ Script detection │ YES (automatic) │ model_01 (standalone) │
│ Language models (LM) │ YES (built-in custom) │ NO (custom ops needed) │
│ Multi-script support │ YES (auto-switch) │ Manual script select │
│ Dependencies │ oneocr.dll + ort.dll │ onnxruntime + numpy │
│ Size │ ~100MB (DLL+model) │ ~45MB (12 ONNX models) │
│ Latency (typical) │ ~50-100ms │ ~30-80ms (recog only) │
│ Custom ops needed │ NO (built-in) │ 23/34 models blocked │
└─────────────────────────────────────────────────────────────────────────────┘
""")
# ─── What each model outputs ────────────────────────────────────────────────
print("=" * 100)
print(" DETAILED MODEL OUTPUT ANALYSIS")
print("=" * 100)
for f in sorted(MODELS_DIR.glob("*.onnx")):
idx = int(f.name.split("_")[1])
m = onnx.load(str(f))
name = SCRIPT_NAMES.get(idx, f"model_{idx}")
print(f"\n model_{idx:02d} ({name}):")
# Inputs
for inp in m.graph.input:
dims = []
if inp.type.tensor_type.HasField("shape"):
for d in inp.type.tensor_type.shape.dim:
dims.append(str(d.dim_value) if d.dim_value else d.dim_param or "?")
elem_type = inp.type.tensor_type.elem_type
type_map = {1: "float32", 6: "int32", 7: "int64", 10: "float16"}
print(f" IN: {inp.name:20s} [{','.join(dims):20s}] {type_map.get(elem_type, f'type{elem_type}')}")
# Outputs
for out in m.graph.output:
dims = []
if out.type.tensor_type.HasField("shape"):
for d in out.type.tensor_type.shape.dim:
dims.append(str(d.dim_value) if d.dim_value else d.dim_param or "?")
elem_type = out.type.tensor_type.elem_type
print(f" OUT: {out.name:20s} [{','.join(dims):20s}] {type_map.get(elem_type, f'type{elem_type}')}")
# Custom domains
domains = [o.domain for o in m.opset_import if o.domain]
if domains:
print(f" CUSTOM OPS: {', '.join(domains)}")
# Op counts
op_counts = {}
for node in m.graph.node:
key = f"{node.domain}::{node.op_type}" if node.domain else node.op_type
op_counts[key] = op_counts.get(key, 0) + 1
if idx <= 1 or idx == 33: # Show ops for interesting models
top_ops = sorted(op_counts.items(), key=lambda x: -x[1])[:8]
print(f" TOP OPS: {', '.join(f'{op}({n})' for op, n in top_ops)}")