oneocr / _archive /analyze_models.py
OneOCR Dev
OneOCR - reverse engineering complete, ONNX pipeline 53% match rate
ce847d4
"""Analyze all extracted ONNX models — inputs, outputs, ops, runtime compatibility."""
import onnx
import onnxruntime as ort
from pathlib import Path
models_dir = Path("oneocr_extracted/onnx_models")
print("=" * 120)
print(f"{'#':>3} {'Name':40s} {'KB':>7} {'IR':>3} {'Producer':15s} {'Nodes':>5} {'Inputs':35s} {'Outputs':25s} {'RT':10s} Custom Ops")
print("=" * 120)
for f in sorted(models_dir.glob("*.onnx")):
m = onnx.load(str(f))
idx = f.name.split("_")[1]
ir = m.ir_version
prod = (m.producer_name or "?")[:15]
nodes = len(m.graph.node)
# Input shapes
inputs = []
for i in m.graph.input:
dims = []
if i.type.tensor_type.HasField("shape"):
for d in i.type.tensor_type.shape.dim:
dims.append(str(d.dim_value) if d.dim_value else d.dim_param or "?")
inputs.append(f"{i.name}[{','.join(dims)}]")
# Output shapes
outputs = []
for o in m.graph.output:
dims = []
if o.type.tensor_type.HasField("shape"):
for d in o.type.tensor_type.shape.dim:
dims.append(str(d.dim_value) if d.dim_value else d.dim_param or "?")
outputs.append(f"{o.name}[{','.join(dims)}]")
inp_str = "; ".join(inputs)[:35]
out_str = "; ".join(outputs)[:25]
# Custom ops
opsets = [o.domain for o in m.opset_import if o.domain]
custom = ", ".join(opsets) if opsets else "-"
# Runtime check
try:
sess = ort.InferenceSession(str(f), providers=["CPUExecutionProvider"])
rt = "OK"
except Exception as e:
rt = "CUSTOM"
size_kb = f.stat().st_size // 1024
print(f"{idx:>3} {f.stem:40s} {size_kb:>7} {ir:>3} {prod:15s} {nodes:>5} {inp_str:35s} {out_str:25s} {rt:10s} {custom}")
# Summary
print("\n=== OCR Pipeline Architecture ===")
print("""
OneOCR uses a MULTI-MODEL pipeline (not a single model):
1. DETECTOR (model_03, 13MB) — text detection in image
- Input: image tensor → Output: bounding boxes of text regions
2. CHARACTER RECOGNIZERS (model_00..10, 33) — per-script recognition
- Each script (Latin, Arabic, CJK, Cyrillic, etc.) has its own recognizer
- Input: cropped text region → Output: character probabilities
- Accompanied by: rnn.info, char2ind.txt, char2inschar.txt files
3. SMALL LANGUAGE MODELS (model_11..32, 26-28KB each)
- Post-processing character-level language models
- One per supported script/language
Problem for cross-platform:
- 23 models use custom op domain 'com.microsoft.oneocr'
- Custom ops like OneOCRFeatureExtract, DynamicQuantizeLSTM
- These are ONLY implemented in oneocr.dll (Windows)
- To run on Linux: need to reimplement these custom ops or find alternatives
""")
# Show config structure
print("=== Config Files (per-recognizer) ===")
config_dir = Path("oneocr_extracted/config_data")
config = (config_dir / "chunk_66_ocr_config.config.txt").read_text(errors="replace")
print(config[:500])