|
|
"""Analyze all extracted ONNX models — inputs, outputs, ops, runtime compatibility.""" |
|
|
import onnx |
|
|
import onnxruntime as ort |
|
|
from pathlib import Path |
|
|
|
|
|
models_dir = Path("oneocr_extracted/onnx_models") |
|
|
|
|
|
print("=" * 120) |
|
|
print(f"{'#':>3} {'Name':40s} {'KB':>7} {'IR':>3} {'Producer':15s} {'Nodes':>5} {'Inputs':35s} {'Outputs':25s} {'RT':10s} Custom Ops") |
|
|
print("=" * 120) |
|
|
|
|
|
for f in sorted(models_dir.glob("*.onnx")): |
|
|
m = onnx.load(str(f)) |
|
|
idx = f.name.split("_")[1] |
|
|
ir = m.ir_version |
|
|
prod = (m.producer_name or "?")[:15] |
|
|
nodes = len(m.graph.node) |
|
|
|
|
|
|
|
|
inputs = [] |
|
|
for i in m.graph.input: |
|
|
dims = [] |
|
|
if i.type.tensor_type.HasField("shape"): |
|
|
for d in i.type.tensor_type.shape.dim: |
|
|
dims.append(str(d.dim_value) if d.dim_value else d.dim_param or "?") |
|
|
inputs.append(f"{i.name}[{','.join(dims)}]") |
|
|
|
|
|
|
|
|
outputs = [] |
|
|
for o in m.graph.output: |
|
|
dims = [] |
|
|
if o.type.tensor_type.HasField("shape"): |
|
|
for d in o.type.tensor_type.shape.dim: |
|
|
dims.append(str(d.dim_value) if d.dim_value else d.dim_param or "?") |
|
|
outputs.append(f"{o.name}[{','.join(dims)}]") |
|
|
|
|
|
inp_str = "; ".join(inputs)[:35] |
|
|
out_str = "; ".join(outputs)[:25] |
|
|
|
|
|
|
|
|
opsets = [o.domain for o in m.opset_import if o.domain] |
|
|
custom = ", ".join(opsets) if opsets else "-" |
|
|
|
|
|
|
|
|
try: |
|
|
sess = ort.InferenceSession(str(f), providers=["CPUExecutionProvider"]) |
|
|
rt = "OK" |
|
|
except Exception as e: |
|
|
rt = "CUSTOM" |
|
|
|
|
|
size_kb = f.stat().st_size // 1024 |
|
|
print(f"{idx:>3} {f.stem:40s} {size_kb:>7} {ir:>3} {prod:15s} {nodes:>5} {inp_str:35s} {out_str:25s} {rt:10s} {custom}") |
|
|
|
|
|
|
|
|
print("\n=== OCR Pipeline Architecture ===") |
|
|
print(""" |
|
|
OneOCR uses a MULTI-MODEL pipeline (not a single model): |
|
|
|
|
|
1. DETECTOR (model_03, 13MB) — text detection in image |
|
|
- Input: image tensor → Output: bounding boxes of text regions |
|
|
|
|
|
2. CHARACTER RECOGNIZERS (model_00..10, 33) — per-script recognition |
|
|
- Each script (Latin, Arabic, CJK, Cyrillic, etc.) has its own recognizer |
|
|
- Input: cropped text region → Output: character probabilities |
|
|
- Accompanied by: rnn.info, char2ind.txt, char2inschar.txt files |
|
|
|
|
|
3. SMALL LANGUAGE MODELS (model_11..32, 26-28KB each) |
|
|
- Post-processing character-level language models |
|
|
- One per supported script/language |
|
|
|
|
|
Problem for cross-platform: |
|
|
- 23 models use custom op domain 'com.microsoft.oneocr' |
|
|
- Custom ops like OneOCRFeatureExtract, DynamicQuantizeLSTM |
|
|
- These are ONLY implemented in oneocr.dll (Windows) |
|
|
- To run on Linux: need to reimplement these custom ops or find alternatives |
|
|
""") |
|
|
|
|
|
|
|
|
print("=== Config Files (per-recognizer) ===") |
|
|
config_dir = Path("oneocr_extracted/config_data") |
|
|
config = (config_dir / "chunk_66_ocr_config.config.txt").read_text(errors="replace") |
|
|
print(config[:500]) |
|
|
|