oneocr

File size: 2,956 Bytes

ce847d4

"""Analyze all extracted ONNX models — inputs, outputs, ops, runtime compatibility."""
import onnx
import onnxruntime as ort
from pathlib import Path

models_dir = Path("oneocr_extracted/onnx_models")

print("=" * 120)
print(f"{'#':>3} {'Name':40s} {'KB':>7} {'IR':>3} {'Producer':15s} {'Nodes':>5} {'Inputs':35s} {'Outputs':25s} {'RT':10s} Custom Ops")
print("=" * 120)

for f in sorted(models_dir.glob("*.onnx")):
    m = onnx.load(str(f))
    idx = f.name.split("_")[1]
    ir = m.ir_version
    prod = (m.producer_name or "?")[:15]
    nodes = len(m.graph.node)

    # Input shapes
    inputs = []
    for i in m.graph.input:
        dims = []
        if i.type.tensor_type.HasField("shape"):
            for d in i.type.tensor_type.shape.dim:
                dims.append(str(d.dim_value) if d.dim_value else d.dim_param or "?")
        inputs.append(f"{i.name}[{','.join(dims)}]")

    # Output shapes
    outputs = []
    for o in m.graph.output:
        dims = []
        if o.type.tensor_type.HasField("shape"):
            for d in o.type.tensor_type.shape.dim:
                dims.append(str(d.dim_value) if d.dim_value else d.dim_param or "?")
        outputs.append(f"{o.name}[{','.join(dims)}]")

    inp_str = "; ".join(inputs)[:35]
    out_str = "; ".join(outputs)[:25]

    # Custom ops
    opsets = [o.domain for o in m.opset_import if o.domain]
    custom = ", ".join(opsets) if opsets else "-"

    # Runtime check
    try:
        sess = ort.InferenceSession(str(f), providers=["CPUExecutionProvider"])
        rt = "OK"
    except Exception as e:
        rt = "CUSTOM"

    size_kb = f.stat().st_size // 1024
    print(f"{idx:>3} {f.stem:40s} {size_kb:>7} {ir:>3} {prod:15s} {nodes:>5} {inp_str:35s} {out_str:25s} {rt:10s} {custom}")

# Summary
print("\n=== OCR Pipeline Architecture ===")
print("""
OneOCR uses a MULTI-MODEL pipeline (not a single model):

1. DETECTOR (model_03, 13MB) — text detection in image
   - Input: image tensor → Output: bounding boxes of text regions

2. CHARACTER RECOGNIZERS (model_00..10, 33) — per-script recognition  
   - Each script (Latin, Arabic, CJK, Cyrillic, etc.) has its own recognizer
   - Input: cropped text region → Output: character probabilities
   - Accompanied by: rnn.info, char2ind.txt, char2inschar.txt files

3. SMALL LANGUAGE MODELS (model_11..32, 26-28KB each)
   - Post-processing character-level language models
   - One per supported script/language

Problem for cross-platform:
  - 23 models use custom op domain 'com.microsoft.oneocr'
  - Custom ops like OneOCRFeatureExtract, DynamicQuantizeLSTM
  - These are ONLY implemented in oneocr.dll (Windows)
  - To run on Linux: need to reimplement these custom ops or find alternatives
""")

# Show config structure
print("=== Config Files (per-recognizer) ===")
config_dir = Path("oneocr_extracted/config_data")
config = (config_dir / "chunk_66_ocr_config.config.txt").read_text(errors="replace")
print(config[:500])