oneocr / _archive /analyze_models.py

OneOCR Dev

OneOCR - reverse engineering complete, ONNX pipeline 53% match rate

ce847d4 about 21 hours ago

2.96 kB

	"""Analyze all extracted ONNX models — inputs, outputs, ops, runtime compatibility."""
	import onnx
	import onnxruntime as ort
	from pathlib import Path

	models_dir = Path("oneocr_extracted/onnx_models")

	print("=" * 120)
	print(f"{'#':>3} {'Name':40s} {'KB':>7} {'IR':>3} {'Producer':15s} {'Nodes':>5} {'Inputs':35s} {'Outputs':25s} {'RT':10s} Custom Ops")
	print("=" * 120)

	for f in sorted(models_dir.glob("*.onnx")):
	m = onnx.load(str(f))
	idx = f.name.split("_")[1]
	ir = m.ir_version
	prod = (m.producer_name or "?")[:15]
	nodes = len(m.graph.node)

	# Input shapes
	inputs = []
	for i in m.graph.input:
	dims = []
	if i.type.tensor_type.HasField("shape"):
	for d in i.type.tensor_type.shape.dim:
	dims.append(str(d.dim_value) if d.dim_value else d.dim_param or "?")
	inputs.append(f"{i.name}[{','.join(dims)}]")

	# Output shapes
	outputs = []
	for o in m.graph.output:
	dims = []
	if o.type.tensor_type.HasField("shape"):
	for d in o.type.tensor_type.shape.dim:
	dims.append(str(d.dim_value) if d.dim_value else d.dim_param or "?")
	outputs.append(f"{o.name}[{','.join(dims)}]")

	inp_str = "; ".join(inputs)[:35]
	out_str = "; ".join(outputs)[:25]

	# Custom ops
	opsets = [o.domain for o in m.opset_import if o.domain]
	custom = ", ".join(opsets) if opsets else "-"

	# Runtime check
	try:
	sess = ort.InferenceSession(str(f), providers=["CPUExecutionProvider"])
	rt = "OK"
	except Exception as e:
	rt = "CUSTOM"

	size_kb = f.stat().st_size // 1024
	print(f"{idx:>3} {f.stem:40s} {size_kb:>7} {ir:>3} {prod:15s} {nodes:>5} {inp_str:35s} {out_str:25s} {rt:10s} {custom}")

	# Summary
	print("\n=== OCR Pipeline Architecture ===")
	print("""
	OneOCR uses a MULTI-MODEL pipeline (not a single model):

	1. DETECTOR (model_03, 13MB) — text detection in image
	- Input: image tensor → Output: bounding boxes of text regions

	2. CHARACTER RECOGNIZERS (model_00..10, 33) — per-script recognition
	- Each script (Latin, Arabic, CJK, Cyrillic, etc.) has its own recognizer
	- Input: cropped text region → Output: character probabilities
	- Accompanied by: rnn.info, char2ind.txt, char2inschar.txt files

	3. SMALL LANGUAGE MODELS (model_11..32, 26-28KB each)
	- Post-processing character-level language models
	- One per supported script/language

	Problem for cross-platform:
	- 23 models use custom op domain 'com.microsoft.oneocr'
	- Custom ops like OneOCRFeatureExtract, DynamicQuantizeLSTM
	- These are ONLY implemented in oneocr.dll (Windows)
	- To run on Linux: need to reimplement these custom ops or find alternatives
	""")

	# Show config structure
	print("=== Config Files (per-recognizer) ===")
	config_dir = Path("oneocr_extracted/config_data")
	config = (config_dir / "chunk_66_ocr_config.config.txt").read_text(errors="replace")
	print(config[:500])