oneocr / _archive /analyze_pipeline.py

OneOCR Dev

OneOCR - reverse engineering complete, ONNX pipeline 53% match rate

ce847d4 9 days ago

2.83 kB

	"""Full analysis of detector and scriptID models."""
	import onnx
	import numpy as np
	from pathlib import Path

	def print_io(model_path, label):
	m = onnx.load(str(model_path))
	print(f'\n=== {label} ({Path(model_path).name}) ===')
	print(f'Nodes: {len(m.graph.node)}')

	print('Inputs:')
	for i in m.graph.input:
	dims = []
	for d in i.type.tensor_type.shape.dim:
	dims.append(str(d.dim_value) if d.dim_value else d.dim_param or '?')
	print(f' {i.name}: [{", ".join(dims)}] dtype={i.type.tensor_type.elem_type}')

	print('Outputs:')
	for o in m.graph.output:
	dims = []
	for d in o.type.tensor_type.shape.dim:
	dims.append(str(d.dim_value) if d.dim_value else d.dim_param or '?')
	print(f' {o.name}: [{", ".join(dims)}] dtype={o.type.tensor_type.elem_type}')

	custom = set()
	for n in m.graph.node:
	if n.domain:
	custom.add((n.domain, n.op_type))
	if custom:
	print(f'Custom ops: {custom}')
	else:
	print('Custom ops: none')
	return m

	models_dir = Path('oneocr_extracted/onnx_models')

	# Detector
	m0 = print_io(next(models_dir.glob('model_00_*')), 'DETECTOR')

	# ScriptID
	m1 = print_io(next(models_dir.glob('model_01_*')), 'SCRIPT ID')

	# A recognizer (Latin)
	m2 = print_io(next(models_dir.glob('model_02_*')), 'RECOGNIZER Latin')

	# Try running detector to see actual output shapes
	import onnxruntime as ort
	from PIL import Image

	img = Image.open('image.png').convert('RGB')
	w, h = img.size

	sess = ort.InferenceSession(str(next(models_dir.glob('model_00_*'))),
	providers=['CPUExecutionProvider'])

	scale = 800 / max(h, w)
	dh = (int(h * scale) + 31) // 32 * 32
	dw = (int(w * scale) + 31) // 32 * 32

	img_d = img.resize((dw, dh), Image.LANCZOS)
	arr_d = np.array(img_d, dtype=np.float32)
	arr_d = arr_d[:, :, ::-1] - [102.9801, 115.9465, 122.7717]
	data_d = arr_d.transpose(2, 0, 1)[np.newaxis].astype(np.float32)
	im_info = np.array([[dh, dw, scale]], dtype=np.float32)

	outputs = sess.run(None, {"data": data_d, "im_info": im_info})
	print(f'\n=== DETECTOR OUTPUT SHAPES (image {w}x{h} -> {dw}x{dh}) ===')
	output_names = [o.name for o in sess.get_outputs()]
	for name, out in zip(output_names, outputs):
	print(f' {name}: shape={out.shape} dtype={out.dtype} min={out.min():.4f} max={out.max():.4f}')

	# Specifically analyze pixel_link outputs
	# PixelLink has: pixel scores (text/non-text) + link scores (8 neighbors)
	# FPN produces 3 scales
	print('\n=== DETECTOR OUTPUT ANALYSIS ===')
	for i, (name, out) in enumerate(zip(output_names, outputs)):
	scores = 1.0 / (1.0 + np.exp(-out)) # sigmoid
	hot = (scores > 0.5).sum()
	print(f' [{i:2d}] {name:25s} shape={str(out.shape):20s} sigmoid_max={scores.max():.4f} hot_pixels(>0.5)={hot}')