oneocr / _archive /analyze_pipeline.py
OneOCR Dev
OneOCR - reverse engineering complete, ONNX pipeline 53% match rate
ce847d4
"""Full analysis of detector and scriptID models."""
import onnx
import numpy as np
from pathlib import Path
def print_io(model_path, label):
m = onnx.load(str(model_path))
print(f'\n=== {label} ({Path(model_path).name}) ===')
print(f'Nodes: {len(m.graph.node)}')
print('Inputs:')
for i in m.graph.input:
dims = []
for d in i.type.tensor_type.shape.dim:
dims.append(str(d.dim_value) if d.dim_value else d.dim_param or '?')
print(f' {i.name}: [{", ".join(dims)}] dtype={i.type.tensor_type.elem_type}')
print('Outputs:')
for o in m.graph.output:
dims = []
for d in o.type.tensor_type.shape.dim:
dims.append(str(d.dim_value) if d.dim_value else d.dim_param or '?')
print(f' {o.name}: [{", ".join(dims)}] dtype={o.type.tensor_type.elem_type}')
custom = set()
for n in m.graph.node:
if n.domain:
custom.add((n.domain, n.op_type))
if custom:
print(f'Custom ops: {custom}')
else:
print('Custom ops: none')
return m
models_dir = Path('oneocr_extracted/onnx_models')
# Detector
m0 = print_io(next(models_dir.glob('model_00_*')), 'DETECTOR')
# ScriptID
m1 = print_io(next(models_dir.glob('model_01_*')), 'SCRIPT ID')
# A recognizer (Latin)
m2 = print_io(next(models_dir.glob('model_02_*')), 'RECOGNIZER Latin')
# Try running detector to see actual output shapes
import onnxruntime as ort
from PIL import Image
img = Image.open('image.png').convert('RGB')
w, h = img.size
sess = ort.InferenceSession(str(next(models_dir.glob('model_00_*'))),
providers=['CPUExecutionProvider'])
scale = 800 / max(h, w)
dh = (int(h * scale) + 31) // 32 * 32
dw = (int(w * scale) + 31) // 32 * 32
img_d = img.resize((dw, dh), Image.LANCZOS)
arr_d = np.array(img_d, dtype=np.float32)
arr_d = arr_d[:, :, ::-1] - [102.9801, 115.9465, 122.7717]
data_d = arr_d.transpose(2, 0, 1)[np.newaxis].astype(np.float32)
im_info = np.array([[dh, dw, scale]], dtype=np.float32)
outputs = sess.run(None, {"data": data_d, "im_info": im_info})
print(f'\n=== DETECTOR OUTPUT SHAPES (image {w}x{h} -> {dw}x{dh}) ===')
output_names = [o.name for o in sess.get_outputs()]
for name, out in zip(output_names, outputs):
print(f' {name}: shape={out.shape} dtype={out.dtype} min={out.min():.4f} max={out.max():.4f}')
# Specifically analyze pixel_link outputs
# PixelLink has: pixel scores (text/non-text) + link scores (8 neighbors)
# FPN produces 3 scales
print('\n=== DETECTOR OUTPUT ANALYSIS ===')
for i, (name, out) in enumerate(zip(output_names, outputs)):
scores = 1.0 / (1.0 + np.exp(-out)) # sigmoid
hot = (scores > 0.5).sum()
print(f' [{i:2d}] {name:25s} shape={str(out.shape):20s} sigmoid_max={scores.max():.4f} hot_pixels(>0.5)={hot}')