"""Full analysis of detector and scriptID models.""" import onnx import numpy as np from pathlib import Path def print_io(model_path, label): m = onnx.load(str(model_path)) print(f'\n=== {label} ({Path(model_path).name}) ===') print(f'Nodes: {len(m.graph.node)}') print('Inputs:') for i in m.graph.input: dims = [] for d in i.type.tensor_type.shape.dim: dims.append(str(d.dim_value) if d.dim_value else d.dim_param or '?') print(f' {i.name}: [{", ".join(dims)}] dtype={i.type.tensor_type.elem_type}') print('Outputs:') for o in m.graph.output: dims = [] for d in o.type.tensor_type.shape.dim: dims.append(str(d.dim_value) if d.dim_value else d.dim_param or '?') print(f' {o.name}: [{", ".join(dims)}] dtype={o.type.tensor_type.elem_type}') custom = set() for n in m.graph.node: if n.domain: custom.add((n.domain, n.op_type)) if custom: print(f'Custom ops: {custom}') else: print('Custom ops: none') return m models_dir = Path('oneocr_extracted/onnx_models') # Detector m0 = print_io(next(models_dir.glob('model_00_*')), 'DETECTOR') # ScriptID m1 = print_io(next(models_dir.glob('model_01_*')), 'SCRIPT ID') # A recognizer (Latin) m2 = print_io(next(models_dir.glob('model_02_*')), 'RECOGNIZER Latin') # Try running detector to see actual output shapes import onnxruntime as ort from PIL import Image img = Image.open('image.png').convert('RGB') w, h = img.size sess = ort.InferenceSession(str(next(models_dir.glob('model_00_*'))), providers=['CPUExecutionProvider']) scale = 800 / max(h, w) dh = (int(h * scale) + 31) // 32 * 32 dw = (int(w * scale) + 31) // 32 * 32 img_d = img.resize((dw, dh), Image.LANCZOS) arr_d = np.array(img_d, dtype=np.float32) arr_d = arr_d[:, :, ::-1] - [102.9801, 115.9465, 122.7717] data_d = arr_d.transpose(2, 0, 1)[np.newaxis].astype(np.float32) im_info = np.array([[dh, dw, scale]], dtype=np.float32) outputs = sess.run(None, {"data": data_d, "im_info": im_info}) print(f'\n=== DETECTOR OUTPUT SHAPES (image {w}x{h} -> {dw}x{dh}) ===') output_names = [o.name for o in sess.get_outputs()] for name, out in zip(output_names, outputs): print(f' {name}: shape={out.shape} dtype={out.dtype} min={out.min():.4f} max={out.max():.4f}') # Specifically analyze pixel_link outputs # PixelLink has: pixel scores (text/non-text) + link scores (8 neighbors) # FPN produces 3 scales print('\n=== DETECTOR OUTPUT ANALYSIS ===') for i, (name, out) in enumerate(zip(output_names, outputs)): scores = 1.0 / (1.0 + np.exp(-out)) # sigmoid hot = (scores > 0.5).sum() print(f' [{i:2d}] {name:25s} shape={str(out.shape):20s} sigmoid_max={scores.max():.4f} hot_pixels(>0.5)={hot}')