"""Deep analysis of model_33 (LineLayout) to understand its OneOCRFeatureExtract usage.""" import onnx from onnx import numpy_helper import numpy as np from pathlib import Path models_dir = Path("oneocr_extracted/onnx_models") model_path = list(models_dir.glob("model_33_*"))[0] model = onnx.load(str(model_path)) print(f"Model: {model_path.name}") print(f"IR version: {model.ir_version}") print(f"Opsets: {[(o.domain, o.version) for o in model.opset_import]}") # Show all inputs/outputs print(f"\nGraph inputs:") for inp in model.graph.input: shape = [d.dim_value or d.dim_param for d in inp.type.tensor_type.shape.dim] print(f" {inp.name}: {shape}") print(f"\nGraph outputs:") for out in model.graph.output: shape = [d.dim_value or d.dim_param for d in out.type.tensor_type.shape.dim] print(f" {out.name}: {shape}") # All initializers print(f"\nInitializers ({len(model.graph.initializer)}):") for init in model.graph.initializer: if init.data_type == 8: # STRING raw = bytes(init.string_data[0] if init.string_data else init.raw_data) print(f" {init.name}: STRING, {len(raw)} bytes") else: data = numpy_helper.to_array(init) print(f" {init.name}: {data.shape} {data.dtype} [{data.min():.4f}, {data.max():.4f}]") # Find the OneOCRFeatureExtract node and its context print(f"\nNodes ({len(model.graph.node)}):") fe_found = False for i, node in enumerate(model.graph.node): if node.op_type == "OneOCRFeatureExtract" or fe_found: domain_str = f" [{node.domain}]" if node.domain else "" print(f" [{i}] {node.op_type}{domain_str}: {list(node.input)} → {list(node.output)}") if node.op_type == "OneOCRFeatureExtract": fe_found = True for attr in node.attribute: if attr.type == 2: print(f" {attr.name} = {attr.i}") elif attr.type == 1: print(f" {attr.name} = {attr.f}") elif attr.type == 7: print(f" {attr.name} = {list(attr.ints)}") elif i < 5 or (i > len(model.graph.node) - 5): # Show first/last few nodes for context domain_str = f" [{node.domain}]" if node.domain else "" print(f" [{i}] {node.op_type}{domain_str}: {list(node.input)} → {list(node.output)}") # Show nodes BEFORE OneOCRFeatureExtract (the conv backbone) print(f"\nConv backbone structure (last 5 nodes before FE):") fe_idx = None for i, node in enumerate(model.graph.node): if node.op_type == "OneOCRFeatureExtract": fe_idx = i break if fe_idx: for i in range(max(0, fe_idx - 5), fe_idx + 1): node = model.graph.node[i] domain_str = f" [{node.domain}]" if node.domain else "" print(f" [{i}] {node.op_type}{domain_str}: {list(node.input)} → {list(node.output)}") # Analyze the feature/config blob for init in model.graph.initializer: if init.name == "feature/config": raw = bytes(init.string_data[0] if init.string_data else init.raw_data) print(f"\nfeature/config blob: {len(raw)} bytes") # Try big-endian float32 be = np.frombuffer(raw, dtype='>f4').copy() print(f" Big-endian float32: {len(be)} values") print(f" Finite: {np.isfinite(be).sum()}") in_range = np.sum(np.abs(be[np.isfinite(be)]) < 10) print(f" In [-10,10]: {in_range} ({100*in_range/len(be):.1f}%)") print(f" First 20: {be[:20]}") print(f" Last 20: {be[-20:]}") # Try to find dimension markers for i, v in enumerate(be): if v in [128.0, 256.0, 512.0] or (v > 0 and v == int(v) and 10 < v < 10000): print(f" Potential dim at [{i}]: {v}")