"""Deep-dive into model_11 and model_22 graph structure — handle binary config.""" import onnx import numpy as np from pathlib import Path models_dir = Path("oneocr_extracted/onnx_models") for idx in [11, 22]: matches = list(models_dir.glob(f"model_{idx:02d}_*")) model = onnx.load(str(matches[0])) print(f"\n{'='*70}") print(f"FULL GRAPH: model_{idx:02d}") print(f"{'='*70}") # All initializers (weights) print(f"\n Initializers ({len(model.graph.initializer)}):") for init in model.graph.initializer: if init.data_type == 8: # STRING raw = init.string_data[0] if init.string_data else init.raw_data print(f" {init.name}: STRING, {len(raw)} bytes (binary)") else: data = onnx.numpy_helper.to_array(init) print(f" {init.name}: shape={data.shape}, dtype={data.dtype}, " f"range=[{data.min():.4f}, {data.max():.4f}]") # All nodes print(f"\n Nodes ({len(model.graph.node)}):") for i, node in enumerate(model.graph.node): domain_str = f" [{node.domain}]" if node.domain else "" print(f" [{i}] {node.op_type}{domain_str}: {list(node.input)} → {list(node.output)}") for attr in node.attribute: if attr.type == 2: print(f" {attr.name} = {attr.i}") elif attr.type == 1: print(f" {attr.name} = {attr.f}") elif attr.type == 7: print(f" {attr.name} = {list(attr.ints)}") # Analyze feature/config blob for init in model.graph.initializer: if "config" in init.name.lower(): raw = init.string_data[0] if init.string_data else init.raw_data blob = bytes(raw) print(f"\n ── feature/config analysis ──") print(f" Total bytes: {len(blob)}") print(f" First 32 bytes hex: {blob[:32].hex()}") # Hypothesis: header + weight_matrix(input_dim × output_dim) + bias(output_dim) # If input=21, output=50: 21*50=1050 floats = 4200 bytes, bias=50 floats = 200 bytes # Total weights = 4400 bytes, header = 4492-4400 = 92 bytes # Try reading first few uint32 as header header_u32 = [int.from_bytes(blob[i:i+4], 'little') for i in range(0, min(96, len(blob)), 4)] print(f" First 24 uint32 LE values: {header_u32}") # Try float32 interpretation after various offsets for offset in [0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64, 68, 72, 76, 80, 84, 88, 92]: remaining = len(blob) - offset n_floats = remaining // 4 if n_floats == 0: continue arr = np.frombuffer(blob[offset:offset + n_floats*4], dtype=np.float32) valid = np.isfinite(arr).sum() reasonable = np.sum((np.abs(arr) < 10) & np.isfinite(arr)) if reasonable > n_floats * 0.7: # >70% reasonable values print(f" *** offset={offset}: {n_floats} floats, {valid} finite, " f"{reasonable} in [-10,10] ({100*reasonable/n_floats:.0f}%)") print(f" First 10: {arr[:10]}") print(f" Stats: mean={arr.mean():.4f}, std={arr.std():.4f}") # Check if it could be weight matrix 21×50 if n_floats >= 1050 + 50: W = arr[:1050].reshape(21, 50) b = arr[1050:1100] print(f" As 21×50 weight: W_range=[{W.min():.4f},{W.max():.4f}], " f"b_range=[{b.min():.4f},{b.max():.4f}]") # Test with random input x = np.random.randn(1, 21).astype(np.float32) y = x @ W + b print(f" Test: input(21) → output(50), y_range=[{y.min():.4f},{y.max():.4f}]")