File size: 4,055 Bytes

ce847d4

"""Deep-dive into model_11 and model_22 graph structure — handle binary config."""
import onnx
import numpy as np
from pathlib import Path

models_dir = Path("oneocr_extracted/onnx_models")

for idx in [11, 22]:
    matches = list(models_dir.glob(f"model_{idx:02d}_*"))
    model = onnx.load(str(matches[0]))
    
    print(f"\n{'='*70}")
    print(f"FULL GRAPH: model_{idx:02d}")
    print(f"{'='*70}")
    
    # All initializers (weights)
    print(f"\n  Initializers ({len(model.graph.initializer)}):")
    for init in model.graph.initializer:
        if init.data_type == 8:  # STRING
            raw = init.string_data[0] if init.string_data else init.raw_data
            print(f"    {init.name}: STRING, {len(raw)} bytes (binary)")
        else:
            data = onnx.numpy_helper.to_array(init)
            print(f"    {init.name}: shape={data.shape}, dtype={data.dtype}, "
                  f"range=[{data.min():.4f}, {data.max():.4f}]")
    
    # All nodes
    print(f"\n  Nodes ({len(model.graph.node)}):")
    for i, node in enumerate(model.graph.node):
        domain_str = f" [{node.domain}]" if node.domain else ""
        print(f"    [{i}] {node.op_type}{domain_str}: {list(node.input)} → {list(node.output)}")
        for attr in node.attribute:
            if attr.type == 2:
                print(f"        {attr.name} = {attr.i}")
            elif attr.type == 1:
                print(f"        {attr.name} = {attr.f}")
            elif attr.type == 7:
                print(f"        {attr.name} = {list(attr.ints)}")

    # Analyze feature/config blob
    for init in model.graph.initializer:
        if "config" in init.name.lower():
            raw = init.string_data[0] if init.string_data else init.raw_data
            blob = bytes(raw)
            print(f"\n  ── feature/config analysis ──")
            print(f"  Total bytes: {len(blob)}")
            print(f"  First 32 bytes hex: {blob[:32].hex()}")
            
            # Hypothesis: header + weight_matrix(input_dim × output_dim) + bias(output_dim)
            # If input=21, output=50: 21*50=1050 floats = 4200 bytes, bias=50 floats = 200 bytes
            # Total weights = 4400 bytes, header = 4492-4400 = 92 bytes
            
            # Try reading first few uint32 as header
            header_u32 = [int.from_bytes(blob[i:i+4], 'little') for i in range(0, min(96, len(blob)), 4)]
            print(f"  First 24 uint32 LE values: {header_u32}")
            
            # Try float32 interpretation after various offsets
            for offset in [0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64, 68, 72, 76, 80, 84, 88, 92]:
                remaining = len(blob) - offset
                n_floats = remaining // 4
                if n_floats == 0:
                    continue
                arr = np.frombuffer(blob[offset:offset + n_floats*4], dtype=np.float32)
                valid = np.isfinite(arr).sum()
                reasonable = np.sum((np.abs(arr) < 10) & np.isfinite(arr))
                if reasonable > n_floats * 0.7:  # >70% reasonable values
                    print(f"  *** offset={offset}: {n_floats} floats, {valid} finite, "
                          f"{reasonable} in [-10,10] ({100*reasonable/n_floats:.0f}%)")
                    print(f"      First 10: {arr[:10]}")
                    print(f"      Stats: mean={arr.mean():.4f}, std={arr.std():.4f}")
                    # Check if it could be weight matrix 21×50
                    if n_floats >= 1050 + 50:
                        W = arr[:1050].reshape(21, 50)
                        b = arr[1050:1100]
                        print(f"      As 21×50 weight: W_range=[{W.min():.4f},{W.max():.4f}], "
                              f"b_range=[{b.min():.4f},{b.max():.4f}]")
                        # Test with random input
                        x = np.random.randn(1, 21).astype(np.float32)
                        y = x @ W + b
                        print(f"      Test: input(21) → output(50), y_range=[{y.min():.4f},{y.max():.4f}]")