|
|
"""Deep-dive into model_11 and model_22 graph structure — handle binary config.""" |
|
|
import onnx |
|
|
import numpy as np |
|
|
from pathlib import Path |
|
|
|
|
|
models_dir = Path("oneocr_extracted/onnx_models") |
|
|
|
|
|
for idx in [11, 22]: |
|
|
matches = list(models_dir.glob(f"model_{idx:02d}_*")) |
|
|
model = onnx.load(str(matches[0])) |
|
|
|
|
|
print(f"\n{'='*70}") |
|
|
print(f"FULL GRAPH: model_{idx:02d}") |
|
|
print(f"{'='*70}") |
|
|
|
|
|
|
|
|
print(f"\n Initializers ({len(model.graph.initializer)}):") |
|
|
for init in model.graph.initializer: |
|
|
if init.data_type == 8: |
|
|
raw = init.string_data[0] if init.string_data else init.raw_data |
|
|
print(f" {init.name}: STRING, {len(raw)} bytes (binary)") |
|
|
else: |
|
|
data = onnx.numpy_helper.to_array(init) |
|
|
print(f" {init.name}: shape={data.shape}, dtype={data.dtype}, " |
|
|
f"range=[{data.min():.4f}, {data.max():.4f}]") |
|
|
|
|
|
|
|
|
print(f"\n Nodes ({len(model.graph.node)}):") |
|
|
for i, node in enumerate(model.graph.node): |
|
|
domain_str = f" [{node.domain}]" if node.domain else "" |
|
|
print(f" [{i}] {node.op_type}{domain_str}: {list(node.input)} → {list(node.output)}") |
|
|
for attr in node.attribute: |
|
|
if attr.type == 2: |
|
|
print(f" {attr.name} = {attr.i}") |
|
|
elif attr.type == 1: |
|
|
print(f" {attr.name} = {attr.f}") |
|
|
elif attr.type == 7: |
|
|
print(f" {attr.name} = {list(attr.ints)}") |
|
|
|
|
|
|
|
|
for init in model.graph.initializer: |
|
|
if "config" in init.name.lower(): |
|
|
raw = init.string_data[0] if init.string_data else init.raw_data |
|
|
blob = bytes(raw) |
|
|
print(f"\n ── feature/config analysis ──") |
|
|
print(f" Total bytes: {len(blob)}") |
|
|
print(f" First 32 bytes hex: {blob[:32].hex()}") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
header_u32 = [int.from_bytes(blob[i:i+4], 'little') for i in range(0, min(96, len(blob)), 4)] |
|
|
print(f" First 24 uint32 LE values: {header_u32}") |
|
|
|
|
|
|
|
|
for offset in [0, 4, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64, 68, 72, 76, 80, 84, 88, 92]: |
|
|
remaining = len(blob) - offset |
|
|
n_floats = remaining // 4 |
|
|
if n_floats == 0: |
|
|
continue |
|
|
arr = np.frombuffer(blob[offset:offset + n_floats*4], dtype=np.float32) |
|
|
valid = np.isfinite(arr).sum() |
|
|
reasonable = np.sum((np.abs(arr) < 10) & np.isfinite(arr)) |
|
|
if reasonable > n_floats * 0.7: |
|
|
print(f" *** offset={offset}: {n_floats} floats, {valid} finite, " |
|
|
f"{reasonable} in [-10,10] ({100*reasonable/n_floats:.0f}%)") |
|
|
print(f" First 10: {arr[:10]}") |
|
|
print(f" Stats: mean={arr.mean():.4f}, std={arr.std():.4f}") |
|
|
|
|
|
if n_floats >= 1050 + 50: |
|
|
W = arr[:1050].reshape(21, 50) |
|
|
b = arr[1050:1100] |
|
|
print(f" As 21×50 weight: W_range=[{W.min():.4f},{W.max():.4f}], " |
|
|
f"b_range=[{b.min():.4f},{b.max():.4f}]") |
|
|
|
|
|
x = np.random.randn(1, 21).astype(np.float32) |
|
|
y = x @ W + b |
|
|
print(f" Test: input(21) → output(50), y_range=[{y.min():.4f},{y.max():.4f}]") |
|
|
|