|
|
"""Test big-endian float32 interpretation of OneOCRFeatureExtract config blob.""" |
|
|
import onnx |
|
|
import numpy as np |
|
|
from pathlib import Path |
|
|
|
|
|
models_dir = Path("oneocr_extracted/onnx_models") |
|
|
model = onnx.load(str(list(models_dir.glob("model_11_*"))[0])) |
|
|
|
|
|
|
|
|
for init in model.graph.initializer: |
|
|
if init.name == "feature/config": |
|
|
blob = bytes(init.string_data[0]) |
|
|
break |
|
|
|
|
|
print(f"Blob: {len(blob)} bytes = {len(blob) // 4} float32s") |
|
|
|
|
|
|
|
|
be_arr = np.frombuffer(blob, dtype='>f4') |
|
|
le_arr = np.frombuffer(blob, dtype='<f4') |
|
|
|
|
|
print(f"\nBig-endian float32:") |
|
|
print(f" Finite: {np.isfinite(be_arr).sum()} / {len(be_arr)}") |
|
|
in_range = np.sum(np.abs(be_arr[np.isfinite(be_arr)]) < 10) |
|
|
print(f" In [-10,10]: {in_range} ({100*in_range/len(be_arr):.1f}%)") |
|
|
be_finite = be_arr[np.isfinite(be_arr)] |
|
|
print(f" Mean: {be_finite.mean():.4f}, Std: {be_finite.std():.4f}") |
|
|
print(f" Range: [{be_finite.min():.4f}, {be_finite.max():.4f}]") |
|
|
print(f" First 20: {be_arr[:20]}") |
|
|
|
|
|
print(f"\nLittle-endian float32:") |
|
|
print(f" Finite: {np.isfinite(le_arr).sum()} / {len(le_arr)}") |
|
|
in_range_le = np.sum(np.abs(le_arr[np.isfinite(le_arr)]) < 10) |
|
|
print(f" In [-10,10]: {in_range_le} ({100*in_range_le/len(le_arr):.1f}%)") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print(f"\n--- Dimension search for big-endian ---") |
|
|
for header in range(0, 40): |
|
|
remaining = len(be_arr) - header |
|
|
for in_d in [20, 21, 22]: |
|
|
for out_d in [48, 49, 50, 51, 52]: |
|
|
if remaining == in_d * out_d + out_d: |
|
|
W = be_arr[header:header + in_d*out_d].reshape(in_d, out_d) |
|
|
b = be_arr[header + in_d*out_d:] |
|
|
w_finite = np.isfinite(W).sum() |
|
|
w_reasonable = np.sum(np.abs(W[np.isfinite(W)]) < 10) |
|
|
if w_reasonable > in_d * out_d * 0.7: |
|
|
print(f" *** header={header} + W[{in_d}×{out_d}] + b[{out_d}]") |
|
|
print(f" W finite={w_finite}, reasonable={w_reasonable}") |
|
|
print(f" W range: [{W[np.isfinite(W)].min():.4f}, {W[np.isfinite(W)].max():.4f}]") |
|
|
print(f" b range: [{b[np.isfinite(b)].min():.4f}, {b[np.isfinite(b)].max():.4f}]") |
|
|
|
|
|
|
|
|
|
|
|
print(f"\n--- Finding good float32 regions (big-endian) ---") |
|
|
for start_byte in range(0, 100, 4): |
|
|
chunk = np.frombuffer(blob[start_byte:start_byte+84], dtype='>f4') |
|
|
all_reasonable = all(np.isfinite(chunk)) and all(np.abs(chunk) < 10) |
|
|
if all_reasonable: |
|
|
print(f" offset={start_byte}: ALL 21 values reasonable: {chunk}") |
|
|
break |
|
|
decent = np.sum((np.abs(chunk) < 10) & np.isfinite(chunk)) |
|
|
if decent >= 18: |
|
|
print(f" offset={start_byte}: {decent}/21 reasonable: {chunk}") |
|
|
|