|
|
"""Understand what the 21-dim input features are for LM models 11-32. |
|
|
These models take data[1,21,1,1] → softmax[1,2] (binary classifier). |
|
|
We need to figure out what 21 features to compute from the recognizer output.""" |
|
|
import onnx |
|
|
from onnx import numpy_helper |
|
|
import numpy as np |
|
|
from pathlib import Path |
|
|
import onnxruntime as ort |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
models_dir = Path("oneocr_extracted/onnx_models_unlocked") |
|
|
|
|
|
|
|
|
sess_sm = ort.InferenceSession(str(list(models_dir.glob("model_11_*"))[0])) |
|
|
|
|
|
sess_md = ort.InferenceSession(str(list(models_dir.glob("model_22_*"))[0])) |
|
|
|
|
|
print("LangSm (model_11) inputs:", [(i.name, i.shape, i.type) for i in sess_sm.get_inputs()]) |
|
|
print("LangSm (model_11) outputs:", [(o.name, o.shape, o.type) for o in sess_sm.get_outputs()]) |
|
|
print() |
|
|
print("LangMd (model_22) inputs:", [(i.name, i.shape, i.type) for i in sess_md.get_inputs()]) |
|
|
print("LangMd (model_22) outputs:", [(o.name, o.shape, o.type) for o in sess_md.get_outputs()]) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
model_11 = onnx.load(str(list(Path("oneocr_extracted/onnx_models").glob("model_11_*"))[0])) |
|
|
|
|
|
for node in model_11.graph.node: |
|
|
if node.op_type == "Constant": |
|
|
name = node.output[0] |
|
|
if name in ['26', '28']: |
|
|
for attr in node.attribute: |
|
|
if attr.type == 4: |
|
|
data = numpy_helper.to_array(attr.t) |
|
|
label = "Add (=-mean)" if name == '26' else "Div (=std)" |
|
|
print(f"\n{label}: {data.flatten()}") |
|
|
|
|
|
if name == '26': |
|
|
|
|
|
means = -data.flatten() |
|
|
print(f" Implied means: {means}") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
rec_path = list(Path("oneocr_extracted/onnx_models").glob("model_02_*"))[0] |
|
|
rec_sess = ort.InferenceSession(str(rec_path)) |
|
|
print(f"\nRecognizer (model_02) outputs:") |
|
|
for o in rec_sess.get_outputs(): |
|
|
print(f" {o.name}: {o.shape}") |
|
|
|
|
|
|
|
|
test_data = np.random.randn(1, 3, 60, 200).astype(np.float32) * 0.1 |
|
|
seq_lengths = np.array([50], dtype=np.int32) |
|
|
result = rec_sess.run(None, {"data": test_data, "seq_lengths": seq_lengths}) |
|
|
logprobs = result[0] |
|
|
print(f"\nRecognizer output: {logprobs.shape}") |
|
|
print(f" Log-prob range: [{logprobs.min():.4f}, {logprobs.max():.4f}]") |
|
|
|
|
|
|
|
|
lp = logprobs[:, 0, :] |
|
|
best_probs = np.exp(lp.max(axis=-1)) |
|
|
mean_best = best_probs.mean() |
|
|
print(f"\n Mean best prob per frame: {mean_best:.4f}") |
|
|
print(f" Mean log-prob max: {lp.max(axis=-1).mean():.4f}") |
|
|
print(f" Entropy per frame: {(-np.exp(lp) * lp).sum(axis=-1).mean():.4f}") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
print(f"\n--- Testing LM models with various inputs ---") |
|
|
for name, features in [ |
|
|
("all_zeros", np.zeros(21)), |
|
|
("high_conf", np.array([0.0, 0.5, 0.9, 0.9, 0.9, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 0.01, 1.0])), |
|
|
("low_conf", np.array([3.0, -0.5, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.1, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.1])), |
|
|
("typical", np.array([1.2, -0.4, 0.1, 0.15, 0.08, 0.35, 0.47, 0.43, 0.35, 0.58, 0.31, 0.04, 0.05, 0.03, 0.03, 0.02, 0.04, 0.04, 0.03, 0.03, 0.7])), |
|
|
]: |
|
|
data = features.astype(np.float32).reshape(1, 21, 1, 1) |
|
|
sm_out = sess_sm.run(None, {"data": data})[0] |
|
|
md_out = sess_md.run(None, {"data": data})[0] |
|
|
print(f" {name:12s}: LangSm={sm_out.flatten()}, LangMd={md_out.flatten()}") |
|
|
|