|
|
"""Unlock model_33 (LineLayout) — replace OneOCRFeatureExtract with Conv 1x1. |
|
|
|
|
|
Model 33 architecture: |
|
|
data[1,3,60,W] → CNN(conv1-conv5, 256ch) → OneOCRFeatureExtract(256→16ch) |
|
|
→ Transpose → Reshape → MLP(64→32→2) → line_layout predictions |
|
|
|
|
|
The config blob (16548 bytes) structure (big-endian float32): |
|
|
W[256×16] = 4096 floats (stored as [in_ch, out_ch] or [out_ch, in_ch]) |
|
|
b[16] = 16 floats |
|
|
metadata[25] = remaining floats |
|
|
""" |
|
|
import onnx |
|
|
from onnx import numpy_helper, helper, TensorProto |
|
|
import numpy as np |
|
|
from pathlib import Path |
|
|
import copy |
|
|
import onnxruntime as ort |
|
|
|
|
|
models_dir = Path("oneocr_extracted/onnx_models") |
|
|
output_dir = Path("oneocr_extracted/onnx_models_unlocked") |
|
|
output_dir.mkdir(exist_ok=True) |
|
|
|
|
|
model_path = list(models_dir.glob("model_33_*"))[0] |
|
|
model = onnx.load(str(model_path)) |
|
|
|
|
|
|
|
|
for init in model.graph.initializer: |
|
|
if init.name == "feature/config": |
|
|
blob = bytes(init.string_data[0] if init.string_data else init.raw_data) |
|
|
break |
|
|
|
|
|
be_arr = np.frombuffer(blob, dtype='>f4').copy() |
|
|
print(f"Config blob: {len(be_arr)} big-endian float32 values") |
|
|
|
|
|
|
|
|
|
|
|
in_ch = 256 |
|
|
out_ch = 16 |
|
|
|
|
|
W_flat = be_arr[:in_ch * out_ch] |
|
|
b = be_arr[in_ch * out_ch:in_ch * out_ch + out_ch] |
|
|
metadata = be_arr[in_ch * out_ch + out_ch:] |
|
|
|
|
|
print(f"W: {in_ch*out_ch} floats, range=[{W_flat.min():.4f}, {W_flat.max():.4f}]") |
|
|
print(f"b: {out_ch} floats, range=[{b.min():.4f}, {b.max():.4f}]") |
|
|
print(f"Metadata: {len(metadata)} floats") |
|
|
print(f"Metadata: {metadata}") |
|
|
|
|
|
|
|
|
for i, v in enumerate(metadata): |
|
|
if v > 10 or v < -10: |
|
|
print(f" metadata[{i}] = {v}") |
|
|
|
|
|
|
|
|
|
|
|
for arrangement_name, W_conv in [ |
|
|
("W[out,in]=[16,256]", W_flat.reshape(out_ch, in_ch, 1, 1)), |
|
|
("W[in,out]=[256,16]→T", W_flat.reshape(in_ch, out_ch).T.reshape(out_ch, in_ch, 1, 1)), |
|
|
]: |
|
|
print(f"\n--- Testing {arrangement_name} ---") |
|
|
|
|
|
new_model = copy.deepcopy(model) |
|
|
|
|
|
|
|
|
new_inits = [init for init in new_model.graph.initializer if init.name != "feature/config"] |
|
|
new_inits.append(numpy_helper.from_array(W_conv.astype(np.float32), name="fe_conv_weight")) |
|
|
new_inits.append(numpy_helper.from_array(b.astype(np.float32), name="fe_conv_bias")) |
|
|
del new_model.graph.initializer[:] |
|
|
new_model.graph.initializer.extend(new_inits) |
|
|
|
|
|
|
|
|
fe_node = None |
|
|
for node in new_model.graph.node: |
|
|
if node.op_type == "OneOCRFeatureExtract": |
|
|
fe_node = node |
|
|
break |
|
|
|
|
|
fe_input = fe_node.input[0] |
|
|
fe_output = fe_node.output[0] |
|
|
|
|
|
new_nodes = [] |
|
|
for node in new_model.graph.node: |
|
|
if node.op_type == "OneOCRFeatureExtract": |
|
|
conv_node = helper.make_node( |
|
|
"Conv", |
|
|
inputs=[fe_input, "fe_conv_weight", "fe_conv_bias"], |
|
|
outputs=[fe_output], |
|
|
kernel_shape=[1, 1], |
|
|
strides=[1, 1], |
|
|
pads=[0, 0, 0, 0], |
|
|
) |
|
|
new_nodes.append(conv_node) |
|
|
else: |
|
|
new_nodes.append(node) |
|
|
del new_model.graph.node[:] |
|
|
new_model.graph.node.extend(new_nodes) |
|
|
|
|
|
|
|
|
new_inputs = [inp for inp in new_model.graph.input if inp.name != "feature/config"] |
|
|
del new_model.graph.input[:] |
|
|
new_model.graph.input.extend(new_inputs) |
|
|
|
|
|
new_opsets = [op for op in new_model.opset_import if op.domain != "com.microsoft.oneocr"] |
|
|
del new_model.opset_import[:] |
|
|
new_model.opset_import.extend(new_opsets) |
|
|
|
|
|
|
|
|
out_name = f"model_33_{arrangement_name.split('[')[0].strip()}.onnx" |
|
|
out_path = output_dir / out_name |
|
|
onnx.save(new_model, str(out_path)) |
|
|
|
|
|
try: |
|
|
sess = ort.InferenceSession(str(out_path)) |
|
|
|
|
|
test_data = np.random.randn(1, 3, 60, 100).astype(np.float32) * 0.1 |
|
|
|
|
|
outputs = sess.run(None, {"data": test_data}) |
|
|
output_names = [o.name for o in sess.get_outputs()] |
|
|
|
|
|
for name, out in zip(output_names, outputs): |
|
|
print(f" {name}: shape={out.shape}, range=[{out.min():.4f}, {out.max():.4f}]") |
|
|
|
|
|
|
|
|
test_data2 = np.random.randn(1, 3, 60, 200).astype(np.float32) * 0.1 |
|
|
outputs2 = sess.run(None, {"data": test_data2}) |
|
|
for name, out in zip(output_names, outputs2): |
|
|
print(f" (w=200) {name}: shape={out.shape}") |
|
|
|
|
|
except Exception as e: |
|
|
print(f" FAILED: {e}") |
|
|
import traceback |
|
|
traceback.print_exc() |
|
|
|
|
|
|
|
|
print(f"\n--- Saving official unlocked model ---") |
|
|
|
|
|
W_conv = W_flat.reshape(in_ch, out_ch).T.reshape(out_ch, in_ch, 1, 1) |
|
|
new_model = copy.deepcopy(model) |
|
|
|
|
|
new_inits = [init for init in new_model.graph.initializer if init.name != "feature/config"] |
|
|
new_inits.append(numpy_helper.from_array(W_conv.astype(np.float32), name="fe_conv_weight")) |
|
|
new_inits.append(numpy_helper.from_array(b.astype(np.float32), name="fe_conv_bias")) |
|
|
del new_model.graph.initializer[:] |
|
|
new_model.graph.initializer.extend(new_inits) |
|
|
|
|
|
new_nodes = [] |
|
|
for node in new_model.graph.node: |
|
|
if node.op_type == "OneOCRFeatureExtract": |
|
|
conv_node = helper.make_node( |
|
|
"Conv", |
|
|
inputs=["conv5_2_relu", "fe_conv_weight", "fe_conv_bias"], |
|
|
outputs=["oneocr_feature"], |
|
|
kernel_shape=[1, 1], |
|
|
) |
|
|
new_nodes.append(conv_node) |
|
|
else: |
|
|
new_nodes.append(node) |
|
|
del new_model.graph.node[:] |
|
|
new_model.graph.node.extend(new_nodes) |
|
|
|
|
|
new_inputs = [inp for inp in new_model.graph.input if inp.name != "feature/config"] |
|
|
del new_model.graph.input[:] |
|
|
new_model.graph.input.extend(new_inputs) |
|
|
|
|
|
new_opsets = [op for op in new_model.opset_import if op.domain != "com.microsoft.oneocr"] |
|
|
del new_model.opset_import[:] |
|
|
new_model.opset_import.extend(new_opsets) |
|
|
|
|
|
final_path = output_dir / "model_33_ir7_unknown_857KB_unlocked.onnx" |
|
|
onnx.save(new_model, str(final_path)) |
|
|
print(f"Saved: {final_path}") |
|
|
|
|
|
try: |
|
|
sess = ort.InferenceSession(str(final_path)) |
|
|
test = np.random.randn(1, 3, 60, 100).astype(np.float32) * 0.1 |
|
|
outs = sess.run(None, {"data": test}) |
|
|
for name, out in zip([o.name for o in sess.get_outputs()], outs): |
|
|
print(f" ✓ {name}: {out.shape}") |
|
|
except Exception as e: |
|
|
print(f" ✗ {e}") |
|
|
|