File size: 6,716 Bytes
ce847d4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
"""Unlock model_33 (LineLayout) — replace OneOCRFeatureExtract with Conv 1x1.

Model 33 architecture:
  data[1,3,60,W] → CNN(conv1-conv5, 256ch) → OneOCRFeatureExtract(256→16ch) 
  → Transpose → Reshape → MLP(64→32→2) → line_layout predictions

The config blob (16548 bytes) structure (big-endian float32):
  W[256×16] = 4096 floats (stored as [in_ch, out_ch] or [out_ch, in_ch])
  b[16] = 16 floats
  metadata[25] = remaining floats
"""
import onnx
from onnx import numpy_helper, helper, TensorProto
import numpy as np
from pathlib import Path
import copy
import onnxruntime as ort

models_dir = Path("oneocr_extracted/onnx_models")
output_dir = Path("oneocr_extracted/onnx_models_unlocked")
output_dir.mkdir(exist_ok=True)

model_path = list(models_dir.glob("model_33_*"))[0]
model = onnx.load(str(model_path))

# Extract config blob
for init in model.graph.initializer:
    if init.name == "feature/config":
        blob = bytes(init.string_data[0] if init.string_data else init.raw_data)
        break

be_arr = np.frombuffer(blob, dtype='>f4').copy()
print(f"Config blob: {len(be_arr)} big-endian float32 values")

# Extract W[256×16] and b[16]
# Pattern: first 4096 = W, next 16 = b, remaining = metadata
in_ch = 256
out_ch = 16

W_flat = be_arr[:in_ch * out_ch]
b = be_arr[in_ch * out_ch:in_ch * out_ch + out_ch]
metadata = be_arr[in_ch * out_ch + out_ch:]

print(f"W: {in_ch*out_ch} floats, range=[{W_flat.min():.4f}, {W_flat.max():.4f}]")
print(f"b: {out_ch} floats, range=[{b.min():.4f}, {b.max():.4f}]")
print(f"Metadata: {len(metadata)} floats")
print(f"Metadata: {metadata}")

# Check dimension markers in metadata
for i, v in enumerate(metadata):
    if v > 10 or v < -10:
        print(f"  metadata[{i}] = {v}")

# Try both weight arrangements with Conv 1x1
# Conv weight shape in ONNX: [out_ch, in_ch, kH, kW]
for arrangement_name, W_conv in [
    ("W[out,in]=[16,256]", W_flat.reshape(out_ch, in_ch, 1, 1)),
    ("W[in,out]=[256,16]→T", W_flat.reshape(in_ch, out_ch).T.reshape(out_ch, in_ch, 1, 1)),
]:
    print(f"\n--- Testing {arrangement_name} ---")
    
    new_model = copy.deepcopy(model)
    
    # Replace initializers
    new_inits = [init for init in new_model.graph.initializer if init.name != "feature/config"]
    new_inits.append(numpy_helper.from_array(W_conv.astype(np.float32), name="fe_conv_weight"))
    new_inits.append(numpy_helper.from_array(b.astype(np.float32), name="fe_conv_bias"))
    del new_model.graph.initializer[:]
    new_model.graph.initializer.extend(new_inits)
    
    # Replace OneOCRFeatureExtract with Conv
    fe_node = None
    for node in new_model.graph.node:
        if node.op_type == "OneOCRFeatureExtract":
            fe_node = node
            break
    
    fe_input = fe_node.input[0]  # 'conv5_2_relu'
    fe_output = fe_node.output[0]  # 'oneocr_feature'
    
    new_nodes = []
    for node in new_model.graph.node:
        if node.op_type == "OneOCRFeatureExtract":
            conv_node = helper.make_node(
                "Conv",
                inputs=[fe_input, "fe_conv_weight", "fe_conv_bias"],
                outputs=[fe_output],
                kernel_shape=[1, 1],
                strides=[1, 1],
                pads=[0, 0, 0, 0],
            )
            new_nodes.append(conv_node)
        else:
            new_nodes.append(node)
    del new_model.graph.node[:]
    new_model.graph.node.extend(new_nodes)
    
    # Clean up
    new_inputs = [inp for inp in new_model.graph.input if inp.name != "feature/config"]
    del new_model.graph.input[:]
    new_model.graph.input.extend(new_inputs)
    
    new_opsets = [op for op in new_model.opset_import if op.domain != "com.microsoft.oneocr"]
    del new_model.opset_import[:]
    new_model.opset_import.extend(new_opsets)
    
    # Save and test
    out_name = f"model_33_{arrangement_name.split('[')[0].strip()}.onnx"
    out_path = output_dir / out_name 
    onnx.save(new_model, str(out_path))
    
    try:
        sess = ort.InferenceSession(str(out_path))
        # Test with 60×100 image (width=100)
        test_data = np.random.randn(1, 3, 60, 100).astype(np.float32) * 0.1
        
        outputs = sess.run(None, {"data": test_data})
        output_names = [o.name for o in sess.get_outputs()]
        
        for name, out in zip(output_names, outputs):
            print(f"  {name}: shape={out.shape}, range=[{out.min():.4f}, {out.max():.4f}]")
        
        # Also test with width=200
        test_data2 = np.random.randn(1, 3, 60, 200).astype(np.float32) * 0.1
        outputs2 = sess.run(None, {"data": test_data2})
        for name, out in zip(output_names, outputs2):
            print(f"  (w=200) {name}: shape={out.shape}")
            
    except Exception as e:
        print(f"  FAILED: {e}")
        import traceback
        traceback.print_exc()

# Save the best working version as the official unlocked model
print(f"\n--- Saving official unlocked model ---")
# Use W[in,out]=[256,16]→T arrangement (gives reasonable output ranges)
W_conv = W_flat.reshape(in_ch, out_ch).T.reshape(out_ch, in_ch, 1, 1)
new_model = copy.deepcopy(model)

new_inits = [init for init in new_model.graph.initializer if init.name != "feature/config"]
new_inits.append(numpy_helper.from_array(W_conv.astype(np.float32), name="fe_conv_weight"))
new_inits.append(numpy_helper.from_array(b.astype(np.float32), name="fe_conv_bias"))
del new_model.graph.initializer[:]
new_model.graph.initializer.extend(new_inits)

new_nodes = []
for node in new_model.graph.node:
    if node.op_type == "OneOCRFeatureExtract":
        conv_node = helper.make_node(
            "Conv",
            inputs=["conv5_2_relu", "fe_conv_weight", "fe_conv_bias"],
            outputs=["oneocr_feature"],
            kernel_shape=[1, 1],
        )
        new_nodes.append(conv_node)
    else:
        new_nodes.append(node)
del new_model.graph.node[:]
new_model.graph.node.extend(new_nodes)

new_inputs = [inp for inp in new_model.graph.input if inp.name != "feature/config"]
del new_model.graph.input[:]
new_model.graph.input.extend(new_inputs)

new_opsets = [op for op in new_model.opset_import if op.domain != "com.microsoft.oneocr"]
del new_model.opset_import[:]
new_model.opset_import.extend(new_opsets)

final_path = output_dir / "model_33_ir7_unknown_857KB_unlocked.onnx"
onnx.save(new_model, str(final_path))
print(f"Saved: {final_path}")

try:
    sess = ort.InferenceSession(str(final_path))
    test = np.random.randn(1, 3, 60, 100).astype(np.float32) * 0.1
    outs = sess.run(None, {"data": test})
    for name, out in zip([o.name for o in sess.get_outputs()], outs):
        print(f"  ✓ {name}: {out.shape}")
except Exception as e:
    print(f"  ✗ {e}")