File size: 4,748 Bytes
ce847d4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
"""Validate the cracked OneOCRFeatureExtract structure by replacing the op with standard Gemm."""
import onnx
from onnx import numpy_helper, TensorProto
import numpy as np
from pathlib import Path
import onnxruntime as ort

models_dir = Path("oneocr_extracted/onnx_models")

# Load model_11
model_path = list(models_dir.glob("model_11_*"))[0]
model = onnx.load(str(model_path))

# Get config blob (big-endian float32)
for init in model.graph.initializer:
    if init.name == "feature/config":
        blob = bytes(init.string_data[0])
        break

be_arr = np.frombuffer(blob, dtype='>f4').copy()  # big-endian → native
print(f"Config blob: {len(be_arr)} floats total")
print(f"First 30 values: {be_arr[:30]}")

# Let's analyze the structure systematically
# The OneOCRFeatureExtract takes 21-dim input and produces 50-dim output
# So we expect a 21→50 transformation

# Method 1: Try W[21×50] + b[50] starting from the beginning (no header)
# 21*50 = 1050, 1050+50 = 1100, remaining = 23
W_0 = be_arr[:1050].reshape(21, 50)
b_0 = be_arr[1050:1100]
tail = be_arr[1100:]
print(f"\n--- No header: W[21×50] + b[50] + tail[{len(tail)}] ---")
print(f"  W: range=[{W_0.min():.4f}, {W_0.max():.4f}], mean={W_0.mean():.4f}, std={W_0.std():.4f}")
print(f"  b: range=[{b_0.min():.4f}, {b_0.max():.4f}], mean={b_0.mean():.4f}")
print(f"  tail: {tail}")

# Method 2: Try W[50×21] (transposed) + b[50] 
W_t = be_arr[:1050].reshape(50, 21)
b_t = be_arr[1050:1100]
print(f"\n--- No header: W[50×21] + b[50] + tail[{len(tail)}] ---")
print(f"  W: range=[{W_t.min():.4f}, {W_t.max():.4f}], mean={W_t.mean():.4f}, std={W_t.std():.4f}")
print(f"  b: range=[{b_t.min():.4f}, {b_t.max():.4f}], mean={b_t.mean():.4f}")

# Method 3: header=23 + W[21×50] + b[50]
header = be_arr[:23]
W_h = be_arr[23:23+1050].reshape(21, 50)
b_h = be_arr[23+1050:]
print(f"\n--- Header=23: W[21×50] + b[50] ---")
print(f"  Header: {header}")
print(f"  W: range=[{W_h.min():.4f}, {W_h.max():.4f}], mean={W_h.mean():.4f}, std={W_h.std():.4f}")
print(f"  b: range=[{b_h.min():.4f}, {b_h.max():.4f}], mean={b_h.mean():.4f}")

# Check where the large values are
print(f"\n--- Values > 10 ---")
for i, v in enumerate(be_arr):
    if abs(v) > 10:
        print(f"  [{i}] = {v}")

# Check if tail/header might be something meaningful
# 23 values: could be normalization params (21 dim + 2 extras?)
# Or dimensions metadata

# Now try to build a replacement model
# The original graph:
# data[1,21,1,1] → Reshape → Slice[0:21] → Add(offset) → Div(scale) → OneOCRFeatureExtract → [50]
# → Gemm(50,50) → Relu → Gemm(50,50) → Relu → Gemm(50,2) → Softmax

# We'll replace OneOCRFeatureExtract with a standard Gemm
# Let's try all 3 weight interpretations

# Get normalization constants
add_const = None
div_const = None
for node in model.graph.node:
    if node.op_type == "Constant":
        name = node.output[0]
        for attr in node.attribute:
            if attr.type == 4:
                t = attr.t
                data = numpy_helper.to_array(t)
                if name == '26':  # Add constant
                    add_const = data
                elif name == '28':  # Div constant
                    div_const = data

print(f"\nNormalization: add={add_const.shape}, div={div_const.shape}")

# Test with sample input
test_input = np.random.randn(1, 21, 1, 1).astype(np.float32)

# Simulate the preprocessing
x = test_input.reshape(1, 21)[:, :21]  # Slice
x = (x + add_const) / div_const  # Normalize

# Apply feature extraction for each method
for name, W, b in [("no_header_21x50", W_0, b_0), 
                     ("no_header_50x21_T", W_t.T, b_t),
                     ("header23_21x50", W_h, b_h)]:
    feat = x @ W + b  # [1, 50]
    print(f"\n{name}:")
    print(f"  Feature output: range=[{feat.min():.4f}, {feat.max():.4f}], mean={feat.mean():.4f}")
    
    # Continue through the MLP
    for init in model.graph.initializer:
        if init.name == "learned_2":
            W2 = numpy_helper.to_array(init)
        elif init.name == "learned_3":
            b2 = numpy_helper.to_array(init)
        elif init.name == "1.layers.5.weight":
            W5 = numpy_helper.to_array(init)
        elif init.name == "1.layers.5.bias":
            b5 = numpy_helper.to_array(init)
        elif init.name == "1.layers.7.weight":
            W7 = numpy_helper.to_array(init)
        elif init.name == "1.layers.7.bias":
            b7 = numpy_helper.to_array(init)
    
    h1 = np.maximum(0, feat @ W2.T + b2)  # Gemm + Relu
    h2 = np.maximum(0, h1 @ W5.T + b5)    # Gemm + Relu
    logits = h2 @ W7.T + b7                # Gemm
    probs = np.exp(logits) / np.exp(logits).sum(axis=1, keepdims=True)  # Softmax
    print(f"  Final softmax: {probs}")