File size: 3,723 Bytes
ce847d4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
"""Deep analysis of model_33 (LineLayout) to understand its OneOCRFeatureExtract usage."""
import onnx
from onnx import numpy_helper
import numpy as np
from pathlib import Path

models_dir = Path("oneocr_extracted/onnx_models")
model_path = list(models_dir.glob("model_33_*"))[0]
model = onnx.load(str(model_path))

print(f"Model: {model_path.name}")
print(f"IR version: {model.ir_version}")
print(f"Opsets: {[(o.domain, o.version) for o in model.opset_import]}")

# Show all inputs/outputs
print(f"\nGraph inputs:")
for inp in model.graph.input:
    shape = [d.dim_value or d.dim_param for d in inp.type.tensor_type.shape.dim]
    print(f"  {inp.name}: {shape}")

print(f"\nGraph outputs:")
for out in model.graph.output:
    shape = [d.dim_value or d.dim_param for d in out.type.tensor_type.shape.dim]
    print(f"  {out.name}: {shape}")

# All initializers
print(f"\nInitializers ({len(model.graph.initializer)}):")
for init in model.graph.initializer:
    if init.data_type == 8:  # STRING
        raw = bytes(init.string_data[0] if init.string_data else init.raw_data)
        print(f"  {init.name}: STRING, {len(raw)} bytes")
    else:
        data = numpy_helper.to_array(init)
        print(f"  {init.name}: {data.shape} {data.dtype} [{data.min():.4f}, {data.max():.4f}]")

# Find the OneOCRFeatureExtract node and its context
print(f"\nNodes ({len(model.graph.node)}):")
fe_found = False
for i, node in enumerate(model.graph.node):
    if node.op_type == "OneOCRFeatureExtract" or fe_found:
        domain_str = f" [{node.domain}]" if node.domain else ""
        print(f"  [{i}] {node.op_type}{domain_str}: {list(node.input)}{list(node.output)}")
        if node.op_type == "OneOCRFeatureExtract":
            fe_found = True
        for attr in node.attribute:
            if attr.type == 2:
                print(f"      {attr.name} = {attr.i}")
            elif attr.type == 1:
                print(f"      {attr.name} = {attr.f}")
            elif attr.type == 7:
                print(f"      {attr.name} = {list(attr.ints)}")
    elif i < 5 or (i > len(model.graph.node) - 5):
        # Show first/last few nodes for context
        domain_str = f" [{node.domain}]" if node.domain else ""
        print(f"  [{i}] {node.op_type}{domain_str}: {list(node.input)}{list(node.output)}")

# Show nodes BEFORE OneOCRFeatureExtract (the conv backbone)
print(f"\nConv backbone structure (last 5 nodes before FE):")
fe_idx = None
for i, node in enumerate(model.graph.node):
    if node.op_type == "OneOCRFeatureExtract":
        fe_idx = i
        break

if fe_idx:
    for i in range(max(0, fe_idx - 5), fe_idx + 1):
        node = model.graph.node[i]
        domain_str = f" [{node.domain}]" if node.domain else ""
        print(f"  [{i}] {node.op_type}{domain_str}: {list(node.input)}{list(node.output)}")

# Analyze the feature/config blob
for init in model.graph.initializer:
    if init.name == "feature/config":
        raw = bytes(init.string_data[0] if init.string_data else init.raw_data)
        print(f"\nfeature/config blob: {len(raw)} bytes")
        
        # Try big-endian float32
        be = np.frombuffer(raw, dtype='>f4').copy()
        print(f"  Big-endian float32: {len(be)} values")
        print(f"  Finite: {np.isfinite(be).sum()}")
        in_range = np.sum(np.abs(be[np.isfinite(be)]) < 10)
        print(f"  In [-10,10]: {in_range} ({100*in_range/len(be):.1f}%)")
        print(f"  First 20: {be[:20]}")
        print(f"  Last 20: {be[-20:]}")
        
        # Try to find dimension markers
        for i, v in enumerate(be):
            if v in [128.0, 256.0, 512.0] or (v > 0 and v == int(v) and 10 < v < 10000):
                print(f"  Potential dim at [{i}]: {v}")