File size: 2,403 Bytes
ce847d4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
"""Analyze oneocr.onemodel file format."""
import os
import struct

MODEL_PATH = r"ocr_data\oneocr.onemodel"

with open(MODEL_PATH, "rb") as f:
    data = f.read()

print(f"Total size: {len(data)} bytes = {len(data)/1024/1024:.2f} MB")
print(f"First 8 bytes (hex): {data[:8].hex()}")
print(f"First 4 bytes as uint32 LE: {struct.unpack('<I', data[:4])[0]}")
print(f"First 8 bytes as uint64 LE: {struct.unpack('<Q', data[:8])[0]}")
print()

# Search for known patterns
patterns = [b"onnx", b"ai.onnx", b"ONNX", b"ort_", b"onnxruntime", 
            b"ir_version", b"ORTM", b"FORT", b"ORT ", b"model",
            b"graph", b"Conv", b"Relu", b"Softmax", b"tensor",
            b"float", b"int64", b"opset", b"producer"]

for pattern in patterns:
    idx = data.find(pattern)
    if idx >= 0:
        ctx_start = max(0, idx - 8)
        ctx_end = min(len(data), idx + len(pattern) + 8)
        print(f"Found '{pattern.decode(errors='replace')}' at offset {idx} (0x{idx:x})")
        print(f"  Context hex: {data[ctx_start:ctx_end].hex()}")

print()

# Check entropy by sections
import collections
def entropy_score(chunk):
    c = collections.Counter(chunk)
    unique = len(c)
    return unique

print("Entropy analysis (unique byte values per 4KB block):")
for i in range(0, min(len(data), 64*1024), 4096):
    chunk = data[i:i+4096]
    e = entropy_score(chunk)
    print(f"  Offset 0x{i:06x}: {e}/256 unique bytes", 
          "(encrypted/compressed)" if e > 240 else "(structured)" if e < 100 else "")

# Look at first int as possible header size
hdr_size = struct.unpack('<I', data[:4])[0]
print(f"\nFirst uint32 = {hdr_size} (0x{hdr_size:x})")
print(f"If header size, data starts at offset {hdr_size}")
if hdr_size < len(data):
    print(f"Data at offset {hdr_size}: {data[hdr_size:hdr_size+32].hex()}")
    
# Check what's at byte 8
print(f"\nBytes 8-16: {data[8:16].hex()}")
print(f"If offset 8 is data: unique bytes = {entropy_score(data[8:8+4096])}/256")

# XOR analysis - try single byte XOR keys
print("\nXOR key analysis (checking if XOR of first bytes gives ONNX protobuf header):")
# ONNX protobuf starts with 0x08 (varint, field 1 = ir_version)
xor_key_byte0 = data[0] ^ 0x08
print(f"  If first byte should be 0x08: XOR key = 0x{xor_key_byte0:02x}")
# Try XOR with that key on first 16 bytes
test = bytes(b ^ xor_key_byte0 for b in data[:16])
print(f"  XOR'd first 16 bytes: {test.hex()}")