|
|
"""Analyze oneocr.onemodel file format.""" |
|
|
import os |
|
|
import struct |
|
|
|
|
|
MODEL_PATH = r"ocr_data\oneocr.onemodel" |
|
|
|
|
|
with open(MODEL_PATH, "rb") as f: |
|
|
data = f.read() |
|
|
|
|
|
print(f"Total size: {len(data)} bytes = {len(data)/1024/1024:.2f} MB") |
|
|
print(f"First 8 bytes (hex): {data[:8].hex()}") |
|
|
print(f"First 4 bytes as uint32 LE: {struct.unpack('<I', data[:4])[0]}") |
|
|
print(f"First 8 bytes as uint64 LE: {struct.unpack('<Q', data[:8])[0]}") |
|
|
print() |
|
|
|
|
|
|
|
|
patterns = [b"onnx", b"ai.onnx", b"ONNX", b"ort_", b"onnxruntime", |
|
|
b"ir_version", b"ORTM", b"FORT", b"ORT ", b"model", |
|
|
b"graph", b"Conv", b"Relu", b"Softmax", b"tensor", |
|
|
b"float", b"int64", b"opset", b"producer"] |
|
|
|
|
|
for pattern in patterns: |
|
|
idx = data.find(pattern) |
|
|
if idx >= 0: |
|
|
ctx_start = max(0, idx - 8) |
|
|
ctx_end = min(len(data), idx + len(pattern) + 8) |
|
|
print(f"Found '{pattern.decode(errors='replace')}' at offset {idx} (0x{idx:x})") |
|
|
print(f" Context hex: {data[ctx_start:ctx_end].hex()}") |
|
|
|
|
|
print() |
|
|
|
|
|
|
|
|
import collections |
|
|
def entropy_score(chunk): |
|
|
c = collections.Counter(chunk) |
|
|
unique = len(c) |
|
|
return unique |
|
|
|
|
|
print("Entropy analysis (unique byte values per 4KB block):") |
|
|
for i in range(0, min(len(data), 64*1024), 4096): |
|
|
chunk = data[i:i+4096] |
|
|
e = entropy_score(chunk) |
|
|
print(f" Offset 0x{i:06x}: {e}/256 unique bytes", |
|
|
"(encrypted/compressed)" if e > 240 else "(structured)" if e < 100 else "") |
|
|
|
|
|
|
|
|
hdr_size = struct.unpack('<I', data[:4])[0] |
|
|
print(f"\nFirst uint32 = {hdr_size} (0x{hdr_size:x})") |
|
|
print(f"If header size, data starts at offset {hdr_size}") |
|
|
if hdr_size < len(data): |
|
|
print(f"Data at offset {hdr_size}: {data[hdr_size:hdr_size+32].hex()}") |
|
|
|
|
|
|
|
|
print(f"\nBytes 8-16: {data[8:16].hex()}") |
|
|
print(f"If offset 8 is data: unique bytes = {entropy_score(data[8:8+4096])}/256") |
|
|
|
|
|
|
|
|
print("\nXOR key analysis (checking if XOR of first bytes gives ONNX protobuf header):") |
|
|
|
|
|
xor_key_byte0 = data[0] ^ 0x08 |
|
|
print(f" If first byte should be 0x08: XOR key = 0x{xor_key_byte0:02x}") |
|
|
|
|
|
test = bytes(b ^ xor_key_byte0 for b in data[:16]) |
|
|
print(f" XOR'd first 16 bytes: {test.hex()}") |
|
|
|