oneocr / _archive /analysis /walk_payload.py
OneOCR Dev
OneOCR - reverse engineering complete, ONNX pipeline 53% match rate
ce847d4
"""Walk ALL payload chunks in the .onemodel file and decrypt them statically.
Full cross-platform static decryptor - no DLL or Windows APIs needed.
"""
import struct
import hashlib
from Crypto.Cipher import AES
MASTER_KEY = b'kj)TGtrK>f]b[Piow.gU+nC@s""""""4'
IV = b"Copyright @ OneO"
CONTAINER_MAGIC = bytes.fromhex("4a1a082b25000000")
def aes_cfb128_decrypt(key: bytes, iv: bytes, data: bytes) -> bytes:
cipher = AES.new(key, AES.MODE_CFB, iv=iv, segment_size=128)
return cipher.decrypt(data)
with open("ocr_data/oneocr.onemodel", "rb") as f:
fdata = f.read()
# Parse file header
H = struct.unpack_from("<Q", fdata, 0)[0]
file_hash = fdata[8:24]
print(f"File size: {len(fdata):,} bytes")
print(f"Header value H: {H}")
print(f"DX encrypted size: {H-12}")
print(f"Payload start: {H+16}")
# Decrypt DX index
dx_key = hashlib.sha256(MASTER_KEY + file_hash).digest()
dx_enc = fdata[24:H+12]
dx = aes_cfb128_decrypt(dx_key, IV, dx_enc)
valid_size = struct.unpack_from("<Q", dx, 8)[0]
print(f"DX magic: {dx[:8]}")
print(f"DX valid size: {valid_size}")
# Decrypt config from DX
config_sha_input = dx[48:64] + dx[32:48] # sizes + checksum
config_key = hashlib.sha256(config_sha_input).digest()
config_s1 = struct.unpack_from("<Q", dx, 48)[0]
config_enc = dx[64:64+config_s1+8]
config_dec = aes_cfb128_decrypt(config_key, IV, config_enc)
print(f"Config decrypted: {len(config_dec)} bytes, magic match: {config_dec[:8] == CONTAINER_MAGIC}")
# Walk payload chunks
off = H + 16
chunk_idx = 0
chunks = []
while off + 32 <= len(fdata):
chk = fdata[off:off+16]
s1, s2 = struct.unpack_from("<QQ", fdata, off+16)
if s2 != s1 + 24 or s1 == 0 or s1 > len(fdata):
break
enc_size = s1 + 8
data_off = off + 32
if data_off + enc_size > len(fdata):
print(f"WARNING: chunk#{chunk_idx} extends past file end!")
break
# Derive per-chunk key
sha_input = fdata[off+16:off+32] + fdata[off:off+16] # sizes + checksum
chunk_key = hashlib.sha256(sha_input).digest()
# Decrypt
dec_data = aes_cfb128_decrypt(chunk_key, IV, fdata[data_off:data_off+enc_size])
magic_ok = dec_data[:8] == CONTAINER_MAGIC
payload = dec_data[8:] # strip container header
chunks.append({
"idx": chunk_idx,
"file_offset": off,
"data_offset": data_off,
"size1": s1,
"enc_size": enc_size,
"magic_ok": magic_ok,
"payload": payload,
})
print(f" chunk#{chunk_idx:02d}: off={off:>10} s1={s1:>10} magic={'OK' if magic_ok else 'FAIL'} payload_start={payload[:8].hex()}")
off = data_off + enc_size
chunk_idx += 1
print(f"\nTotal chunks: {chunk_idx}")
print(f"File bytes remaining: {len(fdata) - off}")
print(f"All magic OK: {all(c['magic_ok'] for c in chunks)}")
# Identify ONNX models (start with protobuf field tags typical for ONNX ModelProto)
print("\n=== ONNX model identification ===")
onnx_count = 0
for c in chunks:
payload = c["payload"]
# ONNX ModelProto fields: 1(ir_version), 2(opset_import), 3(producer_name), etc.
# Field 1 varint starts with 0x08
# Actually check for ONNX-specific protobuf pattern
is_onnx = False
if len(payload) > 100:
# Check for typical ONNX patterns
if payload[0] == 0x08 and payload[1] in (0x06, 0x07): # ir_version 6 or 7
is_onnx = True
if is_onnx:
onnx_count += 1
print(f" chunk#{c['idx']:02d}: ONNX model, size={len(payload):,} bytes")
print(f"\nTotal ONNX models found: {onnx_count}")
print(f"Total non-ONNX chunks: {chunk_idx - onnx_count}")
# Show what non-ONNX chunks look like
print("\n=== Non-ONNX chunk types ===")
for c in chunks:
payload = c["payload"]
if len(payload) < 100 or payload[0] != 0x08 or payload[1] not in (0x06, 0x07):
# Try ASCII
try:
s = payload[:40].decode('ascii')
readable = all(ch.isprintable() or ch in '\n\r\t' for ch in s)
except:
readable = False
if readable:
preview = payload[:60].decode('ascii', errors='replace').replace('\n', '\\n')
else:
preview = payload[:32].hex()
print(f" chunk#{c['idx']:02d}: size={len(payload):>8,} type={'text' if readable else 'binary'} preview={preview}")