oneocr / _archive /analysis /walk_payload.py

OneOCR Dev

OneOCR - reverse engineering complete, ONNX pipeline 53% match rate

ce847d4 1 day ago

4.33 kB

	"""Walk ALL payload chunks in the .onemodel file and decrypt them statically.
	Full cross-platform static decryptor - no DLL or Windows APIs needed.
	"""
	import struct
	import hashlib
	from Crypto.Cipher import AES

	MASTER_KEY = b'kj)TGtrK>f]b[Piow.gU+nC@s""""""4'
	IV = b"Copyright @ OneO"
	CONTAINER_MAGIC = bytes.fromhex("4a1a082b25000000")

	def aes_cfb128_decrypt(key: bytes, iv: bytes, data: bytes) -> bytes:
	cipher = AES.new(key, AES.MODE_CFB, iv=iv, segment_size=128)
	return cipher.decrypt(data)

	with open("ocr_data/oneocr.onemodel", "rb") as f:
	fdata = f.read()

	# Parse file header
	H = struct.unpack_from("<Q", fdata, 0)[0]
	file_hash = fdata[8:24]
	print(f"File size: {len(fdata):,} bytes")
	print(f"Header value H: {H}")
	print(f"DX encrypted size: {H-12}")
	print(f"Payload start: {H+16}")

	# Decrypt DX index
	dx_key = hashlib.sha256(MASTER_KEY + file_hash).digest()
	dx_enc = fdata[24:H+12]
	dx = aes_cfb128_decrypt(dx_key, IV, dx_enc)

	valid_size = struct.unpack_from("<Q", dx, 8)[0]
	print(f"DX magic: {dx[:8]}")
	print(f"DX valid size: {valid_size}")

	# Decrypt config from DX
	config_sha_input = dx[48:64] + dx[32:48] # sizes + checksum
	config_key = hashlib.sha256(config_sha_input).digest()
	config_s1 = struct.unpack_from("<Q", dx, 48)[0]
	config_enc = dx[64:64+config_s1+8]
	config_dec = aes_cfb128_decrypt(config_key, IV, config_enc)
	print(f"Config decrypted: {len(config_dec)} bytes, magic match: {config_dec[:8] == CONTAINER_MAGIC}")

	# Walk payload chunks
	off = H + 16
	chunk_idx = 0
	chunks = []

	while off + 32 <= len(fdata):
	chk = fdata[off:off+16]
	s1, s2 = struct.unpack_from("<QQ", fdata, off+16)

	if s2 != s1 + 24 or s1 == 0 or s1 > len(fdata):
	break

	enc_size = s1 + 8
	data_off = off + 32

	if data_off + enc_size > len(fdata):
	print(f"WARNING: chunk#{chunk_idx} extends past file end!")
	break

	# Derive per-chunk key
	sha_input = fdata[off+16:off+32] + fdata[off:off+16] # sizes + checksum
	chunk_key = hashlib.sha256(sha_input).digest()

	# Decrypt
	dec_data = aes_cfb128_decrypt(chunk_key, IV, fdata[data_off:data_off+enc_size])

	magic_ok = dec_data[:8] == CONTAINER_MAGIC
	payload = dec_data[8:] # strip container header

	chunks.append({
	"idx": chunk_idx,
	"file_offset": off,
	"data_offset": data_off,
	"size1": s1,
	"enc_size": enc_size,
	"magic_ok": magic_ok,
	"payload": payload,
	})

	print(f" chunk#{chunk_idx:02d}: off={off:>10} s1={s1:>10} magic={'OK' if magic_ok else 'FAIL'} payload_start={payload[:8].hex()}")

	off = data_off + enc_size
	chunk_idx += 1

	print(f"\nTotal chunks: {chunk_idx}")
	print(f"File bytes remaining: {len(fdata) - off}")
	print(f"All magic OK: {all(c['magic_ok'] for c in chunks)}")

	# Identify ONNX models (start with protobuf field tags typical for ONNX ModelProto)
	print("\n=== ONNX model identification ===")
	onnx_count = 0
	for c in chunks:
	payload = c["payload"]
	# ONNX ModelProto fields: 1(ir_version), 2(opset_import), 3(producer_name), etc.
	# Field 1 varint starts with 0x08
	# Actually check for ONNX-specific protobuf pattern
	is_onnx = False
	if len(payload) > 100:
	# Check for typical ONNX patterns
	if payload[0] == 0x08 and payload[1] in (0x06, 0x07): # ir_version 6 or 7
	is_onnx = True

	if is_onnx:
	onnx_count += 1
	print(f" chunk#{c['idx']:02d}: ONNX model, size={len(payload):,} bytes")

	print(f"\nTotal ONNX models found: {onnx_count}")
	print(f"Total non-ONNX chunks: {chunk_idx - onnx_count}")

	# Show what non-ONNX chunks look like
	print("\n=== Non-ONNX chunk types ===")
	for c in chunks:
	payload = c["payload"]
	if len(payload) < 100 or payload[0] != 0x08 or payload[1] not in (0x06, 0x07):
	# Try ASCII
	try:
	s = payload[:40].decode('ascii')
	readable = all(ch.isprintable() or ch in '\n\r\t' for ch in s)
	except:
	readable = False

	if readable:
	preview = payload[:60].decode('ascii', errors='replace').replace('\n', '\\n')
	else:
	preview = payload[:32].hex()
	print(f" chunk#{c['idx']:02d}: size={len(payload):>8,} type={'text' if readable else 'binary'} preview={preview}")