oneocr / _archive /attempts /verify_key_derivation.py

OneOCR Dev

OneOCR - reverse engineering complete, ONNX pipeline 53% match rate

ce847d4 17 days ago

3.36 kB

	"""
	Verify key derivation and analyze DX index structure.
	Proven scheme:
	DX key = SHA256(master_key_32 + file[8:24])
	AES-256-CFB128, IV = "Copyright @ OneO"
	"""
	import hashlib
	import struct
	from pathlib import Path
	from Crypto.Cipher import AES

	KEY = b'kj)TGtrK>f]b[Piow.gU+nC@s""""""4'
	IV = b"Copyright @ OneO"

	file_data = Path("ocr_data/oneocr.onemodel").read_bytes()

	# Step 1: Derive DX key
	header_hash = file_data[8:24]
	derived_key = hashlib.sha256(KEY + header_hash).digest()
	print(f"DX derived key: {derived_key.hex()}")

	# Step 2: Decrypt DX index
	encrypted_dx = file_data[24:24 + 22624]
	cipher = AES.new(derived_key, AES.MODE_CFB, iv=IV, segment_size=128)
	dx = cipher.decrypt(encrypted_dx)

	assert dx[:2] == b"DX", "DX header mismatch!"
	valid_size = struct.unpack('<Q', dx[8:16])[0]
	print(f"DX valid size: {valid_size}, starts with DX: OK")

	# Step 3: Hex dump
	print(f"\nDX hex dump (first 512 bytes):")
	for i in range(0, min(512, len(dx)), 16):
	chunk = dx[i:i+16]
	hex_str = ' '.join(f'{b:02x}' for b in chunk)
	ascii_str = ''.join(chr(b) if 32 <= b < 127 else '.' for b in chunk)
	print(f" {i:04x}: {hex_str:<48s} {ascii_str}")

	# Step 4: Search for known hash inputs from hook data
	print(f"\n--- Searching for hash input patterns in DX ---")
	patterns = {
	"Chunk1(config)": "7f2e000000000000972e0000000000003fe51f12a6d7432577c9b6b367b1ff4d",
	"Chunk2(encrypt)": "78000000000000009000000000000000",
	"Chunk3(bigONNX)": "7f4bb00000000000974bb00000000000165e6ebce48ad4c5b45554019f6cefe8",
	"Chunk4(ONNX)": "5c000000000000007400000000000000",
	"Chunk5(ONNX2)": "63000000000000007b00000000000000",
	"Chunk6(ONNX3)": "69bf34000000000081bf340000000000c7ed80dc84ea4fc4a891feae316ccc8e",
	}

	for name, hex_pat in patterns.items():
	target = bytes.fromhex(hex_pat)
	pos = dx.find(target)
	if pos >= 0:
	print(f" {name}: found at DX offset {pos} ({pos:#x})")
	else:
	print(f" {name}: NOT found in DX (len={len(target)})")

	# Step 5: Analyze DX structure around container header magic
	magic = bytes.fromhex("4a1a082b25000000")
	print(f"\nContainer magic 4a1a082b25000000 locations:")
	pos = 0
	while True:
	pos = dx.find(magic, pos)
	if pos < 0:
	break
	# Read surrounding context
	ctx = dx[pos:pos+40]
	print(f" offset {pos} ({pos:#x}): {ctx.hex()}")
	pos += 1

	# Step 6: Parse DX as record-based structure
	# Looking at the structure:
	# Offset 0-7: "DX\x00\x00\x00\x00\x00\x00"
	# Offset 8-15: valid_size (uint64) = 22620
	# Offset 16-23: container magic = 4a1a082b25000000
	# Offset 24-31: uint64 = 0x2ea7 = 11943
	# Let's see what's after that

	print(f"\n--- DX parsed fields ---")
	off = 0
	print(f" [{off}] Magic: {dx[off:off+8]}")
	off = 8
	print(f" [{off}] ValidSize: {struct.unpack('<Q', dx[off:off+8])[0]}")
	off = 16
	print(f" [{off}] ContainerMagic: {dx[off:off+8].hex()}")
	off = 24
	print(f" [{off}] Value: {struct.unpack('<Q', dx[off:off+8])[0]}")
	off = 32

	# Look for uint64 pairs that were hash inputs
	# The 16-byte patterns are two uint64 LE values
	# The 32-byte patterns are two uint64 LE + 16-byte hash
	# Let me scan for all pairs of uint64 in DX and see structure

	# Save DX for manual analysis
	Path("temp").mkdir(exist_ok=True)
	Path("temp/dx_index_decrypted.bin").write_bytes(dx)
	print(f"\nSaved DX to temp/dx_index_decrypted.bin ({len(dx)} bytes)")