oneocr / _archive /attempts /verify_key_derivation.py
OneOCR Dev
OneOCR - reverse engineering complete, ONNX pipeline 53% match rate
ce847d4
"""
Verify key derivation and analyze DX index structure.
Proven scheme:
DX key = SHA256(master_key_32 + file[8:24])
AES-256-CFB128, IV = "Copyright @ OneO"
"""
import hashlib
import struct
from pathlib import Path
from Crypto.Cipher import AES
KEY = b'kj)TGtrK>f]b[Piow.gU+nC@s""""""4'
IV = b"Copyright @ OneO"
file_data = Path("ocr_data/oneocr.onemodel").read_bytes()
# Step 1: Derive DX key
header_hash = file_data[8:24]
derived_key = hashlib.sha256(KEY + header_hash).digest()
print(f"DX derived key: {derived_key.hex()}")
# Step 2: Decrypt DX index
encrypted_dx = file_data[24:24 + 22624]
cipher = AES.new(derived_key, AES.MODE_CFB, iv=IV, segment_size=128)
dx = cipher.decrypt(encrypted_dx)
assert dx[:2] == b"DX", "DX header mismatch!"
valid_size = struct.unpack('<Q', dx[8:16])[0]
print(f"DX valid size: {valid_size}, starts with DX: OK")
# Step 3: Hex dump
print(f"\nDX hex dump (first 512 bytes):")
for i in range(0, min(512, len(dx)), 16):
chunk = dx[i:i+16]
hex_str = ' '.join(f'{b:02x}' for b in chunk)
ascii_str = ''.join(chr(b) if 32 <= b < 127 else '.' for b in chunk)
print(f" {i:04x}: {hex_str:<48s} {ascii_str}")
# Step 4: Search for known hash inputs from hook data
print(f"\n--- Searching for hash input patterns in DX ---")
patterns = {
"Chunk1(config)": "7f2e000000000000972e0000000000003fe51f12a6d7432577c9b6b367b1ff4d",
"Chunk2(encrypt)": "78000000000000009000000000000000",
"Chunk3(bigONNX)": "7f4bb00000000000974bb00000000000165e6ebce48ad4c5b45554019f6cefe8",
"Chunk4(ONNX)": "5c000000000000007400000000000000",
"Chunk5(ONNX2)": "63000000000000007b00000000000000",
"Chunk6(ONNX3)": "69bf34000000000081bf340000000000c7ed80dc84ea4fc4a891feae316ccc8e",
}
for name, hex_pat in patterns.items():
target = bytes.fromhex(hex_pat)
pos = dx.find(target)
if pos >= 0:
print(f" {name}: found at DX offset {pos} ({pos:#x})")
else:
print(f" {name}: NOT found in DX (len={len(target)})")
# Step 5: Analyze DX structure around container header magic
magic = bytes.fromhex("4a1a082b25000000")
print(f"\nContainer magic 4a1a082b25000000 locations:")
pos = 0
while True:
pos = dx.find(magic, pos)
if pos < 0:
break
# Read surrounding context
ctx = dx[pos:pos+40]
print(f" offset {pos} ({pos:#x}): {ctx.hex()}")
pos += 1
# Step 6: Parse DX as record-based structure
# Looking at the structure:
# Offset 0-7: "DX\x00\x00\x00\x00\x00\x00"
# Offset 8-15: valid_size (uint64) = 22620
# Offset 16-23: container magic = 4a1a082b25000000
# Offset 24-31: uint64 = 0x2ea7 = 11943
# Let's see what's after that
print(f"\n--- DX parsed fields ---")
off = 0
print(f" [{off}] Magic: {dx[off:off+8]}")
off = 8
print(f" [{off}] ValidSize: {struct.unpack('<Q', dx[off:off+8])[0]}")
off = 16
print(f" [{off}] ContainerMagic: {dx[off:off+8].hex()}")
off = 24
print(f" [{off}] Value: {struct.unpack('<Q', dx[off:off+8])[0]}")
off = 32
# Look for uint64 pairs that were hash inputs
# The 16-byte patterns are two uint64 LE values
# The 32-byte patterns are two uint64 LE + 16-byte hash
# Let me scan for all pairs of uint64 in DX and see structure
# Save DX for manual analysis
Path("temp").mkdir(exist_ok=True)
Path("temp/dx_index_decrypted.bin").write_bytes(dx)
print(f"\nSaved DX to temp/dx_index_decrypted.bin ({len(dx)} bytes)")