"""Analyze DX index structure to understand chunk record format.""" import hashlib import struct import json from pathlib import Path from Crypto.Cipher import AES KEY = b'kj)TGtrK>f]b[Piow.gU+nC@s""""""4' IV = b"Copyright @ OneO" file_data = Path("ocr_data/oneocr.onemodel").read_bytes() # Step 1: Decrypt DX header_hash = file_data[8:24] dx_key = hashlib.sha256(KEY + header_hash).digest() encrypted_dx = file_data[24:24 + 22624] cipher = AES.new(dx_key, AES.MODE_CFB, iv=IV, segment_size=128) dx = cipher.decrypt(encrypted_dx) assert dx[:2] == b"DX" # Load crypto log crypto_log = json.load(open("temp/crypto_log.json")) # Get unique SHA256 inputs in order sha_ops = [x for x in crypto_log if x['op'] == 'sha256'] seen = set() unique_sha = [] for s in sha_ops: if s['input'] not in seen: seen.add(s['input']) unique_sha.append(s) # Get decrypt ops dec_ops = [x for x in crypto_log if x['op'] == 'decrypt'] # For each SHA256 input, find its position in DX print("=" * 80) print("DX Index Structure Analysis") print("=" * 80) print(f"DX size: {len(dx)} bytes, valid: {struct.unpack('= 0: print(f"SHA256 #{i:3d}: len={s['input_len']:2d} found at DX offset {pos:5d} (0x{pos:04x})") elif pos_partial >= 0: # The input might be rearranged from DX size1 = struct.unpack(' 16 else b"" # Check if sizes and checksum are nearby but in different order pos_sizes = dx.find(inp[:16]) pos_check = dx.find(checksum) if checksum else -1 if pos_sizes >= 0: print(f"SHA256 #{i:3d}: len={s['input_len']:2d} sizes at DX offset {pos_sizes:5d}, checksum at {pos_check}") else: # Sizes might be in different order or interleaved pos_s1 = dx.find(first_uint64) print(f"SHA256 #{i:3d}: len={s['input_len']:2d} first_uint64 at DX offset {pos_s1:5d} (rearranged?)") size1 = struct.unpack(' len(dx): break chunk = dx[pos:pos+16] hex_str = ' '.join(f'{b:02x}' for b in chunk) ascii_str = ''.join(chr(b) if 32 <= b < 127 else '.' for b in chunk) print(f" {pos:5d} ({pos:#06x}): {hex_str:<48s} {ascii_str}") # Look at the area around found patterns for name, dx_off in [("Chunk2(encrypt) 0x2ed7", 0x2ed7), ("Chunk4(ONNX) 0x2f80", 0x2f80), ("Chunk5(ONNX2) 0x4692", 0x4692)]: print(f"\n--- Area around {name} ---") start = max(0, dx_off - 48) for j in range(0, 128, 16): pos = start + j if pos + 16 > len(dx): break chunk = dx[pos:pos+16] hex_str = ' '.join(f'{b:02x}' for b in chunk) ascii_str = ''.join(chr(b) if 32 <= b < 127 else '.' for b in chunk) marker = " <<<" if pos == dx_off else "" print(f" {pos:5d} ({pos:#06x}): {hex_str:<48s} {ascii_str}{marker}")