File size: 4,867 Bytes
ce847d4 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 |
"""Analyze DX index structure to understand chunk record format."""
import hashlib
import struct
import json
from pathlib import Path
from Crypto.Cipher import AES
KEY = b'kj)TGtrK>f]b[Piow.gU+nC@s""""""4'
IV = b"Copyright @ OneO"
file_data = Path("ocr_data/oneocr.onemodel").read_bytes()
# Step 1: Decrypt DX
header_hash = file_data[8:24]
dx_key = hashlib.sha256(KEY + header_hash).digest()
encrypted_dx = file_data[24:24 + 22624]
cipher = AES.new(dx_key, AES.MODE_CFB, iv=IV, segment_size=128)
dx = cipher.decrypt(encrypted_dx)
assert dx[:2] == b"DX"
# Load crypto log
crypto_log = json.load(open("temp/crypto_log.json"))
# Get unique SHA256 inputs in order
sha_ops = [x for x in crypto_log if x['op'] == 'sha256']
seen = set()
unique_sha = []
for s in sha_ops:
if s['input'] not in seen:
seen.add(s['input'])
unique_sha.append(s)
# Get decrypt ops
dec_ops = [x for x in crypto_log if x['op'] == 'decrypt']
# For each SHA256 input, find its position in DX
print("=" * 80)
print("DX Index Structure Analysis")
print("=" * 80)
print(f"DX size: {len(dx)} bytes, valid: {struct.unpack('<Q', dx[8:16])[0]}")
print()
# Skip first SHA256 (DX key derivation uses master_key + file_header, not DX data)
print("SHA256 input #0: DX key = SHA256(master_key + file[8:24]) [special case]")
print()
for i, s in enumerate(unique_sha[1:], 1):
inp = bytes.fromhex(s['input'])
pos = dx.find(inp)
# Also try finding parts of the input
first_uint64 = inp[:8]
pos_partial = dx.find(first_uint64)
if pos >= 0:
print(f"SHA256 #{i:3d}: len={s['input_len']:2d} found at DX offset {pos:5d} (0x{pos:04x})")
elif pos_partial >= 0:
# The input might be rearranged from DX
size1 = struct.unpack('<Q', inp[:8])[0]
size2 = struct.unpack('<Q', inp[8:16])[0]
checksum = inp[16:] if len(inp) > 16 else b""
# Check if sizes and checksum are nearby but in different order
pos_sizes = dx.find(inp[:16])
pos_check = dx.find(checksum) if checksum else -1
if pos_sizes >= 0:
print(f"SHA256 #{i:3d}: len={s['input_len']:2d} sizes at DX offset {pos_sizes:5d}, checksum at {pos_check}")
else:
# Sizes might be in different order or interleaved
pos_s1 = dx.find(first_uint64)
print(f"SHA256 #{i:3d}: len={s['input_len']:2d} first_uint64 at DX offset {pos_s1:5d} (rearranged?)")
size1 = struct.unpack('<Q', inp[:8])[0]
size2 = struct.unpack('<Q', inp[8:16])[0]
print(f" size1={size1} size2={size2} diff={size2-size1}")
else:
size1 = struct.unpack('<Q', inp[:8])[0]
size2 = struct.unpack('<Q', inp[8:16])[0]
print(f"SHA256 #{i:3d}: len={s['input_len']:2d} NOT FOUND (size1={size1} size2={size2})")
# Now let's dump DX structure around the first few records
print()
print("=" * 80)
print("DX Record Structure (first 128 bytes)")
print("=" * 80)
off = 0
print(f"[{off:4d}] DX Magic: {dx[off:off+8]!r}")
off += 8
print(f"[{off:4d}] Valid Size: {struct.unpack('<Q', dx[off:off+8])[0]}")
off += 8
print(f"[{off:4d}] Container: {dx[off:off+8].hex()}")
off += 8
val = struct.unpack('<Q', dx[off:off+8])[0]
print(f"[{off:4d}] Value: {val} (0x{val:x})")
off += 8
print(f"[{off:4d}] Checksum: {dx[off:off+16].hex()}")
off += 16
s1 = struct.unpack('<Q', dx[off:off+8])[0]
s2 = struct.unpack('<Q', dx[off+8:off+16])[0]
print(f"[{off:4d}] Sizes: {s1}, {s2} (diff={s2-s1})")
off += 16
print(f"[{off:4d}] Enc data starts: {dx[off:off+32].hex()}")
# The config chunk data is here, 11920 bytes
config_enc_size = 11920
config_end = off + config_enc_size
print(f" Config encrypted data: offset {off} to {config_end} ({config_enc_size} bytes)")
# What's after the config?
print(f"\n--- After config chunk ({config_end}) ---")
for j in range(0, 80, 16):
pos = config_end + j
if pos + 16 > len(dx):
break
chunk = dx[pos:pos+16]
hex_str = ' '.join(f'{b:02x}' for b in chunk)
ascii_str = ''.join(chr(b) if 32 <= b < 127 else '.' for b in chunk)
print(f" {pos:5d} ({pos:#06x}): {hex_str:<48s} {ascii_str}")
# Look at the area around found patterns
for name, dx_off in [("Chunk2(encrypt) 0x2ed7", 0x2ed7),
("Chunk4(ONNX) 0x2f80", 0x2f80),
("Chunk5(ONNX2) 0x4692", 0x4692)]:
print(f"\n--- Area around {name} ---")
start = max(0, dx_off - 48)
for j in range(0, 128, 16):
pos = start + j
if pos + 16 > len(dx):
break
chunk = dx[pos:pos+16]
hex_str = ' '.join(f'{b:02x}' for b in chunk)
ascii_str = ''.join(chr(b) if 32 <= b < 127 else '.' for b in chunk)
marker = " <<<" if pos == dx_off else ""
print(f" {pos:5d} ({pos:#06x}): {hex_str:<48s} {ascii_str}{marker}")
|