oneocr / _archive /analysis /analyze_crypto_log.py
OneOCR Dev
OneOCR - reverse engineering complete, ONNX pipeline 53% match rate
ce847d4
"""Analyze crypto_log.json to understand decrypt sequence and chunk mapping."""
import json
import struct
with open("temp/crypto_log.json") as f:
log = json.load(f)
decrypts = [op for op in log if op["op"] == "decrypt"]
sha256s = [op for op in log if op["op"] == "sha256"]
encrypts = [op for op in log if op["op"] == "encrypt"]
print(f"Total ops: {len(log)} (sha256={len(sha256s)}, decrypt={len(decrypts)}, encrypt={len(encrypts)})")
# Build SHA256 output -> input mapping
sha_map = {} # output_hex -> input_hex
for s in sha256s:
sha_map[s["output"]] = s["input"]
# Pair each decrypt with its SHA256 key derivation
print("\n=== Decrypt operations with key derivation ===")
for i, d in enumerate(decrypts):
key = d["aes_key"]
sha_input_hex = sha_map.get(key, "UNKNOWN")
sha_input = bytes.fromhex(sha_input_hex) if sha_input_hex != "UNKNOWN" else b""
if len(sha_input) == 48:
desc = "DX_KEY (master+file[8:24])"
elif len(sha_input) == 32:
s1, s2 = struct.unpack_from("<QQ", sha_input, 0)
chk = sha_input[16:32].hex()[:16] + "..."
desc = f"CHK sizes=({s1},{s2}) chk={chk}"
elif len(sha_input) == 16:
s1, s2 = struct.unpack_from("<QQ", sha_input, 0)
desc = f"NOCHK sizes=({s1},{s2})"
else:
desc = f"len={len(sha_input)}"
first = d["first_bytes"][:32]
print(f" dec#{i:02d}: size={d['input_size']:>8}B {desc:50s} out={first}")
# Now search for plaintext first_bytes in decrypted DX to find embedded chunks
dx = open("temp/dx_index_decrypted.bin", "rb").read()
fdata = open("ocr_data/oneocr.onemodel", "rb").read()
print("\n=== Locating encrypted data ===")
for i, d in enumerate(decrypts):
size = d["input_size"]
first = bytes.fromhex(d["first_bytes"][:32])
# Search in decrypted DX for the plaintext (this was decrypted in-place)
# But we need the CIPHERTEXT, which is in the original file (encrypted DX) or payload
# For chunks embedded in DX: ciphertext is at file offset 24 + dx_offset
# For chunks in payload: ciphertext is at some file offset after 22684
# Let's find plaintext in decrypted DX
dx_pos = dx.find(first)
# Find ciphertext (first 16 bytes from hook_decrypt dumps)
# We don't have ciphertext in logs, but we know:
# - DX encrypted data is at file[24:24+22624]
# - Payload data is after file[22684]
if i == 0:
loc = "DX index itself at file[24:]"
elif dx_pos >= 0:
loc = f"embedded in DX at dx_offset={dx_pos} (file_off={24+dx_pos})"
else:
loc = "payload (after file[22684])"
print(f" dec#{i:02d}: size={size:>8}B {loc}")
# Scan DX for all uint64 pairs where second = first + 24
print("\n=== All size-pair patterns in DX (s2 = s1 + 24) ===")
pairs = []
for off in range(0, len(dx) - 16):
s1, s2 = struct.unpack_from("<QQ", dx, off)
if s2 == s1 + 24 and 0 < s1 < 100_000_000 and s1 > 10:
pairs.append((off, s1, s2))
print(f"Found {len(pairs)} size pairs")
# Deduplicate overlapping pairs
filtered = []
for p in pairs:
if not filtered or p[0] >= filtered[-1][0] + 16:
filtered.append(p)
print(f"After dedup: {len(filtered)} pairs")
for off, s1, s2 in filtered:
# Check if there's a 16-byte checksum before this pair
has_chk = False
if off >= 16:
# Check if the 16 bytes before could be a checksum (non-trivial bytes)
potential_chk = dx[off-16:off]
non_zero = sum(1 for b in potential_chk if b != 0)
has_chk = non_zero > 8 # At least 8 non-zero bytes
print(f" offset={off:>5} (0x{off:04x}): sizes=({s1}, {s2}) chk_before={'YES' if has_chk else 'no'}")