oneocr / _archive /analysis /analyze_crypto_log.py

OneOCR Dev

OneOCR - reverse engineering complete, ONNX pipeline 53% match rate

ce847d4 about 21 hours ago

3.68 kB

	"""Analyze crypto_log.json to understand decrypt sequence and chunk mapping."""
	import json
	import struct

	with open("temp/crypto_log.json") as f:
	log = json.load(f)

	decrypts = [op for op in log if op["op"] == "decrypt"]
	sha256s = [op for op in log if op["op"] == "sha256"]
	encrypts = [op for op in log if op["op"] == "encrypt"]

	print(f"Total ops: {len(log)} (sha256={len(sha256s)}, decrypt={len(decrypts)}, encrypt={len(encrypts)})")

	# Build SHA256 output -> input mapping
	sha_map = {} # output_hex -> input_hex
	for s in sha256s:
	sha_map[s["output"]] = s["input"]

	# Pair each decrypt with its SHA256 key derivation
	print("\n=== Decrypt operations with key derivation ===")
	for i, d in enumerate(decrypts):
	key = d["aes_key"]
	sha_input_hex = sha_map.get(key, "UNKNOWN")
	sha_input = bytes.fromhex(sha_input_hex) if sha_input_hex != "UNKNOWN" else b""

	if len(sha_input) == 48:
	desc = "DX_KEY (master+file[8:24])"
	elif len(sha_input) == 32:
	s1, s2 = struct.unpack_from("<QQ", sha_input, 0)
	chk = sha_input[16:32].hex()[:16] + "..."
	desc = f"CHK sizes=({s1},{s2}) chk={chk}"
	elif len(sha_input) == 16:
	s1, s2 = struct.unpack_from("<QQ", sha_input, 0)
	desc = f"NOCHK sizes=({s1},{s2})"
	else:
	desc = f"len={len(sha_input)}"

	first = d["first_bytes"][:32]
	print(f" dec#{i:02d}: size={d['input_size']:>8}B {desc:50s} out={first}")

	# Now search for plaintext first_bytes in decrypted DX to find embedded chunks
	dx = open("temp/dx_index_decrypted.bin", "rb").read()
	fdata = open("ocr_data/oneocr.onemodel", "rb").read()

	print("\n=== Locating encrypted data ===")
	for i, d in enumerate(decrypts):
	size = d["input_size"]
	first = bytes.fromhex(d["first_bytes"][:32])

	# Search in decrypted DX for the plaintext (this was decrypted in-place)
	# But we need the CIPHERTEXT, which is in the original file (encrypted DX) or payload

	# For chunks embedded in DX: ciphertext is at file offset 24 + dx_offset
	# For chunks in payload: ciphertext is at some file offset after 22684

	# Let's find plaintext in decrypted DX
	dx_pos = dx.find(first)

	# Find ciphertext (first 16 bytes from hook_decrypt dumps)
	# We don't have ciphertext in logs, but we know:
	# - DX encrypted data is at file[24:24+22624]
	# - Payload data is after file[22684]

	if i == 0:
	loc = "DX index itself at file[24:]"
	elif dx_pos >= 0:
	loc = f"embedded in DX at dx_offset={dx_pos} (file_off={24+dx_pos})"
	else:
	loc = "payload (after file[22684])"

	print(f" dec#{i:02d}: size={size:>8}B {loc}")

	# Scan DX for all uint64 pairs where second = first + 24
	print("\n=== All size-pair patterns in DX (s2 = s1 + 24) ===")
	pairs = []
	for off in range(0, len(dx) - 16):
	s1, s2 = struct.unpack_from("<QQ", dx, off)
	if s2 == s1 + 24 and 0 < s1 < 100_000_000 and s1 > 10:
	pairs.append((off, s1, s2))
	print(f"Found {len(pairs)} size pairs")
	# Deduplicate overlapping pairs
	filtered = []
	for p in pairs:
	if not filtered or p[0] >= filtered[-1][0] + 16:
	filtered.append(p)
	print(f"After dedup: {len(filtered)} pairs")
	for off, s1, s2 in filtered:
	# Check if there's a 16-byte checksum before this pair
	has_chk = False
	if off >= 16:
	# Check if the 16 bytes before could be a checksum (non-trivial bytes)
	potential_chk = dx[off-16:off]
	non_zero = sum(1 for b in potential_chk if b != 0)
	has_chk = non_zero > 8 # At least 8 non-zero bytes
	print(f" offset={off:>5} (0x{off:04x}): sizes=({s1}, {s2}) chk_before={'YES' if has_chk else 'no'}")