oneocr / _archive /analysis /analyze_decrypt.py
OneOCR Dev
OneOCR - reverse engineering complete, ONNX pipeline 53% match rate
ce847d4
"""
OneOCR .onemodel file analysis and decryption attempt.
Known facts:
- AES-256-CFB via Windows BCrypt CNG API
- SHA256 used somewhere in the process
- Key: kj)TGtrK>f]b[Piow.gU+nC@s""""""4 (32 ASCII bytes = 256 bits)
- After decryption → decompression (zlib/lz4/etc.)
- Error on wrong key: meta->magic_number == MAGIC_NUMBER (0 vs. 1)
"""
import struct
import hashlib
import zlib
import os
from collections import Counter
from typing import Optional
# ── Try to import crypto libraries ──
try:
from Crypto.Cipher import AES as PyCryptoAES
HAS_PYCRYPTODOME = True
except ImportError:
HAS_PYCRYPTODOME = False
print("[WARN] PyCryptodome not available, install with: pip install pycryptodome")
try:
from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
from cryptography.hazmat.backends import default_backend
HAS_CRYPTOGRAPHY = True
except ImportError:
HAS_CRYPTOGRAPHY = False
print("[WARN] cryptography not available, install with: pip install cryptography")
# ═══════════════════════════════════════════════════════════════
# CONFIGURATION
# ═══════════════════════════════════════════════════════════════
MODEL_PATH = r"c:\Users\MattyMroz\Desktop\PROJECTS\ONEOCR\ocr_data\oneocr.onemodel"
# The key as raw bytes (32 bytes = 256 bits for AES-256)
KEY_RAW = b'kj)TGtrK>f]b[Piow.gU+nC@s""""""4'
assert len(KEY_RAW) == 32, f"Key must be 32 bytes, got {len(KEY_RAW)}"
# SHA256 of the key (another possible key derivation)
KEY_SHA256 = hashlib.sha256(KEY_RAW).digest()
# ═══════════════════════════════════════════════════════════════
# HELPER FUNCTIONS
# ═══════════════════════════════════════════════════════════════
def hex_dump(data: bytes, offset: int = 0, max_lines: int = 32) -> str:
"""Format bytes as hex dump with ASCII column."""
lines = []
for i in range(0, min(len(data), max_lines * 16), 16):
hex_part = " ".join(f"{b:02x}" for b in data[i:i+16])
ascii_part = "".join(chr(b) if 32 <= b < 127 else "." for b in data[i:i+16])
lines.append(f" {offset+i:08x}: {hex_part:<48s} {ascii_part}")
return "\n".join(lines)
def entropy(data: bytes) -> float:
"""Calculate Shannon entropy (0-8 bits per byte)."""
if not data:
return 0.0
import math
freq = Counter(data)
total = len(data)
return -sum((c / total) * math.log2(c / total) for c in freq.values())
def unique_byte_ratio(data: bytes) -> str:
"""Return unique bytes count."""
return f"{len(set(data))}/256"
def check_known_headers(data: bytes) -> list[str]:
"""Check if data starts with known file/compression magic numbers."""
findings = []
if len(data) < 4:
return findings
# Magic number checks
magics = {
b"\x08": "Protobuf varint field tag (field 1, wire type 0)",
b"\x0a": "Protobuf length-delimited field tag (field 1, wire type 2)",
b"\x78\x01": "Zlib (low compression)",
b"\x78\x5e": "Zlib (default compression)",
b"\x78\x9c": "Zlib (best speed/default)",
b"\x78\xda": "Zlib (best compression)",
b"\x1f\x8b": "Gzip",
b"\x04\x22\x4d\x18": "LZ4 frame",
b"\x28\xb5\x2f\xfd": "Zstandard",
b"\xfd\x37\x7a\x58\x5a\x00": "XZ",
b"\x42\x5a\x68": "Bzip2",
b"PK": "ZIP archive",
b"\x89PNG": "PNG image",
b"ONNX": "ONNX text",
b"\x08\x00": "Protobuf: field 1, varint, value will follow",
b"\x08\x01": "Protobuf: field 1, varint = 1 (could be magic_number=1!)",
b"\x08\x02": "Protobuf: field 1, varint = 2",
b"\x08\x03": "Protobuf: field 1, varint = 3",
b"\x08\x04": "Protobuf: field 1, varint = 4",
b"\x50\x42": "Possible PB (protobuf) marker",
b"\x01\x00\x00\x00": "uint32 LE = 1 (possible magic_number=1)",
b"\x00\x00\x00\x01": "uint32 BE = 1 (possible magic_number=1)",
}
for magic, desc in magics.items():
if data[:len(magic)] == magic:
findings.append(f" ★ MATCH: {desc} ({magic.hex()})")
# Check first uint32 LE/BE
u32_le = struct.unpack_from("<I", data, 0)[0]
u32_be = struct.unpack_from(">I", data, 0)[0]
if u32_le == 1:
findings.append(f" ★ uint32_LE at offset 0 = 1 (MAGIC_NUMBER match!)")
if u32_be == 1:
findings.append(f" ★ uint32_BE at offset 0 = 1 (MAGIC_NUMBER match!)")
return findings
def try_decompress(data: bytes, label: str = "") -> Optional[bytes]:
"""Try various decompression methods."""
results = []
# Zlib (with and without header)
for wbits in [15, -15, 31]: # standard, raw deflate, gzip
try:
dec = zlib.decompress(data, wbits)
results.append(("zlib" + (f" wbits={wbits}" if wbits != 15 else ""), dec))
except:
pass
# LZ4
try:
import lz4.frame
dec = lz4.frame.decompress(data)
results.append(("lz4.frame", dec))
except:
pass
try:
import lz4.block
for size in [1 << 20, 1 << 22, 1 << 24]:
try:
dec = lz4.block.decompress(data, uncompressed_size=size)
results.append((f"lz4.block (uncompressed_size={size})", dec))
break
except:
pass
except:
pass
# Zstandard
try:
import zstandard as zstd
dctx = zstd.ZstdDecompressor()
dec = dctx.decompress(data, max_output_size=len(data) * 10)
results.append(("zstandard", dec))
except:
pass
if results:
for method, dec in results:
print(f" ✓ {label} Decompression SUCCESS with {method}: {len(dec)} bytes")
print(f" First 64 bytes: {dec[:64].hex()}")
print(f" Entropy: {entropy(dec[:4096]):.3f}, unique: {unique_byte_ratio(dec[:4096])}")
headers = check_known_headers(dec)
for h in headers:
print(f" {h}")
return results[0][1]
return None
def decrypt_aes_cfb(data: bytes, key: bytes, iv: bytes, segment_size: int = 8) -> Optional[bytes]:
"""Decrypt using AES-CFB with given parameters."""
if HAS_PYCRYPTODOME:
try:
cipher = PyCryptoAES.new(key, PyCryptoAES.MODE_CFB, iv=iv, segment_size=segment_size)
return cipher.decrypt(data)
except Exception as e:
return None
if HAS_CRYPTOGRAPHY:
try:
if segment_size == 128:
cipher = Cipher(algorithms.AES(key), modes.CFB(iv), backend=default_backend())
elif segment_size == 8:
cipher = Cipher(algorithms.AES(key), modes.CFB8(iv), backend=default_backend())
else:
return None
decryptor = cipher.decryptor()
return decryptor.update(data) + decryptor.finalize()
except Exception as e:
return None
return None
def analyze_decrypted(data: bytes, label: str) -> bool:
"""Analyze decrypted data and return True if it looks promising."""
if data is None:
return False
ent = entropy(data[:4096])
unique = unique_byte_ratio(data[:4096])
headers = check_known_headers(data)
is_promising = (
ent < 7.5 or # reduced entropy
len(headers) > 0 or # known header match
data[:4] == b"\x01\x00\x00\x00" or # magic_number = 1 LE
data[:4] == b"\x00\x00\x00\x01" or # magic_number = 1 BE
data[:2] == b"\x08\x01" # protobuf magic_number = 1
)
if is_promising:
print(f" ★★★ PROMISING: {label}")
print(f" Entropy: {ent:.3f}, Unique bytes: {unique}")
print(f" First 128 bytes:")
print(hex_dump(data[:128]))
for h in headers:
print(f" {h}")
# Try decompression on promising results
try_decompress(data, label)
# If starts with protobuf-like data or magic=1, also try decompressing after skipping some bytes
for skip in [4, 8, 12, 16, 20]:
if len(data) > skip + 10:
try_decompress(data[skip:], f"{label} [skip {skip} bytes]")
return True
return False
# ═══════════════════════════════════════════════════════════════
# MAIN ANALYSIS
# ═══════════════════════════════════════════════════════════════
def main():
print("=" * 80)
print("OneOCR .onemodel File Analysis & Decryption Attempt")
print("=" * 80)
# ── Step 1: Read file ──
with open(MODEL_PATH, "rb") as f:
full_data = f.read()
filesize = len(full_data)
print(f"\nFile size: {filesize:,} bytes ({filesize/1024/1024:.2f} MB)")
# ── Step 2: Parse top-level structure ──
print("\n" + "═" * 80)
print("SECTION 1: FILE STRUCTURE ANALYSIS")
print("═" * 80)
header_offset = struct.unpack_from("<I", full_data, 0)[0]
field_at_4 = struct.unpack_from("<I", full_data, 4)[0]
print(f"\n [0-3] uint32_LE (header_offset/size): {header_offset} (0x{header_offset:08x})")
print(f" [4-7] uint32_LE: {field_at_4} (0x{field_at_4:08x})")
# Check if it's a uint64
u64_at_0 = struct.unpack_from("<Q", full_data, 0)[0]
print(f" [0-7] uint64_LE: {u64_at_0} (0x{u64_at_0:016x})")
# Analyze the metadata at offset 22636
print(f"\n At offset {header_offset} (0x{header_offset:04x}):")
meta_magic_8 = full_data[header_offset:header_offset+8]
meta_size = struct.unpack_from("<Q", full_data, header_offset + 8)[0]
print(f" [+0..+7] 8 bytes: {meta_magic_8.hex()}")
print(f" [+8..+15] uint64_LE: {meta_size:,} (0x{meta_size:016x})")
encrypted_start = header_offset + 16
encrypted_size = meta_size
print(f" Encrypted payload: offset {encrypted_start} ({encrypted_start:#x}), size {encrypted_size:,}")
print(f" Check: {encrypted_start} + {encrypted_size} = {encrypted_start + encrypted_size} "
f"vs filesize {filesize}{'MATCH ✓' if encrypted_start + encrypted_size == filesize else 'MISMATCH ✗'}")
# ── Step 3: Analyze header region ──
print(f"\n Header region [8 .. {header_offset-1}]: {header_offset - 8} bytes")
header_data = full_data[8:header_offset]
print(f" Entropy: {entropy(header_data[:4096]):.3f}")
print(f" Unique bytes (first 4KB): {unique_byte_ratio(header_data[:4096])}")
print(f" Null bytes: {header_data.count(0)}/{len(header_data)}")
# ── Step 4: Analyze encrypted payload region ──
print(f"\n Encrypted payload [{encrypted_start} .. {filesize-1}]: {encrypted_size:,} bytes")
payload_sample = full_data[encrypted_start:encrypted_start+4096]
print(f" Entropy (first 4KB): {entropy(payload_sample):.3f}")
print(f" Unique bytes (first 4KB): {unique_byte_ratio(payload_sample)}")
# ── Step 5: Look for structure in metadata ──
print(f"\n Detailed metadata dump at offset {header_offset}:")
print(hex_dump(full_data[header_offset:header_offset+128], offset=header_offset))
# Parse more fields from the metadata region
print(f"\n Parsing fields after metadata header:")
meta_region = full_data[header_offset:header_offset + 256]
for i in range(0, 128, 4):
u32 = struct.unpack_from("<I", meta_region, i)[0]
if u32 > 0 and u32 < filesize:
print(f" +{i:3d}: u32={u32:12,d} (0x{u32:08x})"
f" {'← could be offset/size' if 100 < u32 < filesize else ''}")
# ── Step 6: Hash analysis of key ──
print("\n" + "═" * 80)
print("SECTION 2: KEY ANALYSIS")
print("═" * 80)
print(f"\n Raw key ({len(KEY_RAW)} bytes): {KEY_RAW}")
print(f" Raw key hex: {KEY_RAW.hex()}")
print(f" SHA256 of key: {KEY_SHA256.hex()}")
# Check if SHA256 of key appears in the file header
if KEY_SHA256 in full_data[:header_offset + 256]:
idx = full_data.index(KEY_SHA256)
print(f" ★ SHA256 of key FOUND in file at offset {idx}!")
else:
print(f" SHA256 of key not found in first {header_offset + 256} bytes")
# Check if the 8-byte magic at offset 22636 could be related to key hash
key_sha256_first8 = KEY_SHA256[:8]
print(f" First 8 bytes of SHA256(key): {key_sha256_first8.hex()}")
print(f" 8 bytes at offset {header_offset}: {meta_magic_8.hex()}")
print(f" Match: {'YES ★' if key_sha256_first8 == meta_magic_8 else 'NO'}")
# ── Step 7: Decryption attempts ──
print("\n" + "═" * 80)
print("SECTION 3: DECRYPTION ATTEMPTS")
print("═" * 80)
# Prepare IV candidates
iv_zero = b"\x00" * 16
iv_from_8 = full_data[8:24]
iv_from_4 = full_data[4:20]
iv_from_file_start = full_data[0:16]
iv_from_meta = full_data[header_offset:header_offset + 16]
iv_from_meta_8 = meta_magic_8 + b"\x00" * 8 # pad the 8-byte magic to 16
# SHA256 of key, take first 16 bytes as IV
iv_sha256_key_first16 = KEY_SHA256[:16]
iv_candidates = {
"all-zeros": iv_zero,
"file[8:24]": iv_from_8,
"file[4:20]": iv_from_4,
"file[0:16]": iv_from_file_start,
f"file[{header_offset}:{header_offset+16}]": iv_from_meta,
"meta_magic+padding": iv_from_meta_8,
"SHA256(key)[:16]": iv_sha256_key_first16,
}
# Key candidates
key_candidates = {
"RAW key (32 bytes)": KEY_RAW,
"SHA256(RAW key)": KEY_SHA256,
}
# Data regions to try decrypting
# We try both the header data and the start of the encrypted payload
regions = {
"header[8:22636]": full_data[8:min(8 + 4096, header_offset)],
f"payload[{encrypted_start}:]": full_data[encrypted_start:encrypted_start + 4096],
}
# Also try: what if the entire region from byte 8 to end is one encrypted blob?
regions["all_encrypted[8:]"] = full_data[8:8 + 4096]
# Segment sizes: Windows BCrypt CFB defaults to 8-bit (CFB8), also try 128-bit (CFB128)
segment_sizes = [8, 128]
total_attempts = 0
promising_results = []
for key_name, key in key_candidates.items():
for iv_name, iv in iv_candidates.items():
for seg_size in segment_sizes:
for region_name, region_data in regions.items():
total_attempts += 1
label = f"key={key_name}, iv={iv_name}, CFB{seg_size}, region={region_name}"
decrypted = decrypt_aes_cfb(region_data, key, iv, seg_size)
if decrypted and analyze_decrypted(decrypted, label):
promising_results.append(label)
print(f"\n Total attempts: {total_attempts}")
print(f" Promising results: {len(promising_results)}")
# ── Step 8: Additional IV strategies ──
print("\n" + "═" * 80)
print("SECTION 4: ADVANCED IV STRATEGIES")
print("═" * 80)
# Strategy: IV might be derived from the file content
# Try every 16-byte aligned position in the first 256 bytes as IV
print("\n Trying every 16-byte aligned offset in first 256 bytes as IV...")
for iv_offset in range(0, 256, 4): # try every 4-byte step
iv_cand = full_data[iv_offset:iv_offset + 16]
if len(iv_cand) < 16:
continue
for key in [KEY_RAW, KEY_SHA256]:
for seg in [8, 128]:
# Try decrypting the payload
payload_start = encrypted_start
test_data = full_data[payload_start:payload_start + 4096]
decrypted = decrypt_aes_cfb(test_data, key, iv_cand, seg)
if decrypted:
is_good = analyze_decrypted(decrypted,
f"iv_offset={iv_offset}, key={'raw' if key == KEY_RAW else 'sha256'}, CFB{seg}, payload")
if is_good:
promising_results.append(f"Advanced: iv_offset={iv_offset}")
# Try decrypting from byte 8 (header encrypted area)
test_data2 = full_data[8:8 + 4096]
decrypted2 = decrypt_aes_cfb(test_data2, key, iv_cand, seg)
if decrypted2:
is_good = analyze_decrypted(decrypted2,
f"iv_offset={iv_offset}, key={'raw' if key == KEY_RAW else 'sha256'}, CFB{seg}, header[8:]")
if is_good:
promising_results.append(f"Advanced: iv_offset={iv_offset} header")
# ── Step 9: Try with IV = SHA256 of various things ──
print("\n" + "═" * 80)
print("SECTION 5: DERIVED IV STRATEGIES")
print("═" * 80)
derived_ivs = {
"SHA256(key)[:16]": hashlib.sha256(KEY_RAW).digest()[:16],
"SHA256(key)[16:]": hashlib.sha256(KEY_RAW).digest()[16:],
"SHA256('')[:16]": hashlib.sha256(b"").digest()[:16],
"MD5(key)": hashlib.md5(KEY_RAW).digest(),
"SHA256(file[0:8])[:16]": hashlib.sha256(full_data[0:8]).digest()[:16],
"SHA256(file[0:4])[:16]": hashlib.sha256(full_data[0:4]).digest()[:16],
"SHA256('oneocr')[:16]": hashlib.sha256(b"oneocr").digest()[:16],
"SHA256('oneocr.onemodel')[:16]": hashlib.sha256(b"oneocr.onemodel").digest()[:16],
}
for iv_name, iv in derived_ivs.items():
for key_name, key in key_candidates.items():
for seg in [8, 128]:
for region_name, region_data in regions.items():
label = f"key={key_name}, iv={iv_name}, CFB{seg}, region={region_name}"
decrypted = decrypt_aes_cfb(region_data, key, iv, seg)
if decrypted and analyze_decrypted(decrypted, label):
promising_results.append(label)
# ── Step 10: What if the structure is different? ──
print("\n" + "═" * 80)
print("SECTION 6: ALTERNATIVE STRUCTURE HYPOTHESES")
print("═" * 80)
# Hypothesis A: Bytes 0-3 = offset, 4-7 = 0, 8-23 = IV, 24+ = encrypted data
print("\n Hypothesis A: [0-3]=offset, [4-7]=flags, [8-23]=IV, [24+]=encrypted")
iv_hyp_a = full_data[8:24]
encrypted_hyp_a = full_data[24:24 + 4096]
for key_name, key in key_candidates.items():
for seg in [8, 128]:
dec = decrypt_aes_cfb(encrypted_hyp_a, key, iv_hyp_a, seg)
if dec:
analyze_decrypted(dec, f"HypA: key={key_name}, CFB{seg}")
# Hypothesis B: [0-7]=header, [8-23]=IV, [24-22635]=encrypted meta, then payload also encrypted
print("\n Hypothesis B: [0-7]=header, [22636-22651]=16-byte meta, payload starts at 22652")
print(f" If meta[22636:22652] contains IV for payload:")
iv_hyp_b = full_data[header_offset:header_offset + 16]
enc_payload = full_data[encrypted_start:encrypted_start + 4096]
for key_name, key in key_candidates.items():
for seg in [8, 128]:
dec = decrypt_aes_cfb(enc_payload, key, iv_hyp_b, seg)
if dec:
analyze_decrypted(dec, f"HypB: key={key_name}, CFB{seg}, payload with meta-IV")
# Hypothesis C: The entire file from byte 8 to end is one encrypted stream (IV = zeros)
print("\n Hypothesis C: Single encrypted stream from byte 8, IV=zeros")
single_stream = full_data[8:8 + 4096]
for key_name, key in key_candidates.items():
for seg in [8, 128]:
dec = decrypt_aes_cfb(single_stream, key, iv_zero, seg)
if dec:
analyze_decrypted(dec, f"HypC: key={key_name}, CFB{seg}")
# Hypothesis D: Encrypted data starts right at byte 0 (the header_size field IS part of encrypted data)
# This would mean the header_size value 22636 is coincidental
print("\n Hypothesis D: Encrypted from byte 0, IV=zeros")
for key_name, key in key_candidates.items():
for seg in [8, 128]:
dec = decrypt_aes_cfb(full_data[:4096], key, iv_zero, seg)
if dec:
analyze_decrypted(dec, f"HypD: key={key_name}, CFB{seg}, from byte 0")
# Hypothesis E: Windows CNG might prepend IV to ciphertext
# So bytes 0-3 = header_size, 4-7 = 0, 8-23 = IV (embedded in encrypted blob), 24+ = ciphertext
print("\n Hypothesis E: IV prepended to ciphertext at various offsets")
for data_start in [0, 4, 8]:
iv_e = full_data[data_start:data_start + 16]
ct_e = full_data[data_start + 16:data_start + 16 + 4096]
for key_name, key in key_candidates.items():
for seg in [8, 128]:
dec = decrypt_aes_cfb(ct_e, key, iv_e, seg)
if dec:
analyze_decrypted(dec, f"HypE: data_start={data_start}, key={key_name}, CFB{seg}")
# ── Step 11: Try OFB and CTR modes too (just in case CFB was misidentified) ──
print("\n" + "═" * 80)
print("SECTION 7: ALTERNATIVE CIPHER MODES (OFB, CBC)")
print("═" * 80)
if HAS_PYCRYPTODOME:
for data_start in [8, 24, encrypted_start]:
for iv_offset in [0, 4, 8]:
iv_alt = full_data[iv_offset:iv_offset + 16]
test_data = full_data[data_start:data_start + 4096]
for key in [KEY_RAW, KEY_SHA256]:
key_label = "raw" if key == KEY_RAW else "sha256"
# OFB
try:
cipher = PyCryptoAES.new(key, PyCryptoAES.MODE_OFB, iv=iv_alt)
dec = cipher.decrypt(test_data)
analyze_decrypted(dec, f"OFB: data@{data_start}, iv@{iv_offset}, key={key_label}")
except:
pass
# CBC (needs padding but try anyway)
try:
cipher = PyCryptoAES.new(key, PyCryptoAES.MODE_CBC, iv=iv_alt)
dec = cipher.decrypt(test_data)
analyze_decrypted(dec, f"CBC: data@{data_start}, iv@{iv_offset}, key={key_label}")
except:
pass
# ECB (no IV)
try:
cipher = PyCryptoAES.new(key, PyCryptoAES.MODE_ECB)
# ECB needs data aligned to 16 bytes
aligned = test_data[:len(test_data) - (len(test_data) % 16)]
dec = cipher.decrypt(aligned)
analyze_decrypted(dec, f"ECB: data@{data_start}, key={key_label}")
except:
pass
# ── Step 12: Summary ──
print("\n" + "═" * 80)
print("SUMMARY")
print("═" * 80)
print(f"\n File structure (confirmed):")
print(f" [0x0000 - 0x0007] 8-byte header: offset = {header_offset}")
print(f" [0x0008 - 0x{header_offset-1:04x}] Encrypted header data ({header_offset - 8} bytes)")
print(f" [0x{header_offset:04x} - 0x{header_offset+7:04x}] 8-byte magic/hash: {meta_magic_8.hex()}")
print(f" [0x{header_offset+8:04x} - 0x{header_offset+15:04x}] uint64 payload size: {meta_size:,}")
print(f" [0x{encrypted_start:04x} - 0x{filesize-1:07x}] Encrypted payload ({encrypted_size:,} bytes)")
print(f"\n Key info:")
print(f" Raw key: {KEY_RAW}")
print(f" Raw key hex: {KEY_RAW.hex()}")
print(f" SHA256(key): {KEY_SHA256.hex()}")
print(f"\n Total promising decryption results: {len(promising_results)}")
for r in promising_results:
print(f" ★ {r}")
if not promising_results:
print("\n No successful decryption found with standard approaches.")
print(" Possible reasons:")
print(" 1. The key might be processed differently (PBKDF2, HKDF, etc.)")
print(" 2. The IV might be derived in a non-standard way")
print(" 3. The file structure might be more complex")
print(" 4. The CBC/CFB segment size might be non-standard")
print(" 5. There might be additional authentication (AEAD)")
print(" 6. The BCrypt CNG API might use specific key blob format")
print(" 7. Think about BCRYPT_KEY_DATA_BLOB_HEADER structure")
if __name__ == "__main__":
main()