""" OneOCR .onemodel file analysis and decryption attempt. Known facts: - AES-256-CFB via Windows BCrypt CNG API - SHA256 used somewhere in the process - Key: kj)TGtrK>f]b[Piow.gU+nC@s""""""4 (32 ASCII bytes = 256 bits) - After decryption → decompression (zlib/lz4/etc.) - Error on wrong key: meta->magic_number == MAGIC_NUMBER (0 vs. 1) """ import struct import hashlib import zlib import os from collections import Counter from typing import Optional # ── Try to import crypto libraries ── try: from Crypto.Cipher import AES as PyCryptoAES HAS_PYCRYPTODOME = True except ImportError: HAS_PYCRYPTODOME = False print("[WARN] PyCryptodome not available, install with: pip install pycryptodome") try: from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes from cryptography.hazmat.backends import default_backend HAS_CRYPTOGRAPHY = True except ImportError: HAS_CRYPTOGRAPHY = False print("[WARN] cryptography not available, install with: pip install cryptography") # ═══════════════════════════════════════════════════════════════ # CONFIGURATION # ═══════════════════════════════════════════════════════════════ MODEL_PATH = r"c:\Users\MattyMroz\Desktop\PROJECTS\ONEOCR\ocr_data\oneocr.onemodel" # The key as raw bytes (32 bytes = 256 bits for AES-256) KEY_RAW = b'kj)TGtrK>f]b[Piow.gU+nC@s""""""4' assert len(KEY_RAW) == 32, f"Key must be 32 bytes, got {len(KEY_RAW)}" # SHA256 of the key (another possible key derivation) KEY_SHA256 = hashlib.sha256(KEY_RAW).digest() # ═══════════════════════════════════════════════════════════════ # HELPER FUNCTIONS # ═══════════════════════════════════════════════════════════════ def hex_dump(data: bytes, offset: int = 0, max_lines: int = 32) -> str: """Format bytes as hex dump with ASCII column.""" lines = [] for i in range(0, min(len(data), max_lines * 16), 16): hex_part = " ".join(f"{b:02x}" for b in data[i:i+16]) ascii_part = "".join(chr(b) if 32 <= b < 127 else "." for b in data[i:i+16]) lines.append(f" {offset+i:08x}: {hex_part:<48s} {ascii_part}") return "\n".join(lines) def entropy(data: bytes) -> float: """Calculate Shannon entropy (0-8 bits per byte).""" if not data: return 0.0 import math freq = Counter(data) total = len(data) return -sum((c / total) * math.log2(c / total) for c in freq.values()) def unique_byte_ratio(data: bytes) -> str: """Return unique bytes count.""" return f"{len(set(data))}/256" def check_known_headers(data: bytes) -> list[str]: """Check if data starts with known file/compression magic numbers.""" findings = [] if len(data) < 4: return findings # Magic number checks magics = { b"\x08": "Protobuf varint field tag (field 1, wire type 0)", b"\x0a": "Protobuf length-delimited field tag (field 1, wire type 2)", b"\x78\x01": "Zlib (low compression)", b"\x78\x5e": "Zlib (default compression)", b"\x78\x9c": "Zlib (best speed/default)", b"\x78\xda": "Zlib (best compression)", b"\x1f\x8b": "Gzip", b"\x04\x22\x4d\x18": "LZ4 frame", b"\x28\xb5\x2f\xfd": "Zstandard", b"\xfd\x37\x7a\x58\x5a\x00": "XZ", b"\x42\x5a\x68": "Bzip2", b"PK": "ZIP archive", b"\x89PNG": "PNG image", b"ONNX": "ONNX text", b"\x08\x00": "Protobuf: field 1, varint, value will follow", b"\x08\x01": "Protobuf: field 1, varint = 1 (could be magic_number=1!)", b"\x08\x02": "Protobuf: field 1, varint = 2", b"\x08\x03": "Protobuf: field 1, varint = 3", b"\x08\x04": "Protobuf: field 1, varint = 4", b"\x50\x42": "Possible PB (protobuf) marker", b"\x01\x00\x00\x00": "uint32 LE = 1 (possible magic_number=1)", b"\x00\x00\x00\x01": "uint32 BE = 1 (possible magic_number=1)", } for magic, desc in magics.items(): if data[:len(magic)] == magic: findings.append(f" ★ MATCH: {desc} ({magic.hex()})") # Check first uint32 LE/BE u32_le = struct.unpack_from("I", data, 0)[0] if u32_le == 1: findings.append(f" ★ uint32_LE at offset 0 = 1 (MAGIC_NUMBER match!)") if u32_be == 1: findings.append(f" ★ uint32_BE at offset 0 = 1 (MAGIC_NUMBER match!)") return findings def try_decompress(data: bytes, label: str = "") -> Optional[bytes]: """Try various decompression methods.""" results = [] # Zlib (with and without header) for wbits in [15, -15, 31]: # standard, raw deflate, gzip try: dec = zlib.decompress(data, wbits) results.append(("zlib" + (f" wbits={wbits}" if wbits != 15 else ""), dec)) except: pass # LZ4 try: import lz4.frame dec = lz4.frame.decompress(data) results.append(("lz4.frame", dec)) except: pass try: import lz4.block for size in [1 << 20, 1 << 22, 1 << 24]: try: dec = lz4.block.decompress(data, uncompressed_size=size) results.append((f"lz4.block (uncompressed_size={size})", dec)) break except: pass except: pass # Zstandard try: import zstandard as zstd dctx = zstd.ZstdDecompressor() dec = dctx.decompress(data, max_output_size=len(data) * 10) results.append(("zstandard", dec)) except: pass if results: for method, dec in results: print(f" ✓ {label} Decompression SUCCESS with {method}: {len(dec)} bytes") print(f" First 64 bytes: {dec[:64].hex()}") print(f" Entropy: {entropy(dec[:4096]):.3f}, unique: {unique_byte_ratio(dec[:4096])}") headers = check_known_headers(dec) for h in headers: print(f" {h}") return results[0][1] return None def decrypt_aes_cfb(data: bytes, key: bytes, iv: bytes, segment_size: int = 8) -> Optional[bytes]: """Decrypt using AES-CFB with given parameters.""" if HAS_PYCRYPTODOME: try: cipher = PyCryptoAES.new(key, PyCryptoAES.MODE_CFB, iv=iv, segment_size=segment_size) return cipher.decrypt(data) except Exception as e: return None if HAS_CRYPTOGRAPHY: try: if segment_size == 128: cipher = Cipher(algorithms.AES(key), modes.CFB(iv), backend=default_backend()) elif segment_size == 8: cipher = Cipher(algorithms.AES(key), modes.CFB8(iv), backend=default_backend()) else: return None decryptor = cipher.decryptor() return decryptor.update(data) + decryptor.finalize() except Exception as e: return None return None def analyze_decrypted(data: bytes, label: str) -> bool: """Analyze decrypted data and return True if it looks promising.""" if data is None: return False ent = entropy(data[:4096]) unique = unique_byte_ratio(data[:4096]) headers = check_known_headers(data) is_promising = ( ent < 7.5 or # reduced entropy len(headers) > 0 or # known header match data[:4] == b"\x01\x00\x00\x00" or # magic_number = 1 LE data[:4] == b"\x00\x00\x00\x01" or # magic_number = 1 BE data[:2] == b"\x08\x01" # protobuf magic_number = 1 ) if is_promising: print(f" ★★★ PROMISING: {label}") print(f" Entropy: {ent:.3f}, Unique bytes: {unique}") print(f" First 128 bytes:") print(hex_dump(data[:128])) for h in headers: print(f" {h}") # Try decompression on promising results try_decompress(data, label) # If starts with protobuf-like data or magic=1, also try decompressing after skipping some bytes for skip in [4, 8, 12, 16, 20]: if len(data) > skip + 10: try_decompress(data[skip:], f"{label} [skip {skip} bytes]") return True return False # ═══════════════════════════════════════════════════════════════ # MAIN ANALYSIS # ═══════════════════════════════════════════════════════════════ def main(): print("=" * 80) print("OneOCR .onemodel File Analysis & Decryption Attempt") print("=" * 80) # ── Step 1: Read file ── with open(MODEL_PATH, "rb") as f: full_data = f.read() filesize = len(full_data) print(f"\nFile size: {filesize:,} bytes ({filesize/1024/1024:.2f} MB)") # ── Step 2: Parse top-level structure ── print("\n" + "═" * 80) print("SECTION 1: FILE STRUCTURE ANALYSIS") print("═" * 80) header_offset = struct.unpack_from(" 0 and u32 < filesize: print(f" +{i:3d}: u32={u32:12,d} (0x{u32:08x})" f" {'← could be offset/size' if 100 < u32 < filesize else ''}") # ── Step 6: Hash analysis of key ── print("\n" + "═" * 80) print("SECTION 2: KEY ANALYSIS") print("═" * 80) print(f"\n Raw key ({len(KEY_RAW)} bytes): {KEY_RAW}") print(f" Raw key hex: {KEY_RAW.hex()}") print(f" SHA256 of key: {KEY_SHA256.hex()}") # Check if SHA256 of key appears in the file header if KEY_SHA256 in full_data[:header_offset + 256]: idx = full_data.index(KEY_SHA256) print(f" ★ SHA256 of key FOUND in file at offset {idx}!") else: print(f" SHA256 of key not found in first {header_offset + 256} bytes") # Check if the 8-byte magic at offset 22636 could be related to key hash key_sha256_first8 = KEY_SHA256[:8] print(f" First 8 bytes of SHA256(key): {key_sha256_first8.hex()}") print(f" 8 bytes at offset {header_offset}: {meta_magic_8.hex()}") print(f" Match: {'YES ★' if key_sha256_first8 == meta_magic_8 else 'NO'}") # ── Step 7: Decryption attempts ── print("\n" + "═" * 80) print("SECTION 3: DECRYPTION ATTEMPTS") print("═" * 80) # Prepare IV candidates iv_zero = b"\x00" * 16 iv_from_8 = full_data[8:24] iv_from_4 = full_data[4:20] iv_from_file_start = full_data[0:16] iv_from_meta = full_data[header_offset:header_offset + 16] iv_from_meta_8 = meta_magic_8 + b"\x00" * 8 # pad the 8-byte magic to 16 # SHA256 of key, take first 16 bytes as IV iv_sha256_key_first16 = KEY_SHA256[:16] iv_candidates = { "all-zeros": iv_zero, "file[8:24]": iv_from_8, "file[4:20]": iv_from_4, "file[0:16]": iv_from_file_start, f"file[{header_offset}:{header_offset+16}]": iv_from_meta, "meta_magic+padding": iv_from_meta_8, "SHA256(key)[:16]": iv_sha256_key_first16, } # Key candidates key_candidates = { "RAW key (32 bytes)": KEY_RAW, "SHA256(RAW key)": KEY_SHA256, } # Data regions to try decrypting # We try both the header data and the start of the encrypted payload regions = { "header[8:22636]": full_data[8:min(8 + 4096, header_offset)], f"payload[{encrypted_start}:]": full_data[encrypted_start:encrypted_start + 4096], } # Also try: what if the entire region from byte 8 to end is one encrypted blob? regions["all_encrypted[8:]"] = full_data[8:8 + 4096] # Segment sizes: Windows BCrypt CFB defaults to 8-bit (CFB8), also try 128-bit (CFB128) segment_sizes = [8, 128] total_attempts = 0 promising_results = [] for key_name, key in key_candidates.items(): for iv_name, iv in iv_candidates.items(): for seg_size in segment_sizes: for region_name, region_data in regions.items(): total_attempts += 1 label = f"key={key_name}, iv={iv_name}, CFB{seg_size}, region={region_name}" decrypted = decrypt_aes_cfb(region_data, key, iv, seg_size) if decrypted and analyze_decrypted(decrypted, label): promising_results.append(label) print(f"\n Total attempts: {total_attempts}") print(f" Promising results: {len(promising_results)}") # ── Step 8: Additional IV strategies ── print("\n" + "═" * 80) print("SECTION 4: ADVANCED IV STRATEGIES") print("═" * 80) # Strategy: IV might be derived from the file content # Try every 16-byte aligned position in the first 256 bytes as IV print("\n Trying every 16-byte aligned offset in first 256 bytes as IV...") for iv_offset in range(0, 256, 4): # try every 4-byte step iv_cand = full_data[iv_offset:iv_offset + 16] if len(iv_cand) < 16: continue for key in [KEY_RAW, KEY_SHA256]: for seg in [8, 128]: # Try decrypting the payload payload_start = encrypted_start test_data = full_data[payload_start:payload_start + 4096] decrypted = decrypt_aes_cfb(test_data, key, iv_cand, seg) if decrypted: is_good = analyze_decrypted(decrypted, f"iv_offset={iv_offset}, key={'raw' if key == KEY_RAW else 'sha256'}, CFB{seg}, payload") if is_good: promising_results.append(f"Advanced: iv_offset={iv_offset}") # Try decrypting from byte 8 (header encrypted area) test_data2 = full_data[8:8 + 4096] decrypted2 = decrypt_aes_cfb(test_data2, key, iv_cand, seg) if decrypted2: is_good = analyze_decrypted(decrypted2, f"iv_offset={iv_offset}, key={'raw' if key == KEY_RAW else 'sha256'}, CFB{seg}, header[8:]") if is_good: promising_results.append(f"Advanced: iv_offset={iv_offset} header") # ── Step 9: Try with IV = SHA256 of various things ── print("\n" + "═" * 80) print("SECTION 5: DERIVED IV STRATEGIES") print("═" * 80) derived_ivs = { "SHA256(key)[:16]": hashlib.sha256(KEY_RAW).digest()[:16], "SHA256(key)[16:]": hashlib.sha256(KEY_RAW).digest()[16:], "SHA256('')[:16]": hashlib.sha256(b"").digest()[:16], "MD5(key)": hashlib.md5(KEY_RAW).digest(), "SHA256(file[0:8])[:16]": hashlib.sha256(full_data[0:8]).digest()[:16], "SHA256(file[0:4])[:16]": hashlib.sha256(full_data[0:4]).digest()[:16], "SHA256('oneocr')[:16]": hashlib.sha256(b"oneocr").digest()[:16], "SHA256('oneocr.onemodel')[:16]": hashlib.sha256(b"oneocr.onemodel").digest()[:16], } for iv_name, iv in derived_ivs.items(): for key_name, key in key_candidates.items(): for seg in [8, 128]: for region_name, region_data in regions.items(): label = f"key={key_name}, iv={iv_name}, CFB{seg}, region={region_name}" decrypted = decrypt_aes_cfb(region_data, key, iv, seg) if decrypted and analyze_decrypted(decrypted, label): promising_results.append(label) # ── Step 10: What if the structure is different? ── print("\n" + "═" * 80) print("SECTION 6: ALTERNATIVE STRUCTURE HYPOTHESES") print("═" * 80) # Hypothesis A: Bytes 0-3 = offset, 4-7 = 0, 8-23 = IV, 24+ = encrypted data print("\n Hypothesis A: [0-3]=offset, [4-7]=flags, [8-23]=IV, [24+]=encrypted") iv_hyp_a = full_data[8:24] encrypted_hyp_a = full_data[24:24 + 4096] for key_name, key in key_candidates.items(): for seg in [8, 128]: dec = decrypt_aes_cfb(encrypted_hyp_a, key, iv_hyp_a, seg) if dec: analyze_decrypted(dec, f"HypA: key={key_name}, CFB{seg}") # Hypothesis B: [0-7]=header, [8-23]=IV, [24-22635]=encrypted meta, then payload also encrypted print("\n Hypothesis B: [0-7]=header, [22636-22651]=16-byte meta, payload starts at 22652") print(f" If meta[22636:22652] contains IV for payload:") iv_hyp_b = full_data[header_offset:header_offset + 16] enc_payload = full_data[encrypted_start:encrypted_start + 4096] for key_name, key in key_candidates.items(): for seg in [8, 128]: dec = decrypt_aes_cfb(enc_payload, key, iv_hyp_b, seg) if dec: analyze_decrypted(dec, f"HypB: key={key_name}, CFB{seg}, payload with meta-IV") # Hypothesis C: The entire file from byte 8 to end is one encrypted stream (IV = zeros) print("\n Hypothesis C: Single encrypted stream from byte 8, IV=zeros") single_stream = full_data[8:8 + 4096] for key_name, key in key_candidates.items(): for seg in [8, 128]: dec = decrypt_aes_cfb(single_stream, key, iv_zero, seg) if dec: analyze_decrypted(dec, f"HypC: key={key_name}, CFB{seg}") # Hypothesis D: Encrypted data starts right at byte 0 (the header_size field IS part of encrypted data) # This would mean the header_size value 22636 is coincidental print("\n Hypothesis D: Encrypted from byte 0, IV=zeros") for key_name, key in key_candidates.items(): for seg in [8, 128]: dec = decrypt_aes_cfb(full_data[:4096], key, iv_zero, seg) if dec: analyze_decrypted(dec, f"HypD: key={key_name}, CFB{seg}, from byte 0") # Hypothesis E: Windows CNG might prepend IV to ciphertext # So bytes 0-3 = header_size, 4-7 = 0, 8-23 = IV (embedded in encrypted blob), 24+ = ciphertext print("\n Hypothesis E: IV prepended to ciphertext at various offsets") for data_start in [0, 4, 8]: iv_e = full_data[data_start:data_start + 16] ct_e = full_data[data_start + 16:data_start + 16 + 4096] for key_name, key in key_candidates.items(): for seg in [8, 128]: dec = decrypt_aes_cfb(ct_e, key, iv_e, seg) if dec: analyze_decrypted(dec, f"HypE: data_start={data_start}, key={key_name}, CFB{seg}") # ── Step 11: Try OFB and CTR modes too (just in case CFB was misidentified) ── print("\n" + "═" * 80) print("SECTION 7: ALTERNATIVE CIPHER MODES (OFB, CBC)") print("═" * 80) if HAS_PYCRYPTODOME: for data_start in [8, 24, encrypted_start]: for iv_offset in [0, 4, 8]: iv_alt = full_data[iv_offset:iv_offset + 16] test_data = full_data[data_start:data_start + 4096] for key in [KEY_RAW, KEY_SHA256]: key_label = "raw" if key == KEY_RAW else "sha256" # OFB try: cipher = PyCryptoAES.new(key, PyCryptoAES.MODE_OFB, iv=iv_alt) dec = cipher.decrypt(test_data) analyze_decrypted(dec, f"OFB: data@{data_start}, iv@{iv_offset}, key={key_label}") except: pass # CBC (needs padding but try anyway) try: cipher = PyCryptoAES.new(key, PyCryptoAES.MODE_CBC, iv=iv_alt) dec = cipher.decrypt(test_data) analyze_decrypted(dec, f"CBC: data@{data_start}, iv@{iv_offset}, key={key_label}") except: pass # ECB (no IV) try: cipher = PyCryptoAES.new(key, PyCryptoAES.MODE_ECB) # ECB needs data aligned to 16 bytes aligned = test_data[:len(test_data) - (len(test_data) % 16)] dec = cipher.decrypt(aligned) analyze_decrypted(dec, f"ECB: data@{data_start}, key={key_label}") except: pass # ── Step 12: Summary ── print("\n" + "═" * 80) print("SUMMARY") print("═" * 80) print(f"\n File structure (confirmed):") print(f" [0x0000 - 0x0007] 8-byte header: offset = {header_offset}") print(f" [0x0008 - 0x{header_offset-1:04x}] Encrypted header data ({header_offset - 8} bytes)") print(f" [0x{header_offset:04x} - 0x{header_offset+7:04x}] 8-byte magic/hash: {meta_magic_8.hex()}") print(f" [0x{header_offset+8:04x} - 0x{header_offset+15:04x}] uint64 payload size: {meta_size:,}") print(f" [0x{encrypted_start:04x} - 0x{filesize-1:07x}] Encrypted payload ({encrypted_size:,} bytes)") print(f"\n Key info:") print(f" Raw key: {KEY_RAW}") print(f" Raw key hex: {KEY_RAW.hex()}") print(f" SHA256(key): {KEY_SHA256.hex()}") print(f"\n Total promising decryption results: {len(promising_results)}") for r in promising_results: print(f" ★ {r}") if not promising_results: print("\n No successful decryption found with standard approaches.") print(" Possible reasons:") print(" 1. The key might be processed differently (PBKDF2, HKDF, etc.)") print(" 2. The IV might be derived in a non-standard way") print(" 3. The file structure might be more complex") print(" 4. The CBC/CFB segment size might be non-standard") print(" 5. There might be additional authentication (AEAD)") print(" 6. The BCrypt CNG API might use specific key blob format") print(" 7. Think about BCRYPT_KEY_DATA_BLOB_HEADER structure") if __name__ == "__main__": main()