""" Static decryptor for OneOCR .onemodel files using BCrypt CNG API. Finds chunk boundaries by re-encrypting known plaintext patterns. Works on Windows only (BCrypt CNG). For Linux, use the hook-based approach. Usage: python static_decrypt.py [model_path] [-o output_dir] """ import ctypes import ctypes.wintypes as wt from ctypes import c_void_p, c_ulong, POINTER, byref import struct import sys import os from pathlib import Path # ═══════════════════════════════════════════════════════════════ # CRYPTO PARAMETERS (discovered via IAT hook interception) # ═══════════════════════════════════════════════════════════════ KEY = b'kj)TGtrK>f]b[Piow.gU+nC@s""""""4' IV = b"Copyright @ OneO" CONTAINER_HEADER = bytes.fromhex("4a1a082b25000000") ONNX_VALID_FIELDS = {1, 2, 3, 4, 5, 6, 7, 8, 9, 14, 20} # BCrypt constants BCRYPT_AES = "AES\0".encode('utf-16-le') BCRYPT_CHAINING_MODE = "ChainingMode\0".encode('utf-16-le') BCRYPT_CHAIN_MODE_CFB = "ChainingModeCFB\0".encode('utf-16-le') bcrypt = ctypes.windll.bcrypt class BCRYPT_KEY_DATA_BLOB_HEADER(ctypes.Structure): _fields_ = [ ("dwMagic", c_ulong), ("dwVersion", c_ulong), ("cbKeyData", c_ulong), ] def setup_bcrypt(): hAlg = c_void_p() assert bcrypt.BCryptOpenAlgorithmProvider(byref(hAlg), BCRYPT_AES, None, 0) == 0 assert bcrypt.BCryptSetProperty(hAlg, BCRYPT_CHAINING_MODE, BCRYPT_CHAIN_MODE_CFB, len(BCRYPT_CHAIN_MODE_CFB), 0) == 0 header = BCRYPT_KEY_DATA_BLOB_HEADER(dwMagic=0x4d42444b, dwVersion=1, cbKeyData=len(KEY)) blob = bytes(header) + KEY hKey = c_void_p() assert bcrypt.BCryptGenerateSymmetricKey(hAlg, byref(hKey), None, 0, blob, len(blob), 0) == 0 return hAlg, hKey def bcrypt_op(hKey, data, encrypt=False): """Encrypt or decrypt data using BCrypt AES-CFB with fresh IV.""" iv = bytearray(IV) func = bcrypt.BCryptEncrypt if encrypt else bcrypt.BCryptDecrypt result_size = c_ulong(0) func(hKey, data, len(data), None, None, 0, None, 0, byref(result_size), 0) output = (ctypes.c_ubyte * result_size.value)() actual = c_ulong(0) status = func(hKey, data, len(data), None, (ctypes.c_ubyte * len(iv))(*iv), len(iv), output, result_size.value, byref(actual), 0) assert status == 0, f"BCrypt op failed: {status:#x}" return bytes(output[:actual.value]) def read_varint(data, pos): val = 0; shift = 0 while pos < len(data): b = data[pos]; pos += 1 val |= (b & 0x7f) << shift if not (b & 0x80): break shift += 7 return val, pos def measure_onnx(data): pos = 0; last = 0 while pos < len(data): start = pos tag, pos = read_varint(data, pos) if pos > len(data): break fn = tag >> 3; wt = tag & 7 if fn not in ONNX_VALID_FIELDS: return start if wt == 0: _, pos = read_varint(data, pos) elif wt == 1: pos += 8 elif wt == 2: l, pos = read_varint(data, pos); pos += l elif wt == 5: pos += 4 else: return start if pos > len(data): return start last = pos return last def main(): import argparse parser = argparse.ArgumentParser(description="OneOCR .onemodel decryptor (Windows BCrypt)") parser.add_argument("model_path", nargs="?", default="ocr_data/oneocr.onemodel") parser.add_argument("-o", "--output", default="onnx_models_static") args = parser.parse_args() model_path = Path(args.model_path) output_dir = Path(args.output) output_dir.mkdir(exist_ok=True, parents=True) for old in output_dir.glob("*"): old.unlink() data = model_path.read_bytes() print(f"{'='*70}") print(f"OneOCR Static Decryptor (BCrypt CNG)") print(f"{'='*70}") print(f"File: {model_path} ({len(data):,} bytes)") hAlg, hKey = setup_bcrypt() print(f"AES-256-CFB initialized") # Step 1: Decrypt DX index (offset 24, size 22624) dx_offset = 24 dx_size = 22624 dx_dec = bcrypt_op(hKey, data[dx_offset:dx_offset + dx_size]) print(f"\nDX index: starts with {dx_dec[:2].hex()}") assert dx_dec[:2] == b'DX', f"DX header not found! Got: {dx_dec[:8].hex()}" (output_dir / "dx_index.bin").write_bytes(dx_dec) # Step 2: Parse DX to find embedded chunks # DX contains sub-chunks that need independent decryption # We'll also find main payload chunks by scanning the file # The DX contains a list of uint64 values that might be chunk sizes/offsets dx_values = [] for i in range(0, len(dx_dec) - 7, 8): v = struct.unpack_from(' 0 and v < len(data): dx_values.append((i, v)) # Step 3: Try to decrypt every possible chunk in the payload area # Payload starts after DX (offset 22648) + 36 bytes gap = 22684 payload_start = dx_offset + dx_size + 36 print(f"\n--- Scanning payload for encrypted chunks ---") print(f"Payload starts at offset {payload_start}") # Strategy: try decrypting at current offset, check if result starts # with container magic. If yes, extract chunk, determine its size # from the DX index or by scanning forward. # Known chunk sizes from the DX index analysis: # We know the DX has entries like 11943, 11903, 11927 etc. # And the main payload has large ONNX models. # Let's try a different approach: scan the encrypted file for positions # where decryption produces valid container magic print(f"\nSearching for chunk boundaries by trial decryption...") # The container magic `4a1a082b25000000` after decryption = specific encrypted pattern # Compute what the container magic encrypts TO: magic_encrypted = bcrypt_op(hKey, CONTAINER_HEADER, encrypt=True) print(f"Container magic encrypted: {magic_encrypted.hex()}") # Search for this pattern in the payload area chunk_starts = [] search_start = payload_start # Also check DX sub-chunks # First, find container magic encryptions within the DX encrypted data while search_start < len(data) - 16: idx = data.find(magic_encrypted[:8], search_start) if idx < 0: break # Verify by decrypting 16 bytes test = bcrypt_op(hKey, data[idx:idx+16]) if test[:8] == CONTAINER_HEADER: chunk_starts.append(idx) search_start = idx + 1 else: search_start = idx + 1 print(f"Found {len(chunk_starts)} potential chunk starts") if not chunk_starts: # Fallback: just try sequential decryption print("No chunk starts found via magic pattern. Trying sequential...") # Try decrypting from payload_start with large block sizes remaining = len(data) - payload_start dec = bcrypt_op(hKey, data[payload_start:payload_start + remaining]) # Find container magic in decrypted data pos = 0 chunks_data = [] while True: idx = dec.find(CONTAINER_HEADER, pos) if idx < 0: # Handle remaining data if pos < len(dec): chunks_data.append(dec[pos:]) break if idx > pos: chunks_data.append(dec[pos:idx]) pos = idx # Will be split on next iteration # Find next occurrence next_idx = dec.find(CONTAINER_HEADER, pos + 8) if next_idx < 0: chunks_data.append(dec[pos:]) break chunks_data.append(dec[pos:next_idx]) pos = next_idx print(f"Found {len(chunks_data)} chunks in sequential decryption") else: # Decrypt each chunk chunk_starts.sort() chunks_data = [] for i, start in enumerate(chunk_starts): end = chunk_starts[i + 1] if i + 1 < len(chunk_starts) else len(data) encrypted = data[start:end] try: dec = bcrypt_op(hKey, encrypted) chunks_data.append(dec) except: pass # Extract models from chunks print(f"\n--- Extracting ONNX models ---") models = [] data_files = [] for chunk in chunks_data: if chunk[:8] == CONTAINER_HEADER: payload = chunk[8:] else: payload = chunk if len(payload) >= 2 and payload[0] == 0x08 and 1 <= payload[1] <= 12: valid_len = measure_onnx(payload) onnx_data = payload[:valid_len] if valid_len < 100: # Too small to be a real model continue producer = "unknown" if b"PyTorch" in payload[:100]: producer = "pytorch" elif b"onnx.quantize" in payload[:100]: producer = "onnx_quantize" elif b"pytorch" in payload[:100]: producer = "pytorch_small" ir = payload[1] idx = len(models) fname = f"model_{idx:02d}_ir{ir}_{producer}_{valid_len//1024}KB.onnx" (output_dir / fname).write_bytes(onnx_data) models.append({'name': fname, 'size': valid_len}) print(f" ONNX: {fname} ({valid_len:,} bytes)") elif len(payload) > 100: preview = payload[:30].decode('utf-8', errors='replace') idx = len(data_files) fname = f"data_{idx:02d}_{len(payload)}B.bin" (output_dir / fname).write_bytes(payload) data_files.append({'name': fname, 'size': len(payload)}) print(f" Data: {fname} ({len(payload):,} bytes) {preview[:30]!r}") # Summary print(f"\n{'='*70}") print(f"EXTRACTION COMPLETE") print(f"{'='*70}") print(f"ONNX models: {len(models)}") print(f"Data files: {len(data_files)}") if models: total = sum(m['size'] for m in models) print(f"Total ONNX: {total:,} bytes ({total/1024/1024:.1f} MB)") # Verify try: import onnx ok = sum(1 for m in models if not _try_load(onnx, output_dir / m['name'])) ok = 0 for m in models: try: onnx.load(str(output_dir / m['name'])) ok += 1 except: pass print(f"Verified with onnx.load: {ok}/{len(models)}") except ImportError: pass bcrypt.BCryptDestroyKey(hKey) bcrypt.BCryptCloseAlgorithmProvider(hAlg, 0) if __name__ == "__main__": main()