oneocr / _archive /attempts /static_decrypt.py
OneOCR Dev
OneOCR - reverse engineering complete, ONNX pipeline 53% match rate
ce847d4
"""
Static decryptor for OneOCR .onemodel files using BCrypt CNG API.
Finds chunk boundaries by re-encrypting known plaintext patterns.
Works on Windows only (BCrypt CNG). For Linux, use the hook-based approach.
Usage: python static_decrypt.py [model_path] [-o output_dir]
"""
import ctypes
import ctypes.wintypes as wt
from ctypes import c_void_p, c_ulong, POINTER, byref
import struct
import sys
import os
from pathlib import Path
# ═══════════════════════════════════════════════════════════════
# CRYPTO PARAMETERS (discovered via IAT hook interception)
# ═══════════════════════════════════════════════════════════════
KEY = b'kj)TGtrK>f]b[Piow.gU+nC@s""""""4'
IV = b"Copyright @ OneO"
CONTAINER_HEADER = bytes.fromhex("4a1a082b25000000")
ONNX_VALID_FIELDS = {1, 2, 3, 4, 5, 6, 7, 8, 9, 14, 20}
# BCrypt constants
BCRYPT_AES = "AES\0".encode('utf-16-le')
BCRYPT_CHAINING_MODE = "ChainingMode\0".encode('utf-16-le')
BCRYPT_CHAIN_MODE_CFB = "ChainingModeCFB\0".encode('utf-16-le')
bcrypt = ctypes.windll.bcrypt
class BCRYPT_KEY_DATA_BLOB_HEADER(ctypes.Structure):
_fields_ = [
("dwMagic", c_ulong),
("dwVersion", c_ulong),
("cbKeyData", c_ulong),
]
def setup_bcrypt():
hAlg = c_void_p()
assert bcrypt.BCryptOpenAlgorithmProvider(byref(hAlg), BCRYPT_AES, None, 0) == 0
assert bcrypt.BCryptSetProperty(hAlg, BCRYPT_CHAINING_MODE,
BCRYPT_CHAIN_MODE_CFB, len(BCRYPT_CHAIN_MODE_CFB), 0) == 0
header = BCRYPT_KEY_DATA_BLOB_HEADER(dwMagic=0x4d42444b, dwVersion=1, cbKeyData=len(KEY))
blob = bytes(header) + KEY
hKey = c_void_p()
assert bcrypt.BCryptGenerateSymmetricKey(hAlg, byref(hKey), None, 0, blob, len(blob), 0) == 0
return hAlg, hKey
def bcrypt_op(hKey, data, encrypt=False):
"""Encrypt or decrypt data using BCrypt AES-CFB with fresh IV."""
iv = bytearray(IV)
func = bcrypt.BCryptEncrypt if encrypt else bcrypt.BCryptDecrypt
result_size = c_ulong(0)
func(hKey, data, len(data), None, None, 0, None, 0, byref(result_size), 0)
output = (ctypes.c_ubyte * result_size.value)()
actual = c_ulong(0)
status = func(hKey, data, len(data), None,
(ctypes.c_ubyte * len(iv))(*iv), len(iv),
output, result_size.value, byref(actual), 0)
assert status == 0, f"BCrypt op failed: {status:#x}"
return bytes(output[:actual.value])
def read_varint(data, pos):
val = 0; shift = 0
while pos < len(data):
b = data[pos]; pos += 1
val |= (b & 0x7f) << shift
if not (b & 0x80): break
shift += 7
return val, pos
def measure_onnx(data):
pos = 0; last = 0
while pos < len(data):
start = pos
tag, pos = read_varint(data, pos)
if pos > len(data): break
fn = tag >> 3; wt = tag & 7
if fn not in ONNX_VALID_FIELDS: return start
if wt == 0: _, pos = read_varint(data, pos)
elif wt == 1: pos += 8
elif wt == 2: l, pos = read_varint(data, pos); pos += l
elif wt == 5: pos += 4
else: return start
if pos > len(data): return start
last = pos
return last
def main():
import argparse
parser = argparse.ArgumentParser(description="OneOCR .onemodel decryptor (Windows BCrypt)")
parser.add_argument("model_path", nargs="?", default="ocr_data/oneocr.onemodel")
parser.add_argument("-o", "--output", default="onnx_models_static")
args = parser.parse_args()
model_path = Path(args.model_path)
output_dir = Path(args.output)
output_dir.mkdir(exist_ok=True, parents=True)
for old in output_dir.glob("*"): old.unlink()
data = model_path.read_bytes()
print(f"{'='*70}")
print(f"OneOCR Static Decryptor (BCrypt CNG)")
print(f"{'='*70}")
print(f"File: {model_path} ({len(data):,} bytes)")
hAlg, hKey = setup_bcrypt()
print(f"AES-256-CFB initialized")
# Step 1: Decrypt DX index (offset 24, size 22624)
dx_offset = 24
dx_size = 22624
dx_dec = bcrypt_op(hKey, data[dx_offset:dx_offset + dx_size])
print(f"\nDX index: starts with {dx_dec[:2].hex()}")
assert dx_dec[:2] == b'DX', f"DX header not found! Got: {dx_dec[:8].hex()}"
(output_dir / "dx_index.bin").write_bytes(dx_dec)
# Step 2: Parse DX to find embedded chunks
# DX contains sub-chunks that need independent decryption
# We'll also find main payload chunks by scanning the file
# The DX contains a list of uint64 values that might be chunk sizes/offsets
dx_values = []
for i in range(0, len(dx_dec) - 7, 8):
v = struct.unpack_from('<Q', dx_dec, i)[0]
if v > 0 and v < len(data):
dx_values.append((i, v))
# Step 3: Try to decrypt every possible chunk in the payload area
# Payload starts after DX (offset 22648) + 36 bytes gap = 22684
payload_start = dx_offset + dx_size + 36
print(f"\n--- Scanning payload for encrypted chunks ---")
print(f"Payload starts at offset {payload_start}")
# Strategy: try decrypting at current offset, check if result starts
# with container magic. If yes, extract chunk, determine its size
# from the DX index or by scanning forward.
# Known chunk sizes from the DX index analysis:
# We know the DX has entries like 11943, 11903, 11927 etc.
# And the main payload has large ONNX models.
# Let's try a different approach: scan the encrypted file for positions
# where decryption produces valid container magic
print(f"\nSearching for chunk boundaries by trial decryption...")
# The container magic `4a1a082b25000000` after decryption = specific encrypted pattern
# Compute what the container magic encrypts TO:
magic_encrypted = bcrypt_op(hKey, CONTAINER_HEADER, encrypt=True)
print(f"Container magic encrypted: {magic_encrypted.hex()}")
# Search for this pattern in the payload area
chunk_starts = []
search_start = payload_start
# Also check DX sub-chunks
# First, find container magic encryptions within the DX encrypted data
while search_start < len(data) - 16:
idx = data.find(magic_encrypted[:8], search_start)
if idx < 0:
break
# Verify by decrypting 16 bytes
test = bcrypt_op(hKey, data[idx:idx+16])
if test[:8] == CONTAINER_HEADER:
chunk_starts.append(idx)
search_start = idx + 1
else:
search_start = idx + 1
print(f"Found {len(chunk_starts)} potential chunk starts")
if not chunk_starts:
# Fallback: just try sequential decryption
print("No chunk starts found via magic pattern. Trying sequential...")
# Try decrypting from payload_start with large block sizes
remaining = len(data) - payload_start
dec = bcrypt_op(hKey, data[payload_start:payload_start + remaining])
# Find container magic in decrypted data
pos = 0
chunks_data = []
while True:
idx = dec.find(CONTAINER_HEADER, pos)
if idx < 0:
# Handle remaining data
if pos < len(dec):
chunks_data.append(dec[pos:])
break
if idx > pos:
chunks_data.append(dec[pos:idx])
pos = idx # Will be split on next iteration
# Find next occurrence
next_idx = dec.find(CONTAINER_HEADER, pos + 8)
if next_idx < 0:
chunks_data.append(dec[pos:])
break
chunks_data.append(dec[pos:next_idx])
pos = next_idx
print(f"Found {len(chunks_data)} chunks in sequential decryption")
else:
# Decrypt each chunk
chunk_starts.sort()
chunks_data = []
for i, start in enumerate(chunk_starts):
end = chunk_starts[i + 1] if i + 1 < len(chunk_starts) else len(data)
encrypted = data[start:end]
try:
dec = bcrypt_op(hKey, encrypted)
chunks_data.append(dec)
except:
pass
# Extract models from chunks
print(f"\n--- Extracting ONNX models ---")
models = []
data_files = []
for chunk in chunks_data:
if chunk[:8] == CONTAINER_HEADER:
payload = chunk[8:]
else:
payload = chunk
if len(payload) >= 2 and payload[0] == 0x08 and 1 <= payload[1] <= 12:
valid_len = measure_onnx(payload)
onnx_data = payload[:valid_len]
if valid_len < 100: # Too small to be a real model
continue
producer = "unknown"
if b"PyTorch" in payload[:100]: producer = "pytorch"
elif b"onnx.quantize" in payload[:100]: producer = "onnx_quantize"
elif b"pytorch" in payload[:100]: producer = "pytorch_small"
ir = payload[1]
idx = len(models)
fname = f"model_{idx:02d}_ir{ir}_{producer}_{valid_len//1024}KB.onnx"
(output_dir / fname).write_bytes(onnx_data)
models.append({'name': fname, 'size': valid_len})
print(f" ONNX: {fname} ({valid_len:,} bytes)")
elif len(payload) > 100:
preview = payload[:30].decode('utf-8', errors='replace')
idx = len(data_files)
fname = f"data_{idx:02d}_{len(payload)}B.bin"
(output_dir / fname).write_bytes(payload)
data_files.append({'name': fname, 'size': len(payload)})
print(f" Data: {fname} ({len(payload):,} bytes) {preview[:30]!r}")
# Summary
print(f"\n{'='*70}")
print(f"EXTRACTION COMPLETE")
print(f"{'='*70}")
print(f"ONNX models: {len(models)}")
print(f"Data files: {len(data_files)}")
if models:
total = sum(m['size'] for m in models)
print(f"Total ONNX: {total:,} bytes ({total/1024/1024:.1f} MB)")
# Verify
try:
import onnx
ok = sum(1 for m in models if not _try_load(onnx, output_dir / m['name']))
ok = 0
for m in models:
try:
onnx.load(str(output_dir / m['name']))
ok += 1
except:
pass
print(f"Verified with onnx.load: {ok}/{len(models)}")
except ImportError:
pass
bcrypt.BCryptDestroyKey(hKey)
bcrypt.BCryptCloseAlgorithmProvider(hAlg, 0)
if __name__ == "__main__":
main()