oneocr / _archive /onemodel_decrypt.py
OneOCR Dev
OneOCR - reverse engineering complete, ONNX pipeline 53% match rate
ce847d4
#!/usr/bin/env python3
"""
OneOCR .onemodel Static Decryptor
=================================
Cross-platform tool to extract ONNX models and config data from
Windows OneOCR's encrypted .onemodel container files.
No Windows APIs, DLLs, or runtime hooking required.
Only dependency: pycryptodome (pip install pycryptodome)
Crypto scheme (fully reverse-engineered):
- Algorithm: AES-256-CFB128
- Master Key: hardcoded 32-byte ASCII string
- IV: "Copyright @ OneO" (16 bytes, same for all chunks)
- DX index key: SHA256(master_key + file[8:24])
- Config key: SHA256(DX[48:64] + DX[32:48]) (sizes + checksum)
- Per-chunk key: SHA256(chunk_header[16:32] + chunk_header[0:16])
- Chunk header in file: checksum(16) + size1(8) + size2(8) = 32 bytes
- On-disk encrypted data follows immediately: size1 + 8 bytes
File structure:
[0:8] uint64 LE H (header value)
[8:24] 16 bytes file_hash (used in DX key derivation)
[24:H+12] encrypted DX index
[H+12:H+16] 4 zero bytes (gap)
[H+16:] payload chunks (checksum(16) + sizes(16) + encrypted_data)
Usage:
python onemodel_decrypt.py [onemodel_file] [output_dir]
python onemodel_decrypt.py # uses defaults
"""
import struct
import hashlib
import sys
import os
from pathlib import Path
try:
from Crypto.Cipher import AES
except ImportError:
print("ERROR: pycryptodome is required. Install with: pip install pycryptodome")
sys.exit(1)
# ─── Constants ───────────────────────────────────────────────────────────────
MASTER_KEY = b'kj)TGtrK>f]b[Piow.gU+nC@s""""""4'
IV = b"Copyright @ OneO"
CONTAINER_MAGIC = bytes.fromhex("4a1a082b25000000")
# ─── Crypto ──────────────────────────────────────────────────────────────────
def aes_cfb128_decrypt(key: bytes, data: bytes) -> bytes:
"""Decrypt data with AES-256-CFB128 using the global IV."""
cipher = AES.new(key, AES.MODE_CFB, iv=IV, segment_size=128)
return cipher.decrypt(data)
def derive_key(sha256_input: bytes) -> bytes:
"""Derive AES key via SHA256."""
return hashlib.sha256(sha256_input).digest()
# ─── Protobuf helpers (for ONNX size measurement) ───────────────────────────
def read_varint(data: bytes, pos: int) -> tuple[int, int]:
"""Read protobuf varint, return (value, new_pos)."""
val = 0
shift = 0
while pos < len(data):
b = data[pos]
pos += 1
val |= (b & 0x7F) << shift
if not (b & 0x80):
break
shift += 7
return val, pos
def measure_protobuf(data: bytes) -> int:
"""Walk ONNX ModelProto protobuf fields; return byte length of valid data.
Valid fields for ONNX ModelProto: 1-9, 14, 20."""
VALID_FIELDS = {1, 2, 3, 4, 5, 6, 7, 8, 9, 14, 20}
pos = 0
while pos < len(data):
start = pos
tag, pos = read_varint(data, pos)
if pos > len(data):
return start
field_num = tag >> 3
wire_type = tag & 7
if field_num not in VALID_FIELDS:
return start
if wire_type == 0: # varint
_, pos = read_varint(data, pos)
elif wire_type == 1: # 64-bit
pos += 8
elif wire_type == 2: # length-delimited
length, pos = read_varint(data, pos)
pos += length
elif wire_type == 5: # 32-bit
pos += 4
else:
return start
if pos > len(data):
return start
return pos
# ─── File parsing ────────────────────────────────────────────────────────────
class OneModelFile:
"""Parser for .onemodel encrypted containers."""
def __init__(self, filepath: str):
with open(filepath, "rb") as f:
self.data = f.read()
self.filepath = filepath
# Parse file header
self.H = struct.unpack_from("<Q", self.data, 0)[0]
self.file_hash = self.data[8:24]
# DX boundaries
self.dx_offset = 24
self.dx_size = self.H - 12
self.payload_start = self.H + 16
def decrypt_dx(self) -> bytes:
"""Decrypt the DX index."""
key = derive_key(MASTER_KEY + self.file_hash)
dx_enc = self.data[self.dx_offset : self.dx_offset + self.dx_size]
return aes_cfb128_decrypt(key, dx_enc)
def decrypt_config(self, dx: bytes) -> bytes:
"""Decrypt the config chunk embedded in DX."""
sha_input = dx[48:64] + dx[32:48] # sizes + checksum
key = derive_key(sha_input)
config_s1 = struct.unpack_from("<Q", dx, 48)[0]
config_enc = dx[64 : 64 + config_s1 + 8]
return aes_cfb128_decrypt(key, config_enc)
def iter_payload_chunks(self):
"""Iterate over all payload chunks, yielding (index, metadata, decrypted_payload).
Each payload chunk in file:
[16 bytes] checksum
[8 bytes] uint64 LE size1 (data size excl. 8-byte container header)
[8 bytes] uint64 LE size2 (always size1 + 24)
[size1+8 bytes] encrypted data
"""
off = self.payload_start
idx = 0
while off + 32 <= len(self.data):
checksum = self.data[off : off + 16]
s1, s2 = struct.unpack_from("<QQ", self.data, off + 16)
# Validate
if s2 != s1 + 24 or s1 == 0 or s1 > len(self.data):
break
enc_size = s1 + 8
data_off = off + 32
if data_off + enc_size > len(self.data):
break
# Derive per-chunk AES key: SHA256(sizes + checksum)
sha_input = self.data[off + 16 : off + 32] + checksum
key = derive_key(sha_input)
# Decrypt
dec = aes_cfb128_decrypt(key, self.data[data_off : data_off + enc_size])
# Validate container magic
if dec[:8] != CONTAINER_MAGIC:
print(f" WARNING: chunk#{idx} container magic mismatch!")
# Strip 8-byte container header
payload = dec[8:]
meta = {
"index": idx,
"file_offset": off,
"size1": s1,
"size2": s2,
"checksum": checksum.hex(),
}
yield idx, meta, payload
off = data_off + enc_size
idx += 1
# ─── ONNX extraction ────────────────────────────────────────────────────────
def classify_chunk(payload: bytes) -> str:
"""Classify a decrypted chunk payload."""
if len(payload) > 100 and payload[0] == 0x08 and payload[1] in (0x06, 0x07):
return "onnx"
# Check for text content
try:
sample = payload[:100].decode("ascii")
if all(c.isprintable() or c in "\n\r\t" for c in sample):
if "<LogPrior>" in sample:
return "rnn_info"
elif sample.startswith("! ") or sample.startswith('" '):
if any(c.isdigit() for c in sample[:20]):
return "char2ind"
else:
return "char2inschar"
elif sample.startswith("0."):
return "score_calibration"
elif "text_script" in sample:
return "ocr_config"
elif "//" in sample[:5]:
return "composite_chars"
return "text_data"
except (UnicodeDecodeError, ValueError):
pass
return "binary_data"
def get_onnx_info(data: bytes) -> dict:
"""Get basic ONNX model info from raw protobuf bytes."""
info = {}
pos = 0
while pos < min(len(data), 500):
tag, pos = read_varint(data, pos)
field_num = tag >> 3
wire_type = tag & 7
if wire_type == 0:
val, pos = read_varint(data, pos)
if field_num == 1:
info["ir_version"] = val
elif wire_type == 2:
length, pos = read_varint(data, pos)
payload_bytes = data[pos : pos + length]
if field_num == 3:
try:
info["producer"] = payload_bytes.decode("utf-8")
except:
pass
elif field_num == 4:
try:
info["producer_version"] = payload_bytes.decode("utf-8")
except:
pass
pos += length
elif wire_type == 5:
pos += 4
elif wire_type == 1:
pos += 8
else:
break
if "producer" in info and "ir_version" in info:
break
return info
def extract_all(input_file: str, output_dir: str, verify: bool = True):
"""Extract all content from a .onemodel file."""
model_file = OneModelFile(input_file)
print(f"File: {input_file}")
print(f"Size: {len(model_file.data):,} bytes")
print(f"Header value: {model_file.H}")
print(f"DX size: {model_file.dx_size:,} bytes")
# Decrypt DX
dx = model_file.decrypt_dx()
valid_size = struct.unpack_from("<Q", dx, 8)[0]
print(f"DX valid size: {valid_size:,}")
assert dx[:2] == b"DX", "DX magic mismatch!"
# Decrypt config
config_dec = model_file.decrypt_config(dx)
assert config_dec[:8] == CONTAINER_MAGIC, "Config magic mismatch!"
config_payload = config_dec[8:]
# Prepare output dirs
out = Path(output_dir)
onnx_dir = out / "onnx_models"
config_dir = out / "config_data"
onnx_dir.mkdir(parents=True, exist_ok=True)
config_dir.mkdir(parents=True, exist_ok=True)
# Save config
config_path = config_dir / "manifest.bin"
config_path.write_bytes(config_payload)
print(f"\nConfig manifest saved: {config_path} ({len(config_payload):,} bytes)")
# Extract payload chunks
onnx_models = []
config_files = []
print(f"\n{'='*70}")
print(f"{'#':>4} {'Type':<18} {'Size':>12} {'Filename':<40}")
print(f"{'='*70}")
for idx, meta, payload in model_file.iter_payload_chunks():
chunk_type = classify_chunk(payload)
if chunk_type == "onnx":
# Trim ONNX to exact protobuf boundary
exact_size = measure_protobuf(payload)
onnx_data = payload[:exact_size]
info = get_onnx_info(onnx_data)
ir = info.get("ir_version", "?")
producer = info.get("producer", "unknown")
size_kb = len(onnx_data) // 1024
# Generate filename
if "quantize" in producer.lower() or "onnx" in producer.lower():
prod_tag = "onnx_quantize"
elif "pytorch" in producer.lower() or "torch" in producer.lower():
if size_kb < 50:
prod_tag = "pytorch_small"
else:
prod_tag = "pytorch"
else:
prod_tag = producer.replace(" ", "_")
onnx_idx = len(onnx_models)
fname = f"model_{onnx_idx:02d}_ir{ir}_{prod_tag}_{size_kb}KB.onnx"
fpath = onnx_dir / fname
fpath.write_bytes(onnx_data)
onnx_models.append(fpath)
print(f"{idx:4d} {'ONNX':18s} {len(onnx_data):12,} {fname}")
else:
# Config/text file
ext_map = {
"rnn_info": ".rnn_info",
"char2ind": ".char2ind.txt",
"char2inschar": ".char2inschar.txt",
"score_calibration": ".calibration.txt",
"ocr_config": ".config.txt",
"composite_chars": ".composite.txt",
"text_data": ".txt",
"binary_data": ".bin",
}
ext = ext_map.get(chunk_type, ".bin")
fname = f"chunk_{idx:02d}_{chunk_type}{ext}"
fpath = config_dir / fname
fpath.write_bytes(payload)
config_files.append(fpath)
print(f"{idx:4d} {chunk_type:18s} {len(payload):12,} {fname}")
print(f"\n{'='*70}")
print(f"ONNX models extracted: {len(onnx_models)}")
print(f"Config files extracted: {len(config_files)}")
# Verify ONNX models
if verify:
print(f"\n{'='*70}")
print("ONNX Verification")
print(f"{'='*70}")
try:
import onnx
onnx_ok = 0
onnx_fail = 0
for fpath in onnx_models:
try:
model = onnx.load(str(fpath))
onnx.checker.check_model(model)
onnx_ok += 1
print(f" OK {fpath.name}")
except Exception as e:
try:
# Try just loading without full check
model = onnx.load(str(fpath))
onnx_ok += 1
print(f" OK* {fpath.name} (loads but checker warning: {str(e)[:50]})")
except Exception as e2:
onnx_fail += 1
print(f" FAIL {fpath.name}: {e2}")
print(f"\nVerification: {onnx_ok}/{len(onnx_models)} models load successfully")
except ImportError:
print(" (onnx package not installed, skipping verification)")
try:
import onnxruntime as ort
rt_ok = 0
rt_custom_ops = 0
for fpath in onnx_models:
try:
sess = ort.InferenceSession(str(fpath))
rt_ok += 1
except Exception as e:
if "custom ops" in str(e).lower() or "oneocr" in str(e).lower():
rt_custom_ops += 1
else:
pass # Other runtime errors
print(f" onnxruntime: {rt_ok} standard, {rt_custom_ops} need custom ops")
except ImportError:
pass
print(f"\nDone! All files saved to: {out.resolve()}")
# ─── Main ────────────────────────────────────────────────────────────────────
if __name__ == "__main__":
default_input = "ocr_data/oneocr.onemodel"
default_output = "oneocr_extracted"
input_file = sys.argv[1] if len(sys.argv) > 1 else default_input
output_dir = sys.argv[2] if len(sys.argv) > 2 else default_output
if not os.path.exists(input_file):
print(f"ERROR: Input file not found: {input_file}")
sys.exit(1)
extract_all(input_file, output_dir)