oneocr / _archive /attempts /disasm_proper.py
OneOCR Dev
OneOCR - reverse engineering complete, ONNX pipeline 53% match rate
ce847d4
"""
Proper disassembly of the Cipher function in oneocr.dll using capstone.
Focus on the crypto setup flow: key derivation, IV, AES parameters.
"""
import struct
from capstone import Cs, CS_ARCH_X86, CS_MODE_64
dll_path = r"c:\Users\MattyMroz\Desktop\PROJECTS\ONEOCR\ocr_data\oneocr.dll"
with open(dll_path, "rb") as f:
data = f.read()
# PE parsing (simplified)
pe_sig_offset = struct.unpack_from("<I", data, 0x3C)[0]
coff_start = pe_sig_offset + 4
opt_header_size = struct.unpack_from("<H", data, coff_start + 16)[0]
opt_start = coff_start + 20
image_base = struct.unpack_from("<Q", data, opt_start + 24)[0]
num_sections = struct.unpack_from("<H", data, coff_start + 2)[0]
section_start = opt_start + opt_header_size
sections = []
for i in range(num_sections):
s_off = section_start + i * 40
name = data[s_off:s_off+8].rstrip(b"\x00").decode("ascii", errors="replace")
vsize = struct.unpack_from("<I", data, s_off + 8)[0]
va = struct.unpack_from("<I", data, s_off + 12)[0]
raw_size = struct.unpack_from("<I", data, s_off + 16)[0]
raw_ptr = struct.unpack_from("<I", data, s_off + 20)[0]
sections.append((name, va, vsize, raw_ptr, raw_size))
def rva_to_file_offset(rva):
for name, va, vsize, raw_ptr, raw_size in sections:
if va <= rva < va + vsize:
return raw_ptr + (rva - va)
return None
def file_offset_to_rva(offset):
for name, va, vsize, raw_ptr, raw_size in sections:
if raw_ptr <= offset < raw_ptr + raw_size:
return va + (offset - raw_ptr)
return None
md = Cs(CS_ARCH_X86, CS_MODE_64)
md.detail = True
def disasm_region(file_start, file_end, label=""):
"""Disassemble a region and print instructions."""
code_bytes = data[file_start:file_end]
base_rva = file_offset_to_rva(file_start)
base_addr = image_base + base_rva
print(f"\n{'='*80}")
print(f"{label}")
print(f"File: 0x{file_start:08x}-0x{file_end:08x}, RVA: 0x{base_rva:08x}, VA: 0x{base_addr:016x}")
print(f"{'='*80}")
for instr in md.disasm(code_bytes, base_addr):
file_off = file_start + (instr.address - base_addr)
rva = base_rva + (instr.address - base_addr)
hex_bytes = " ".join(f"{b:02x}" for b in instr.bytes)
print(f" {rva:08x} ({file_off:08x}): {hex_bytes:<30s} {instr.mnemonic:10s} {instr.op_str}")
# Key code regions to disassemble (from our earlier analysis)
# These are file offsets where important crypto code is
regions = [
# SHA256 provider setup
(0x0015a3a0, 0x0015a500, "SHA256Hash function - BCryptOpenAlgorithmProvider for SHA256"),
# AES provider setup and ChainingMode/MessageBlockLength
(0x0015a6b0, 0x0015a880, "Cipher function - AES setup, ChainingModeCFB, MessageBlockLength"),
# Key generation and decrypt/encrypt
(0x0015a880, 0x0015aA00, "Cipher function - key generation and encrypt/decrypt"),
# Magic number check and uncompress
(0x0015a170, 0x0015a300, "Magic number check and uncompress"),
]
for file_start, file_end, label in regions:
disasm_region(file_start, file_end, label)
# Also look for the function that calls BCryptDecrypt
# BCryptDecrypt is called via an indirect call through the import table
# Let me find the BCryptDecrypt IAT entry
print("\n\n=== Finding BCryptDecrypt call sites ===")
# The call at 0015b3de: ff 15 23 f2 6b 00 is CALL [rip+0x006bf223]
# This is an indirect call through the IAT
# Let me find similar patterns near the ChainingModeCFB reference
# After ChainingMode and MessageBlockLength are set, the next step is GenerateSymmetricKey
# Disassemble the broader decrypt region
disasm_region(0x0015a880, 0x0015abe0, "Post-setup: key generation, IV, encrypt/decrypt")