oneocr / _archive /attempts /disasm_full_cipher.py
OneOCR Dev
OneOCR - reverse engineering complete, ONNX pipeline 53% match rate
ce847d4
"""
Full disassembly of the Cipher function from AES setup through BCryptDecrypt.
Based on findings:
- SHA256 provider at file 0x0015a3e2 (RVA 0x0015afe2)
- AES provider at file 0x0015a702 (RVA 0x0015b302)
- ChainingModeCFB at file 0x0015a7cd (RVA 0x0015b3cd)
- MessageBlockLength at file 0x0015a7fc (RVA 0x0015b3fc)
- BCryptGenerateSymmetricKey import at ~0x027ef0a2
- Need to find: key handling, IV passing, BCryptDecrypt call
"""
import struct
from capstone import Cs, CS_ARCH_X86, CS_MODE_64
DLL_PATH = r"c:\Users\MattyMroz\Desktop\PROJECTS\ONEOCR\ocr_data\oneocr.dll"
IMAGE_BASE = 0x180000000
TEXT_VA = 0x1000
TEXT_FILE_OFFSET = 0x400 # .text section file offset
def rva_to_file(rva):
return rva - TEXT_VA + TEXT_FILE_OFFSET
def file_to_rva(foff):
return foff - TEXT_FILE_OFFSET + TEXT_VA
with open(DLL_PATH, "rb") as f:
dll_data = f.read()
md = Cs(CS_ARCH_X86, CS_MODE_64)
md.detail = False
def disasm_region(name, file_start, file_end):
rva_start = file_to_rva(file_start)
va_start = IMAGE_BASE + rva_start
code = dll_data[file_start:file_end]
print(f"\n{'='*100}")
print(f"{name}")
print(f"File: 0x{file_start:08x}-0x{file_end:08x}, RVA: 0x{rva_start:08x}, VA: 0x{va_start:016x}")
print(f"{'='*100}")
for insn in md.disasm(code, va_start):
foff = rva_to_file(insn.address - IMAGE_BASE)
print(f" {insn.address - IMAGE_BASE:08x} ({foff:08x}): {insn.bytes.hex():<40s} {insn.mnemonic:<14s} {insn.op_str}")
# The Cipher function appears to start before the AES setup.
# Let's find the function prologue by scanning backwards from the AES setup.
# The AES LEA is at file 0x0015a702. Let's look for a typical function prologue.
# First, let's find the actual function start
# Look for common prologues (push rbp, sub rsp, mov [rsp+...], etc.) before the AES reference
print("\n" + "="*100)
print("SCANNING FOR FUNCTION PROLOGUE before AES setup (file 0x0015a702)")
print("="*100)
# Search backwards from 0x0015a702 for push rbp or sub rsp patterns
search_start = 0x0015a500 # Start from after SHA256Hash function
search_end = 0x0015a710
search_region = dll_data[search_start:search_end]
# Look for common x64 function prologues
# 48 89 5C 24 xx = mov [rsp+xx], rbx
# 48 89 74 24 xx = mov [rsp+xx], rsi
# 55 = push rbp
# 40 55 = push rbp (with REX prefix)
# 48 8B EC = mov rbp, rsp
# 48 81 EC xx xx xx xx = sub rsp, imm32
for i in range(len(search_region) - 4):
b = search_region[i:i+8]
foff = search_start + i
rva = file_to_rva(foff)
# Look for function start patterns
if b[:5] == bytes([0x48, 0x89, 0x5C, 0x24, 0x08]): # mov [rsp+8], rbx
print(f" Possible prologue at file 0x{foff:08x} (RVA 0x{rva:08x}): mov [rsp+8], rbx")
elif b[:2] == bytes([0x40, 0x55]): # push rbp with REX
print(f" Possible prologue at file 0x{foff:08x} (RVA 0x{rva:08x}): REX push rbp")
elif b[:1] == bytes([0x55]) and (i == 0 or search_region[i-1] in (0xC3, 0xCC, 0x90)):
print(f" Possible prologue at file 0x{foff:08x} (RVA 0x{rva:08x}): push rbp (after ret/nop/int3)")
elif b[:4] == bytes([0x48, 0x83, 0xEC, 0x28]): # sub rsp, 0x28
print(f" Possible prologue at file 0x{foff:08x} (RVA 0x{rva:08x}): sub rsp, 0x28")
elif b[:3] == bytes([0x48, 0x81, 0xEC]): # sub rsp, imm32
val = struct.unpack_from('<I', b, 3)[0]
print(f" Possible prologue at file 0x{foff:08x} (RVA 0x{rva:08x}): sub rsp, 0x{val:X}")
# Now disassemble the ENTIRE Cipher function region - from after SHA256Hash to well past all setup
# The function is large, so let's do it in meaningful chunks
# Region 1: Function start to AES provider setup
disasm_region(
"Cipher function part 1: prologue to AES provider",
0x0015a500, 0x0015a720
)
# Region 2: AES provider setup through ChainingMode and MessageBlockLength
disasm_region(
"Cipher function part 2: AES provider, ChainingModeCFB, MessageBlockLength",
0x0015a720, 0x0015a880
)
# Region 3: After IV extraction, BCryptGenerateSymmetricKey, BCryptDecrypt calls
# This is the critical region we need
disasm_region(
"Cipher function part 3: key gen and decrypt (extended)",
0x0015abd0, 0x0015ae00
)
# Also check what's around the BCryptDecrypt import call
# BCrypt imports are indirect calls through IAT
# Let's find all indirect calls (FF 15) in the cipher function range
print("\n" + "="*100)
print("ALL INDIRECT CALLS (ff 15) in Cipher function region 0x0015a500-0x0015ae00")
print("="*100)
search_start = 0x0015a500
search_end = 0x0015ae00
for i in range(search_end - search_start - 6):
foff = search_start + i
if dll_data[foff] == 0xFF and dll_data[foff+1] == 0x15:
rva = file_to_rva(foff)
disp = struct.unpack_from('<i', dll_data, foff + 2)[0]
target_rva = rva + 6 + disp # RIP-relative
target_foff = rva_to_file(target_rva)
# Read the IAT entry (8 bytes at the target)
iat_value = struct.unpack_from('<Q', dll_data, target_foff)[0] if target_foff + 8 <= len(dll_data) else 0
print(f" File 0x{foff:08x} (RVA 0x{rva:08x}): call [rip+0x{disp:x}] -> IAT at RVA 0x{target_rva:08x}")
# Also disassemble the region between IV handling (0x0015abdb) and magic number check (0x0015a170)
# This might contain the actual BCryptDecrypt call
disasm_region(
"Cipher function part 4: from end of IV path to function cleanup",
0x0015ac00, 0x0015ae00
)
# Look for the region right before the magic number check function
# The Cipher function should return, and then a caller invokes the magic check
disasm_region(
"Pre-magic-check function caller",
0x0015a0c0, 0x0015a170
)