oneocr / _archive /attempts /disasm_crypto.py
OneOCR Dev
OneOCR - reverse engineering complete, ONNX pipeline 53% match rate
ce847d4
"""
Disassemble the Cipher function in oneocr.dll to find the exact crypto parameters.
Find code references to the crypto strings we identified.
"""
import struct
import re
dll_path = r"c:\Users\MattyMroz\Desktop\PROJECTS\ONEOCR\ocr_data\oneocr.dll"
with open(dll_path, "rb") as f:
data = f.read()
# Parse PE headers to find section info
pe_sig_offset = struct.unpack_from("<I", data, 0x3C)[0]
assert data[pe_sig_offset:pe_sig_offset+4] == b"PE\x00\x00"
# COFF header
coff_start = pe_sig_offset + 4
num_sections = struct.unpack_from("<H", data, coff_start + 2)[0]
opt_header_size = struct.unpack_from("<H", data, coff_start + 16)[0]
# Optional header
opt_start = coff_start + 20
magic = struct.unpack_from("<H", data, opt_start)[0]
assert magic == 0x20B # PE32+
image_base = struct.unpack_from("<Q", data, opt_start + 24)[0]
# Sections
section_start = opt_start + opt_header_size
sections = []
for i in range(num_sections):
s_off = section_start + i * 40
name = data[s_off:s_off+8].rstrip(b"\x00").decode("ascii", errors="replace")
vsize = struct.unpack_from("<I", data, s_off + 8)[0]
va = struct.unpack_from("<I", data, s_off + 12)[0]
raw_size = struct.unpack_from("<I", data, s_off + 16)[0]
raw_ptr = struct.unpack_from("<I", data, s_off + 20)[0]
sections.append((name, va, vsize, raw_ptr, raw_size))
print(f"Section: {name:10s} VA=0x{va:08x} VSize=0x{vsize:08x} RawPtr=0x{raw_ptr:08x} RawSize=0x{raw_size:08x}")
print(f"\nImage base: 0x{image_base:016x}")
def rva_to_file_offset(rva):
for name, va, vsize, raw_ptr, raw_size in sections:
if va <= rva < va + vsize:
return raw_ptr + (rva - va)
return None
def file_offset_to_rva(offset):
for name, va, vsize, raw_ptr, raw_size in sections:
if raw_ptr <= offset < raw_ptr + raw_size:
return va + (offset - raw_ptr)
return None
# Key string offsets we found
crypto_strings = {
"SHA256 (wide)": 0x02724b60,
"AES (wide)": 0x02724b70,
"BlockLength (wide)": 0x02724b78,
"ChainingModeCFB (wide)": 0x02724b90,
"meta->magic_number == MAGIC_NUMBER": 0x02724bb0,
"Unable to uncompress": 0x02724bd8,
"Crypto.cpp": 0x02724c08,
"Error returned from crypto API": 0x02724c40,
"ChainingMode (wide)": 0x02724c80,
"MessageBlockLength (wide)": 0x02724ca0,
}
# Calculate RVAs of these strings
print("\n=== String RVAs ===")
for name, file_off in crypto_strings.items():
rva = file_offset_to_rva(file_off)
if rva:
print(f" {name}: file=0x{file_off:08x} RVA=0x{rva:08x}")
# Find code references to these strings via LEA instruction patterns
# In x64, LEA reg, [rip+disp32] is encoded as:
# 48 8D xx yy yy yy yy (where xx determines the register)
# or 4C 8D xx yy yy yy yy
# The target address = instruction_address + 7 + disp32
print("\n=== Searching for code references to crypto strings ===")
# Focus on the most important strings
key_strings = {
"ChainingModeCFB (wide)": 0x02724b90,
"SHA256 (wide)": 0x02724b60,
"AES (wide)": 0x02724b70,
"Crypto.cpp": 0x02724c08,
"MessageBlockLength (wide)": 0x02724ca0,
"meta->magic_number": 0x02724bb0,
}
# Find the .text section (code)
text_section = None
for name, va, vsize, raw_ptr, raw_size in sections:
if name == ".text":
text_section = (va, vsize, raw_ptr, raw_size)
break
if text_section:
text_va, text_vsize, text_raw, text_rawsize = text_section
print(f"\n.text section: VA=0x{text_va:08x} size=0x{text_vsize:08x}")
for string_name, string_file_off in key_strings.items():
string_rva = file_offset_to_rva(string_file_off)
if string_rva is None:
continue
# Search for LEA instructions referencing this RVA
# LEA uses RIP-relative addressing: target = RIP + disp32
# RIP at instruction = instruction_RVA + instruction_length (typically 7 for LEA)
refs_found = []
for code_off in range(text_raw, text_raw + text_rawsize - 7):
# Check for LEA patterns
b0 = data[code_off]
b1 = data[code_off + 1]
# 48 8D 0D/15/05/1D/25/2D/35/3D = LEA with REX.W
# 4C 8D 05/0D/15/1D/25/2D/35/3D = LEA with REX.WR
if b0 in (0x48, 0x4C) and b1 == 0x8D:
modrm = data[code_off + 2]
if (modrm & 0xC7) == 0x05: # mod=00, rm=101 (RIP-relative)
disp32 = struct.unpack_from("<i", data, code_off + 3)[0]
instr_rva = file_offset_to_rva(code_off)
if instr_rva is None:
continue
target_rva = instr_rva + 7 + disp32
if target_rva == string_rva:
reg_idx = (modrm >> 3) & 7
if b0 == 0x4C:
reg_idx += 8
reg_names = ["rax","rcx","rdx","rbx","rsp","rbp","rsi","rdi",
"r8","r9","r10","r11","r12","r13","r14","r15"]
reg = reg_names[reg_idx]
refs_found.append((code_off, instr_rva, reg))
if refs_found:
print(f"\n References to '{string_name}' (RVA=0x{string_rva:08x}):")
for code_off, instr_rva, reg in refs_found[:5]:
print(f" at file=0x{code_off:08x} RVA=0x{instr_rva:08x}: LEA {reg}, [{string_name}]")
# Dump surrounding code
ctx_start = max(text_raw, code_off - 64)
ctx_end = min(text_raw + text_rawsize, code_off + 128)
# Simple bytecode dump with some x64 instruction markers
print(f" Context (file offset 0x{ctx_start:08x} - 0x{ctx_end:08x}):")
for i in range(ctx_start, ctx_end, 16):
chunk = data[i:i+16]
hex_part = " ".join(f"{b:02x}" for b in chunk)
rva_i = file_offset_to_rva(i)
marker = " <<<" if i <= code_off < i + 16 else ""
print(f" {rva_i:08x}: {hex_part}{marker}")
else:
print(f"\n No code references found for '{string_name}'")