"""
Disassemble the Cipher function in oneocr.dll to find the exact crypto parameters.
Find code references to the crypto strings we identified.
"""
import struct
import re

dll_path = r"c:\Users\MattyMroz\Desktop\PROJECTS\ONEOCR\ocr_data\oneocr.dll"
with open(dll_path, "rb") as f:
    data = f.read()

# Parse PE headers to find section info
pe_sig_offset = struct.unpack_from("<I", data, 0x3C)[0]
assert data[pe_sig_offset:pe_sig_offset+4] == b"PE\x00\x00"

# COFF header
coff_start = pe_sig_offset + 4
num_sections = struct.unpack_from("<H", data, coff_start + 2)[0]
opt_header_size = struct.unpack_from("<H", data, coff_start + 16)[0]

# Optional header
opt_start = coff_start + 20
magic = struct.unpack_from("<H", data, opt_start)[0]
assert magic == 0x20B  # PE32+

image_base = struct.unpack_from("<Q", data, opt_start + 24)[0]

# Sections
section_start = opt_start + opt_header_size
sections = []
for i in range(num_sections):
    s_off = section_start + i * 40
    name = data[s_off:s_off+8].rstrip(b"\x00").decode("ascii", errors="replace")
    vsize = struct.unpack_from("<I", data, s_off + 8)[0]
    va = struct.unpack_from("<I", data, s_off + 12)[0]
    raw_size = struct.unpack_from("<I", data, s_off + 16)[0]
    raw_ptr = struct.unpack_from("<I", data, s_off + 20)[0]
    sections.append((name, va, vsize, raw_ptr, raw_size))
    print(f"Section: {name:10s} VA=0x{va:08x} VSize=0x{vsize:08x} RawPtr=0x{raw_ptr:08x} RawSize=0x{raw_size:08x}")

print(f"\nImage base: 0x{image_base:016x}")

def rva_to_file_offset(rva):
    for name, va, vsize, raw_ptr, raw_size in sections:
        if va <= rva < va + vsize:
            return raw_ptr + (rva - va)
    return None

def file_offset_to_rva(offset):
    for name, va, vsize, raw_ptr, raw_size in sections:
        if raw_ptr <= offset < raw_ptr + raw_size:
            return va + (offset - raw_ptr)
    return None

# Key string offsets we found
crypto_strings = {
    "SHA256 (wide)": 0x02724b60,
    "AES (wide)": 0x02724b70,
    "BlockLength (wide)": 0x02724b78,
    "ChainingModeCFB (wide)": 0x02724b90,
    "meta->magic_number == MAGIC_NUMBER": 0x02724bb0,
    "Unable to uncompress": 0x02724bd8,
    "Crypto.cpp": 0x02724c08,
    "Error returned from crypto API": 0x02724c40,
    "ChainingMode (wide)": 0x02724c80,
    "MessageBlockLength (wide)": 0x02724ca0,
}

# Calculate RVAs of these strings
print("\n=== String RVAs ===")
for name, file_off in crypto_strings.items():
    rva = file_offset_to_rva(file_off)
    if rva:
        print(f"  {name}: file=0x{file_off:08x} RVA=0x{rva:08x}")

# Find code references to these strings via LEA instruction patterns
# In x64, LEA reg, [rip+disp32] is encoded as:
# 48 8D xx yy yy yy yy  (where xx determines the register)
# or 4C 8D xx yy yy yy yy
# The target address = instruction_address + 7 + disp32

print("\n=== Searching for code references to crypto strings ===")

# Focus on the most important strings
key_strings = {
    "ChainingModeCFB (wide)": 0x02724b90,
    "SHA256 (wide)": 0x02724b60,
    "AES (wide)": 0x02724b70,
    "Crypto.cpp": 0x02724c08,
    "MessageBlockLength (wide)": 0x02724ca0,
    "meta->magic_number": 0x02724bb0,
}

# Find the .text section (code)
text_section = None
for name, va, vsize, raw_ptr, raw_size in sections:
    if name == ".text":
        text_section = (va, vsize, raw_ptr, raw_size)
        break

if text_section:
    text_va, text_vsize, text_raw, text_rawsize = text_section
    print(f"\n.text section: VA=0x{text_va:08x} size=0x{text_vsize:08x}")
    
    for string_name, string_file_off in key_strings.items():
        string_rva = file_offset_to_rva(string_file_off)
        if string_rva is None:
            continue
        
        # Search for LEA instructions referencing this RVA
        # LEA uses RIP-relative addressing: target = RIP + disp32
        # RIP at instruction = instruction_RVA + instruction_length (typically 7 for LEA)
        refs_found = []
        
        for code_off in range(text_raw, text_raw + text_rawsize - 7):
            # Check for LEA patterns
            b0 = data[code_off]
            b1 = data[code_off + 1]
            
            # 48 8D 0D/15/05/1D/25/2D/35/3D = LEA with REX.W
            # 4C 8D 05/0D/15/1D/25/2D/35/3D = LEA with REX.WR
            if b0 in (0x48, 0x4C) and b1 == 0x8D:
                modrm = data[code_off + 2]
                if (modrm & 0xC7) == 0x05:  # mod=00, rm=101 (RIP-relative)
                    disp32 = struct.unpack_from("<i", data, code_off + 3)[0]
                    instr_rva = file_offset_to_rva(code_off)
                    if instr_rva is None:
                        continue
                    target_rva = instr_rva + 7 + disp32
                    if target_rva == string_rva:
                        reg_idx = (modrm >> 3) & 7
                        if b0 == 0x4C:
                            reg_idx += 8
                        reg_names = ["rax","rcx","rdx","rbx","rsp","rbp","rsi","rdi",
                                     "r8","r9","r10","r11","r12","r13","r14","r15"]
                        reg = reg_names[reg_idx]
                        refs_found.append((code_off, instr_rva, reg))
        
        if refs_found:
            print(f"\n  References to '{string_name}' (RVA=0x{string_rva:08x}):")
            for code_off, instr_rva, reg in refs_found[:5]:
                print(f"    at file=0x{code_off:08x} RVA=0x{instr_rva:08x}: LEA {reg}, [{string_name}]")
                # Dump surrounding code
                ctx_start = max(text_raw, code_off - 64)
                ctx_end = min(text_raw + text_rawsize, code_off + 128)
                
                # Simple bytecode dump with some x64 instruction markers
                print(f"    Context (file offset 0x{ctx_start:08x} - 0x{ctx_end:08x}):")
                for i in range(ctx_start, ctx_end, 16):
                    chunk = data[i:i+16]
                    hex_part = " ".join(f"{b:02x}" for b in chunk)
                    rva_i = file_offset_to_rva(i)
                    marker = " <<<" if i <= code_off < i + 16 else ""
                    print(f"      {rva_i:08x}: {hex_part}{marker}")
        else:
            print(f"\n  No code references found for '{string_name}'")