oneocr / _archive /attempts /find_offset.py
OneOCR Dev
OneOCR - reverse engineering complete, ONNX pipeline 53% match rate
ce847d4
"""Map encrypted input bytes from hook to file offsets."""
from pathlib import Path
import struct
data = Path("ocr_data/oneocr.onemodel").read_bytes()
# Encrypted input first bytes from hook (call #, first 32 enc bytes hex, chunk_size)
chunks_encrypted = [
(0, "2e0c10c7c967f66b6d03821271115ad6c19ca7d91b668e5c484018e02c9632b4", 22624),
(2, "f7d14a6dbd04af02b6de5e5454af59d007bb5c174e3b6be6a73513b995c7dc1a", 11920),
(4, "7bf021af201c559217035b95ebf758ff70c860f126c9c1529421bb2d75898bf9", 11553680),
]
print("Searching for encrypted chunk starts in file:")
print(f"File size: {len(data):,}")
print()
prev_end = 0
for call_num, hex_str, chunk_size in chunks_encrypted:
search_bytes = bytes.fromhex(hex_str[:16]) # First 8 bytes
idx = data.find(search_bytes)
if idx >= 0:
gap = idx - prev_end if prev_end > 0 else idx
print(f" Call #{call_num}: offset {idx} ({idx:#x}), size={chunk_size:,}, gap={gap}")
print(f" Range: [{idx:#x}, {idx+chunk_size:#x})")
prev_end = idx + chunk_size
full = bytes.fromhex(hex_str)
if data[idx:idx+len(full)] == full:
print(f" 32-byte match: OK")
else:
print(f" Call #{call_num}: NOT FOUND")
# File structure
print(f"\n--- File structure ---")
print(f"Offset 0: header_size = {struct.unpack_from('<I', data, 0)[0]}")
print(f"Offset 4: {struct.unpack_from('<I', data, 4)[0]}")
print(f"Offset 8-23: {data[8:24].hex()}")
chunk1_end = 24 + 22624 # = 22648
print(f"\nChunk 1 ends at offset {chunk1_end}")
for o in range(22636, 22680, 4):
v = struct.unpack_from('<I', data, o)[0]
print(f" offset {o}: {data[o:o+4].hex()} = uint32 {v} ({v:#x})")