oneocr / _archive /attempts /extract_strings.py
OneOCR Dev
OneOCR - reverse engineering complete, ONNX pipeline 53% match rate
ce847d4
import re
data = open(r"c:\Users\MattyMroz\Desktop\PROJECTS\ONEOCR\ocr_data\oneocr.dll", "rb").read()
all_strings = re.findall(b'[\x20-\x7e]{6,}', data)
crypto_keywords = [b'crypt', b'aes', b'bcrypt', b'key', b'iv', b'cipher', b'cfb', b'hash',
b'sha', b'magic', b'decomp', b'uncomp', b'compress', b'model', b'meta',
b'onnx', b'ONNX', b'decrypt', b'encrypt', b'Crypto', b'init', b'blob',
b'MAGIC', b'check', b'Check', b'fail', b'Fail', b'number']
print(f"Total strings: {len(all_strings)}")
print()
print("=== Crypto/model-related strings ===")
seen = set()
for s in all_strings:
s_lower = s.lower()
for kw in crypto_keywords:
if kw.lower() in s_lower:
if s not in seen:
seen.add(s)
offset = data.find(s)
text = s.decode("ascii", errors="replace")
print(f" [0x{offset:08x}] {text}")
break
# Also look for wide strings (UTF-16LE) related to BCrypt
print()
print("=== Wide (UTF-16LE) strings ===")
wide_strings = re.findall(b'(?:[\x20-\x7e]\x00){4,}', data)
for ws in wide_strings:
decoded = ws.decode("utf-16-le", errors="replace")
d_lower = decoded.lower()
for kw in [b'crypt', b'aes', b'cfb', b'chain', b'algorithm', b'key', b'sha', b'hash']:
if kw.decode().lower() in d_lower:
offset = data.find(ws)
print(f" [0x{offset:08x}] {decoded}")
break