oneocr / _archive /attempts /extract_strings.py

OneOCR Dev

OneOCR - reverse engineering complete, ONNX pipeline 53% match rate

ce847d4 1 day ago

1.47 kB

	import re

	data = open(r"c:\Users\MattyMroz\Desktop\PROJECTS\ONEOCR\ocr_data\oneocr.dll", "rb").read()

	all_strings = re.findall(b'[\x20-\x7e]{6,}', data)
	crypto_keywords = [b'crypt', b'aes', b'bcrypt', b'key', b'iv', b'cipher', b'cfb', b'hash',
	b'sha', b'magic', b'decomp', b'uncomp', b'compress', b'model', b'meta',
	b'onnx', b'ONNX', b'decrypt', b'encrypt', b'Crypto', b'init', b'blob',
	b'MAGIC', b'check', b'Check', b'fail', b'Fail', b'number']

	print(f"Total strings: {len(all_strings)}")
	print()
	print("=== Crypto/model-related strings ===")
	seen = set()
	for s in all_strings:
	s_lower = s.lower()
	for kw in crypto_keywords:
	if kw.lower() in s_lower:
	if s not in seen:
	seen.add(s)
	offset = data.find(s)
	text = s.decode("ascii", errors="replace")
	print(f" [0x{offset:08x}] {text}")
	break

	# Also look for wide strings (UTF-16LE) related to BCrypt
	print()
	print("=== Wide (UTF-16LE) strings ===")
	wide_strings = re.findall(b'(?:[\x20-\x7e]\x00){4,}', data)
	for ws in wide_strings:
	decoded = ws.decode("utf-16-le", errors="replace")
	d_lower = decoded.lower()
	for kw in [b'crypt', b'aes', b'cfb', b'chain', b'algorithm', b'key', b'sha', b'hash']:
	if kw.decode().lower() in d_lower:
	offset = data.find(ws)
	print(f" [0x{offset:08x}] {decoded}")
	break