File size: 3,088 Bytes

ce847d4

import struct

filepath = r"c:\Users\MattyMroz\Desktop\PROJECTS\ONEOCR\ocr_data\oneocr.onemodel"

with open(filepath, "rb") as f:
    data = f.read(23000)  # read a bit more than 22636
    f.seek(0, 2)
    filesize = f.tell()

print(f"File size: {filesize} bytes ({filesize/1024/1024:.2f} MB)")
print()

# Hex dump first 512 bytes
print("=== First 512 bytes hex dump ===")
for i in range(0, 512, 16):
    hex_part = " ".join(f"{b:02x}" for b in data[i:i+16])
    ascii_part = "".join(chr(b) if 32 <= b < 127 else "." for b in data[i:i+16])
    print(f"{i:08x}: {hex_part:<48s}  {ascii_part}")

print()
print("=== uint32 LE values at key offsets ===")
for off in range(0, 64, 4):
    val = struct.unpack_from("<I", data, off)[0]
    print(f"  offset {off:4d} (0x{off:04x}): {val:12d} (0x{val:08x})")

print()
print("=== Check around offset 22636 (header size?) ===")
off = 22636
for i in range(off - 32, off + 64, 16):
    hex_part = " ".join(f"{b:02x}" for b in data[i:i+16])
    ascii_part = "".join(chr(b) if 32 <= b < 127 else "." for b in data[i:i+16])
    print(f"{i:08x}: {hex_part:<48s}  {ascii_part}")

print()
print("=== Entropy analysis of header vs body ===")
from collections import Counter
header = data[:22636]
body_sample = data[22636:22636+4096]
h_counter = Counter(header)
b_counter = Counter(body_sample)
print(f"  Header unique bytes: {len(h_counter)}/256")
print(f"  Body sample unique bytes: {len(b_counter)}/256")

# Check for null bytes in header
null_count = header.count(0)
print(f"  Header null bytes: {null_count}/{len(header)} ({100*null_count/len(header):.1f}%)")

# Look for patterns in header
print()
print("=== Looking for potential sub-structures in header ===")
# Check if there are recognizable strings
import re
strings = re.findall(b'[\x20-\x7e]{4,}', header)
if strings:
    print("  ASCII strings found in header:")
    for s in strings[:30]:
        print(f"    {s.decode('ascii', errors='replace')}")
else:
    print("  No ASCII strings >= 4 chars found in header")

# Check for potential magic numbers
print()
print("=== Magic number checks at offset 0 ===")
print(f"  Bytes 0-3: {data[0:4].hex()}")
print(f"  Bytes 0-7: {data[0:8].hex()}")
print(f"  As string: {data[0:8]}")

# Look for repeating 4-byte patterns
print()
print("=== Byte frequency in first 64 bytes ===")
for i in range(64):
    if i % 16 == 0:
        print(f"  {i:3d}: ", end="")
    print(f"{data[i]:3d}", end=" ")
    if i % 16 == 15:
        print()

# Check if header has structure - look for uint32 values that could be offsets/sizes
print()
print("=== Potential offset/size table at start ===")
for i in range(0, min(256, len(header)), 4):
    val = struct.unpack_from("<I", data, i)[0]
    if 0 < val < filesize:
        print(f"  offset {i}: uint32={val} (could be offset/size, {val/1024:.1f}KB)")

# Check byte patterns for IV detection  
print()
print("=== 16-byte blocks that could be IV ===")
for start in [4, 8, 12, 16, 20]:
    block = data[start:start+16]
    unique = len(set(block))
    print(f"  offset {start:3d}: {block.hex()} (unique bytes: {unique}/16)")