oneocr

File size: 3,453 Bytes

ce847d4

"""Analyze exact chunk boundary structure in the .onemodel file."""
import struct, json

with open("ocr_data/oneocr.onemodel", "rb") as f:
    fdata = f.read()
log = json.load(open("temp/crypto_log.json"))

sha256s = [op for op in log if op["op"] == "sha256"]
sha_map = {s["output"]: s["input"] for s in sha256s}
decrypts = [op for op in log if op["op"] == "decrypt"]

# Get info for first few payload chunks
def get_chunk_info(dec_idx):
    d = decrypts[dec_idx]
    sha_inp = bytes.fromhex(sha_map[d["aes_key"]])
    s1, s2 = struct.unpack_from("<QQ", sha_inp, 0)
    chk = sha_inp[16:32]
    chk_pos = fdata.find(chk)
    return {
        "dec_idx": dec_idx,
        "enc_size": d["input_size"],
        "size1": s1,
        "size2": s2,
        "chk": chk,
        "chk_pos": chk_pos,
    }

# Focus on first few consecutive large chunks
# From the sorted output, the order in file is: dec#02, dec#03, dec#06, dec#11, dec#16, dec#23, ...
chunks_in_order = [2, 3, 6, 11, 16, 23, 28, 33]
infos = [get_chunk_info(i) for i in chunks_in_order]

print("=== Chunk boundary analysis ===\n")
for i, info in enumerate(infos):
    print(f"dec#{info['dec_idx']:02d}: chk_pos={info['chk_pos']}, size1={info['size1']}, enc_size={info['enc_size']}")
    
    if i > 0:
        prev = infos[i-1]
        # Hypothesis: on-disk encrypted data = size1 + 8 (data_size + container_header)
        prev_data_start = prev['chk_pos'] + 32
        prev_on_disk = prev['size1'] + 8
        expected_next_chk = prev_data_start + prev_on_disk
        actual_next_chk = info['chk_pos']
        delta = actual_next_chk - expected_next_chk
        print(f"  Expected chk_pos: {expected_next_chk}, actual: {actual_next_chk}, delta: {delta}")

# Now figure out the EXACT header structure
print("\n=== Bytes around first few chunk boundaries ===\n")

# Between DX and first chunk
dx_end = 24 + 22624  # = 22648
print(f"--- DX end ({dx_end}) to first chunk ---")
for off in range(dx_end, infos[0]['chk_pos'] + 48, 8):
    raw = fdata[off:off+8]
    val = struct.unpack_from("<Q", raw)[0] if len(raw) == 8 else 0
    print(f"  {off:>8}: {raw.hex()}  (uint64={val})")

# Between chunk 0 and chunk 1
c0 = infos[0]
c1 = infos[1]
# data starts at chk_pos + 32, on-disk size is approximately size1+8 or enc_size
# Let's look at bytes around where the boundary should be
c0_data_start = c0['chk_pos'] + 32
c0_approx_end = c0_data_start + c0['size1'] + 8
print(f"\n--- End of dec#{c0['dec_idx']:02d} / Start of dec#{c1['dec_idx']:02d} ---")
print(f"  c0 data_start: {c0_data_start}")
print(f"  c0 size1+8: {c0['size1']+8}")
print(f"  c0 approx end: {c0_approx_end}")
print(f"  c1 chk_pos: {c1['chk_pos']}")

for off in range(c0_approx_end - 16, c1['chk_pos'] + 48, 8):
    raw = fdata[off:off+8]
    val = struct.unpack_from("<Q", raw)[0] if len(raw) == 8 else 0
    ascii_s = ''.join(chr(b) if 32 <= b < 127 else '.' for b in raw)
    print(f"  {off:>8}: {raw.hex()}  val={val:<15d}  {ascii_s}")

# Check file header
header_size = struct.unpack_from("<Q", fdata, 0)[0]
print(f"\nFile header uint64: {header_size}")
print(f"  = file[0:8] as uint64 LE")

# What if it's NOT a uint64 but two uint32?
h1, h2 = struct.unpack_from("<II", fdata, 0)
print(f"  As two uint32: ({h1}, {h2})")

# file[0:24] detailed view
print("\nFile header [0:24]:")
for off in range(0, 24, 8):
    raw = fdata[off:off+8]
    val = struct.unpack_from("<Q", raw)[0]
    print(f"  {off:>3}: {raw.hex()}  uint64={val}")