"""Analyze exact chunk boundary structure in the .onemodel file.""" import struct, json with open("ocr_data/oneocr.onemodel", "rb") as f: fdata = f.read() log = json.load(open("temp/crypto_log.json")) sha256s = [op for op in log if op["op"] == "sha256"] sha_map = {s["output"]: s["input"] for s in sha256s} decrypts = [op for op in log if op["op"] == "decrypt"] # Get info for first few payload chunks def get_chunk_info(dec_idx): d = decrypts[dec_idx] sha_inp = bytes.fromhex(sha_map[d["aes_key"]]) s1, s2 = struct.unpack_from(" 0: prev = infos[i-1] # Hypothesis: on-disk encrypted data = size1 + 8 (data_size + container_header) prev_data_start = prev['chk_pos'] + 32 prev_on_disk = prev['size1'] + 8 expected_next_chk = prev_data_start + prev_on_disk actual_next_chk = info['chk_pos'] delta = actual_next_chk - expected_next_chk print(f" Expected chk_pos: {expected_next_chk}, actual: {actual_next_chk}, delta: {delta}") # Now figure out the EXACT header structure print("\n=== Bytes around first few chunk boundaries ===\n") # Between DX and first chunk dx_end = 24 + 22624 # = 22648 print(f"--- DX end ({dx_end}) to first chunk ---") for off in range(dx_end, infos[0]['chk_pos'] + 48, 8): raw = fdata[off:off+8] val = struct.unpack_from("8}: {raw.hex()} (uint64={val})") # Between chunk 0 and chunk 1 c0 = infos[0] c1 = infos[1] # data starts at chk_pos + 32, on-disk size is approximately size1+8 or enc_size # Let's look at bytes around where the boundary should be c0_data_start = c0['chk_pos'] + 32 c0_approx_end = c0_data_start + c0['size1'] + 8 print(f"\n--- End of dec#{c0['dec_idx']:02d} / Start of dec#{c1['dec_idx']:02d} ---") print(f" c0 data_start: {c0_data_start}") print(f" c0 size1+8: {c0['size1']+8}") print(f" c0 approx end: {c0_approx_end}") print(f" c1 chk_pos: {c1['chk_pos']}") for off in range(c0_approx_end - 16, c1['chk_pos'] + 48, 8): raw = fdata[off:off+8] val = struct.unpack_from("8}: {raw.hex()} val={val:<15d} {ascii_s}") # Check file header header_size = struct.unpack_from("3}: {raw.hex()} uint64={val}")