File size: 4,457 Bytes
fb05c3a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
/*
 * Test program to demonstrate integer overflow in tensor size calculation.
 * Loads a malicious GGUF file and shows that ggml_nbytes() returns
 * an incorrect (too small) value due to integer overflow in ggml_row_size().
 *
 * Compile:
 *   cc -o test_tensor_overflow test_tensor_overflow.c \
 *      -I ../llama.cpp/ggml/include -I ../llama.cpp/ggml/src \
 *      -L ../llama.cpp/build/bin -lggml-base -lggml \
 *      -Wl,-rpath,../llama.cpp/build/bin
 *
 * Run:
 *   ./test_tensor_overflow poc_tensor_overflow.gguf
 */

#include <stdio.h>
#include <stdlib.h>
#include <stdint.h>
#include <inttypes.h>
#include "ggml.h"
#include "gguf.h"

int main(int argc, char **argv) {
    if (argc < 2) {
        fprintf(stderr, "Usage: %s <gguf_file>\n", argv[0]);
        return 1;
    }

    const char *fname = argv[1];
    printf("=== Tensor Integer Overflow PoC ===\n");
    printf("Loading: %s\n\n", fname);

    struct ggml_context *ctx = NULL;
    struct gguf_init_params params = {
        .no_alloc = true,
        .ctx = &ctx,
    };

    struct gguf_context *gctx = gguf_init_from_file(fname, params);
    if (!gctx) {
        fprintf(stderr, "ERROR: gguf_init_from_file failed!\n");
        return 1;
    }

    printf("[+] GGUF file loaded successfully (all validation passed!)\n\n");

    int n_tensors = gguf_get_n_tensors(gctx);
    printf("Number of tensors: %d\n\n", n_tensors);

    // Iterate over tensors and show the overflow
    struct ggml_tensor *tensor = ggml_get_first_tensor(ctx);
    while (tensor) {
        printf("Tensor: '%s'\n", tensor->name);
        printf("  Type: %s (type_size=%zu, blck_size=%" PRId64 ")\n",
               ggml_type_name(tensor->type),
               ggml_type_size(tensor->type),
               ggml_blck_size(tensor->type));
        printf("  Dimensions: ne[0]=%" PRId64 ", ne[1]=%" PRId64
               ", ne[2]=%" PRId64 ", ne[3]=%" PRId64 "\n",
               tensor->ne[0], tensor->ne[1], tensor->ne[2], tensor->ne[3]);
        printf("  Strides:    nb[0]=%zu, nb[1]=%zu, nb[2]=%zu, nb[3]=%zu\n",
               tensor->nb[0], tensor->nb[1], tensor->nb[2], tensor->nb[3]);

        size_t nbytes = ggml_nbytes(tensor);
        int64_t nelements = ggml_nelements(tensor);

        // Compute what the correct size should be
        // For Q4_0: correct = type_size * nelements / blck_size = 18 * ne[0] / 32
        size_t type_size = ggml_type_size(tensor->type);
        int64_t blck_size = ggml_blck_size(tensor->type);
        // Use Python-style big number arithmetic to show correct value
        // correct_nbytes = nelements * type_size / blck_size
        // But we can't compute this without overflow in C, so just show the values

        printf("  ggml_nbytes():    %zu bytes\n", nbytes);
        printf("  ggml_nelements(): %" PRId64 "\n", nelements);
        printf("  ggml_row_size():  %zu bytes\n", ggml_row_size(tensor->type, tensor->ne[0]));

        // Show the overflow
        size_t row_size = ggml_row_size(tensor->type, tensor->ne[0]);
        printf("\n  === OVERFLOW DETECTION ===\n");
        printf("  ggml_row_size = type_size * ne[0] / blck_size\n");
        printf("                = %zu * %" PRId64 " / %" PRId64 "\n",
               type_size, tensor->ne[0], blck_size);
        printf("  Computed result: %zu bytes\n", row_size);

        // Check for overflow: if type_size * ne[0] / blck_size != row_size
        // then overflow occurred
        // We can detect this by checking: row_size * blck_size / type_size != ne[0]
        if (type_size > 0 && blck_size > 0) {
            int64_t reconstructed = (int64_t)(row_size * blck_size / type_size);
            if (reconstructed != tensor->ne[0]) {
                printf("  *** INTEGER OVERFLOW DETECTED! ***\n");
                printf("  Reconstructed ne[0] from row_size: %" PRId64 "\n", reconstructed);
                printf("  Actual ne[0]:                      %" PRId64 "\n", tensor->ne[0]);
                printf("  The buffer would be %zu bytes but tensor claims %" PRId64 " elements!\n",
                       nbytes, nelements);
                printf("  This is a HEAP BUFFER OVERFLOW vulnerability!\n");
            } else {
                printf("  No overflow detected for this tensor.\n");
            }
        }

        printf("\n");
        tensor = ggml_get_next_tensor(ctx, tensor);
    }

    ggml_free(ctx);
    gguf_free(gctx);

    printf("[+] Test complete.\n");
    return 0;
}