File size: 1,511 Bytes
7f69342
441479b
7f69342
4ec7108
7f69342
 
6e29991
7f69342
09e70ff
4ec7108
 
7f69342
 
 
 
 
 
441479b
7f69342
 
7b82554
7f69342
 
09e70ff
7f69342
 
441479b
7f69342
 
ba2be63
7f69342
6e29991
7f69342
cba98c9
6e29991
ba2be63
7f69342
6e29991
7f69342
ba2be63
7f69342
 
ba2be63
 
 
7f69342
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
FROM debian:bookworm-slim AS builder

# Install build dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
    git \
    build-essential \
    cmake \
    ca-certificates \
    libcurl4-openssl-dev \
    && rm -rf /var/lib/apt/lists/*

# Clone and build llama.cpp
WORKDIR /build
RUN git clone https://github.com/ggerganov/llama.cpp.git && \
    cd llama.cpp && \
    cmake -B build -DCMAKE_BUILD_TYPE=Release -DGGML_NATIVE=OFF -DGGML_AVX2=OFF && \
    cmake --build build --config Release --target llama-server -j$(nproc)

# Runtime stage
FROM debian:bookworm-slim

# Install runtime dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
    libcurl4 \
    ca-certificates \
    && rm -rf /var/lib/apt/lists/*

# Copy llama-server binary from builder
COPY --from=builder /build/llama.cpp/build/bin/llama-server /usr/local/bin/llama-server

# Create non-root user
RUN useradd -m -u 1000 user && \
    mkdir -p /home/user/.cache/llama.cpp && \
    chown -R user:user /home/user

USER user
WORKDIR /home/user

# Set environment variables
ENV HOME=/home/user \
    LLAMA_CACHE=/home/user/.cache/llama.cpp \
    PATH=/home/user/.local/bin:$PATH

EXPOSE 7860

# Start llama-server with HuggingFace model
# Using TheBloke's DeepSeek Coder GGUF model
CMD ["llama-server", \
     "-hf", "TheBloke/deepseek-coder-6.7B-instruct-GGUF:deepseek-coder-6.7b-instruct.Q4_K_M.gguf", \
     "--host", "0.0.0.0", \
     "--port", "7860", \
     "-c", "2048", \
     "--metrics"]