File size: 2,159 Bytes
7f69342
441479b
7f69342
4ec7108
7f69342
 
6e29991
7f69342
09e70ff
4ec7108
 
84bb7ea
7f69342
9345f95
7f69342
 
25f92ca
309e664
84bb7ea
 
 
309e664
84bb7ea
db57dc8
 
441479b
7f69342
 
7b82554
7f69342
 
09e70ff
7f69342
d9a4451
 
7f69342
441479b
8c68c1f
7f69342
8c68c1f
 
 
 
ba2be63
dde400a
 
 
 
 
 
 
55e1aa1
dde400a
7f69342
6e29991
7f69342
cba98c9
6e29991
dde400a
 
 
ba2be63
7f69342
6e29991
7f69342
ba2be63
7f69342
dde400a
 
ba2be63
 
 
dde400a
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
FROM debian:bookworm-slim AS builder

# Install build dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
    git \
    build-essential \
    cmake \
    ca-certificates \
    libcurl4-openssl-dev \
    && rm -rf /var/lib/apt/lists/*

# Clone and build llama.cpp with MINIMAL optimizations (fast build)
WORKDIR /build
ARG CACHEBUST=4
RUN git clone https://github.com/ggerganov/llama.cpp.git && \
    cd llama.cpp && \
    cmake -B build -DCMAKE_BUILD_TYPE=Release \
        -DGGML_NATIVE=OFF \
        -DGGML_AVX2=OFF \
        -DGGML_AVX=OFF \
        -DGGML_FMA=OFF \
        -DGGML_F16C=OFF && \
    cmake --build build --config Release --target llama-server -j1 && \
    echo "=== Binary dependencies ===" && \
    ldd build/bin/llama-server || true

# Runtime stage
FROM debian:bookworm-slim

# Install runtime dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
    libcurl4 \
    ca-certificates \
    libgomp1 \
    libstdc++6 \
    && rm -rf /var/lib/apt/lists/*

# Copy llama-server binary and all shared libraries from builder
COPY --from=builder /build/llama.cpp/build/bin/llama-server /usr/local/bin/llama-server
COPY --from=builder /build/llama.cpp/build/bin/*.so.* /usr/local/lib/

# Update library cache
RUN ldconfig

# Install Python and FastAPI dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
    python3 \
    python3-pip \
    && rm -rf /var/lib/apt/lists/*

# Install Python packages
RUN pip3 install --no-cache-dir fastapi uvicorn requests pydantic duckduckgo-search beautifulsoup4 lxml --break-system-packages

# Create non-root user
RUN useradd -m -u 1000 user && \
    mkdir -p /home/user/.cache/llama.cpp && \
    chown -R user:user /home/user

# Copy application code
COPY --chown=user:user app.py /home/user/app.py

USER user
WORKDIR /home/user

# Set environment variables
ENV HOME=/home/user \
    LLAMA_CACHE=/home/user/.cache/llama.cpp \
    PATH=/home/user/.local/bin:$PATH \
    PYTHONUNBUFFERED=1

EXPOSE 7860

# Start FastAPI app (which manages llama-server internally)
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]