Spaces:
Sleeping
Sleeping
File size: 2,159 Bytes
7f69342 441479b 7f69342 4ec7108 7f69342 6e29991 7f69342 09e70ff 4ec7108 84bb7ea 7f69342 9345f95 7f69342 25f92ca 309e664 84bb7ea 309e664 84bb7ea db57dc8 441479b 7f69342 7b82554 7f69342 09e70ff 7f69342 d9a4451 7f69342 441479b 8c68c1f 7f69342 8c68c1f ba2be63 dde400a 55e1aa1 dde400a 7f69342 6e29991 7f69342 cba98c9 6e29991 dde400a ba2be63 7f69342 6e29991 7f69342 ba2be63 7f69342 dde400a ba2be63 dde400a |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 |
FROM debian:bookworm-slim AS builder
# Install build dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
git \
build-essential \
cmake \
ca-certificates \
libcurl4-openssl-dev \
&& rm -rf /var/lib/apt/lists/*
# Clone and build llama.cpp with MINIMAL optimizations (fast build)
WORKDIR /build
ARG CACHEBUST=4
RUN git clone https://github.com/ggerganov/llama.cpp.git && \
cd llama.cpp && \
cmake -B build -DCMAKE_BUILD_TYPE=Release \
-DGGML_NATIVE=OFF \
-DGGML_AVX2=OFF \
-DGGML_AVX=OFF \
-DGGML_FMA=OFF \
-DGGML_F16C=OFF && \
cmake --build build --config Release --target llama-server -j1 && \
echo "=== Binary dependencies ===" && \
ldd build/bin/llama-server || true
# Runtime stage
FROM debian:bookworm-slim
# Install runtime dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
libcurl4 \
ca-certificates \
libgomp1 \
libstdc++6 \
&& rm -rf /var/lib/apt/lists/*
# Copy llama-server binary and all shared libraries from builder
COPY --from=builder /build/llama.cpp/build/bin/llama-server /usr/local/bin/llama-server
COPY --from=builder /build/llama.cpp/build/bin/*.so.* /usr/local/lib/
# Update library cache
RUN ldconfig
# Install Python and FastAPI dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
python3 \
python3-pip \
&& rm -rf /var/lib/apt/lists/*
# Install Python packages
RUN pip3 install --no-cache-dir fastapi uvicorn requests pydantic duckduckgo-search beautifulsoup4 lxml --break-system-packages
# Create non-root user
RUN useradd -m -u 1000 user && \
mkdir -p /home/user/.cache/llama.cpp && \
chown -R user:user /home/user
# Copy application code
COPY --chown=user:user app.py /home/user/app.py
USER user
WORKDIR /home/user
# Set environment variables
ENV HOME=/home/user \
LLAMA_CACHE=/home/user/.cache/llama.cpp \
PATH=/home/user/.local/bin:$PATH \
PYTHONUNBUFFERED=1
EXPOSE 7860
# Start FastAPI app (which manages llama-server internally)
CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"] |