deepseek-coder-6b-api / Dockerfile
truegleai's picture
Deploy FastAPI server with CodeLlama 7B
1e3c6e8 verified
# Dockerfile for HuggingFace Spaces - Build llama-cpp-python from source
FROM python:3.11-slim-bookworm
# Set working directory
WORKDIR /app
# Install system dependencies including CMAKE build tools
RUN apt-get update && apt-get install -y \
build-essential \
cmake \
git \
curl \
&& rm -rf /var/lib/apt/lists/*
# Copy requirements FIRST (for better caching)
COPY requirements.txt .
# Install Python dependencies EXCEPT llama-cpp-python
RUN pip install --no-cache-dir --upgrade pip && \
grep -v "llama-cpp-python" requirements.txt > requirements_temp.txt && \
pip install --no-cache-dir -r requirements_temp.txt && \
rm requirements_temp.txt
# Build llama-cpp-python from source with CMAKE (this is the key!)
# Force CMAKE build to ensure it compiles against glibc, not using musl wheels
RUN CMAKE_ARGS="-DGGML_BLAS=ON -DGGML_BLAS_VENDOR=OpenBLAS" \
FORCE_CMAKE=1 \
pip install --no-cache-dir --force-reinstall --upgrade --verbose \
llama-cpp-python==0.2.90
# Copy application code
COPY app.py .
# Expose port (HF Spaces uses 7860)
EXPOSE 7860
# Health check
HEALTHCHECK --interval=30s --timeout=10s --start-period=120s --retries=3 \
CMD curl -f http://localhost:7860/health || exit 1
# Run the FastAPI app
CMD ["python", "app.py"]