FROM nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04 WORKDIR /app ENV DEBIAN_FRONTEND=noninteractive \ PYTHONUNBUFFERED=1 \ PIP_NO_CACHE_DIR=1 \ HF_HOME=/data/.huggingface \ XDG_CACHE_HOME=/data/.cache \ LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:${LD_LIBRARY_PATH} RUN apt-get update && apt-get install -y --no-install-recommends \ python3 \ python3-pip \ python3-dev \ build-essential \ curl \ ca-certificates \ git \ && rm -rf /var/lib/apt/lists/* RUN python3 -m pip install --upgrade pip setuptools wheel COPY requirements.txt . RUN python3 -m pip install --no-cache-dir \ --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu124 \ llama-cpp-python RUN python3 -m pip install --no-cache-dir -r requirements.txt COPY app.py . COPY README.md . EXPOSE 7860 HEALTHCHECK --interval=30s --timeout=10s --start-period=120s --retries=3 \ CMD curl -f http://localhost:7860/health || exit 1 CMD ["python3", "-m", "uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]