| FROM nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04 |
|
|
| WORKDIR /app |
|
|
| ENV DEBIAN_FRONTEND=noninteractive \ |
| PYTHONUNBUFFERED=1 \ |
| PIP_NO_CACHE_DIR=1 \ |
| HF_HOME=/data/.huggingface \ |
| XDG_CACHE_HOME=/data/.cache \ |
| LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:${LD_LIBRARY_PATH} |
|
|
| RUN apt-get update && apt-get install -y --no-install-recommends \ |
| python3 \ |
| python3-pip \ |
| python3-dev \ |
| build-essential \ |
| curl \ |
| ca-certificates \ |
| git \ |
| && rm -rf /var/lib/apt/lists/* |
|
|
| RUN python3 -m pip install --upgrade pip setuptools wheel |
|
|
| COPY requirements.txt . |
|
|
| RUN python3 -m pip install --no-cache-dir \ |
| --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu124 \ |
| llama-cpp-python |
|
|
| RUN python3 -m pip install --no-cache-dir -r requirements.txt |
|
|
| COPY app.py . |
| COPY README.md . |
|
|
| EXPOSE 7860 |
|
|
| HEALTHCHECK --interval=30s --timeout=10s --start-period=120s --retries=3 \ |
| CMD curl -f http://localhost:7860/health || exit 1 |
|
|
| CMD ["python3", "-m", "uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"] |