File size: 1,075 Bytes
ae7c6b5
12c3560
 
 
ae7c6b5
 
036edd5
 
ae7c6b5
 
036edd5
 
ae7c6b5
 
 
 
12c3560
036edd5
b8260d1
12c3560
 
ae7c6b5
 
12c3560
036edd5
ae7c6b5
 
 
036edd5
ae7c6b5
12c3560
b8260d1
 
12c3560
 
 
 
 
 
ae7c6b5
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
FROM nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04

WORKDIR /app

ENV DEBIAN_FRONTEND=noninteractive \
    PYTHONUNBUFFERED=1 \
    PIP_NO_CACHE_DIR=1 \
    HF_HOME=/data/.huggingface \
    XDG_CACHE_HOME=/data/.cache \
    LD_LIBRARY_PATH=/usr/local/cuda/lib64:/usr/local/cuda/lib64/stubs:${LD_LIBRARY_PATH}

RUN apt-get update && apt-get install -y --no-install-recommends \
    python3 \
    python3-pip \
    python3-dev \
    build-essential \
    curl \
    ca-certificates \
    git \
    && rm -rf /var/lib/apt/lists/*

RUN python3 -m pip install --upgrade pip setuptools wheel

COPY requirements.txt .

RUN python3 -m pip install --no-cache-dir \
    --extra-index-url https://abetlen.github.io/llama-cpp-python/whl/cu124 \
    llama-cpp-python

RUN python3 -m pip install --no-cache-dir -r requirements.txt

COPY app.py .
COPY README.md .

EXPOSE 7860

HEALTHCHECK --interval=30s --timeout=10s --start-period=120s --retries=3 \
    CMD curl -f http://localhost:7860/health || exit 1

CMD ["python3", "-m", "uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]