File size: 1,108 Bytes
fda8fb3
 
 
 
 
 
 
 
 
 
 
 
7919262
fda8fb3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
FROM nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04

ENV DEBIAN_FRONTEND=noninteractive
ENV PYTHONDONTWRITEBYTECODE=1
ENV PYTHONUNBUFFERED=1
ENV UV_LINK_MODE=copy
ENV HOME=/home/user
ENV PATH=/home/user/.local/bin:$PATH
ENV HF_HOME=/home/user/.cache/huggingface
ENV TRANSFORMERS_CACHE=/home/user/.cache/huggingface
ENV API_HOST=0.0.0.0
ENV API_PORT=7860
ENV MODEL_NAME=Qwen/Qwen2.5-0.5B-Instruct
ENV DEVICE_PREFERENCE=auto
ENV DTYPE_PREFERENCE=auto
ENV ATTN_IMPLEMENTATION=eager
ENV LOW_CPU_MEM_USAGE=true
ENV TRUST_REMOTE_CODE=true
ENV PRELOAD_MODEL=true
ENV REQUIRE_AUTH=true

RUN apt-get update && apt-get install -y --no-install-recommends \
    ca-certificates \
    curl \
    git \
    python3 \
    python3-pip \
    python3-venv \
    && rm -rf /var/lib/apt/lists/*

RUN useradd -m -u 1000 user
USER user
WORKDIR $HOME/app

RUN curl -LsSf https://astral.sh/uv/install.sh | sh

COPY --chown=user pyproject.toml uv.lock README.md .env.example ./
COPY --chown=user app ./app
COPY --chown=user notebooks ./notebooks

RUN uv sync --frozen

EXPOSE 7860

CMD ["uv", "run", "python", "-m", "app.cli.run_api"]