FROM nvidia/cuda:12.4.1-cudnn-runtime-ubuntu22.04 ENV DEBIAN_FRONTEND=noninteractive ENV PYTHONDONTWRITEBYTECODE=1 ENV PYTHONUNBUFFERED=1 ENV UV_LINK_MODE=copy ENV HOME=/home/user ENV PATH=/home/user/.local/bin:$PATH ENV HF_HOME=/home/user/.cache/huggingface ENV TRANSFORMERS_CACHE=/home/user/.cache/huggingface ENV API_HOST=0.0.0.0 ENV API_PORT=7860 ENV MODEL_NAME=Qwen/Qwen2.5-0.5B-Instruct ENV DEVICE_PREFERENCE=auto ENV DTYPE_PREFERENCE=auto ENV ATTN_IMPLEMENTATION=eager ENV LOW_CPU_MEM_USAGE=true ENV TRUST_REMOTE_CODE=true ENV PRELOAD_MODEL=true ENV REQUIRE_AUTH=true RUN apt-get update && apt-get install -y --no-install-recommends \ ca-certificates \ curl \ git \ python3 \ python3-pip \ python3-venv \ && rm -rf /var/lib/apt/lists/* RUN useradd -m -u 1000 user USER user WORKDIR $HOME/app RUN curl -LsSf https://astral.sh/uv/install.sh | sh COPY --chown=user pyproject.toml uv.lock README.md .env.example ./ COPY --chown=user app ./app COPY --chown=user notebooks ./notebooks RUN uv sync --frozen EXPOSE 7860 CMD ["uv", "run", "python", "-m", "app.cli.run_api"]