FROM nvidia/cuda:12.6.3-cudnn-runtime-ubuntu24.04 # Install 'uv', which we will use to install Python dependencies COPY --from=ghcr.io/astral-sh/uv:latest /uv /uvx /bin/ ENV PYTHONDONTWRITEBYTECODE="1" \ PYTHONUNBUFFERED="1" \ DEBIAN_FRONTEND="noninteractive" # Install Python and create venv WORKDIR /app/ RUN uv python install 3.12 RUN uv venv RUN uv pip install --no-cache-dir --upgrade pip setuptools wheel # Download models during build instead of copying from local COPY scripts/model_download.bash /tmp/model_download.bash RUN . .venv/bin/activate && \ uv pip install --no-cache-dir huggingface-hub && \ bash /tmp/model_download.bash && \ rm /tmp/model_download.bash # Install CPU requirements COPY requirements.cpu.txt ./ RUN uv pip install --no-cache-dir -r ./requirements.cpu.txt # Install GPU PyTorch requirements COPY requirements.torch.gpu.txt ./ RUN uv pip install --no-cache-dir -r ./requirements.torch.gpu.txt COPY app ./app COPY main.py ./ EXPOSE 8000 ENTRYPOINT ["python3", "-m", "uvicorn", "main:app", "--host", "0.0.0.0", "--port", "8000"]