ml_trainer_env / Dockerfile
BART-ender's picture
Upload folder using huggingface_hub
685c05b verified
# syntax=docker/dockerfile:1
ARG BASE_IMAGE=ghcr.io/meta-pytorch/openenv-base:latest
FROM ${BASE_IMAGE} AS builder
WORKDIR /app
# Ensure git is available
RUN apt-get update && \
apt-get install -y --no-install-recommends git && \
rm -rf /var/lib/apt/lists/*
ARG BUILD_MODE=in-repo
ARG ENV_NAME=ml_trainer_env
# Copy environment code
COPY . /app/env
WORKDIR /app/env
# Ensure uv is available
RUN if ! command -v uv >/dev/null 2>&1; then \
curl -LsSf https://astral.sh/uv/install.sh | sh && \
mv /root/.local/bin/uv /usr/local/bin/uv && \
mv /root/.local/bin/uvx /usr/local/bin/uvx; \
fi
# Install dependencies
RUN if [ -f uv.lock ]; then \
uv sync --frozen --no-install-project --no-editable; \
else \
uv sync --no-install-project --no-editable; \
fi
RUN if [ -f uv.lock ]; then \
uv sync --frozen --no-editable; \
else \
uv sync --no-editable; \
fi
# Pre-download datasets during build so they're cached in the image
ENV DATA_DIR=/app/data
RUN .venv/bin/python -c "from server.datasets import download_all_datasets; download_all_datasets()"
# Final runtime stage
FROM ${BASE_IMAGE}
WORKDIR /app
# Copy the virtual environment from builder
COPY --from=builder /app/env/.venv /app/.venv
# Copy the environment code
COPY --from=builder /app/env /app/env
# Copy pre-downloaded datasets
COPY --from=builder /app/data /app/data
# Set PATH to use the virtual environment
ENV PATH="/app/.venv/bin:$PATH"
# Set PYTHONPATH so imports work correctly
ENV PYTHONPATH="/app/env:$PYTHONPATH"
# Set data directory
ENV DATA_DIR="/app/data"
# Limit PyTorch threads to match 2 vCPU
ENV OMP_NUM_THREADS=2
ENV MKL_NUM_THREADS=2
# Health check
HEALTHCHECK --interval=30s --timeout=3s --start-period=10s --retries=3 \
CMD python -c "import urllib.request; urllib.request.urlopen('http://localhost:8000/health')" || exit 1
# Run the FastAPI server
ENV ENABLE_WEB_INTERFACE=true
CMD ["sh", "-c", "cd /app/env && uvicorn server.app:app --host 0.0.0.0 --port 8000"]