# Training Dockerfile for GRPO on Northflank (A100/H100) # Builds a GPU-ready image with PyTorch, TRL, vLLM, and the skill_invocation_env client. FROM pytorch/pytorch:2.6.0-cuda12.4-cudnn9-devel WORKDIR /app # System deps RUN apt-get update && \ apt-get install -y --no-install-recommends git curl && \ rm -rf /var/lib/apt/lists/* # Install uv for fast dependency resolution RUN curl -LsSf https://astral.sh/uv/install.sh | sh && \ mv /root/.local/bin/uv /usr/local/bin/uv && \ mv /root/.local/bin/uvx /usr/local/bin/uvx # Install Python training dependencies RUN pip install --no-cache-dir \ "trl>=0.25.0" \ "vllm>=0.8.0" \ "transformers>=4.51.0" \ "datasets>=3.0.0" \ "accelerate>=1.0.0" \ "peft>=0.14.0" \ "openenv-core[core]>=0.2.1" \ "pydantic>=2.0" \ "huggingface_hub>=0.25.0" # Copy only the client code needed for training (not the server) COPY __init__.py /app/skill_invocation_env/__init__.py COPY models.py /app/skill_invocation_env/models.py COPY client.py /app/skill_invocation_env/client.py COPY train_demo.py /app/train_demo.py ENV PYTHONPATH="/app:$PYTHONPATH" ENV PYTHONUNBUFFERED=1 # Default entrypoint — run the training script CMD ["python", "train_demo.py"]