| # ============================================================================= | |
| # Marxist-GRPO Training Container | |
| # ============================================================================= | |
| # Headless GRPO fine-tuning container for RunPod deployment. | |
| # | |
| # Build: | |
| # docker build -t marxist-grpo:latest docker/ | |
| # | |
| # Run locally (testing): | |
| # docker run --gpus all \ | |
| # -e HF_TOKEN=$HF_TOKEN \ | |
| # -e WANDB_API_KEY=$WANDB_API_KEY \ | |
| # -e MAX_STEPS=10 \ | |
| # marxist-grpo:latest | |
| # | |
| # Deploy to RunPod: | |
| # runpodctl create pod \ | |
| # --name "marxist-grpo-training" \ | |
| # --gpuType "NVIDIA A100 80GB PCIe" \ | |
| # --imageName "myregistry/marxist-grpo:latest" \ | |
| # --env HF_TOKEN=$HF_TOKEN \ | |
| # --env WANDB_API_KEY=$WANDB_API_KEY \ | |
| # --env HF_REPO=my-org/my-model | |
| # ============================================================================= | |
| # Use RunPod's PyTorch base image with CUDA 11.8 | |
| FROM runpod/pytorch:2.1.0-py3.10-cuda11.8.0-devel | |
| # Set working directory | |
| WORKDIR /workspace | |
| # Prevent interactive prompts during package installation | |
| ENV DEBIAN_FRONTEND=noninteractive | |
| # Install system dependencies | |
| RUN apt-get update && apt-get install -y --no-install-recommends \ | |
| git \ | |
| git-lfs \ | |
| curl \ | |
| htop \ | |
| nvtop \ | |
| tmux \ | |
| wget \ | |
| && rm -rf /var/lib/apt/lists/* \ | |
| && git lfs install | |
| # Install runpodctl for pod self-termination | |
| RUN curl -fsSL -o /tmp/runpodctl.tar.gz \ | |
| https://github.com/runpod/runpodctl/releases/download/v1.14.15/runpodctl-linux-amd64.tar.gz \ | |
| && tar -xzf /tmp/runpodctl.tar.gz -C /tmp \ | |
| && mv /tmp/runpodctl /usr/local/bin/runpodctl \ | |
| && chmod +x /usr/local/bin/runpodctl \ | |
| && rm /tmp/runpodctl.tar.gz | |
| # Copy requirements first (for layer caching) | |
| COPY docker/requirements.txt /workspace/requirements.txt | |
| # Install Python dependencies | |
| # Note: Unsloth requires specific installation order | |
| RUN pip install --no-cache-dir --upgrade pip && \ | |
| pip install --no-cache-dir -r requirements.txt | |
| # Install Unsloth (from source for latest optimizations) | |
| RUN pip install --no-cache-dir "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git" | |
| # Install specific versions that work with Unsloth | |
| # Note: These must be installed after unsloth to avoid conflicts | |
| RUN pip install --no-cache-dir --no-deps \ | |
| "xformers<0.0.27" \ | |
| "trl>=0.9.0,<0.12.0" \ | |
| peft \ | |
| accelerate \ | |
| bitsandbytes | |
| # Download spaCy model for NLP-based reward functions | |
| RUN python -m spacy download en_core_web_sm | |
| # Copy the training code | |
| COPY src/prolewiki_llm/ /workspace/prolewiki_llm/ | |
| # Copy the dataset (embedded - only 4.5MB) | |
| COPY training_data/grpo_dataset.jsonl /workspace/dataset.jsonl | |
| # Copy entrypoint script | |
| COPY docker/start.sh /workspace/start.sh | |
| RUN chmod +x /workspace/start.sh | |
| # Set PYTHONPATH so prolewiki_llm module can be imported | |
| ENV PYTHONPATH=/workspace | |
| # ============================================================================= | |
| # Environment Variables (defaults - override at runtime) | |
| # ============================================================================= | |
| # Model configuration | |
| ENV MODEL_NAME="unsloth/DeepSeek-R1-0528-Qwen3-8B" | |
| ENV MAX_SEQ_LENGTH=2048 | |
| ENV LORA_RANK=32 | |
| # Training configuration | |
| ENV MAX_STEPS=500 | |
| ENV SAVE_STEPS=50 | |
| ENV LEARNING_RATE=5e-6 | |
| ENV BATCH_SIZE=2 | |
| ENV GRADIENT_ACCUMULATION=2 | |
| ENV NUM_GENERATIONS=4 | |
| ENV GPU_MEMORY_UTILIZATION=0.6 | |
| # Paths (container internal) | |
| ENV DATASET_PATH=/workspace/dataset.jsonl | |
| ENV CHECKPOINT_DIR=/workspace/checkpoints | |
| ENV LORA_OUTPUT=/workspace/lora-output | |
| ENV OUTPUT_DIR=/workspace/outputs | |
| # Reward mode: FULL (recommended), ROBUST, or LEGACY | |
| ENV REWARD_MODE=FULL | |
| # Upload destination | |
| ENV HF_REPO=prolewiki/marxist-grpo-lora | |
| # Required secrets (must be provided at runtime): | |
| # - HF_TOKEN: HuggingFace API token | |
| # - WANDB_API_KEY: Weights & Biases API key | |
| # Optional: | |
| # - RUNPOD_POD_ID: For self-termination after training | |
| # Health check - verify CUDA is available | |
| HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \ | |
| CMD python -c "import torch; assert torch.cuda.is_available()" || exit 1 | |
| # Entry point | |
| ENTRYPOINT ["/workspace/start.sh"] | |