# ============================================================================= # Marxist-GRPO Training Container # ============================================================================= # Headless GRPO fine-tuning container for RunPod deployment. # # Build: # docker build -t marxist-grpo:latest docker/ # # Run locally (testing): # docker run --gpus all \ # -e HF_TOKEN=$HF_TOKEN \ # -e WANDB_API_KEY=$WANDB_API_KEY \ # -e MAX_STEPS=10 \ # marxist-grpo:latest # # Deploy to RunPod: # runpodctl create pod \ # --name "marxist-grpo-training" \ # --gpuType "NVIDIA A100 80GB PCIe" \ # --imageName "myregistry/marxist-grpo:latest" \ # --env HF_TOKEN=$HF_TOKEN \ # --env WANDB_API_KEY=$WANDB_API_KEY \ # --env HF_REPO=my-org/my-model # ============================================================================= # Use RunPod's PyTorch base image with CUDA 11.8 FROM runpod/pytorch:2.1.0-py3.10-cuda11.8.0-devel # Set working directory WORKDIR /workspace # Prevent interactive prompts during package installation ENV DEBIAN_FRONTEND=noninteractive # Install system dependencies RUN apt-get update && apt-get install -y --no-install-recommends \ git \ git-lfs \ curl \ htop \ nvtop \ tmux \ wget \ && rm -rf /var/lib/apt/lists/* \ && git lfs install # Install runpodctl for pod self-termination RUN curl -fsSL -o /tmp/runpodctl.tar.gz \ https://github.com/runpod/runpodctl/releases/download/v1.14.15/runpodctl-linux-amd64.tar.gz \ && tar -xzf /tmp/runpodctl.tar.gz -C /tmp \ && mv /tmp/runpodctl /usr/local/bin/runpodctl \ && chmod +x /usr/local/bin/runpodctl \ && rm /tmp/runpodctl.tar.gz # Copy requirements first (for layer caching) COPY docker/requirements.txt /workspace/requirements.txt # Install Python dependencies # Note: Unsloth requires specific installation order RUN pip install --no-cache-dir --upgrade pip && \ pip install --no-cache-dir -r requirements.txt # Install Unsloth (from source for latest optimizations) RUN pip install --no-cache-dir "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git" # Install specific versions that work with Unsloth # Note: These must be installed after unsloth to avoid conflicts RUN pip install --no-cache-dir --no-deps \ "xformers<0.0.27" \ "trl>=0.9.0,<0.12.0" \ peft \ accelerate \ bitsandbytes # Download spaCy model for NLP-based reward functions RUN python -m spacy download en_core_web_sm # Copy the training code COPY src/prolewiki_llm/ /workspace/prolewiki_llm/ # Copy the dataset (embedded - only 4.5MB) COPY training_data/grpo_dataset.jsonl /workspace/dataset.jsonl # Copy entrypoint script COPY docker/start.sh /workspace/start.sh RUN chmod +x /workspace/start.sh # Set PYTHONPATH so prolewiki_llm module can be imported ENV PYTHONPATH=/workspace # ============================================================================= # Environment Variables (defaults - override at runtime) # ============================================================================= # Model configuration ENV MODEL_NAME="unsloth/DeepSeek-R1-0528-Qwen3-8B" ENV MAX_SEQ_LENGTH=2048 ENV LORA_RANK=32 # Training configuration ENV MAX_STEPS=500 ENV SAVE_STEPS=50 ENV LEARNING_RATE=5e-6 ENV BATCH_SIZE=2 ENV GRADIENT_ACCUMULATION=2 ENV NUM_GENERATIONS=4 ENV GPU_MEMORY_UTILIZATION=0.6 # Paths (container internal) ENV DATASET_PATH=/workspace/dataset.jsonl ENV CHECKPOINT_DIR=/workspace/checkpoints ENV LORA_OUTPUT=/workspace/lora-output ENV OUTPUT_DIR=/workspace/outputs # Reward mode: FULL (recommended), ROBUST, or LEGACY ENV REWARD_MODE=FULL # Upload destination ENV HF_REPO=prolewiki/marxist-grpo-lora # Required secrets (must be provided at runtime): # - HF_TOKEN: HuggingFace API token # - WANDB_API_KEY: Weights & Biases API key # Optional: # - RUNPOD_POD_ID: For self-termination after training # Health check - verify CUDA is available HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \ CMD python -c "import torch; assert torch.cuda.is_available()" || exit 1 # Entry point ENTRYPOINT ["/workspace/start.sh"]