File size: 4,150 Bytes
81b3473 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 |
# =============================================================================
# Marxist-GRPO Training Container
# =============================================================================
# Headless GRPO fine-tuning container for RunPod deployment.
#
# Build:
# docker build -t marxist-grpo:latest docker/
#
# Run locally (testing):
# docker run --gpus all \
# -e HF_TOKEN=$HF_TOKEN \
# -e WANDB_API_KEY=$WANDB_API_KEY \
# -e MAX_STEPS=10 \
# marxist-grpo:latest
#
# Deploy to RunPod:
# runpodctl create pod \
# --name "marxist-grpo-training" \
# --gpuType "NVIDIA A100 80GB PCIe" \
# --imageName "myregistry/marxist-grpo:latest" \
# --env HF_TOKEN=$HF_TOKEN \
# --env WANDB_API_KEY=$WANDB_API_KEY \
# --env HF_REPO=my-org/my-model
# =============================================================================
# Use RunPod's PyTorch base image with CUDA 11.8
FROM runpod/pytorch:2.1.0-py3.10-cuda11.8.0-devel
# Set working directory
WORKDIR /workspace
# Prevent interactive prompts during package installation
ENV DEBIAN_FRONTEND=noninteractive
# Install system dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
git \
git-lfs \
curl \
htop \
nvtop \
tmux \
wget \
&& rm -rf /var/lib/apt/lists/* \
&& git lfs install
# Install runpodctl for pod self-termination
RUN curl -fsSL -o /tmp/runpodctl.tar.gz \
https://github.com/runpod/runpodctl/releases/download/v1.14.15/runpodctl-linux-amd64.tar.gz \
&& tar -xzf /tmp/runpodctl.tar.gz -C /tmp \
&& mv /tmp/runpodctl /usr/local/bin/runpodctl \
&& chmod +x /usr/local/bin/runpodctl \
&& rm /tmp/runpodctl.tar.gz
# Copy requirements first (for layer caching)
COPY docker/requirements.txt /workspace/requirements.txt
# Install Python dependencies
# Note: Unsloth requires specific installation order
RUN pip install --no-cache-dir --upgrade pip && \
pip install --no-cache-dir -r requirements.txt
# Install Unsloth (from source for latest optimizations)
RUN pip install --no-cache-dir "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
# Install specific versions that work with Unsloth
# Note: These must be installed after unsloth to avoid conflicts
RUN pip install --no-cache-dir --no-deps \
"xformers<0.0.27" \
"trl>=0.9.0,<0.12.0" \
peft \
accelerate \
bitsandbytes
# Download spaCy model for NLP-based reward functions
RUN python -m spacy download en_core_web_sm
# Copy the training code
COPY src/prolewiki_llm/ /workspace/prolewiki_llm/
# Copy the dataset (embedded - only 4.5MB)
COPY training_data/grpo_dataset.jsonl /workspace/dataset.jsonl
# Copy entrypoint script
COPY docker/start.sh /workspace/start.sh
RUN chmod +x /workspace/start.sh
# Set PYTHONPATH so prolewiki_llm module can be imported
ENV PYTHONPATH=/workspace
# =============================================================================
# Environment Variables (defaults - override at runtime)
# =============================================================================
# Model configuration
ENV MODEL_NAME="unsloth/DeepSeek-R1-0528-Qwen3-8B"
ENV MAX_SEQ_LENGTH=2048
ENV LORA_RANK=32
# Training configuration
ENV MAX_STEPS=500
ENV SAVE_STEPS=50
ENV LEARNING_RATE=5e-6
ENV BATCH_SIZE=2
ENV GRADIENT_ACCUMULATION=2
ENV NUM_GENERATIONS=4
ENV GPU_MEMORY_UTILIZATION=0.6
# Paths (container internal)
ENV DATASET_PATH=/workspace/dataset.jsonl
ENV CHECKPOINT_DIR=/workspace/checkpoints
ENV LORA_OUTPUT=/workspace/lora-output
ENV OUTPUT_DIR=/workspace/outputs
# Reward mode: FULL (recommended), ROBUST, or LEGACY
ENV REWARD_MODE=FULL
# Upload destination
ENV HF_REPO=prolewiki/marxist-grpo-lora
# Required secrets (must be provided at runtime):
# - HF_TOKEN: HuggingFace API token
# - WANDB_API_KEY: Weights & Biases API key
# Optional:
# - RUNPOD_POD_ID: For self-termination after training
# Health check - verify CUDA is available
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
CMD python -c "import torch; assert torch.cuda.is_available()" || exit 1
# Entry point
ENTRYPOINT ["/workspace/start.sh"]
|