llm-training / docker /Dockerfile
percyraskova's picture
Upload folder using huggingface_hub
81b3473 verified
# =============================================================================
# Marxist-GRPO Training Container
# =============================================================================
# Headless GRPO fine-tuning container for RunPod deployment.
#
# Build:
# docker build -t marxist-grpo:latest docker/
#
# Run locally (testing):
# docker run --gpus all \
# -e HF_TOKEN=$HF_TOKEN \
# -e WANDB_API_KEY=$WANDB_API_KEY \
# -e MAX_STEPS=10 \
# marxist-grpo:latest
#
# Deploy to RunPod:
# runpodctl create pod \
# --name "marxist-grpo-training" \
# --gpuType "NVIDIA A100 80GB PCIe" \
# --imageName "myregistry/marxist-grpo:latest" \
# --env HF_TOKEN=$HF_TOKEN \
# --env WANDB_API_KEY=$WANDB_API_KEY \
# --env HF_REPO=my-org/my-model
# =============================================================================
# Use RunPod's PyTorch base image with CUDA 11.8
FROM runpod/pytorch:2.1.0-py3.10-cuda11.8.0-devel
# Set working directory
WORKDIR /workspace
# Prevent interactive prompts during package installation
ENV DEBIAN_FRONTEND=noninteractive
# Install system dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
git \
git-lfs \
curl \
htop \
nvtop \
tmux \
wget \
&& rm -rf /var/lib/apt/lists/* \
&& git lfs install
# Install runpodctl for pod self-termination
RUN curl -fsSL -o /tmp/runpodctl.tar.gz \
https://github.com/runpod/runpodctl/releases/download/v1.14.15/runpodctl-linux-amd64.tar.gz \
&& tar -xzf /tmp/runpodctl.tar.gz -C /tmp \
&& mv /tmp/runpodctl /usr/local/bin/runpodctl \
&& chmod +x /usr/local/bin/runpodctl \
&& rm /tmp/runpodctl.tar.gz
# Copy requirements first (for layer caching)
COPY docker/requirements.txt /workspace/requirements.txt
# Install Python dependencies
# Note: Unsloth requires specific installation order
RUN pip install --no-cache-dir --upgrade pip && \
pip install --no-cache-dir -r requirements.txt
# Install Unsloth (from source for latest optimizations)
RUN pip install --no-cache-dir "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
# Install specific versions that work with Unsloth
# Note: These must be installed after unsloth to avoid conflicts
RUN pip install --no-cache-dir --no-deps \
"xformers<0.0.27" \
"trl>=0.9.0,<0.12.0" \
peft \
accelerate \
bitsandbytes
# Download spaCy model for NLP-based reward functions
RUN python -m spacy download en_core_web_sm
# Copy the training code
COPY src/prolewiki_llm/ /workspace/prolewiki_llm/
# Copy the dataset (embedded - only 4.5MB)
COPY training_data/grpo_dataset.jsonl /workspace/dataset.jsonl
# Copy entrypoint script
COPY docker/start.sh /workspace/start.sh
RUN chmod +x /workspace/start.sh
# Set PYTHONPATH so prolewiki_llm module can be imported
ENV PYTHONPATH=/workspace
# =============================================================================
# Environment Variables (defaults - override at runtime)
# =============================================================================
# Model configuration
ENV MODEL_NAME="unsloth/DeepSeek-R1-0528-Qwen3-8B"
ENV MAX_SEQ_LENGTH=2048
ENV LORA_RANK=32
# Training configuration
ENV MAX_STEPS=500
ENV SAVE_STEPS=50
ENV LEARNING_RATE=5e-6
ENV BATCH_SIZE=2
ENV GRADIENT_ACCUMULATION=2
ENV NUM_GENERATIONS=4
ENV GPU_MEMORY_UTILIZATION=0.6
# Paths (container internal)
ENV DATASET_PATH=/workspace/dataset.jsonl
ENV CHECKPOINT_DIR=/workspace/checkpoints
ENV LORA_OUTPUT=/workspace/lora-output
ENV OUTPUT_DIR=/workspace/outputs
# Reward mode: FULL (recommended), ROBUST, or LEGACY
ENV REWARD_MODE=FULL
# Upload destination
ENV HF_REPO=prolewiki/marxist-grpo-lora
# Required secrets (must be provided at runtime):
# - HF_TOKEN: HuggingFace API token
# - WANDB_API_KEY: Weights & Biases API key
# Optional:
# - RUNPOD_POD_ID: For self-termination after training
# Health check - verify CUDA is available
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
CMD python -c "import torch; assert torch.cuda.is_available()" || exit 1
# Entry point
ENTRYPOINT ["/workspace/start.sh"]