File size: 4,150 Bytes
81b3473
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
# =============================================================================
# Marxist-GRPO Training Container
# =============================================================================
# Headless GRPO fine-tuning container for RunPod deployment.
#
# Build:
#   docker build -t marxist-grpo:latest docker/
#
# Run locally (testing):
#   docker run --gpus all \
#     -e HF_TOKEN=$HF_TOKEN \
#     -e WANDB_API_KEY=$WANDB_API_KEY \
#     -e MAX_STEPS=10 \
#     marxist-grpo:latest
#
# Deploy to RunPod:
#   runpodctl create pod \
#     --name "marxist-grpo-training" \
#     --gpuType "NVIDIA A100 80GB PCIe" \
#     --imageName "myregistry/marxist-grpo:latest" \
#     --env HF_TOKEN=$HF_TOKEN \
#     --env WANDB_API_KEY=$WANDB_API_KEY \
#     --env HF_REPO=my-org/my-model
# =============================================================================

# Use RunPod's PyTorch base image with CUDA 11.8
FROM runpod/pytorch:2.1.0-py3.10-cuda11.8.0-devel

# Set working directory
WORKDIR /workspace

# Prevent interactive prompts during package installation
ENV DEBIAN_FRONTEND=noninteractive

# Install system dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
    git \
    git-lfs \
    curl \
    htop \
    nvtop \
    tmux \
    wget \
    && rm -rf /var/lib/apt/lists/* \
    && git lfs install

# Install runpodctl for pod self-termination
RUN curl -fsSL -o /tmp/runpodctl.tar.gz \
    https://github.com/runpod/runpodctl/releases/download/v1.14.15/runpodctl-linux-amd64.tar.gz \
    && tar -xzf /tmp/runpodctl.tar.gz -C /tmp \
    && mv /tmp/runpodctl /usr/local/bin/runpodctl \
    && chmod +x /usr/local/bin/runpodctl \
    && rm /tmp/runpodctl.tar.gz

# Copy requirements first (for layer caching)
COPY docker/requirements.txt /workspace/requirements.txt

# Install Python dependencies
# Note: Unsloth requires specific installation order
RUN pip install --no-cache-dir --upgrade pip && \
    pip install --no-cache-dir -r requirements.txt

# Install Unsloth (from source for latest optimizations)
RUN pip install --no-cache-dir "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"

# Install specific versions that work with Unsloth
# Note: These must be installed after unsloth to avoid conflicts
RUN pip install --no-cache-dir --no-deps \
    "xformers<0.0.27" \
    "trl>=0.9.0,<0.12.0" \
    peft \
    accelerate \
    bitsandbytes

# Download spaCy model for NLP-based reward functions
RUN python -m spacy download en_core_web_sm

# Copy the training code
COPY src/prolewiki_llm/ /workspace/prolewiki_llm/

# Copy the dataset (embedded - only 4.5MB)
COPY training_data/grpo_dataset.jsonl /workspace/dataset.jsonl

# Copy entrypoint script
COPY docker/start.sh /workspace/start.sh
RUN chmod +x /workspace/start.sh

# Set PYTHONPATH so prolewiki_llm module can be imported
ENV PYTHONPATH=/workspace

# =============================================================================
# Environment Variables (defaults - override at runtime)
# =============================================================================

# Model configuration
ENV MODEL_NAME="unsloth/DeepSeek-R1-0528-Qwen3-8B"
ENV MAX_SEQ_LENGTH=2048
ENV LORA_RANK=32

# Training configuration
ENV MAX_STEPS=500
ENV SAVE_STEPS=50
ENV LEARNING_RATE=5e-6
ENV BATCH_SIZE=2
ENV GRADIENT_ACCUMULATION=2
ENV NUM_GENERATIONS=4
ENV GPU_MEMORY_UTILIZATION=0.6

# Paths (container internal)
ENV DATASET_PATH=/workspace/dataset.jsonl
ENV CHECKPOINT_DIR=/workspace/checkpoints
ENV LORA_OUTPUT=/workspace/lora-output
ENV OUTPUT_DIR=/workspace/outputs

# Reward mode: FULL (recommended), ROBUST, or LEGACY
ENV REWARD_MODE=FULL

# Upload destination
ENV HF_REPO=prolewiki/marxist-grpo-lora

# Required secrets (must be provided at runtime):
# - HF_TOKEN: HuggingFace API token
# - WANDB_API_KEY: Weights & Biases API key
# Optional:
# - RUNPOD_POD_ID: For self-termination after training

# Health check - verify CUDA is available
HEALTHCHECK --interval=30s --timeout=10s --start-period=5s --retries=3 \
    CMD python -c "import torch; assert torch.cuda.is_available()" || exit 1

# Entry point
ENTRYPOINT ["/workspace/start.sh"]