| # ============================================================================= | |
| # Marxist-GRPO Training Environment Variables | |
| # ============================================================================= | |
| # Copy this file to .env and fill in your values. | |
| # These can be passed to docker run with --env-file or individually with -e. | |
| # | |
| # Usage: | |
| # docker run --gpus all --env-file docker/.env marxist-grpo:latest | |
| # | |
| # Or with runpodctl: | |
| # runpodctl create pod \ | |
| # --imageName myregistry/marxist-grpo:latest \ | |
| # --env HF_TOKEN=$HF_TOKEN \ | |
| # --env WANDB_API_KEY=$WANDB_API_KEY \ | |
| # --env HF_REPO=my-org/my-model | |
| # ============================================================================= | |
| # ----------------------------------------------------------------------------- | |
| # REQUIRED SECRETS (must be set) | |
| # ----------------------------------------------------------------------------- | |
| # HuggingFace API token (for model upload) | |
| # Get yours at: https://huggingface.co/settings/tokens | |
| HF_TOKEN=hf_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx | |
| # Weights & Biases API key (for training monitoring) | |
| # Get yours at: https://wandb.ai/authorize | |
| WANDB_API_KEY=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx | |
| # ----------------------------------------------------------------------------- | |
| # MODEL CONFIGURATION | |
| # ----------------------------------------------------------------------------- | |
| # Base model to fine-tune | |
| MODEL_NAME=unsloth/DeepSeek-R1-0528-Qwen3-8B | |
| # Maximum sequence length for tokenizer | |
| MAX_SEQ_LENGTH=2048 | |
| # LoRA adapter rank (higher = more expressive, more params) | |
| LORA_RANK=32 | |
| # ----------------------------------------------------------------------------- | |
| # TRAINING HYPERPARAMETERS | |
| # ----------------------------------------------------------------------------- | |
| # Total training steps | |
| MAX_STEPS=500 | |
| # Save checkpoint every N steps | |
| SAVE_STEPS=50 | |
| # Learning rate | |
| LEARNING_RATE=5e-6 | |
| # Warmup ratio (fraction of steps for LR warmup) | |
| WARMUP_RATIO=0.1 | |
| # Per-device batch size | |
| BATCH_SIZE=2 | |
| # Gradient accumulation steps (effective batch = BATCH_SIZE * GRADIENT_ACCUMULATION) | |
| GRADIENT_ACCUMULATION=2 | |
| # Number of generations per prompt during GRPO | |
| NUM_GENERATIONS=4 | |
| # Fraction of GPU memory to allocate | |
| GPU_MEMORY_UTILIZATION=0.6 | |
| # Maximum prompt length (tokens) | |
| MAX_PROMPT_LENGTH=512 | |
| # Maximum completion length (tokens) | |
| MAX_COMPLETION_LENGTH=1500 | |
| # ----------------------------------------------------------------------------- | |
| # REWARD CONFIGURATION | |
| # ----------------------------------------------------------------------------- | |
| # Reward mode: FULL (recommended), ROBUST, or LEGACY | |
| # FULL: NLI + self-consistency + structure + topic relevance + depth | |
| # ROBUST: NLI + self-consistency + structure | |
| # LEGACY: Semantic similarity + terminology (faster but vulnerable to word soup) | |
| REWARD_MODE=FULL | |
| # ----------------------------------------------------------------------------- | |
| # OUTPUT CONFIGURATION | |
| # ----------------------------------------------------------------------------- | |
| # HuggingFace repo to upload the trained LoRA adapter | |
| HF_REPO=prolewiki/marxist-grpo-lora | |
| # ----------------------------------------------------------------------------- | |
| # PATHS (container internal - usually don't change) | |
| # ----------------------------------------------------------------------------- | |
| # Path to training dataset (JSONL) | |
| DATASET_PATH=/workspace/dataset.jsonl | |
| # Directory for training checkpoints | |
| CHECKPOINT_DIR=/workspace/checkpoints | |
| # Directory for final LoRA output | |
| LORA_OUTPUT=/workspace/lora-output | |
| # Directory for training outputs | |
| OUTPUT_DIR=/workspace/outputs | |
| # ----------------------------------------------------------------------------- | |
| # OPTIONAL: RUNPOD AUTO-TERMINATION | |
| # ----------------------------------------------------------------------------- | |
| # Set this to enable automatic pod termination after training | |
| # This prevents "zombie pods" from racking up bills | |
| # Value is automatically set by RunPod, or can be set manually | |
| # RUNPOD_POD_ID= | |
| # ----------------------------------------------------------------------------- | |
| # OPTIONAL: REMOTE DATASET | |
| # ----------------------------------------------------------------------------- | |
| # If dataset is not embedded in the image, set this URL to download it | |
| # DATASET_URL=https://my-bucket.s3.amazonaws.com/grpo_dataset.jsonl | |