# ============================================================================= # Marxist-GRPO Training Environment Variables # ============================================================================= # Copy this file to .env and fill in your values. # These can be passed to docker run with --env-file or individually with -e. # # Usage: # docker run --gpus all --env-file docker/.env marxist-grpo:latest # # Or with runpodctl: # runpodctl create pod \ # --imageName myregistry/marxist-grpo:latest \ # --env HF_TOKEN=$HF_TOKEN \ # --env WANDB_API_KEY=$WANDB_API_KEY \ # --env HF_REPO=my-org/my-model # ============================================================================= # ----------------------------------------------------------------------------- # REQUIRED SECRETS (must be set) # ----------------------------------------------------------------------------- # HuggingFace API token (for model upload) # Get yours at: https://huggingface.co/settings/tokens HF_TOKEN=hf_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx # Weights & Biases API key (for training monitoring) # Get yours at: https://wandb.ai/authorize WANDB_API_KEY=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx # ----------------------------------------------------------------------------- # MODEL CONFIGURATION # ----------------------------------------------------------------------------- # Base model to fine-tune MODEL_NAME=unsloth/DeepSeek-R1-0528-Qwen3-8B # Maximum sequence length for tokenizer MAX_SEQ_LENGTH=2048 # LoRA adapter rank (higher = more expressive, more params) LORA_RANK=32 # ----------------------------------------------------------------------------- # TRAINING HYPERPARAMETERS # ----------------------------------------------------------------------------- # Total training steps MAX_STEPS=500 # Save checkpoint every N steps SAVE_STEPS=50 # Learning rate LEARNING_RATE=5e-6 # Warmup ratio (fraction of steps for LR warmup) WARMUP_RATIO=0.1 # Per-device batch size BATCH_SIZE=2 # Gradient accumulation steps (effective batch = BATCH_SIZE * GRADIENT_ACCUMULATION) GRADIENT_ACCUMULATION=2 # Number of generations per prompt during GRPO NUM_GENERATIONS=4 # Fraction of GPU memory to allocate GPU_MEMORY_UTILIZATION=0.6 # Maximum prompt length (tokens) MAX_PROMPT_LENGTH=512 # Maximum completion length (tokens) MAX_COMPLETION_LENGTH=1500 # ----------------------------------------------------------------------------- # REWARD CONFIGURATION # ----------------------------------------------------------------------------- # Reward mode: FULL (recommended), ROBUST, or LEGACY # FULL: NLI + self-consistency + structure + topic relevance + depth # ROBUST: NLI + self-consistency + structure # LEGACY: Semantic similarity + terminology (faster but vulnerable to word soup) REWARD_MODE=FULL # ----------------------------------------------------------------------------- # OUTPUT CONFIGURATION # ----------------------------------------------------------------------------- # HuggingFace repo to upload the trained LoRA adapter HF_REPO=prolewiki/marxist-grpo-lora # ----------------------------------------------------------------------------- # PATHS (container internal - usually don't change) # ----------------------------------------------------------------------------- # Path to training dataset (JSONL) DATASET_PATH=/workspace/dataset.jsonl # Directory for training checkpoints CHECKPOINT_DIR=/workspace/checkpoints # Directory for final LoRA output LORA_OUTPUT=/workspace/lora-output # Directory for training outputs OUTPUT_DIR=/workspace/outputs # ----------------------------------------------------------------------------- # OPTIONAL: RUNPOD AUTO-TERMINATION # ----------------------------------------------------------------------------- # Set this to enable automatic pod termination after training # This prevents "zombie pods" from racking up bills # Value is automatically set by RunPod, or can be set manually # RUNPOD_POD_ID= # ----------------------------------------------------------------------------- # OPTIONAL: REMOTE DATASET # ----------------------------------------------------------------------------- # If dataset is not embedded in the image, set this URL to download it # DATASET_URL=https://my-bucket.s3.amazonaws.com/grpo_dataset.jsonl