llm-training / docker /.env.example
percyraskova's picture
Upload folder using huggingface_hub
81b3473 verified
# =============================================================================
# Marxist-GRPO Training Environment Variables
# =============================================================================
# Copy this file to .env and fill in your values.
# These can be passed to docker run with --env-file or individually with -e.
#
# Usage:
# docker run --gpus all --env-file docker/.env marxist-grpo:latest
#
# Or with runpodctl:
# runpodctl create pod \
# --imageName myregistry/marxist-grpo:latest \
# --env HF_TOKEN=$HF_TOKEN \
# --env WANDB_API_KEY=$WANDB_API_KEY \
# --env HF_REPO=my-org/my-model
# =============================================================================
# -----------------------------------------------------------------------------
# REQUIRED SECRETS (must be set)
# -----------------------------------------------------------------------------
# HuggingFace API token (for model upload)
# Get yours at: https://huggingface.co/settings/tokens
HF_TOKEN=hf_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
# Weights & Biases API key (for training monitoring)
# Get yours at: https://wandb.ai/authorize
WANDB_API_KEY=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
# -----------------------------------------------------------------------------
# MODEL CONFIGURATION
# -----------------------------------------------------------------------------
# Base model to fine-tune
MODEL_NAME=unsloth/DeepSeek-R1-0528-Qwen3-8B
# Maximum sequence length for tokenizer
MAX_SEQ_LENGTH=2048
# LoRA adapter rank (higher = more expressive, more params)
LORA_RANK=32
# -----------------------------------------------------------------------------
# TRAINING HYPERPARAMETERS
# -----------------------------------------------------------------------------
# Total training steps
MAX_STEPS=500
# Save checkpoint every N steps
SAVE_STEPS=50
# Learning rate
LEARNING_RATE=5e-6
# Warmup ratio (fraction of steps for LR warmup)
WARMUP_RATIO=0.1
# Per-device batch size
BATCH_SIZE=2
# Gradient accumulation steps (effective batch = BATCH_SIZE * GRADIENT_ACCUMULATION)
GRADIENT_ACCUMULATION=2
# Number of generations per prompt during GRPO
NUM_GENERATIONS=4
# Fraction of GPU memory to allocate
GPU_MEMORY_UTILIZATION=0.6
# Maximum prompt length (tokens)
MAX_PROMPT_LENGTH=512
# Maximum completion length (tokens)
MAX_COMPLETION_LENGTH=1500
# -----------------------------------------------------------------------------
# REWARD CONFIGURATION
# -----------------------------------------------------------------------------
# Reward mode: FULL (recommended), ROBUST, or LEGACY
# FULL: NLI + self-consistency + structure + topic relevance + depth
# ROBUST: NLI + self-consistency + structure
# LEGACY: Semantic similarity + terminology (faster but vulnerable to word soup)
REWARD_MODE=FULL
# -----------------------------------------------------------------------------
# OUTPUT CONFIGURATION
# -----------------------------------------------------------------------------
# HuggingFace repo to upload the trained LoRA adapter
HF_REPO=prolewiki/marxist-grpo-lora
# -----------------------------------------------------------------------------
# PATHS (container internal - usually don't change)
# -----------------------------------------------------------------------------
# Path to training dataset (JSONL)
DATASET_PATH=/workspace/dataset.jsonl
# Directory for training checkpoints
CHECKPOINT_DIR=/workspace/checkpoints
# Directory for final LoRA output
LORA_OUTPUT=/workspace/lora-output
# Directory for training outputs
OUTPUT_DIR=/workspace/outputs
# -----------------------------------------------------------------------------
# OPTIONAL: RUNPOD AUTO-TERMINATION
# -----------------------------------------------------------------------------
# Set this to enable automatic pod termination after training
# This prevents "zombie pods" from racking up bills
# Value is automatically set by RunPod, or can be set manually
# RUNPOD_POD_ID=
# -----------------------------------------------------------------------------
# OPTIONAL: REMOTE DATASET
# -----------------------------------------------------------------------------
# If dataset is not embedded in the image, set this URL to download it
# DATASET_URL=https://my-bucket.s3.amazonaws.com/grpo_dataset.jsonl