# =============================================================================
# Marxist-GRPO Training Environment Variables
# =============================================================================
# Copy this file to .env and fill in your values.
# These can be passed to docker run with --env-file or individually with -e.
#
# Usage:
#   docker run --gpus all --env-file docker/.env marxist-grpo:latest
#
# Or with runpodctl:
#   runpodctl create pod \
#     --imageName myregistry/marxist-grpo:latest \
#     --env HF_TOKEN=$HF_TOKEN \
#     --env WANDB_API_KEY=$WANDB_API_KEY \
#     --env HF_REPO=my-org/my-model
# =============================================================================

# -----------------------------------------------------------------------------
# REQUIRED SECRETS (must be set)
# -----------------------------------------------------------------------------

# HuggingFace API token (for model upload)
# Get yours at: https://huggingface.co/settings/tokens
HF_TOKEN=hf_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx

# Weights & Biases API key (for training monitoring)
# Get yours at: https://wandb.ai/authorize
WANDB_API_KEY=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx

# -----------------------------------------------------------------------------
# MODEL CONFIGURATION
# -----------------------------------------------------------------------------

# Base model to fine-tune
MODEL_NAME=unsloth/DeepSeek-R1-0528-Qwen3-8B

# Maximum sequence length for tokenizer
MAX_SEQ_LENGTH=2048

# LoRA adapter rank (higher = more expressive, more params)
LORA_RANK=32

# -----------------------------------------------------------------------------
# TRAINING HYPERPARAMETERS
# -----------------------------------------------------------------------------

# Total training steps
MAX_STEPS=500

# Save checkpoint every N steps
SAVE_STEPS=50

# Learning rate
LEARNING_RATE=5e-6

# Warmup ratio (fraction of steps for LR warmup)
WARMUP_RATIO=0.1

# Per-device batch size
BATCH_SIZE=2

# Gradient accumulation steps (effective batch = BATCH_SIZE * GRADIENT_ACCUMULATION)
GRADIENT_ACCUMULATION=2

# Number of generations per prompt during GRPO
NUM_GENERATIONS=4

# Fraction of GPU memory to allocate
GPU_MEMORY_UTILIZATION=0.6

# Maximum prompt length (tokens)
MAX_PROMPT_LENGTH=512

# Maximum completion length (tokens)
MAX_COMPLETION_LENGTH=1500

# -----------------------------------------------------------------------------
# REWARD CONFIGURATION
# -----------------------------------------------------------------------------

# Reward mode: FULL (recommended), ROBUST, or LEGACY
#   FULL:   NLI + self-consistency + structure + topic relevance + depth
#   ROBUST: NLI + self-consistency + structure
#   LEGACY: Semantic similarity + terminology (faster but vulnerable to word soup)
REWARD_MODE=FULL

# -----------------------------------------------------------------------------
# OUTPUT CONFIGURATION
# -----------------------------------------------------------------------------

# HuggingFace repo to upload the trained LoRA adapter
HF_REPO=prolewiki/marxist-grpo-lora

# -----------------------------------------------------------------------------
# PATHS (container internal - usually don't change)
# -----------------------------------------------------------------------------

# Path to training dataset (JSONL)
DATASET_PATH=/workspace/dataset.jsonl

# Directory for training checkpoints
CHECKPOINT_DIR=/workspace/checkpoints

# Directory for final LoRA output
LORA_OUTPUT=/workspace/lora-output

# Directory for training outputs
OUTPUT_DIR=/workspace/outputs

# -----------------------------------------------------------------------------
# OPTIONAL: RUNPOD AUTO-TERMINATION
# -----------------------------------------------------------------------------

# Set this to enable automatic pod termination after training
# This prevents "zombie pods" from racking up bills
# Value is automatically set by RunPod, or can be set manually
# RUNPOD_POD_ID=

# -----------------------------------------------------------------------------
# OPTIONAL: REMOTE DATASET
# -----------------------------------------------------------------------------

# If dataset is not embedded in the image, set this URL to download it
# DATASET_URL=https://my-bucket.s3.amazonaws.com/grpo_dataset.jsonl