Upload folder using huggingface_hub

81b3473 verified about 2 months ago

4.32 kB

	# =============================================================================
	# Marxist-GRPO Training Environment Variables
	# =============================================================================
	# Copy this file to .env and fill in your values.
	# These can be passed to docker run with --env-file or individually with -e.
	#
	# Usage:
	# docker run --gpus all --env-file docker/.env marxist-grpo:latest
	#
	# Or with runpodctl:
	# runpodctl create pod \
	# --imageName myregistry/marxist-grpo:latest \
	# --env HF_TOKEN=$HF_TOKEN \
	# --env WANDB_API_KEY=$WANDB_API_KEY \
	# --env HF_REPO=my-org/my-model
	# =============================================================================

	# -----------------------------------------------------------------------------
	# REQUIRED SECRETS (must be set)
	# -----------------------------------------------------------------------------

	# HuggingFace API token (for model upload)
	# Get yours at: https://huggingface.co/settings/tokens
	HF_TOKEN=hf_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx

	# Weights & Biases API key (for training monitoring)
	# Get yours at: https://wandb.ai/authorize
	WANDB_API_KEY=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx

	# -----------------------------------------------------------------------------
	# MODEL CONFIGURATION
	# -----------------------------------------------------------------------------

	# Base model to fine-tune
	MODEL_NAME=unsloth/DeepSeek-R1-0528-Qwen3-8B

	# Maximum sequence length for tokenizer
	MAX_SEQ_LENGTH=2048

	# LoRA adapter rank (higher = more expressive, more params)
	LORA_RANK=32

	# -----------------------------------------------------------------------------
	# TRAINING HYPERPARAMETERS
	# -----------------------------------------------------------------------------

	# Total training steps
	MAX_STEPS=500

	# Save checkpoint every N steps
	SAVE_STEPS=50

	# Learning rate
	LEARNING_RATE=5e-6

	# Warmup ratio (fraction of steps for LR warmup)
	WARMUP_RATIO=0.1

	# Per-device batch size
	BATCH_SIZE=2

	# Gradient accumulation steps (effective batch = BATCH_SIZE * GRADIENT_ACCUMULATION)
	GRADIENT_ACCUMULATION=2

	# Number of generations per prompt during GRPO
	NUM_GENERATIONS=4

	# Fraction of GPU memory to allocate
	GPU_MEMORY_UTILIZATION=0.6

	# Maximum prompt length (tokens)
	MAX_PROMPT_LENGTH=512

	# Maximum completion length (tokens)
	MAX_COMPLETION_LENGTH=1500

	# -----------------------------------------------------------------------------
	# REWARD CONFIGURATION
	# -----------------------------------------------------------------------------

	# Reward mode: FULL (recommended), ROBUST, or LEGACY
	# FULL: NLI + self-consistency + structure + topic relevance + depth
	# ROBUST: NLI + self-consistency + structure
	# LEGACY: Semantic similarity + terminology (faster but vulnerable to word soup)
	REWARD_MODE=FULL

	# -----------------------------------------------------------------------------
	# OUTPUT CONFIGURATION
	# -----------------------------------------------------------------------------

	# HuggingFace repo to upload the trained LoRA adapter
	HF_REPO=prolewiki/marxist-grpo-lora

	# -----------------------------------------------------------------------------
	# PATHS (container internal - usually don't change)
	# -----------------------------------------------------------------------------

	# Path to training dataset (JSONL)
	DATASET_PATH=/workspace/dataset.jsonl

	# Directory for training checkpoints
	CHECKPOINT_DIR=/workspace/checkpoints

	# Directory for final LoRA output
	LORA_OUTPUT=/workspace/lora-output

	# Directory for training outputs
	OUTPUT_DIR=/workspace/outputs

	# -----------------------------------------------------------------------------
	# OPTIONAL: RUNPOD AUTO-TERMINATION
	# -----------------------------------------------------------------------------

	# Set this to enable automatic pod termination after training
	# This prevents "zombie pods" from racking up bills
	# Value is automatically set by RunPod, or can be set manually
	# RUNPOD_POD_ID=

	# -----------------------------------------------------------------------------
	# OPTIONAL: REMOTE DATASET
	# -----------------------------------------------------------------------------

	# If dataset is not embedded in the image, set this URL to download it
	# DATASET_URL=https://my-bucket.s3.amazonaws.com/grpo_dataset.jsonl