Vedisasi
/

UltraThinking-LLM-Training

Model card Files Files and versions

UltraThinking-LLM-Training / .env.example

Vedisasi's picture

Upload folder using huggingface_hub

54c5666 verified 6 months ago

history blame contribute delete

1.8 kB

	# ULTRATHINK Environment Configuration
	# Copy this file to .env and customize for your environment

	# ===== Training Directories =====
	ULTRATHINK_OUTPUT_DIR=./outputs
	ULTRATHINK_CACHE_DIR=./cache
	ULTRATHINK_DATA_DIR=./data
	ULTRATHINK_CHECKPOINT_DIR=./checkpoints

	# ===== Distributed Training =====
	# Set these when using torchrun or accelerate
	MASTER_ADDR=localhost
	MASTER_PORT=29500
	WORLD_SIZE=1
	RANK=0
	LOCAL_RANK=0


	# TensorBoard
	TENSORBOARD_LOG_DIR=./runs

	# ===== GPU Settings =====
	# Specify which GPUs to use (comma-separated)
	CUDA_VISIBLE_DEVICES=0,1,2,3

	# CUDA memory allocation
	PYTORCH_CUDA_ALLOC_CONF=expandable_segments:True

	# ===== Performance Settings =====
	# Number of CPU threads for data loading
	OMP_NUM_THREADS=8

	# Tokenizers parallelism (set to false if you see warnings)
	TOKENIZERS_PARALLELISM=false

	# Torch compile mode
	TORCHDYNAMO_DISABLE=1

	# ===== Hugging Face =====
	# Hugging Face Hub token for private models/datasets
	HF_TOKEN=your_huggingface_token_here
	HF_HOME=./hf_cache

	# ===== DeepSpeed =====
	# DeepSpeed configuration
	DEEPSPEED_CONFIG=./deepspeed_config_zero2.json

	# ===== Security =====
	# Maximum file upload size (MB)
	MAX_FILE_SIZE_MB=1000

	# Allowed checkpoint directories (colon-separated)
	ALLOWED_CHECKPOINT_DIRS=./checkpoints:./outputs

	# ===== Development =====
	# Logging level
	LOG_LEVEL=INFO # DEBUG, INFO, WARNING, ERROR

	# Debug mode
	DEBUG=false

	# ===== Model Defaults =====
	# Default model configuration
	DEFAULT_MODEL_SIZE=small
	DEFAULT_BATCH_SIZE=8
	DEFAULT_SEQ_LENGTH=512
	DEFAULT_LEARNING_RATE=3e-4

	# ===== Dataset Settings =====
	# Default dataset
	DEFAULT_DATASET=wikitext
	DATASET_CACHE_DIR=./dataset_cache

	# Streaming settings
	DATASET_STREAMING=false
	DATASET_NUM_PROC=4