Add files using upload-large-folder tool

cb2428f verified 5 months ago

959 Bytes

	# Env: 4 * A100
	# https://github.com/modelscope/ms-swift/blob/main/examples/train/long_text/zero3.sh
	# Max Length: 32K
	# GPU Memory: 4 * 50GB, Training Speed 23s/it
	NPROC_PER_NODE=4 \
	CUDA_VISIBLE_DEVICES=0,1,2,3 \
	megatron sft \
	--load Qwen2.5-7B-mcore \
	--dataset 'ZhipuAI/LongWriter-6k' \
	--tensor_model_parallel_size 4 \
	--micro_batch_size 1 \
	--global_batch_size 8 \
	--packing true \
	--recompute_granularity full \
	--recompute_method uniform \
	--recompute_num_layers 1 \
	--train_iters 1000 \
	--eval_iters 50 \
	--finetune true \
	--cross_entropy_loss_fusion true \
	--lr 1e-5 \
	--lr_warmup_iters 100 \
	--min_lr 1e-6 \
	--save megatron_output/Qwen2.5-7B \
	--eval_interval 200 \
	--save_interval 200 \
	--max_length 32768 \
	--num_workers 8 \
	--dataset_num_proc 8 \
	--no_save_optim true \
	--no_save_rng true \
	--sequence_parallel true \
	--use_flash_attn true