BechusRantus
/

injected_thinking

Model card Files Files and versions

injected_thinking / third_party /ms-swift /examples /ascend /multi-node /megatron /node2.sh

BechusRantus's picture

Upload folder using huggingface_hub

7134ce7 verified 2 months ago

history blame contribute delete

952 Bytes

	# Atlas A2 * 2 nodes * 8 cards per node

	ASCEND_RT_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
	NNODES=2 \
	NODE_RANK=1 \
	MASTER_ADDR=xxx.xxx.xxx.xxx \
	MASTER_PORT=29500 \
	NPROC_PER_NODE=8 \
	HCCL_SOCKET_IFNAME=xxx \
	megatron sft \
	--model 'Qwen/Qwen3-8B' \
	--dataset 'AI-ModelScope/alpaca-gpt4-data-zh#1000' \
	--save './SAVE' \
	--tuner_type 'lora' \
	--lora_rank 8 \
	--lora_alpha 32 \
	--target_modules 'all-linear' \
	--tensor_model_parallel_size 2 \
	--pipeline_model_parallel_size 1 \
	--context_parallel_size 1 \
	--sequence_parallel true \
	--micro_batch_size 1 \
	--global_batch_size 64 \
	--recompute_granularity selective \
	--recompute_modules core_attn \
	--cross_entropy_loss_fusion true \
	--no_gradient_accumulation_fusion true \
	--lr 1e-4 \
	--lr_warmup_fraction 0.05 \
	--min_lr 1e-5 \
	--max_epochs 1 \
	--log_interval 5 \
	--num_workers 4