BechusRantus
/

injected_thinking

Model card Files Files and versions

injected_thinking / third_party /ms-swift /examples /megatron /export /full.sh

BechusRantus's picture

Upload folder using huggingface_hub

7134ce7 verified 3 months ago

history blame contribute delete

736 Bytes

	# safetensors -> torch_dist
	CUDA_VISIBLE_DEVICES=0,1,2,3 \
	NPROC_PER_NODE=4 \
	megatron export \
	--model Qwen/Qwen3-30B-A3B-Instruct-2507 \
	--save Qwen3-30B-A3B-Instruct-2507-mcore \
	--to_mcore true \
	--tensor_model_parallel_size 2 \
	--expert_model_parallel_size 2 \
	--pipeline_model_parallel_size 2 \
	--test_convert_precision true

	# torch_dist -> safetensors
	CUDA_VISIBLE_DEVICES=0,1,2,3 \
	NPROC_PER_NODE=4 \
	megatron export \
	--load Qwen3-30B-A3B-Instruct-2507-mcore \
	--save Qwen3-30B-A3B-Instruct-2507-hf \
	--to_hf true \
	--tensor_model_parallel_size 2 \
	--expert_model_parallel_size 2 \
	--pipeline_model_parallel_size 2 \
	--test_convert_precision true