File size: 706 Bytes
cb2428f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 |
CUDA_VISIBLE_DEVICES=0 \
swift sft \
--model "Qwen/Qwen2.5-0.5B-Instruct" \
--train_type "lora" \
--dataset "AI-ModelScope/alpaca-gpt4-data-zh#100" \
--torch_dtype "bfloat16" \
--num_train_epochs "1" \
--per_device_train_batch_size "1" \
--learning_rate "1e-4" \
--lora_rank "8" \
--lora_alpha "32" \
--target_modules "all-linear" \
--gradient_accumulation_steps "16" \
--save_steps "50" \
--save_total_limit "5" \
--logging_steps "5" \
--max_length "2048" \
--eval_strategy "steps" \
--eval_steps "5" \
--per_device_eval_batch_size "5" \
--eval_use_evalscope \
--eval_datasets "gsm8k" \
--eval_datasets_args '{"gsm8k": {"few_shot_num": 0}}' \
--eval_limit "10"
|