Upload ms-swift/sft.sh with huggingface_hub
Browse files- ms-swift/sft.sh +29 -0
ms-swift/sft.sh
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 22GB
|
| 2 |
+
CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
|
| 3 |
+
swift sft \
|
| 4 |
+
--model /oss/wangyujia/BIO/pretrain_output/qwen2.5-7b-instruct-bio/bio_all/save1epoch/checkpoint-1300 \
|
| 5 |
+
--train_type lora \
|
| 6 |
+
--dataset /oss/wangyujia/pretrain-bench/locate/deeplocmulti/train.jsonl \
|
| 7 |
+
--torch_dtype bfloat16 \
|
| 8 |
+
--num_train_epochs 2 \
|
| 9 |
+
--per_device_train_batch_size 1 \
|
| 10 |
+
--per_device_eval_batch_size 1 \
|
| 11 |
+
--learning_rate 1e-4 \
|
| 12 |
+
--lora_rank 8 \
|
| 13 |
+
--lora_alpha 32 \
|
| 14 |
+
--target_modules all-linear \
|
| 15 |
+
--gradient_accumulation_steps 16 \
|
| 16 |
+
--eval_steps 5 \
|
| 17 |
+
--save_steps 5 \
|
| 18 |
+
--save_total_limit 5 \
|
| 19 |
+
--logging_steps 5 \
|
| 20 |
+
--max_length 8192 \
|
| 21 |
+
--output_dir /oss/wangyujia/BIO/sft/qwen2.5-7b-bio-deeplocmulti \
|
| 22 |
+
--system 'You are a helpful assistant.' \
|
| 23 |
+
--warmup_ratio 0.05 \
|
| 24 |
+
--dataloader_num_workers 8 \
|
| 25 |
+
--model_author swift \
|
| 26 |
+
--model_name qwen_bio_sft_deeplocmulti \
|
| 27 |
+
--save_only_model true
|
| 28 |
+
|
| 29 |
+
# --dataset /fs-computility/ai-shen/wangyujia/pretrain-bench/data/SciKnowEval_bio_swift_new_.jsonl \
|