File size: 1,794 Bytes
e791fa3 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 |
CUDA_VISIBLE_DEVICES=0 swift sft \
--model /root/autodl-tmp/output_7B_FULL_4JOB/v2-20250621-170947/checkpoint-608 \
--dataset ./dataset_newCOTSFT1_filtered_90.0s_resampled_16000.jsonl \
--model_type qwen2_5_omni\
--train_type full \
--output_dir /root/autodl-tmp/output_7B_FULL_cotSFT \
--torch_dtype bfloat16 \
--learning_rate 1e-4 \
--num_train_epochs 2 \
--freeze_vit false \
--freeze_aligner false \
--per_device_train_batch_size 1 \
--per_device_eval_batch_size 1 \
# ...
#CUDA_VISIBLE_DEVICES=0 swift sft \
# --model /root/autodl-tmp/Qwen2.5-Omni-7B \
# --dataset /root/ms-swift/dataset_cotSFT.json \
# --model_type qwen2_5_omni\
# --train_type lora \
# --output_dir /root/autodl-tmp/output_7B_Lora_cotSFT \
# --torch_dtype bfloat16 \
# --learning_rate 1e-4 \
# --lora_rank 8 \
# --lora_alpha 32 \
# --target_modules all-linear \
# --num_train_epochs 3 \
# --freeze_vit false \
# --freeze_aligner false \
# --per_device_train_batch_size 3 \
# --per_device_eval_batch_size 1 \
# ...
# # 8*A100
# NPROC_PER_NODE=8 \
# CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 \
# swift pt \
# --model Qwen/Qwen2.5-7B \
# --dataset swift/chinese-c4 \
# --streaming true \
# --train_type full \
# --deepspeed zero2 \
# --output_dir output \
# --max_steps 10000 \
# ...
# --lora_rank 8 \
# --lora_alpha 32 \
# --target_modules all-linear \
# --gradient_accumulation_steps 16 \
# --eval_steps 50 \
# --save_steps 50 \
# --save_total_limit 2 \
# --logging_steps 5 \
# --max_length 2048 \
# --output_dir output \
# --system 'You are a helpful assistant.' \
# --warmup_ratio 0.05 \
# --dataloader_num_workers 4 \
# --model_author swift \
# --model_name swift-robot |