Student0809 commited on
Commit
356aced
·
verified ·
1 Parent(s): c3c781c

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. ms-swift/examples/train/megatron/multi-node/node1.sh +34 -0
  2. ms-swift/examples/train/multi-gpu/ddp/train.sh +30 -0
  3. ms-swift/examples/train/multi-node/deepspeed/host.txt +2 -0
  4. ms-swift/examples/train/multi-node/swift/train_node2.sh +30 -0
  5. ms-swift/examples/train/multi-node/torchrun/train_node2.sh +31 -0
  6. ms-swift/examples/train/multimodal/caption.sh +27 -0
  7. ms-swift/examples/train/multimodal/lora_llm_full_vit/infer.sh +8 -0
  8. ms-swift/examples/train/multimodal/lora_llm_full_vit/merge_lora.sh +3 -0
  9. ms-swift/examples/train/multimodal/omni/sft.sh +39 -0
  10. ms-swift/examples/train/multimodal/rlhf/kto.sh +32 -0
  11. ms-swift/examples/train/multimodal/video.sh +31 -0
  12. ms-swift/examples/train/packing/llm.sh +31 -0
  13. ms-swift/examples/train/packing/streaming.sh +34 -0
  14. ms-swift/examples/train/plugins/tuner_phi4_mm.sh +20 -0
  15. ms-swift/examples/train/predict_with_generate/train.sh +28 -0
  16. ms-swift/examples/train/qlora/gptq.sh +25 -0
  17. ms-swift/examples/train/rft/math.json +0 -0
  18. ms-swift/examples/train/rlhf/README.md +3 -0
  19. ms-swift/examples/train/rlhf/dpo/lora.sh +25 -0
  20. ms-swift/examples/train/rlhf/simpo.sh +26 -0
  21. ms-swift/examples/train/seq_cls/bert/deploy.sh +11 -0
  22. ms-swift/examples/train/seq_cls/bert/infer.sh +7 -0
  23. ms-swift/examples/train/seq_cls/qwen2_vl/sft.sh +28 -0
  24. ms-swift/examples/train/seq_cls/regression/deploy.sh +8 -0
  25. ms-swift/examples/train/seq_cls/regression/sft.sh +28 -0
  26. ms-swift/examples/train/think_model/qwen3_demo2.sh +29 -0
  27. ms-swift/examples/train/tuners/bone/train.sh +16 -0
  28. ms-swift/examples/train/tuners/lisa/train.sh +17 -0
  29. ms-swift/examples/train/tuners/longlora/train.sh +16 -0
  30. ms-swift/ms_swift.egg-info/SOURCES.txt +373 -0
  31. ms-swift/ms_swift.egg-info/entry_points.txt +3 -0
  32. ms-swift/ms_swift.egg-info/top_level.txt +2 -0
  33. ms-swift/requirements/framework.txt +38 -0
  34. ms-swift/requirements/tests.txt +6 -0
  35. ms-swift/swift/__pycache__/__init__.cpython-310.pyc +0 -0
  36. ms-swift/swift/cli/_megatron/main.py +20 -0
  37. ms-swift/swift/cli/infer.py +5 -0
  38. ms-swift/swift/cli/merge_lora.py +14 -0
  39. ms-swift/swift/cli/rlhf.py +5 -0
  40. ms-swift/swift/hub/constant.py +6 -0
  41. ms-swift/swift/hub/hub.py +451 -0
  42. ms-swift/swift/llm/__pycache__/utils.cpython-310.pyc +0 -0
  43. ms-swift/swift/llm/app/app.py +44 -0
  44. ms-swift/swift/llm/app/locale.py +23 -0
  45. ms-swift/swift/llm/argument/__init__.py +12 -0
  46. ms-swift/swift/llm/argument/__pycache__/__init__.cpython-310.pyc +0 -0
  47. ms-swift/swift/llm/argument/__pycache__/deploy_args.cpython-310.pyc +0 -0
  48. ms-swift/swift/llm/argument/__pycache__/infer_args.cpython-310.pyc +0 -0
  49. ms-swift/swift/llm/argument/__pycache__/merge_args.cpython-310.pyc +0 -0
  50. ms-swift/swift/llm/argument/base_args/__init__.py +3 -0
ms-swift/examples/train/megatron/multi-node/node1.sh ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # For more information on multi-node training launch methods, refer to:
2
+ # https://github.com/modelscope/ms-swift/tree/main/examples/train/multi-node
3
+
4
+ CUDA_VISIBLE_DEVICES=0,1,2,3 \
5
+ NNODES=2 \
6
+ NODE_RANK=0 \
7
+ MASTER_ADDR=127.0.0.1 \
8
+ MASTER_PORT=29500 \
9
+ NPROC_PER_NODE=4 \
10
+ megatron sft \
11
+ --load Qwen2.5-14B-mcore \
12
+ --dataset 'liucong/Chinese-DeepSeek-R1-Distill-data-110k-SFT' \
13
+ --tensor_model_parallel_size 4 \
14
+ --micro_batch_size 1 \
15
+ --global_batch_size 16 \
16
+ --packing true \
17
+ --recompute_granularity selective \
18
+ --train_iters 2000 \
19
+ --eval_iters 50 \
20
+ --finetune true \
21
+ --cross_entropy_loss_fusion true \
22
+ --lr 1e-5 \
23
+ --lr_warmup_iters 100 \
24
+ --min_lr 1e-6 \
25
+ --save megatron_output/Qwen2.5-14B \
26
+ --eval_interval 200 \
27
+ --save_interval 200 \
28
+ --max_length 8192 \
29
+ --num_workers 8 \
30
+ --dataset_num_proc 8 \
31
+ --no_save_optim true \
32
+ --no_save_rng true \
33
+ --sequence_parallel true \
34
+ --use_flash_attn true
ms-swift/examples/train/multi-gpu/ddp/train.sh ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 27.5GiB * 2
2
+ nproc_per_node=2
3
+
4
+ CUDA_VISIBLE_DEVICES=0,1 \
5
+ NPROC_PER_NODE=$nproc_per_node \
6
+ swift sft \
7
+ --model Qwen/Qwen2.5-7B-Instruct \
8
+ --train_type lora \
9
+ --torch_dtype bfloat16 \
10
+ --dataset 'swift/self-cognition#1000' \
11
+ --num_train_epochs 1 \
12
+ --per_device_train_batch_size 1 \
13
+ --per_device_eval_batch_size 1 \
14
+ --learning_rate 1e-4 \
15
+ --lora_rank 8 \
16
+ --lora_alpha 32 \
17
+ --target_modules all-linear \
18
+ --gradient_accumulation_steps $(expr 16 / $nproc_per_node) \
19
+ --eval_steps 100 \
20
+ --save_steps 100 \
21
+ --save_total_limit 2 \
22
+ --logging_steps 5 \
23
+ --max_length 2048 \
24
+ --output_dir output \
25
+ --system 'You are a helpful assistant.' \
26
+ --warmup_ratio 0.05 \
27
+ --dataloader_num_workers 4 \
28
+ --model_author swift \
29
+ --model_name swift-robot \
30
+ --gradient_checkpointing_kwargs '{"use_reentrant": false}'
ms-swift/examples/train/multi-node/deepspeed/host.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ worker-0 slots=2
2
+ worker-1 slots=2
ms-swift/examples/train/multi-node/swift/train_node2.sh ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ nnodes=2
2
+ nproc_per_node=4
3
+
4
+ CUDA_VISIBLE_DEVICES=0,1,2,3 \
5
+ NNODES=$nnodes \
6
+ NODE_RANK=1 \
7
+ MASTER_ADDR=xxx.xxx.xxx.xxx \
8
+ MASTER_PORT=29500 \
9
+ NPROC_PER_NODE=$nproc_per_node \
10
+ swift sft \
11
+ --model Qwen/Qwen2.5-7B-Instruct \
12
+ --train_type full \
13
+ --dataset 'AI-ModelScope/alpaca-gpt4-data-zh#20000' \
14
+ 'AI-ModelScope/alpaca-gpt4-data-en#20000' \
15
+ --torch_dtype bfloat16 \
16
+ --num_train_epochs 1 \
17
+ --per_device_train_batch_size 1 \
18
+ --per_device_eval_batch_size 1 \
19
+ --learning_rate 1e-5 \
20
+ --gradient_accumulation_steps $(expr 32 / $nproc_per_node / $nnodes) \
21
+ --eval_steps 100 \
22
+ --save_steps 100 \
23
+ --save_total_limit 2 \
24
+ --logging_steps 5 \
25
+ --max_length 8192 \
26
+ --output_dir output \
27
+ --system 'You are a helpful assistant.' \
28
+ --warmup_ratio 0.05 \
29
+ --dataloader_num_workers 4 \
30
+ --deepspeed zero2
ms-swift/examples/train/multi-node/torchrun/train_node2.sh ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ nnodes=2
2
+ nproc_per_node=4
3
+
4
+ CUDA_VISIBLE_DEVICES=0,1,2,3 \
5
+ torchrun \
6
+ --master_port 29500 \
7
+ --nproc_per_node=$nproc_per_node \
8
+ --nnodes=$nnodes \
9
+ --node_rank=1 \
10
+ --master_addr=xxx.xxx.xxx.xxx \
11
+ swift/cli/sft.py \
12
+ --model Qwen/Qwen2.5-7B-Instruct \
13
+ --train_type full \
14
+ --dataset 'AI-ModelScope/alpaca-gpt4-data-zh#20000' \
15
+ 'AI-ModelScope/alpaca-gpt4-data-en#20000' \
16
+ --torch_dtype bfloat16 \
17
+ --num_train_epochs 1 \
18
+ --per_device_train_batch_size 1 \
19
+ --per_device_eval_batch_size 1 \
20
+ --learning_rate 1e-5 \
21
+ --gradient_accumulation_steps $(expr 32 / $nproc_per_node / $nnodes) \
22
+ --eval_steps 100 \
23
+ --save_steps 100 \
24
+ --save_total_limit 2 \
25
+ --logging_steps 5 \
26
+ --max_length 8192 \
27
+ --output_dir output \
28
+ --system 'You are a helpful assistant.' \
29
+ --warmup_ratio 0.05 \
30
+ --dataloader_num_workers 4 \
31
+ --deepspeed zero2
ms-swift/examples/train/multimodal/caption.sh ADDED
@@ -0,0 +1,27 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 22GiB
2
+ # You can refer to `https://github.com/QwenLM/Qwen2.5-VL` for the meaning of the `MAX_PIXELS` parameter.
3
+ # 1003520 = 1280 * 28 * 28
4
+ CUDA_VISIBLE_DEVICES=0 \
5
+ MAX_PIXELS=1003520 \
6
+ swift sft \
7
+ --model Qwen/Qwen2.5-VL-7B-Instruct \
8
+ --dataset 'modelscope/coco_2014_caption:validation#20000' \
9
+ --train_type lora \
10
+ --torch_dtype bfloat16 \
11
+ --num_train_epochs 1 \
12
+ --per_device_train_batch_size 1 \
13
+ --per_device_eval_batch_size 1 \
14
+ --learning_rate 1e-4 \
15
+ --lora_rank 8 \
16
+ --lora_alpha 32 \
17
+ --target_modules all-linear \
18
+ --freeze_vit true \
19
+ --gradient_accumulation_steps 16 \
20
+ --eval_steps 100 \
21
+ --save_steps 100 \
22
+ --save_total_limit 2 \
23
+ --logging_steps 5 \
24
+ --max_length 2048 \
25
+ --output_dir output \
26
+ --warmup_ratio 0.05 \
27
+ --dataloader_num_workers 4
ms-swift/examples/train/multimodal/lora_llm_full_vit/infer.sh ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ # If the weights have been merged, please use `--model`.
2
+ CUDA_VISIBLE_DEVICES=0 \
3
+ swift infer \
4
+ --adapters output/vx-xxx/checkpoint-xxx \
5
+ --stream true \
6
+ --load_data_args true \
7
+ --temperature 0 \
8
+ --max_new_tokens 2048
ms-swift/examples/train/multimodal/lora_llm_full_vit/merge_lora.sh ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ swift export \
2
+ --adapters output/vx-xxx/checkpoint-xxx \
3
+ --merge_lora true
ms-swift/examples/train/multimodal/omni/sft.sh ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 4*35GB
2
+ # A demo for four modalities that can be run directly
3
+ pip uninstall transformers
4
+ pip install git+https://github.com/huggingface/transformers
5
+
6
+ nproc_per_node=4
7
+
8
+ CUDA_VISIBLE_DEVICES=0,1,2,3 \
9
+ NPROC_PER_NODE=$nproc_per_node \
10
+ VIDEO_MAX_PIXELS=50176 \
11
+ FPS_MAX_FRAMES=12 \
12
+ MAX_PIXELS=1003520 \
13
+ ENABLE_AUDIO_OUTPUT=0 \
14
+ swift sft \
15
+ --model Qwen/Qwen2.5-Omni-7B \
16
+ --dataset 'AI-ModelScope/alpaca-gpt4-data-zh#2000' \
17
+ 'AI-ModelScope/LaTeX_OCR:human_handwrite#2000' \
18
+ 'speech_asr/speech_asr_aishell1_trainsets:validation#2000' \
19
+ 'swift/VideoChatGPT:all#2000' \
20
+ --train_type lora \
21
+ --torch_dtype bfloat16 \
22
+ --num_train_epochs 1 \
23
+ --per_device_train_batch_size 1 \
24
+ --per_device_eval_batch_size 1 \
25
+ --learning_rate 1e-4 \
26
+ --lora_rank 8 \
27
+ --lora_alpha 32 \
28
+ --target_modules all-linear \
29
+ --freeze_vit true \
30
+ --gradient_accumulation_steps $(expr 16 / $nproc_per_node) \
31
+ --eval_steps 50 \
32
+ --save_steps 50 \
33
+ --save_total_limit 2 \
34
+ --logging_steps 5 \
35
+ --max_length 2048 \
36
+ --output_dir output \
37
+ --warmup_ratio 0.05 \
38
+ --dataloader_num_workers 4 \
39
+ --deepspeed zero2
ms-swift/examples/train/multimodal/rlhf/kto.sh ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Due to the absence of a multi-modal open-source dataset for kto,
2
+ # we will use a pure text kto dataset as an example here.
3
+ nproc_per_node=2
4
+
5
+ CUDA_VISIBLE_DEVICES=0,1 \
6
+ NPROC_PER_NODE=$nproc_per_node \
7
+ MAX_PIXELS=1003520 \
8
+ swift rlhf \
9
+ --rlhf_type kto \
10
+ --model Qwen/Qwen2.5-VL-7B-Instruct \
11
+ --dataset 'AI-ModelScope/ultrafeedback-binarized-preferences-cleaned-kto#10000' \
12
+ --train_type lora \
13
+ --torch_dtype bfloat16 \
14
+ --num_train_epochs 1 \
15
+ --per_device_train_batch_size 1 \
16
+ --per_device_eval_batch_size 1 \
17
+ --learning_rate 1e-4 \
18
+ --lora_rank 8 \
19
+ --lora_alpha 32 \
20
+ --target_modules all-linear \
21
+ --freeze_vit true \
22
+ --gradient_accumulation_steps $(expr 16 / $nproc_per_node) \
23
+ --eval_steps 100 \
24
+ --save_steps 100 \
25
+ --save_total_limit 2 \
26
+ --deepspeed zero2 \
27
+ --logging_steps 5 \
28
+ --max_length 4096 \
29
+ --output_dir output \
30
+ --warmup_ratio 0.05 \
31
+ --dataloader_num_workers 4 \
32
+ --dataset_num_proc 4
ms-swift/examples/train/multimodal/video.sh ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 4*80GB
2
+ # You can refer to `https://github.com/QwenLM/Qwen2.5-VL` for the meaning of the `VIDEO_MAX_PIXELS` parameter.
3
+ nproc_per_node=4
4
+
5
+ CUDA_VISIBLE_DEVICES=0,1,2,3 \
6
+ NPROC_PER_NODE=$nproc_per_node \
7
+ VIDEO_MAX_PIXELS=50176 \
8
+ FPS_MAX_FRAMES=12 \
9
+ swift sft \
10
+ --model Qwen/QVQ-72B-Preview \
11
+ --dataset swift/VideoChatGPT:all \
12
+ --train_type lora \
13
+ --torch_dtype bfloat16 \
14
+ --num_train_epochs 1 \
15
+ --per_device_train_batch_size 1 \
16
+ --per_device_eval_batch_size 1 \
17
+ --learning_rate 1e-4 \
18
+ --lora_rank 8 \
19
+ --lora_alpha 32 \
20
+ --target_modules all-linear \
21
+ --freeze_vit true \
22
+ --gradient_accumulation_steps $(expr 16 / $nproc_per_node) \
23
+ --eval_steps 50 \
24
+ --save_steps 50 \
25
+ --save_total_limit 2 \
26
+ --logging_steps 5 \
27
+ --max_length 2048 \
28
+ --output_dir output \
29
+ --warmup_ratio 0.05 \
30
+ --dataloader_num_workers 4 \
31
+ --deepspeed zero3
ms-swift/examples/train/packing/llm.sh ADDED
@@ -0,0 +1,31 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 22GB
2
+ CUDA_VISIBLE_DEVICES=0 \
3
+ swift sft \
4
+ --model Qwen/Qwen2.5-7B-Instruct \
5
+ --train_type lora \
6
+ --packing true \
7
+ --dataset 'AI-ModelScope/alpaca-gpt4-data-zh#500' \
8
+ 'AI-ModelScope/alpaca-gpt4-data-en#500' \
9
+ 'swift/self-cognition#500' \
10
+ --torch_dtype bfloat16 \
11
+ --num_train_epochs 3 \
12
+ --attn_impl flash_attn \
13
+ --per_device_train_batch_size 1 \
14
+ --per_device_eval_batch_size 1 \
15
+ --learning_rate 1e-4 \
16
+ --lora_rank 8 \
17
+ --lora_alpha 32 \
18
+ --target_modules all-linear \
19
+ --gradient_accumulation_steps 4 \
20
+ --eval_steps 50 \
21
+ --save_steps 50 \
22
+ --save_total_limit 2 \
23
+ --logging_steps 5 \
24
+ --max_length 2048 \
25
+ --output_dir output \
26
+ --system 'You are a helpful assistant.' \
27
+ --warmup_ratio 0.05 \
28
+ --dataloader_num_workers 4 \
29
+ --dataset_num_proc 4 \
30
+ --model_author swift \
31
+ --model_name swift-robot
ms-swift/examples/train/packing/streaming.sh ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 4 * 36GB
2
+ # A demo using the Hugging Face dataset
3
+ # The first model weights will be saved around step 70.
4
+ NPROC_PER_NODE=4 \
5
+ MAX_PIXELS=1003520 \
6
+ CUDA_VISIBLE_DEVICES=0,1,2,3 \
7
+ HF_ENDPOINT=https://hf-mirror.com \
8
+ swift sft \
9
+ --model Qwen/Qwen2.5-VL-7B-Instruct \
10
+ --train_type lora \
11
+ --dataset 'HF::linxy/LaTeX_OCR:full#20000' \
12
+ --torch_dtype bfloat16 \
13
+ --attn_impl flash_attn \
14
+ --streaming true \
15
+ --shuffle_buffer_size 1000 \
16
+ --packing true \
17
+ --save_strategy epoch \
18
+ --max_steps 1000 \
19
+ --max_epochs 5 \
20
+ --per_device_train_batch_size 1 \
21
+ --per_device_eval_batch_size 1 \
22
+ --learning_rate 1e-4 \
23
+ --lora_rank 8 \
24
+ --lora_alpha 32 \
25
+ --target_modules all-linear \
26
+ --gradient_accumulation_steps 1 \
27
+ --save_total_limit 2 \
28
+ --logging_steps 5 \
29
+ --max_length 8192 \
30
+ --output_dir output \
31
+ --warmup_ratio 0.05 \
32
+ --dataloader_num_workers 1 \
33
+ --dataset_num_proc 8 \
34
+ --deepspeed zero2
ms-swift/examples/train/plugins/tuner_phi4_mm.sh ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # `--train_type dummy`
2
+ CUDA_VISIBLE_DEVICES=0 \
3
+ swift sft \
4
+ --model LLM-Research/Phi-4-multimodal-instruct \
5
+ --dataset 'AI-ModelScope/LaTeX_OCR:human_handwrite#20000' \
6
+ --train_type dummy \
7
+ --torch_dtype bfloat16 \
8
+ --num_train_epochs 1 \
9
+ --per_device_train_batch_size 1 \
10
+ --per_device_eval_batch_size 1 \
11
+ --learning_rate 1e-4 \
12
+ --gradient_accumulation_steps 16 \
13
+ --eval_steps 200 \
14
+ --save_steps 200 \
15
+ --save_total_limit 2 \
16
+ --logging_steps 5 \
17
+ --max_length 2048 \
18
+ --output_dir output \
19
+ --warmup_ratio 0.05 \
20
+ --dataloader_num_workers 4
ms-swift/examples/train/predict_with_generate/train.sh ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 20GiB
2
+ CUDA_VISIBLE_DEVICES=0 \
3
+ MAX_PIXELS=1003520 \
4
+ swift sft \
5
+ --model Qwen/Qwen2.5-VL-7B-Instruct \
6
+ --dataset 'AI-ModelScope/LaTeX_OCR:human_handwrite#20000' \
7
+ --train_type lora \
8
+ --torch_dtype bfloat16 \
9
+ --num_train_epochs 1 \
10
+ --per_device_train_batch_size 1 \
11
+ --per_device_eval_batch_size 2 \
12
+ --learning_rate 1e-4 \
13
+ --lora_rank 8 \
14
+ --lora_alpha 32 \
15
+ --target_modules all-linear \
16
+ --freeze_vit true \
17
+ --gradient_accumulation_steps 16 \
18
+ --eval_steps 100 \
19
+ --save_steps 100 \
20
+ --save_total_limit 2 \
21
+ --logging_steps 5 \
22
+ --max_length 2048 \
23
+ --output_dir output \
24
+ --warmup_ratio 0.05 \
25
+ --dataloader_num_workers 4 \
26
+ --predict_with_generate true \
27
+ --metric_for_best_model rouge-l \
28
+ --greater_is_better true
ms-swift/examples/train/qlora/gptq.sh ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 2 * 30GiB
2
+ CUDA_VISIBLE_DEVICES=0,1 \
3
+ MAX_PIXELS=1003520 \
4
+ swift sft \
5
+ --model Qwen/Qwen2.5-VL-72B-Instruct-GPTQ-Int4 \
6
+ --dataset 'modelscope/coco_2014_caption:validation#20000' \
7
+ --train_type lora \
8
+ --torch_dtype bfloat16 \
9
+ --num_train_epochs 1 \
10
+ --per_device_train_batch_size 1 \
11
+ --per_device_eval_batch_size 1 \
12
+ --learning_rate 1e-4 \
13
+ --lora_rank 8 \
14
+ --lora_alpha 32 \
15
+ --target_modules all-linear \
16
+ --freeze_vit true \
17
+ --gradient_accumulation_steps 16 \
18
+ --eval_steps 100 \
19
+ --save_steps 100 \
20
+ --save_total_limit 2 \
21
+ --logging_steps 5 \
22
+ --max_length 2048 \
23
+ --output_dir output \
24
+ --warmup_ratio 0.05 \
25
+ --dataloader_num_workers 4
ms-swift/examples/train/rft/math.json ADDED
The diff for this file is too large to render. See raw diff
 
ms-swift/examples/train/rlhf/README.md ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # TIPS
2
+
3
+ Multi-modal models' RLHF are also supported! Check the multimodal folder for details.
ms-swift/examples/train/rlhf/dpo/lora.sh ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 24GiB
2
+ CUDA_VISIBLE_DEVICES=0 \
3
+ swift rlhf \
4
+ --rlhf_type dpo \
5
+ --model Qwen/Qwen2.5-7B-Instruct \
6
+ --train_type lora \
7
+ --dataset hjh0119/shareAI-Llama3-DPO-zh-en-emoji \
8
+ --torch_dtype bfloat16 \
9
+ --num_train_epochs 1 \
10
+ --per_device_train_batch_size 1 \
11
+ --per_device_eval_batch_size 1 \
12
+ --learning_rate 1e-4 \
13
+ --lora_rank 8 \
14
+ --lora_alpha 32 \
15
+ --target_modules all-linear \
16
+ --gradient_accumulation_steps 16 \
17
+ --eval_steps 100 \
18
+ --save_steps 100 \
19
+ --save_total_limit 2 \
20
+ --logging_steps 5 \
21
+ --max_length 2048 \
22
+ --output_dir output \
23
+ --warmup_ratio 0.05 \
24
+ --dataloader_num_workers 4 \
25
+ --dataset_num_proc 4
ms-swift/examples/train/rlhf/simpo.sh ADDED
@@ -0,0 +1,26 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 2*50GB
2
+ nproc_per_node=2
3
+
4
+ CUDA_VISIBLE_DEVICES=0,1 \
5
+ NPROC_PER_NODE=$nproc_per_node \
6
+ swift rlhf \
7
+ --rlhf_type simpo \
8
+ --model Qwen/Qwen2.5-3B-Instruct \
9
+ --train_type full \
10
+ --dataset hjh0119/shareAI-Llama3-DPO-zh-en-emoji \
11
+ --torch_dtype bfloat16 \
12
+ --num_train_epochs 1 \
13
+ --per_device_train_batch_size 1 \
14
+ --per_device_eval_batch_size 1 \
15
+ --learning_rate 1e-5 \
16
+ --gradient_accumulation_steps $(expr 16 / $nproc_per_node) \
17
+ --eval_steps 100 \
18
+ --save_steps 100 \
19
+ --save_total_limit 2 \
20
+ --logging_steps 5 \
21
+ --max_length 2048 \
22
+ --output_dir output \
23
+ --warmup_ratio 0.05 \
24
+ --dataloader_num_workers 4 \
25
+ --deepspeed zero2 \
26
+ --dataset_num_proc 4
ms-swift/examples/train/seq_cls/bert/deploy.sh ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ CUDA_VISIBLE_DEVICES=0 \
2
+ swift deploy \
3
+ --adapters output/vx-xxx/checkpoint-xxx \
4
+ --served_model_name bert-base-chinese \
5
+ --truncation_strategy right \
6
+ --max_length 512
7
+
8
+ # curl http://localhost:8000/v1/chat/completions -H "Content-Type: application/json" -d '{
9
+ # "model": "bert-base-chinese",
10
+ # "messages": [{"role": "user", "content": "包装差,容易被调包。"}]
11
+ # }'
ms-swift/examples/train/seq_cls/bert/infer.sh ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ CUDA_VISIBLE_DEVICES=0 \
2
+ swift infer \
3
+ --adapters output/vx-xxx/checkpoint-xxx \
4
+ --load_data_args true \
5
+ --max_batch_size 16 \
6
+ --truncation_strategy right \
7
+ --max_length 512
ms-swift/examples/train/seq_cls/qwen2_vl/sft.sh ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # If `num_labels` is provided, it will be considered a classification task.
2
+ # You can also specify `--model Qwen/Qwen2.5-VL-2B-Instruct --use_chat_template true`.
3
+ CUDA_VISIBLE_DEVICES=0 \
4
+ MAX_PIXELS=1003520 \
5
+ swift sft \
6
+ --model Qwen/Qwen2-VL-2B \
7
+ --train_type lora \
8
+ --dataset 'tany0699/garbage265#20000' \
9
+ --torch_dtype bfloat16 \
10
+ --num_train_epochs 1 \
11
+ --per_device_train_batch_size 1 \
12
+ --per_device_eval_batch_size 1 \
13
+ --learning_rate 1e-4 \
14
+ --lora_rank 8 \
15
+ --lora_alpha 32 \
16
+ --target_modules all-linear \
17
+ --gradient_accumulation_steps 16 \
18
+ --eval_steps 50 \
19
+ --save_steps 50 \
20
+ --save_total_limit 2 \
21
+ --logging_steps 5 \
22
+ --max_length 2048 \
23
+ --output_dir output \
24
+ --warmup_ratio 0.05 \
25
+ --dataloader_num_workers 4 \
26
+ --num_labels 265 \
27
+ --task_type seq_cls \
28
+ --use_chat_template false
ms-swift/examples/train/seq_cls/regression/deploy.sh ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ CUDA_VISIBLE_DEVICES=0 \
2
+ swift deploy \
3
+ --adapters output/vx-xxx/checkpoint-xxx
4
+
5
+ # curl http://localhost:8000/v1/chat/completions -H "Content-Type: application/json" -d '{
6
+ # "model": "Qwen2.5-0.5B",
7
+ # "messages": [{"role": "user", "content": "Task: Based on the given two sentences, provide a similarity score between 0.0 and 1.0.\nSentence 1: The animal is eating.\nSentence 2: A woman is dancing.\nSimilarity score: "}]
8
+ # }'
ms-swift/examples/train/seq_cls/regression/sft.sh ADDED
@@ -0,0 +1,28 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 2GB
2
+ CUDA_VISIBLE_DEVICES=0 \
3
+ swift sft \
4
+ --model Qwen/Qwen2.5-0.5B \
5
+ --train_type lora \
6
+ --dataset 'sentence-transformers/stsb:reg#20000' \
7
+ --torch_dtype bfloat16 \
8
+ --num_train_epochs 1 \
9
+ --per_device_train_batch_size 16 \
10
+ --per_device_eval_batch_size 16 \
11
+ --learning_rate 1e-4 \
12
+ --lora_rank 8 \
13
+ --lora_alpha 32 \
14
+ --target_modules all-linear \
15
+ --gradient_accumulation_steps 1 \
16
+ --eval_steps 100 \
17
+ --save_steps 100 \
18
+ --save_total_limit 2 \
19
+ --logging_steps 5 \
20
+ --max_length 2048 \
21
+ --output_dir output \
22
+ --warmup_ratio 0.05 \
23
+ --dataloader_num_workers 4 \
24
+ --dataset_num_proc 4 \
25
+ --num_labels 1 \
26
+ --task_type seq_cls \
27
+ --use_chat_template false \
28
+ --problem_type regression
ms-swift/examples/train/think_model/qwen3_demo2.sh ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # use `swift/self-cognition:qwen3`
2
+ # Avoid losing the thinking capability by appending `/no_think` to the dataset query.
3
+ # https://github.com/modelscope/ms-swift/blob/77985c2ccdac8ed4037174ee222e79d1f1d5059d/swift/llm/dataset/dataset/llm.py#L835
4
+ CUDA_VISIBLE_DEVICES=0 \
5
+ swift sft \
6
+ --model Qwen/Qwen3-8B \
7
+ --train_type lora \
8
+ --dataset 'swift/Qwen3-SFT-Mixin#2000' \
9
+ 'swift/self-cognition:qwen3#600' \
10
+ --torch_dtype bfloat16 \
11
+ --num_train_epochs 1 \
12
+ --per_device_train_batch_size 1 \
13
+ --per_device_eval_batch_size 1 \
14
+ --learning_rate 1e-4 \
15
+ --lora_rank 8 \
16
+ --lora_alpha 32 \
17
+ --target_modules all-linear \
18
+ --gradient_accumulation_steps 16 \
19
+ --eval_steps 50 \
20
+ --save_steps 50 \
21
+ --save_total_limit 2 \
22
+ --logging_steps 5 \
23
+ --max_length 2048 \
24
+ --output_dir output \
25
+ --warmup_ratio 0.05 \
26
+ --dataloader_num_workers 4 \
27
+ --use_liger_kernel true \
28
+ --model_author swift \
29
+ --model_name swift-robot
ms-swift/examples/train/tuners/bone/train.sh ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 17.3GiB
2
+ CUDA_VISIBLE_DEVICES=0 \
3
+ swift sft \
4
+ --model Qwen/Qwen2.5-7B-Instruct \
5
+ --train_type bone \
6
+ --dataset 'swift/self-cognition#1000' \
7
+ --num_train_epochs 1 \
8
+ --per_device_train_batch_size 1 \
9
+ --learning_rate 1e-4 \
10
+ --gradient_accumulation_steps 16 \
11
+ --eval_steps 100 \
12
+ --save_steps 100 \
13
+ --save_total_limit 2 \
14
+ --logging_steps 5 \
15
+ --model_author swift \
16
+ --model_name swift-robot
ms-swift/examples/train/tuners/lisa/train.sh ADDED
@@ -0,0 +1,17 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # 29GiB
2
+ CUDA_VISIBLE_DEVICES=0 \
3
+ swift sft \
4
+ --model Qwen/Qwen2.5-7B-Instruct \
5
+ --train_type full \
6
+ --dataset 'swift/self-cognition#1000' \
7
+ --lisa_activated_layers 2 \
8
+ --num_train_epochs 1 \
9
+ --per_device_train_batch_size 1 \
10
+ --learning_rate 1e-5 \
11
+ --gradient_accumulation_steps 16 \
12
+ --eval_steps 100 \
13
+ --save_steps 100 \
14
+ --save_total_limit 2 \
15
+ --logging_steps 5 \
16
+ --model_author swift \
17
+ --model_name swift-robot
ms-swift/examples/train/tuners/longlora/train.sh ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ CUDA_VISIBLE_DEVICES=0 \
2
+ swift sft \
3
+ --model LLM-Research/Meta-Llama-3.1-8B-Instruct \
4
+ --train_type longlora \
5
+ --dataset 'AI-ModelScope/LongAlpaca-12k#1000' \
6
+ --num_train_epochs 1 \
7
+ --learning_rate 1e-4 \
8
+ --attn_impl flash_attn \
9
+ --gradient_accumulation_steps 16 \
10
+ --lora_rank 8 \
11
+ --lora_alpha 32 \
12
+ --eval_steps 100 \
13
+ --save_steps 100 \
14
+ --max_length 10000 \
15
+ --save_total_limit 2 \
16
+ --logging_steps 5
ms-swift/ms_swift.egg-info/SOURCES.txt ADDED
@@ -0,0 +1,373 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ LICENSE
2
+ MANIFEST.in
3
+ README.md
4
+ setup.cfg
5
+ setup.py
6
+ ms_swift.egg-info/PKG-INFO
7
+ ms_swift.egg-info/SOURCES.txt
8
+ ms_swift.egg-info/dependency_links.txt
9
+ ms_swift.egg-info/entry_points.txt
10
+ ms_swift.egg-info/not-zip-safe
11
+ ms_swift.egg-info/requires.txt
12
+ ms_swift.egg-info/top_level.txt
13
+ requirements/docs.txt
14
+ requirements/eval.txt
15
+ requirements/framework.txt
16
+ requirements/seq_parallel.txt
17
+ requirements/swanlab.txt
18
+ requirements/tests.txt
19
+ swift/__init__.py
20
+ swift/version.py
21
+ swift/cli/__init__.py
22
+ swift/cli/app.py
23
+ swift/cli/deploy.py
24
+ swift/cli/eval.py
25
+ swift/cli/export.py
26
+ swift/cli/infer.py
27
+ swift/cli/main.py
28
+ swift/cli/merge_lora.py
29
+ swift/cli/pt.py
30
+ swift/cli/rlhf.py
31
+ swift/cli/rollout.py
32
+ swift/cli/sample.py
33
+ swift/cli/sft.py
34
+ swift/cli/web_ui.py
35
+ swift/cli/_megatron/__init__.py
36
+ swift/cli/_megatron/main.py
37
+ swift/cli/_megatron/pt.py
38
+ swift/cli/_megatron/sft.py
39
+ swift/hub/__init__.py
40
+ swift/hub/constant.py
41
+ swift/hub/hub.py
42
+ swift/llm/__init__.py
43
+ swift/llm/base.py
44
+ swift/llm/data_loader.py
45
+ swift/llm/utils.py
46
+ swift/llm/app/__init__.py
47
+ swift/llm/app/app.py
48
+ swift/llm/app/build_ui.py
49
+ swift/llm/app/locale.py
50
+ swift/llm/argument/__init__.py
51
+ swift/llm/argument/app_args.py
52
+ swift/llm/argument/deploy_args.py
53
+ swift/llm/argument/eval_args.py
54
+ swift/llm/argument/export_args.py
55
+ swift/llm/argument/infer_args.py
56
+ swift/llm/argument/merge_args.py
57
+ swift/llm/argument/rlhf_args.py
58
+ swift/llm/argument/sampling_args.py
59
+ swift/llm/argument/train_args.py
60
+ swift/llm/argument/tuner_args.py
61
+ swift/llm/argument/webui_args.py
62
+ swift/llm/argument/base_args/__init__.py
63
+ swift/llm/argument/base_args/base_args.py
64
+ swift/llm/argument/base_args/data_args.py
65
+ swift/llm/argument/base_args/generation_args.py
66
+ swift/llm/argument/base_args/model_args.py
67
+ swift/llm/argument/base_args/quant_args.py
68
+ swift/llm/argument/base_args/template_args.py
69
+ swift/llm/argument/base_args/utils.py
70
+ swift/llm/dataset/__init__.py
71
+ swift/llm/dataset/loader.py
72
+ swift/llm/dataset/media.py
73
+ swift/llm/dataset/register.py
74
+ swift/llm/dataset/utils.py
75
+ swift/llm/dataset/data/dataset_info.json
76
+ swift/llm/dataset/dataset/__init__.py
77
+ swift/llm/dataset/dataset/llm.py
78
+ swift/llm/dataset/dataset/mllm.py
79
+ swift/llm/dataset/preprocessor/__init__.py
80
+ swift/llm/dataset/preprocessor/core.py
81
+ swift/llm/dataset/preprocessor/extra.py
82
+ swift/llm/ds_config/zero0.json
83
+ swift/llm/ds_config/zero1.json
84
+ swift/llm/ds_config/zero2.json
85
+ swift/llm/ds_config/zero2_offload.json
86
+ swift/llm/ds_config/zero3.json
87
+ swift/llm/ds_config/zero3_offload.json
88
+ swift/llm/eval/__init__.py
89
+ swift/llm/eval/eval.py
90
+ swift/llm/eval/utils.py
91
+ swift/llm/export/__init__.py
92
+ swift/llm/export/export.py
93
+ swift/llm/export/merge_lora.py
94
+ swift/llm/export/ollama.py
95
+ swift/llm/export/quant.py
96
+ swift/llm/infer/__init__.py
97
+ swift/llm/infer/deploy.py
98
+ swift/llm/infer/infer.py
99
+ swift/llm/infer/protocol.py
100
+ swift/llm/infer/rollout.py
101
+ swift/llm/infer/utils.py
102
+ swift/llm/infer/infer_engine/__init__.py
103
+ swift/llm/infer/infer_engine/base.py
104
+ swift/llm/infer/infer_engine/grpo_vllm_engine.py
105
+ swift/llm/infer/infer_engine/infer_client.py
106
+ swift/llm/infer/infer_engine/infer_engine.py
107
+ swift/llm/infer/infer_engine/lmdeploy_engine.py
108
+ swift/llm/infer/infer_engine/patch.py
109
+ swift/llm/infer/infer_engine/pt_engine.py
110
+ swift/llm/infer/infer_engine/utils.py
111
+ swift/llm/infer/infer_engine/vllm_engine.py
112
+ swift/llm/model/__init__.py
113
+ swift/llm/model/constant.py
114
+ swift/llm/model/model_arch.py
115
+ swift/llm/model/patcher.py
116
+ swift/llm/model/register.py
117
+ swift/llm/model/utils.py
118
+ swift/llm/model/model/__init__.py
119
+ swift/llm/model/model/baai.py
120
+ swift/llm/model/model/baichuan.py
121
+ swift/llm/model/model/bert.py
122
+ swift/llm/model/model/codefuse.py
123
+ swift/llm/model/model/deepseek.py
124
+ swift/llm/model/model/gemma.py
125
+ swift/llm/model/model/glm.py
126
+ swift/llm/model/model/internlm.py
127
+ swift/llm/model/model/llama.py
128
+ swift/llm/model/model/llava.py
129
+ swift/llm/model/model/llm.py
130
+ swift/llm/model/model/mamba.py
131
+ swift/llm/model/model/microsoft.py
132
+ swift/llm/model/model/minicpm.py
133
+ swift/llm/model/model/minimax.py
134
+ swift/llm/model/model/mistral.py
135
+ swift/llm/model/model/mllm.py
136
+ swift/llm/model/model/moonshot.py
137
+ swift/llm/model/model/mplug.py
138
+ swift/llm/model/model/openbuddy.py
139
+ swift/llm/model/model/qwen.py
140
+ swift/llm/model/model/skywork.py
141
+ swift/llm/model/model/stepfun.py
142
+ swift/llm/model/model/telechat.py
143
+ swift/llm/model/model/valley.py
144
+ swift/llm/model/model/yi.py
145
+ swift/llm/sampling/__init__.py
146
+ swift/llm/sampling/base.py
147
+ swift/llm/sampling/distill_sampler.py
148
+ swift/llm/sampling/mcts.py
149
+ swift/llm/sampling/sampling.py
150
+ swift/llm/sampling/utils.py
151
+ swift/llm/sampling/vanilla_sampler.py
152
+ swift/llm/template/__init__.py
153
+ swift/llm/template/base.py
154
+ swift/llm/template/constant.py
155
+ swift/llm/template/grounding.py
156
+ swift/llm/template/register.py
157
+ swift/llm/template/template_inputs.py
158
+ swift/llm/template/template_meta.py
159
+ swift/llm/template/utils.py
160
+ swift/llm/template/vision_utils.py
161
+ swift/llm/template/template/__init__.py
162
+ swift/llm/template/template/deepseek.py
163
+ swift/llm/template/template/emu3.py
164
+ swift/llm/template/template/gemma.py
165
+ swift/llm/template/template/glm.py
166
+ swift/llm/template/template/idefics3.py
167
+ swift/llm/template/template/internlm.py
168
+ swift/llm/template/template/internvl.py
169
+ swift/llm/template/template/llama.py
170
+ swift/llm/template/template/llava.py
171
+ swift/llm/template/template/llm.py
172
+ swift/llm/template/template/megrez.py
173
+ swift/llm/template/template/microsoft.py
174
+ swift/llm/template/template/minicpm.py
175
+ swift/llm/template/template/minimax.py
176
+ swift/llm/template/template/mistral.py
177
+ swift/llm/template/template/molmo.py
178
+ swift/llm/template/template/moonshot.py
179
+ swift/llm/template/template/mplug.py
180
+ swift/llm/template/template/openbuddy.py
181
+ swift/llm/template/template/pixtral.py
182
+ swift/llm/template/template/qwen.py
183
+ swift/llm/template/template/stepfun.py
184
+ swift/llm/template/template/utils.py
185
+ swift/llm/template/template/valley.py
186
+ swift/llm/template/template/yi.py
187
+ swift/llm/train/__init__.py
188
+ swift/llm/train/callback.py
189
+ swift/llm/train/kto.py
190
+ swift/llm/train/pt.py
191
+ swift/llm/train/rlhf.py
192
+ swift/llm/train/sft.py
193
+ swift/llm/train/tuner.py
194
+ swift/megatron/__init__.py
195
+ swift/megatron/init.py
196
+ swift/megatron/argument/__init__.py
197
+ swift/megatron/argument/megatron_args.py
198
+ swift/megatron/argument/train_args.py
199
+ swift/megatron/model/__init__.py
200
+ swift/megatron/model/config.py
201
+ swift/megatron/model/constant.py
202
+ swift/megatron/model/register.py
203
+ swift/megatron/model/rope.py
204
+ swift/megatron/model/gpt/__init__.py
205
+ swift/megatron/model/gpt/config.py
206
+ swift/megatron/model/gpt/hf2mcore.py
207
+ swift/megatron/model/gpt/mcore2hf.py
208
+ swift/megatron/model/gpt/model.py
209
+ swift/megatron/train/__init__.py
210
+ swift/megatron/train/patcher.py
211
+ swift/megatron/train/pt.py
212
+ swift/megatron/train/sft.py
213
+ swift/megatron/train/utils.py
214
+ swift/megatron/utils/__init__.py
215
+ swift/megatron/utils/convert.py
216
+ swift/megatron/utils/patcher.py
217
+ swift/plugin/__init__.py
218
+ swift/plugin/callback.py
219
+ swift/plugin/loss.py
220
+ swift/plugin/metric.py
221
+ swift/plugin/multi_turn.py
222
+ swift/plugin/optimizer.py
223
+ swift/plugin/orm.py
224
+ swift/plugin/prm.py
225
+ swift/plugin/rm_plugin.py
226
+ swift/plugin/tuner.py
227
+ swift/plugin/agent_template/__init__.py
228
+ swift/plugin/agent_template/base.py
229
+ swift/plugin/agent_template/extra.py
230
+ swift/plugin/agent_template/glm4.py
231
+ swift/plugin/agent_template/hermes.py
232
+ swift/plugin/agent_template/llama.py
233
+ swift/plugin/agent_template/qwen.py
234
+ swift/plugin/agent_template/react.py
235
+ swift/plugin/agent_template/toolbench.py
236
+ swift/plugin/loss_scale/__init__.py
237
+ swift/plugin/loss_scale/loss_scale.py
238
+ swift/plugin/loss_scale/utils.py
239
+ swift/plugin/loss_scale/config/agentflan.json
240
+ swift/plugin/loss_scale/config/alpha_umi.json
241
+ swift/plugin/loss_scale/config/hermes.json
242
+ swift/plugin/loss_scale/config/ignore_empty_think.json
243
+ swift/plugin/loss_scale/config/qwen.json
244
+ swift/plugin/loss_scale/config/react.json
245
+ swift/trainers/__init__.py
246
+ swift/trainers/arguments.py
247
+ swift/trainers/callback.py
248
+ swift/trainers/mixin.py
249
+ swift/trainers/rlhf_arguments.py
250
+ swift/trainers/torchacc_mixin.py
251
+ swift/trainers/trainer_factory.py
252
+ swift/trainers/trainers.py
253
+ swift/trainers/utils.py
254
+ swift/trainers/optimizers/__init__.py
255
+ swift/trainers/optimizers/galore/__init__.py
256
+ swift/trainers/optimizers/galore/adafactor.py
257
+ swift/trainers/optimizers/galore/adamw.py
258
+ swift/trainers/optimizers/galore/adamw8bit.py
259
+ swift/trainers/optimizers/galore/galore_projector.py
260
+ swift/trainers/optimizers/galore/utils.py
261
+ swift/trainers/rlhf_trainer/__init__.py
262
+ swift/trainers/rlhf_trainer/cpo_trainer.py
263
+ swift/trainers/rlhf_trainer/dpo_trainer.py
264
+ swift/trainers/rlhf_trainer/grpo_trainer.py
265
+ swift/trainers/rlhf_trainer/kto_trainer.py
266
+ swift/trainers/rlhf_trainer/orpo_trainer.py
267
+ swift/trainers/rlhf_trainer/ppo_trainer.py
268
+ swift/trainers/rlhf_trainer/reward_trainer.py
269
+ swift/trainers/rlhf_trainer/rlhf_mixin.py
270
+ swift/trainers/rlhf_trainer/utils.py
271
+ swift/trainers/rlhf_trainer/vllm_client.py
272
+ swift/trainers/sequence_parallel/__init__.py
273
+ swift/trainers/sequence_parallel/base.py
274
+ swift/trainers/sequence_parallel/ulysses.py
275
+ swift/trainers/sequence_parallel/xtuner.py
276
+ swift/tuners/__init__.py
277
+ swift/tuners/adapter.py
278
+ swift/tuners/base.py
279
+ swift/tuners/llamapro.py
280
+ swift/tuners/lora.py
281
+ swift/tuners/lora_layers.py
282
+ swift/tuners/mapping.py
283
+ swift/tuners/neftune.py
284
+ swift/tuners/part.py
285
+ swift/tuners/peft.py
286
+ swift/tuners/prompt.py
287
+ swift/tuners/reft.py
288
+ swift/tuners/restuning.py
289
+ swift/tuners/restuning_components.py
290
+ swift/tuners/side.py
291
+ swift/tuners/utils.py
292
+ swift/tuners/longlora/__init__.py
293
+ swift/tuners/longlora/llama.py
294
+ swift/tuners/longlora/longlora.py
295
+ swift/tuners/scetuning/__init__.py
296
+ swift/tuners/scetuning/scetuning.py
297
+ swift/tuners/scetuning/scetuning_components.py
298
+ swift/ui/__init__.py
299
+ swift/ui/app.py
300
+ swift/ui/base.py
301
+ swift/ui/llm_eval/__init__.py
302
+ swift/ui/llm_eval/eval.py
303
+ swift/ui/llm_eval/llm_eval.py
304
+ swift/ui/llm_eval/model.py
305
+ swift/ui/llm_eval/runtime.py
306
+ swift/ui/llm_export/__init__.py
307
+ swift/ui/llm_export/export.py
308
+ swift/ui/llm_export/llm_export.py
309
+ swift/ui/llm_export/model.py
310
+ swift/ui/llm_export/runtime.py
311
+ swift/ui/llm_infer/__init__.py
312
+ swift/ui/llm_infer/generate.py
313
+ swift/ui/llm_infer/llm_infer.py
314
+ swift/ui/llm_infer/model.py
315
+ swift/ui/llm_infer/runtime.py
316
+ swift/ui/llm_train/__init__.py
317
+ swift/ui/llm_train/advanced.py
318
+ swift/ui/llm_train/dataset.py
319
+ swift/ui/llm_train/galore.py
320
+ swift/ui/llm_train/hyper.py
321
+ swift/ui/llm_train/lisa.py
322
+ swift/ui/llm_train/llamapro.py
323
+ swift/ui/llm_train/llm_train.py
324
+ swift/ui/llm_train/lora.py
325
+ swift/ui/llm_train/model.py
326
+ swift/ui/llm_train/quantization.py
327
+ swift/ui/llm_train/report_to.py
328
+ swift/ui/llm_train/rlhf.py
329
+ swift/ui/llm_train/runtime.py
330
+ swift/ui/llm_train/save.py
331
+ swift/ui/llm_train/self_cog.py
332
+ swift/ui/llm_train/utils.py
333
+ swift/utils/__init__.py
334
+ swift/utils/constants.py
335
+ swift/utils/env.py
336
+ swift/utils/import_utils.py
337
+ swift/utils/io_utils.py
338
+ swift/utils/logger.py
339
+ swift/utils/np_utils.py
340
+ swift/utils/tb_utils.py
341
+ swift/utils/torch_utils.py
342
+ swift/utils/torchacc_utils.py
343
+ swift/utils/utils.py
344
+ tests/__init__.py
345
+ tests/model_tag.py
346
+ tests/run.py
347
+ tests/test_utils.py
348
+ tests/hub/__init__.py
349
+ tests/hub/test_check_model.py
350
+ tests/llm/__init__.py
351
+ tests/llm/load_model.py
352
+ tests/llm/load_template.py
353
+ tests/llm/test_custom.py
354
+ tests/llm/test_dataset.py
355
+ tests/llm/test_ollama_export.py
356
+ tests/llm/test_run.py
357
+ tests/llm/test_run3.py
358
+ tests/llm/test_template.py
359
+ tests/llm/test_utils.py
360
+ tests/tuners/__init__.py
361
+ tests/tuners/test_extra_state_dict.py
362
+ tests/tuners/test_merged_linear.py
363
+ tests/tuners/test_neft.py
364
+ tests/tuners/test_peft.py
365
+ tests/tuners/test_scetuning.py
366
+ tests/tuners/test_swift_base.py
367
+ tests/tuners/test_swift_device_map.py
368
+ tests/tuners/test_swift_restuning.py
369
+ tests/utils/__init__.py
370
+ tests/utils/test_file_utils.py
371
+ tests/utils/test_io_utils.py
372
+ tests/utils/test_split_str_parts_by.py
373
+ tests/utils/test_torch_utils.py
ms-swift/ms_swift.egg-info/entry_points.txt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ [console_scripts]
2
+ megatron = swift.cli._megatron.main:cli_main
3
+ swift = swift.cli.main:cli_main
ms-swift/ms_swift.egg-info/top_level.txt ADDED
@@ -0,0 +1,2 @@
 
 
 
1
+ swift
2
+ tests
ms-swift/requirements/framework.txt ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accelerate
2
+ addict
3
+ aiohttp
4
+ attrdict
5
+ binpacking
6
+ charset_normalizer
7
+ cpm_kernels
8
+ dacite
9
+ datasets>=3.0,<3.4
10
+ einops
11
+ fastapi
12
+ gradio>=3.40.0
13
+ importlib_metadata
14
+ jieba
15
+ matplotlib
16
+ modelscope>=1.23
17
+ nltk
18
+ numpy<2.0
19
+ openai
20
+ oss2
21
+ pandas
22
+ peft>=0.11,<0.16
23
+ pillow
24
+ requests
25
+ rouge
26
+ safetensors
27
+ scipy
28
+ sentencepiece
29
+ simplejson>=3.3.0
30
+ sortedcontainers>=1.5.9
31
+ tensorboard
32
+ tiktoken
33
+ tqdm
34
+ transformers>=4.33,<4.53
35
+ transformers_stream_generator
36
+ trl>=0.13,<0.18
37
+ uvicorn
38
+ zstandard
ms-swift/requirements/tests.txt ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ expecttest
2
+ flake8
3
+ isort>=4.3.21
4
+ modelscope
5
+ pre-commit
6
+ yapf==0.30.0 # use fix version to ensure consistent auto-styling
ms-swift/swift/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (2.13 kB). View file
 
ms-swift/swift/cli/_megatron/main.py ADDED
@@ -0,0 +1,20 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
2
+ from typing import Dict
3
+
4
+ from swift.utils import get_logger
5
+ from ..main import cli_main as swift_cli_main
6
+
7
+ logger = get_logger()
8
+
9
+ ROUTE_MAPPING: Dict[str, str] = {
10
+ 'sft': 'swift.cli._megatron.sft',
11
+ 'pt': 'swift.cli._megatron.pt',
12
+ }
13
+
14
+
15
+ def cli_main():
16
+ return swift_cli_main(ROUTE_MAPPING)
17
+
18
+
19
+ if __name__ == '__main__':
20
+ cli_main()
ms-swift/swift/cli/infer.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
2
+ from swift.llm import infer_main
3
+
4
+ if __name__ == '__main__':
5
+ infer_main()
ms-swift/swift/cli/merge_lora.py ADDED
@@ -0,0 +1,14 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
2
+ from swift.llm import ExportArguments, SwiftPipeline, merge_lora
3
+
4
+
5
+ class SwiftMergeLoRA(SwiftPipeline):
6
+ args_class = ExportArguments
7
+ args: args_class
8
+
9
+ def run(self):
10
+ merge_lora(self.args)
11
+
12
+
13
+ if __name__ == '__main__':
14
+ SwiftMergeLoRA().main()
ms-swift/swift/cli/rlhf.py ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
2
+ from swift.llm import rlhf_main
3
+
4
+ if __name__ == '__main__':
5
+ rlhf_main()
ms-swift/swift/hub/constant.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
2
+ from modelscope.hub import constants
3
+
4
+ constants.API_HTTP_CLIENT_TIMEOUT = 5
5
+ constants.API_FILE_DOWNLOAD_TIMEOUT = 300
6
+ constants.API_FILE_DOWNLOAD_CHUNK_SIZE = 1024 * 1024 * 16
ms-swift/swift/hub/hub.py ADDED
@@ -0,0 +1,451 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
2
+ import os
3
+ import tempfile
4
+ from contextlib import contextmanager
5
+ from functools import partial
6
+ from pathlib import Path
7
+ from typing import List, Literal, Optional, Union
8
+
9
+ import huggingface_hub
10
+ from huggingface_hub import RepoUrl
11
+ from huggingface_hub.hf_api import api, future_compatible
12
+ from requests.exceptions import HTTPError
13
+ from transformers import trainer
14
+ from transformers.utils import logging, strtobool
15
+
16
+ from swift.utils.env import use_hf_hub
17
+
18
+ logger = logging.get_logger(__name__)
19
+
20
+
21
+ class HubOperation:
22
+
23
+ @classmethod
24
+ @contextmanager
25
+ def patch_hub(cls):
26
+ yield
27
+
28
+ @classmethod
29
+ def try_login(cls, token: Optional[str] = None) -> bool:
30
+ """Try to login to the hub
31
+
32
+ Args:
33
+ token: The hub token to use
34
+
35
+ Returns:
36
+ bool: Whether login is successful
37
+ """
38
+ raise NotImplementedError
39
+
40
+ @classmethod
41
+ def create_model_repo(cls, repo_id: str, token: Optional[str] = None, private: bool = False):
42
+ """Create a model repo on the hub
43
+
44
+ Args:
45
+ repo_id: The model id of the hub
46
+ token: The hub token to use
47
+ private: If is a private repo
48
+ """
49
+ raise NotImplementedError
50
+
51
+ @classmethod
52
+ def push_to_hub(cls,
53
+ repo_id: str,
54
+ folder_path: Union[str, Path],
55
+ path_in_repo: Optional[str] = None,
56
+ commit_message: Optional[str] = None,
57
+ commit_description: Optional[str] = None,
58
+ token: Union[str, bool, None] = None,
59
+ private: bool = False,
60
+ revision: Optional[str] = 'master',
61
+ ignore_patterns: Optional[Union[List[str], str]] = None,
62
+ **kwargs):
63
+ """Push a model-like folder to the hub
64
+
65
+ Args:
66
+ repo_id: The repo id
67
+ folder_path: The local folder path
68
+ path_in_repo: Which remote folder to put the local files in
69
+ commit_message: The commit message of git
70
+ commit_description: The commit description
71
+ token: The hub token
72
+ private: Private hub or not
73
+ revision: The revision to push to
74
+ ignore_patterns: The ignore file patterns
75
+ """
76
+ raise NotImplementedError
77
+
78
+ @classmethod
79
+ def load_dataset(cls,
80
+ dataset_id: str,
81
+ subset_name: str,
82
+ split: str,
83
+ streaming: bool = False,
84
+ revision: Optional[str] = None):
85
+ """Load a dataset from the repo
86
+
87
+ Args:
88
+ dataset_id: The dataset id
89
+ subset_name: The subset name of the dataset
90
+ split: The split info
91
+ streaming: Streaming mode
92
+ revision: The revision of the dataset
93
+
94
+ Returns:
95
+ The Dataset instance
96
+ """
97
+ raise NotImplementedError
98
+
99
+ @classmethod
100
+ def download_model(cls,
101
+ model_id_or_path: Optional[str] = None,
102
+ revision: Optional[str] = None,
103
+ download_model: bool = True,
104
+ ignore_patterns: Optional[List[str]] = None,
105
+ **kwargs):
106
+ """Download model from the hub
107
+
108
+ Args:
109
+ model_id_or_path: The model id
110
+ revision: The model revision
111
+ download_model: Whether downloading bin/safetensors files, this is usually useful when only
112
+ using tokenizer
113
+ ignore_patterns: Custom ignore pattern
114
+ **kwargs:
115
+
116
+ Returns:
117
+ The local dir
118
+ """
119
+ raise NotImplementedError
120
+
121
+
122
+ class MSHub(HubOperation):
123
+ ms_token = None
124
+
125
+ @staticmethod
126
+ def create_repo(repo_id: str, *, token: Union[str, bool, None] = None, private: bool = False, **kwargs) -> RepoUrl:
127
+ """
128
+ Create a new repository on the hub.
129
+
130
+ Args:
131
+ repo_id: The ID of the repository to create.
132
+ token: The authentication token to use.
133
+ private: Whether the repository should be private.
134
+ **kwargs: Additional arguments.
135
+
136
+ Returns:
137
+ RepoUrl: The URL of the created repository.
138
+ """
139
+ hub_model_id = MSHub.create_model_repo(repo_id, token, private)
140
+ return RepoUrl(url=hub_model_id, )
141
+
142
+ @staticmethod
143
+ @future_compatible
144
+ def upload_folder(
145
+ self,
146
+ *,
147
+ repo_id: str,
148
+ folder_path: Union[str, Path],
149
+ path_in_repo: Optional[str] = None,
150
+ commit_message: Optional[str] = None,
151
+ commit_description: Optional[str] = None,
152
+ token: Union[str, bool, None] = None,
153
+ revision: Optional[str] = 'master',
154
+ ignore_patterns: Optional[Union[List[str], str]] = None,
155
+ **kwargs,
156
+ ):
157
+ from modelscope.utils.repo_utils import CommitInfo
158
+ MSHub.push_to_hub(repo_id, folder_path, path_in_repo, commit_message, commit_description, token, True, revision,
159
+ ignore_patterns)
160
+ return CommitInfo(
161
+ commit_url=f'https://www.modelscope.cn/models/{repo_id}/files',
162
+ commit_message=commit_message,
163
+ commit_description=commit_description,
164
+ oid=None,
165
+ )
166
+
167
+ @classmethod
168
+ @contextmanager
169
+ def patch_hub(cls):
170
+ hub_create_repo = huggingface_hub.create_repo
171
+ hub_upload_folder = huggingface_hub.upload_folder
172
+ trainer_create_repo = trainer.create_repo
173
+ trainer_upload_folder = trainer.upload_folder
174
+
175
+ huggingface_hub.create_repo = cls.create_repo
176
+ huggingface_hub.upload_folder = partial(cls.upload_folder, api)
177
+ trainer.create_repo = cls.create_repo
178
+ trainer.upload_folder = partial(cls.upload_folder, api)
179
+ try:
180
+ yield
181
+ finally:
182
+ huggingface_hub.create_repo = hub_create_repo
183
+ huggingface_hub.upload_folder = hub_upload_folder
184
+ trainer.create_repo = trainer_create_repo
185
+ trainer.upload_folder = trainer_upload_folder
186
+
187
+ @classmethod
188
+ def try_login(cls, token: Optional[str] = None) -> bool:
189
+ from modelscope import HubApi
190
+ if token is None:
191
+ token = os.environ.get('MODELSCOPE_API_TOKEN')
192
+ if token:
193
+ api = HubApi()
194
+ api.login(token)
195
+ return True
196
+ return False
197
+
198
+ @classmethod
199
+ def create_model_repo(cls, repo_id: str, token: Optional[str] = None, private: bool = False) -> str:
200
+ from modelscope import HubApi
201
+ from modelscope.hub.api import ModelScopeConfig
202
+ from modelscope.hub.constants import ModelVisibility
203
+ assert repo_id is not None, 'Please enter a valid hub_model_id'
204
+
205
+ if not cls.try_login(token):
206
+ raise ValueError('Please specify a token by `--hub_token` or `MODELSCOPE_API_TOKEN=xxx`')
207
+ cls.ms_token = token
208
+ visibility = ModelVisibility.PRIVATE if private else ModelVisibility.PUBLIC
209
+ api = HubApi()
210
+ if '/' not in repo_id:
211
+ user_name = ModelScopeConfig.get_user_info()[0]
212
+ assert isinstance(user_name, str)
213
+ hub_model_id = f'{user_name}/{repo_id}'
214
+ logger.info(f"'/' not in hub_model_id, pushing to personal repo {hub_model_id}")
215
+ try:
216
+ api.create_model(repo_id, visibility)
217
+ except HTTPError:
218
+ # The remote repository has been created
219
+ pass
220
+
221
+ with tempfile.TemporaryDirectory() as temp_cache_dir:
222
+ from modelscope.hub.repository import Repository
223
+ repo = Repository(temp_cache_dir, repo_id)
224
+ cls.add_patterns_to_gitattributes(repo, ['*.safetensors', '*.bin', '*.pt'])
225
+ # Add 'runs/' to .gitignore, ignore tensorboard files
226
+ cls.add_patterns_to_gitignore(repo, ['runs/', 'images/'])
227
+ cls.add_patterns_to_file(
228
+ repo,
229
+ 'configuration.json', ['{"framework": "pytorch", "task": "text-generation", "allow_remote": true}'],
230
+ ignore_push_error=True)
231
+ # Add '*.sagemaker' to .gitignore if using SageMaker
232
+ if os.environ.get('SM_TRAINING_ENV'):
233
+ cls.add_patterns_to_gitignore(repo, ['*.sagemaker-uploading', '*.sagemaker-uploaded'],
234
+ 'Add `*.sagemaker` patterns to .gitignore')
235
+ return repo_id
236
+
237
+ @classmethod
238
+ def push_to_hub(cls,
239
+ repo_id: str,
240
+ folder_path: Union[str, Path],
241
+ path_in_repo: Optional[str] = None,
242
+ commit_message: Optional[str] = None,
243
+ commit_description: Optional[str] = None,
244
+ token: Union[str, bool, None] = None,
245
+ private: bool = False,
246
+ revision: Optional[str] = 'master',
247
+ ignore_patterns: Optional[Union[List[str], str]] = None,
248
+ **kwargs):
249
+ cls.create_model_repo(repo_id, token, private)
250
+ from modelscope import push_to_hub
251
+ commit_message = commit_message or 'Upload folder using api'
252
+ if commit_description:
253
+ commit_message = commit_message + '\n' + commit_description
254
+ if not os.path.exists(os.path.join(folder_path, 'configuration.json')):
255
+ with open(os.path.join(folder_path, 'configuration.json'), 'w', encoding='utf-8') as f:
256
+ f.write('{"framework": "pytorch", "task": "text-generation", "allow_remote": true}')
257
+ if ignore_patterns:
258
+ ignore_patterns = [p for p in ignore_patterns if p != '_*']
259
+ if path_in_repo:
260
+ # We don't support part submit for now
261
+ path_in_repo = os.path.basename(folder_path)
262
+ folder_path = os.path.dirname(folder_path)
263
+ ignore_patterns = []
264
+ if revision is None or revision == 'main':
265
+ revision = 'master'
266
+ push_to_hub(
267
+ repo_id,
268
+ folder_path,
269
+ token or cls.ms_token,
270
+ private,
271
+ commit_message=commit_message,
272
+ ignore_file_pattern=ignore_patterns,
273
+ revision=revision,
274
+ tag=path_in_repo)
275
+
276
+ @classmethod
277
+ def load_dataset(cls,
278
+ dataset_id: str,
279
+ subset_name: str,
280
+ split: str,
281
+ streaming: bool = False,
282
+ revision: Optional[str] = None,
283
+ download_mode: Literal['force_redownload', 'reuse_dataset_if_exists'] = 'reuse_dataset_if_exists',
284
+ token: Optional[str] = None,
285
+ **kwargs):
286
+ from modelscope import MsDataset
287
+ cls.try_login(token)
288
+ if revision is None or revision == 'main':
289
+ revision = 'master'
290
+
291
+ return MsDataset.load(
292
+ dataset_id,
293
+ subset_name=subset_name,
294
+ split=split,
295
+ version=revision,
296
+ download_mode=download_mode,
297
+ use_streaming=streaming,
298
+ )
299
+
300
+ @classmethod
301
+ def download_model(cls,
302
+ model_id_or_path: Optional[str] = None,
303
+ revision: Optional[str] = None,
304
+ ignore_patterns: Optional[List[str]] = None,
305
+ token: Optional[str] = None,
306
+ **kwargs):
307
+ cls.try_login(token)
308
+ if revision is None or revision == 'main':
309
+ revision = 'master'
310
+ logger.info(f'Downloading the model from ModelScope Hub, model_id: {model_id_or_path}')
311
+ from modelscope import snapshot_download
312
+ return snapshot_download(model_id_or_path, revision, ignore_patterns=ignore_patterns, **kwargs)
313
+
314
+ @staticmethod
315
+ def add_patterns_to_file(repo,
316
+ file_name: str,
317
+ patterns: List[str],
318
+ commit_message: Optional[str] = None,
319
+ ignore_push_error=False) -> None:
320
+ if isinstance(patterns, str):
321
+ patterns = [patterns]
322
+ if commit_message is None:
323
+ commit_message = f'Add `{patterns[0]}` patterns to {file_name}'
324
+
325
+ # Get current file content
326
+ repo_dir = repo.model_dir
327
+ file_path = os.path.join(repo_dir, file_name)
328
+ if os.path.exists(file_path):
329
+ with open(file_path, 'r', encoding='utf-8') as f:
330
+ current_content = f.read()
331
+ else:
332
+ current_content = ''
333
+ # Add the patterns to file
334
+ content = current_content
335
+ for pattern in patterns:
336
+ if pattern not in content:
337
+ if len(content) > 0 and not content.endswith('\n'):
338
+ content += '\n'
339
+ content += f'{pattern}\n'
340
+
341
+ # Write the file if it has changed
342
+ if content != current_content:
343
+ with open(file_path, 'w', encoding='utf-8') as f:
344
+ logger.debug(f'Writing {file_name} file. Content: {content}')
345
+ f.write(content)
346
+ try:
347
+ repo.push(commit_message)
348
+ except Exception as e:
349
+ if ignore_push_error:
350
+ pass
351
+ else:
352
+ raise e
353
+
354
+ @staticmethod
355
+ def add_patterns_to_gitignore(repo, patterns: List[str], commit_message: Optional[str] = None) -> None:
356
+ MSHub.add_patterns_to_file(repo, '.gitignore', patterns, commit_message, ignore_push_error=True)
357
+
358
+ @staticmethod
359
+ def add_patterns_to_gitattributes(repo, patterns: List[str], commit_message: Optional[str] = None) -> None:
360
+ new_patterns = []
361
+ suffix = 'filter=lfs diff=lfs merge=lfs -text'
362
+ for pattern in patterns:
363
+ if suffix not in pattern:
364
+ pattern = f'{pattern} {suffix}'
365
+ new_patterns.append(pattern)
366
+ file_name = '.gitattributes'
367
+ if commit_message is None:
368
+ commit_message = f'Add `{patterns[0]}` patterns to {file_name}'
369
+ MSHub.add_patterns_to_file(repo, file_name, new_patterns, commit_message, ignore_push_error=True)
370
+
371
+
372
+ class HFHub(HubOperation):
373
+
374
+ @classmethod
375
+ def try_login(cls, token: Optional[str] = None) -> bool:
376
+ pass
377
+
378
+ @classmethod
379
+ def create_model_repo(cls, repo_id: str, token: Optional[str] = None, private: bool = False) -> str:
380
+ return api.create_repo(repo_id, token=token, private=private)
381
+
382
+ @classmethod
383
+ def push_to_hub(cls,
384
+ repo_id: str,
385
+ folder_path: Union[str, Path],
386
+ path_in_repo: Optional[str] = None,
387
+ commit_message: Optional[str] = None,
388
+ commit_description: Optional[str] = None,
389
+ token: Union[str, bool, None] = None,
390
+ private: bool = False,
391
+ revision: Optional[str] = 'master',
392
+ ignore_patterns: Optional[Union[List[str], str]] = None,
393
+ **kwargs):
394
+ cls.create_model_repo(repo_id, token, private)
395
+ if revision is None or revision == 'master':
396
+ revision = 'main'
397
+ return api.upload_folder(
398
+ repo_id=repo_id,
399
+ folder_path=folder_path,
400
+ path_in_repo=path_in_repo,
401
+ commit_message=commit_message,
402
+ commit_description=commit_description,
403
+ token=token,
404
+ revision=revision,
405
+ ignore_patterns=ignore_patterns,
406
+ **kwargs)
407
+
408
+ @classmethod
409
+ def load_dataset(cls,
410
+ dataset_id: str,
411
+ subset_name: str,
412
+ split: str,
413
+ streaming: bool = False,
414
+ revision: Optional[str] = None,
415
+ download_mode: Literal['force_redownload', 'reuse_dataset_if_exists'] = 'reuse_dataset_if_exists',
416
+ num_proc: Optional[int] = None,
417
+ **kwargs):
418
+ from datasets import load_dataset
419
+ if revision is None or revision == 'master':
420
+ revision = 'main'
421
+ return load_dataset(
422
+ dataset_id,
423
+ name=subset_name,
424
+ split=split,
425
+ streaming=streaming,
426
+ revision=revision,
427
+ download_mode=download_mode,
428
+ num_proc=num_proc)
429
+
430
+ @classmethod
431
+ def download_model(cls,
432
+ model_id_or_path: Optional[str] = None,
433
+ revision: Optional[str] = None,
434
+ ignore_patterns: Optional[List[str]] = None,
435
+ **kwargs):
436
+ if revision is None or revision == 'master':
437
+ revision = 'main'
438
+ logger.info(f'Downloading the model from HuggingFace Hub, model_id: {model_id_or_path}')
439
+ use_hf_transfer = strtobool(os.environ.get('USE_HF_TRANSFER', 'False'))
440
+ if use_hf_transfer:
441
+ from huggingface_hub import _snapshot_download
442
+ _snapshot_download.HF_HUB_ENABLE_HF_TRANSFER = True
443
+ from huggingface_hub import snapshot_download
444
+ return snapshot_download(
445
+ model_id_or_path, repo_type='model', revision=revision, ignore_patterns=ignore_patterns, **kwargs)
446
+
447
+
448
+ def get_hub(use_hf: Optional[bool] = None):
449
+ if use_hf is None:
450
+ use_hf = True if use_hf_hub() else False
451
+ return {True: HFHub, False: MSHub}[use_hf]
ms-swift/swift/llm/__pycache__/utils.cpython-310.pyc ADDED
Binary file (9.5 kB). View file
 
ms-swift/swift/llm/app/app.py ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
2
+ from contextlib import nullcontext
3
+ from typing import List, Union
4
+
5
+ import gradio
6
+ from packaging import version
7
+
8
+ from swift.utils import get_logger
9
+ from ..argument import AppArguments
10
+ from ..base import SwiftPipeline
11
+ from ..infer import run_deploy
12
+ from .build_ui import build_ui
13
+
14
+ logger = get_logger()
15
+
16
+
17
+ class SwiftApp(SwiftPipeline):
18
+ args_class = AppArguments
19
+ args: args_class
20
+
21
+ def run(self):
22
+ args = self.args
23
+ deploy_context = nullcontext() if args.base_url else run_deploy(args, return_url=True)
24
+ with deploy_context as base_url:
25
+ base_url = base_url or args.base_url
26
+ demo = build_ui(
27
+ base_url,
28
+ args.model_suffix,
29
+ request_config=args.get_request_config(),
30
+ is_multimodal=args.is_multimodal,
31
+ studio_title=args.studio_title,
32
+ lang=args.lang,
33
+ default_system=args.system)
34
+ concurrency_count = 1 if args.infer_backend == 'pt' else 16
35
+ if version.parse(gradio.__version__) < version.parse('4'):
36
+ queue_kwargs = {'concurrency_count': concurrency_count}
37
+ else:
38
+ queue_kwargs = {'default_concurrency_limit': concurrency_count}
39
+ demo.queue(**queue_kwargs).launch(
40
+ server_name=args.server_name, server_port=args.server_port, share=args.share)
41
+
42
+
43
+ def app_main(args: Union[List[str], AppArguments, None] = None):
44
+ return SwiftApp(args).main()
ms-swift/swift/llm/app/locale.py ADDED
@@ -0,0 +1,23 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
2
+ locale_mapping = {
3
+ 'modify_system': {
4
+ 'en': '🛠️ Set system and clear history',
5
+ 'zh': '🛠️ 设置system并清空历史'
6
+ },
7
+ 'clear_history': {
8
+ 'en': '🧹 Clear history',
9
+ 'zh': '🧹 清空历史'
10
+ },
11
+ 'submit': {
12
+ 'en': '🚀 Send',
13
+ 'zh': '🚀 发送'
14
+ },
15
+ 'regenerate': {
16
+ 'en': '🤔️ Regenerate',
17
+ 'zh': '🤔️ 重试'
18
+ },
19
+ 'upload': {
20
+ 'en': '📁 Upload',
21
+ 'zh': '📁 上传'
22
+ }
23
+ }
ms-swift/swift/llm/argument/__init__.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
2
+ from .app_args import AppArguments
3
+ from .base_args import BaseArguments
4
+ from .deploy_args import DeployArguments
5
+ from .eval_args import EvalArguments
6
+ from .export_args import ExportArguments
7
+ from .infer_args import InferArguments
8
+ from .rlhf_args import RLHFArguments
9
+ from .sampling_args import SamplingArguments
10
+ from .train_args import TrainArguments
11
+ from .tuner_args import TunerArguments
12
+ from .webui_args import WebUIArguments
ms-swift/swift/llm/argument/__pycache__/__init__.cpython-310.pyc ADDED
Binary file (712 Bytes). View file
 
ms-swift/swift/llm/argument/__pycache__/deploy_args.cpython-310.pyc ADDED
Binary file (3.41 kB). View file
 
ms-swift/swift/llm/argument/__pycache__/infer_args.cpython-310.pyc ADDED
Binary file (7.37 kB). View file
 
ms-swift/swift/llm/argument/__pycache__/merge_args.cpython-310.pyc ADDED
Binary file (934 Bytes). View file
 
ms-swift/swift/llm/argument/base_args/__init__.py ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ # Copyright (c) Alibaba, Inc. and its affiliates.
2
+ from .base_args import BaseArguments
3
+ from .utils import to_abspath