Student0809 commited on Jun 6, 2025

Commit

356aced

verified ·

1 Parent(s): c3c781c

Add files using upload-large-folder tool

Browse files

This view is limited to 50 files because it contains too many changes. See raw diff

Files changed (50) hide show

ms-swift/examples/train/megatron/multi-node/node1.sh +34 -0
ms-swift/examples/train/multi-gpu/ddp/train.sh +30 -0
ms-swift/examples/train/multi-node/deepspeed/host.txt +2 -0
ms-swift/examples/train/multi-node/swift/train_node2.sh +30 -0
ms-swift/examples/train/multi-node/torchrun/train_node2.sh +31 -0
ms-swift/examples/train/multimodal/caption.sh +27 -0
ms-swift/examples/train/multimodal/lora_llm_full_vit/infer.sh +8 -0
ms-swift/examples/train/multimodal/lora_llm_full_vit/merge_lora.sh +3 -0
ms-swift/examples/train/multimodal/omni/sft.sh +39 -0
ms-swift/examples/train/multimodal/rlhf/kto.sh +32 -0
ms-swift/examples/train/multimodal/video.sh +31 -0
ms-swift/examples/train/packing/llm.sh +31 -0
ms-swift/examples/train/packing/streaming.sh +34 -0
ms-swift/examples/train/plugins/tuner_phi4_mm.sh +20 -0
ms-swift/examples/train/predict_with_generate/train.sh +28 -0
ms-swift/examples/train/qlora/gptq.sh +25 -0
ms-swift/examples/train/rft/math.json +0 -0
ms-swift/examples/train/rlhf/README.md +3 -0
ms-swift/examples/train/rlhf/dpo/lora.sh +25 -0
ms-swift/examples/train/rlhf/simpo.sh +26 -0
ms-swift/examples/train/seq_cls/bert/deploy.sh +11 -0
ms-swift/examples/train/seq_cls/bert/infer.sh +7 -0
ms-swift/examples/train/seq_cls/qwen2_vl/sft.sh +28 -0
ms-swift/examples/train/seq_cls/regression/deploy.sh +8 -0
ms-swift/examples/train/seq_cls/regression/sft.sh +28 -0
ms-swift/examples/train/think_model/qwen3_demo2.sh +29 -0
ms-swift/examples/train/tuners/bone/train.sh +16 -0
ms-swift/examples/train/tuners/lisa/train.sh +17 -0
ms-swift/examples/train/tuners/longlora/train.sh +16 -0
ms-swift/ms_swift.egg-info/SOURCES.txt +373 -0
ms-swift/ms_swift.egg-info/entry_points.txt +3 -0
ms-swift/ms_swift.egg-info/top_level.txt +2 -0
ms-swift/requirements/framework.txt +38 -0
ms-swift/requirements/tests.txt +6 -0
ms-swift/swift/__pycache__/__init__.cpython-310.pyc +0 -0
ms-swift/swift/cli/_megatron/main.py +20 -0
ms-swift/swift/cli/infer.py +5 -0
ms-swift/swift/cli/merge_lora.py +14 -0
ms-swift/swift/cli/rlhf.py +5 -0
ms-swift/swift/hub/constant.py +6 -0
ms-swift/swift/hub/hub.py +451 -0
ms-swift/swift/llm/__pycache__/utils.cpython-310.pyc +0 -0
ms-swift/swift/llm/app/app.py +44 -0
ms-swift/swift/llm/app/locale.py +23 -0
ms-swift/swift/llm/argument/__init__.py +12 -0
ms-swift/swift/llm/argument/__pycache__/__init__.cpython-310.pyc +0 -0
ms-swift/swift/llm/argument/__pycache__/deploy_args.cpython-310.pyc +0 -0
ms-swift/swift/llm/argument/__pycache__/infer_args.cpython-310.pyc +0 -0
ms-swift/swift/llm/argument/__pycache__/merge_args.cpython-310.pyc +0 -0
ms-swift/swift/llm/argument/base_args/__init__.py +3 -0

ms-swift/examples/train/megatron/multi-node/node1.sh ADDED Viewed

	@@ -0,0 +1,34 @@

+# For more information on multi-node training launch methods, refer to:
+# https://github.com/modelscope/ms-swift/tree/main/examples/train/multi-node
+CUDA_VISIBLE_DEVICES=0,1,2,3 \
+NNODES=2 \
+NODE_RANK=0 \
+MASTER_ADDR=127.0.0.1 \
+MASTER_PORT=29500 \
+NPROC_PER_NODE=4 \
+megatron sft \
+    --load Qwen2.5-14B-mcore \
+    --dataset 'liucong/Chinese-DeepSeek-R1-Distill-data-110k-SFT' \
+    --tensor_model_parallel_size 4 \
+    --micro_batch_size 1 \
+    --global_batch_size 16 \
+    --packing true \
+    --recompute_granularity selective \
+    --train_iters 2000 \
+    --eval_iters 50 \
+    --finetune true \
+    --cross_entropy_loss_fusion true \
+    --lr 1e-5 \
+    --lr_warmup_iters 100 \
+    --min_lr 1e-6 \
+    --save megatron_output/Qwen2.5-14B \
+    --eval_interval 200 \
+    --save_interval 200 \
+    --max_length 8192 \
+    --num_workers 8 \
+    --dataset_num_proc 8 \
+    --no_save_optim true \
+    --no_save_rng true \
+    --sequence_parallel true \
+    --use_flash_attn true

ms-swift/examples/train/multi-gpu/ddp/train.sh ADDED Viewed

	@@ -0,0 +1,30 @@

+# 27.5GiB * 2
+nproc_per_node=2
+CUDA_VISIBLE_DEVICES=0,1 \
+NPROC_PER_NODE=$nproc_per_node \
+swift sft \
+    --model Qwen/Qwen2.5-7B-Instruct \
+    --train_type lora \
+    --torch_dtype bfloat16 \
+    --dataset 'swift/self-cognition#1000' \
+    --num_train_epochs 1 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --learning_rate 1e-4 \
+    --lora_rank 8 \
+    --lora_alpha 32 \
+    --target_modules all-linear \
+    --gradient_accumulation_steps $(expr 16 / $nproc_per_node) \
+    --eval_steps 100 \
+    --save_steps 100 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --max_length 2048 \
+    --output_dir output \
+    --system 'You are a helpful assistant.' \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --model_author swift \
+    --model_name swift-robot \
+    --gradient_checkpointing_kwargs '{"use_reentrant": false}'

ms-swift/examples/train/multi-node/deepspeed/host.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ worker-0 slots=2
2	+ worker-1 slots=2

ms-swift/examples/train/multi-node/swift/train_node2.sh ADDED Viewed

	@@ -0,0 +1,30 @@

+nnodes=2
+nproc_per_node=4
+CUDA_VISIBLE_DEVICES=0,1,2,3 \
+NNODES=$nnodes \
+NODE_RANK=1 \
+MASTER_ADDR=xxx.xxx.xxx.xxx \
+MASTER_PORT=29500 \
+NPROC_PER_NODE=$nproc_per_node \
+swift sft \
+    --model Qwen/Qwen2.5-7B-Instruct \
+    --train_type full \
+    --dataset 'AI-ModelScope/alpaca-gpt4-data-zh#20000' \
+              'AI-ModelScope/alpaca-gpt4-data-en#20000' \
+    --torch_dtype bfloat16 \
+    --num_train_epochs 1 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --learning_rate 1e-5 \
+    --gradient_accumulation_steps $(expr 32 / $nproc_per_node / $nnodes) \
+    --eval_steps 100 \
+    --save_steps 100 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --max_length 8192 \
+    --output_dir output \
+    --system 'You are a helpful assistant.' \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --deepspeed zero2

ms-swift/examples/train/multi-node/torchrun/train_node2.sh ADDED Viewed

	@@ -0,0 +1,31 @@

+nnodes=2
+nproc_per_node=4
+CUDA_VISIBLE_DEVICES=0,1,2,3 \
+torchrun \
+    --master_port 29500 \
+    --nproc_per_node=$nproc_per_node \
+    --nnodes=$nnodes \
+    --node_rank=1 \
+    --master_addr=xxx.xxx.xxx.xxx \
+    swift/cli/sft.py \
+    --model Qwen/Qwen2.5-7B-Instruct \
+    --train_type full \
+    --dataset 'AI-ModelScope/alpaca-gpt4-data-zh#20000' \
+              'AI-ModelScope/alpaca-gpt4-data-en#20000' \
+    --torch_dtype bfloat16 \
+    --num_train_epochs 1 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --learning_rate 1e-5 \
+    --gradient_accumulation_steps $(expr 32 / $nproc_per_node / $nnodes) \
+    --eval_steps 100 \
+    --save_steps 100 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --max_length 8192 \
+    --output_dir output \
+    --system 'You are a helpful assistant.' \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --deepspeed zero2

ms-swift/examples/train/multimodal/caption.sh ADDED Viewed

	@@ -0,0 +1,27 @@

+# 22GiB
+# You can refer to `https://github.com/QwenLM/Qwen2.5-VL` for the meaning of the `MAX_PIXELS` parameter.
+# 1003520 = 1280 * 28 * 28
+CUDA_VISIBLE_DEVICES=0 \
+MAX_PIXELS=1003520 \
+swift sft \
+    --model Qwen/Qwen2.5-VL-7B-Instruct \
+    --dataset 'modelscope/coco_2014_caption:validation#20000' \
+    --train_type lora \
+    --torch_dtype bfloat16 \
+    --num_train_epochs 1 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --learning_rate 1e-4 \
+    --lora_rank 8 \
+    --lora_alpha 32 \
+    --target_modules all-linear \
+    --freeze_vit true \
+    --gradient_accumulation_steps 16 \
+    --eval_steps 100 \
+    --save_steps 100 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --max_length 2048 \
+    --output_dir output \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4

ms-swift/examples/train/multimodal/lora_llm_full_vit/infer.sh ADDED Viewed

	@@ -0,0 +1,8 @@

+# If the weights have been merged, please use `--model`.
+CUDA_VISIBLE_DEVICES=0 \
+swift infer \
+    --adapters output/vx-xxx/checkpoint-xxx \
+    --stream true \
+    --load_data_args true \
+    --temperature 0 \
+    --max_new_tokens 2048

ms-swift/examples/train/multimodal/lora_llm_full_vit/merge_lora.sh ADDED Viewed

	@@ -0,0 +1,3 @@

+swift export \
+    --adapters output/vx-xxx/checkpoint-xxx \
+    --merge_lora true

ms-swift/examples/train/multimodal/omni/sft.sh ADDED Viewed

	@@ -0,0 +1,39 @@

+# 4*35GB
+# A demo for four modalities that can be run directly
+pip uninstall transformers
+pip install git+https://github.com/huggingface/transformers
+nproc_per_node=4
+CUDA_VISIBLE_DEVICES=0,1,2,3 \
+NPROC_PER_NODE=$nproc_per_node \
+VIDEO_MAX_PIXELS=50176 \
+FPS_MAX_FRAMES=12 \
+MAX_PIXELS=1003520 \
+ENABLE_AUDIO_OUTPUT=0 \
+swift sft \
+    --model Qwen/Qwen2.5-Omni-7B \
+    --dataset 'AI-ModelScope/alpaca-gpt4-data-zh#2000' \
+              'AI-ModelScope/LaTeX_OCR:human_handwrite#2000' \
+              'speech_asr/speech_asr_aishell1_trainsets:validation#2000' \
+              'swift/VideoChatGPT:all#2000' \
+    --train_type lora \
+    --torch_dtype bfloat16 \
+    --num_train_epochs 1 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --learning_rate 1e-4 \
+    --lora_rank 8 \
+    --lora_alpha 32 \
+    --target_modules all-linear \
+    --freeze_vit true \
+    --gradient_accumulation_steps $(expr 16 / $nproc_per_node) \
+    --eval_steps 50 \
+    --save_steps 50 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --max_length 2048 \
+    --output_dir output \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --deepspeed zero2

ms-swift/examples/train/multimodal/rlhf/kto.sh ADDED Viewed

	@@ -0,0 +1,32 @@

+# Due to the absence of a multi-modal open-source dataset for kto,
+# we will use a pure text kto dataset as an example here.
+nproc_per_node=2
+CUDA_VISIBLE_DEVICES=0,1 \
+NPROC_PER_NODE=$nproc_per_node \
+MAX_PIXELS=1003520 \
+swift rlhf \
+    --rlhf_type kto \
+    --model Qwen/Qwen2.5-VL-7B-Instruct \
+    --dataset 'AI-ModelScope/ultrafeedback-binarized-preferences-cleaned-kto#10000' \
+    --train_type lora \
+    --torch_dtype bfloat16 \
+    --num_train_epochs 1 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --learning_rate 1e-4 \
+    --lora_rank 8 \
+    --lora_alpha 32 \
+    --target_modules all-linear \
+    --freeze_vit true \
+    --gradient_accumulation_steps $(expr 16 / $nproc_per_node) \
+    --eval_steps 100 \
+    --save_steps 100 \
+    --save_total_limit 2 \
+    --deepspeed zero2 \
+    --logging_steps 5 \
+    --max_length 4096 \
+    --output_dir output \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --dataset_num_proc 4

ms-swift/examples/train/multimodal/video.sh ADDED Viewed

	@@ -0,0 +1,31 @@

+# 4*80GB
+# You can refer to `https://github.com/QwenLM/Qwen2.5-VL` for the meaning of the `VIDEO_MAX_PIXELS` parameter.
+nproc_per_node=4
+CUDA_VISIBLE_DEVICES=0,1,2,3 \
+NPROC_PER_NODE=$nproc_per_node \
+VIDEO_MAX_PIXELS=50176 \
+FPS_MAX_FRAMES=12 \
+swift sft \
+    --model Qwen/QVQ-72B-Preview \
+    --dataset swift/VideoChatGPT:all \
+    --train_type lora \
+    --torch_dtype bfloat16 \
+    --num_train_epochs 1 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --learning_rate 1e-4 \
+    --lora_rank 8 \
+    --lora_alpha 32 \
+    --target_modules all-linear \
+    --freeze_vit true \
+    --gradient_accumulation_steps $(expr 16 / $nproc_per_node) \
+    --eval_steps 50 \
+    --save_steps 50 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --max_length 2048 \
+    --output_dir output \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --deepspeed zero3

ms-swift/examples/train/packing/llm.sh ADDED Viewed

	@@ -0,0 +1,31 @@

+# 22GB
+CUDA_VISIBLE_DEVICES=0 \
+swift sft \
+    --model Qwen/Qwen2.5-7B-Instruct \
+    --train_type lora \
+    --packing true \
+    --dataset 'AI-ModelScope/alpaca-gpt4-data-zh#500' \
+              'AI-ModelScope/alpaca-gpt4-data-en#500' \
+              'swift/self-cognition#500' \
+    --torch_dtype bfloat16 \
+    --num_train_epochs 3 \
+    --attn_impl flash_attn \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --learning_rate 1e-4 \
+    --lora_rank 8 \
+    --lora_alpha 32 \
+    --target_modules all-linear \
+    --gradient_accumulation_steps 4 \
+    --eval_steps 50 \
+    --save_steps 50 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --max_length 2048 \
+    --output_dir output \
+    --system 'You are a helpful assistant.' \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --dataset_num_proc 4 \
+    --model_author swift \
+    --model_name swift-robot

ms-swift/examples/train/packing/streaming.sh ADDED Viewed

	@@ -0,0 +1,34 @@

+# 4 * 36GB
+# A demo using the Hugging Face dataset
+# The first model weights will be saved around step 70.
+NPROC_PER_NODE=4 \
+MAX_PIXELS=1003520 \
+CUDA_VISIBLE_DEVICES=0,1,2,3 \
+HF_ENDPOINT=https://hf-mirror.com \
+swift sft \
+    --model Qwen/Qwen2.5-VL-7B-Instruct \
+    --train_type lora \
+    --dataset 'HF::linxy/LaTeX_OCR:full#20000' \
+    --torch_dtype bfloat16 \
+    --attn_impl flash_attn \
+    --streaming true \
+    --shuffle_buffer_size 1000 \
+    --packing true \
+    --save_strategy epoch \
+    --max_steps 1000 \
+    --max_epochs 5 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --learning_rate 1e-4 \
+    --lora_rank 8 \
+    --lora_alpha 32 \
+    --target_modules all-linear \
+    --gradient_accumulation_steps 1 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --max_length 8192 \
+    --output_dir output \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 1 \
+    --dataset_num_proc 8 \
+    --deepspeed zero2

ms-swift/examples/train/plugins/tuner_phi4_mm.sh ADDED Viewed

	@@ -0,0 +1,20 @@

+# `--train_type dummy`
+CUDA_VISIBLE_DEVICES=0 \
+swift sft \
+    --model LLM-Research/Phi-4-multimodal-instruct \
+    --dataset 'AI-ModelScope/LaTeX_OCR:human_handwrite#20000' \
+    --train_type dummy \
+    --torch_dtype bfloat16 \
+    --num_train_epochs 1 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --learning_rate 1e-4 \
+    --gradient_accumulation_steps 16 \
+    --eval_steps 200 \
+    --save_steps 200 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --max_length 2048 \
+    --output_dir output \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4

ms-swift/examples/train/predict_with_generate/train.sh ADDED Viewed

	@@ -0,0 +1,28 @@

+# 20GiB
+CUDA_VISIBLE_DEVICES=0 \
+MAX_PIXELS=1003520 \
+swift sft \
+    --model Qwen/Qwen2.5-VL-7B-Instruct \
+    --dataset 'AI-ModelScope/LaTeX_OCR:human_handwrite#20000' \
+    --train_type lora \
+    --torch_dtype bfloat16 \
+    --num_train_epochs 1 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 2 \
+    --learning_rate 1e-4 \
+    --lora_rank 8 \
+    --lora_alpha 32 \
+    --target_modules all-linear \
+    --freeze_vit true \
+    --gradient_accumulation_steps 16 \
+    --eval_steps 100 \
+    --save_steps 100 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --max_length 2048 \
+    --output_dir output \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --predict_with_generate true \
+    --metric_for_best_model rouge-l \
+    --greater_is_better true

ms-swift/examples/train/qlora/gptq.sh ADDED Viewed

	@@ -0,0 +1,25 @@

+# 2 * 30GiB
+CUDA_VISIBLE_DEVICES=0,1 \
+MAX_PIXELS=1003520 \
+swift sft \
+    --model Qwen/Qwen2.5-VL-72B-Instruct-GPTQ-Int4 \
+    --dataset 'modelscope/coco_2014_caption:validation#20000' \
+    --train_type lora \
+    --torch_dtype bfloat16 \
+    --num_train_epochs 1 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --learning_rate 1e-4 \
+    --lora_rank 8 \
+    --lora_alpha 32 \
+    --target_modules all-linear \
+    --freeze_vit true \
+    --gradient_accumulation_steps 16 \
+    --eval_steps 100 \
+    --save_steps 100 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --max_length 2048 \
+    --output_dir output \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4

ms-swift/examples/train/rft/math.json ADDED Viewed

The diff for this file is too large to render. See raw diff

ms-swift/examples/train/rlhf/README.md ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ # TIPS
2	+
3	+ Multi-modal models' RLHF are also supported! Check the multimodal folder for details.

ms-swift/examples/train/rlhf/dpo/lora.sh ADDED Viewed

	@@ -0,0 +1,25 @@

+# 24GiB
+CUDA_VISIBLE_DEVICES=0 \
+swift rlhf \
+    --rlhf_type dpo \
+    --model Qwen/Qwen2.5-7B-Instruct \
+    --train_type lora \
+    --dataset hjh0119/shareAI-Llama3-DPO-zh-en-emoji \
+    --torch_dtype bfloat16 \
+    --num_train_epochs 1 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --learning_rate 1e-4 \
+    --lora_rank 8 \
+    --lora_alpha 32 \
+    --target_modules all-linear \
+    --gradient_accumulation_steps 16 \
+    --eval_steps 100 \
+    --save_steps 100 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --max_length 2048 \
+    --output_dir output \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --dataset_num_proc 4

ms-swift/examples/train/rlhf/simpo.sh ADDED Viewed

	@@ -0,0 +1,26 @@

+# 2*50GB
+nproc_per_node=2
+CUDA_VISIBLE_DEVICES=0,1 \
+NPROC_PER_NODE=$nproc_per_node \
+swift rlhf \
+    --rlhf_type simpo \
+    --model Qwen/Qwen2.5-3B-Instruct \
+    --train_type full \
+    --dataset hjh0119/shareAI-Llama3-DPO-zh-en-emoji \
+    --torch_dtype bfloat16 \
+    --num_train_epochs 1 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --learning_rate 1e-5 \
+    --gradient_accumulation_steps $(expr 16 / $nproc_per_node) \
+    --eval_steps 100 \
+    --save_steps 100 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --max_length 2048 \
+    --output_dir output \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --deepspeed zero2 \
+    --dataset_num_proc 4

ms-swift/examples/train/seq_cls/bert/deploy.sh ADDED Viewed

	@@ -0,0 +1,11 @@

+CUDA_VISIBLE_DEVICES=0 \
+swift deploy \
+    --adapters output/vx-xxx/checkpoint-xxx \
+    --served_model_name bert-base-chinese \
+    --truncation_strategy right \
+    --max_length 512
+# curl http://localhost:8000/v1/chat/completions -H "Content-Type: application/json" -d '{
+# "model": "bert-base-chinese",
+# "messages": [{"role": "user", "content": "包装差，容易被调包。"}]
+# }'

ms-swift/examples/train/seq_cls/bert/infer.sh ADDED Viewed

	@@ -0,0 +1,7 @@

+CUDA_VISIBLE_DEVICES=0 \
+swift infer \
+    --adapters output/vx-xxx/checkpoint-xxx \
+    --load_data_args true \
+    --max_batch_size 16 \
+    --truncation_strategy right \
+    --max_length 512

ms-swift/examples/train/seq_cls/qwen2_vl/sft.sh ADDED Viewed

	@@ -0,0 +1,28 @@

+# If `num_labels` is provided, it will be considered a classification task.
+# You can also specify `--model Qwen/Qwen2.5-VL-2B-Instruct --use_chat_template true`.
+CUDA_VISIBLE_DEVICES=0 \
+MAX_PIXELS=1003520 \
+swift sft \
+    --model Qwen/Qwen2-VL-2B \
+    --train_type lora \
+    --dataset 'tany0699/garbage265#20000' \
+    --torch_dtype bfloat16 \
+    --num_train_epochs 1 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --learning_rate 1e-4 \
+    --lora_rank 8 \
+    --lora_alpha 32 \
+    --target_modules all-linear \
+    --gradient_accumulation_steps 16 \
+    --eval_steps 50 \
+    --save_steps 50 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --max_length 2048 \
+    --output_dir output \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --num_labels 265 \
+    --task_type seq_cls \
+    --use_chat_template false

ms-swift/examples/train/seq_cls/regression/deploy.sh ADDED Viewed

	@@ -0,0 +1,8 @@

+CUDA_VISIBLE_DEVICES=0 \
+swift deploy \
+    --adapters output/vx-xxx/checkpoint-xxx
+# curl http://localhost:8000/v1/chat/completions -H "Content-Type: application/json" -d '{
+# "model": "Qwen2.5-0.5B",
+# "messages": [{"role": "user", "content": "Task: Based on the given two sentences, provide a similarity score between 0.0 and 1.0.\nSentence 1: The animal is eating.\nSentence 2: A woman is dancing.\nSimilarity score: "}]
+# }'

ms-swift/examples/train/seq_cls/regression/sft.sh ADDED Viewed

	@@ -0,0 +1,28 @@

+# 2GB
+CUDA_VISIBLE_DEVICES=0 \
+swift sft \
+    --model Qwen/Qwen2.5-0.5B \
+    --train_type lora \
+    --dataset 'sentence-transformers/stsb:reg#20000' \
+    --torch_dtype bfloat16 \
+    --num_train_epochs 1 \
+    --per_device_train_batch_size 16 \
+    --per_device_eval_batch_size 16 \
+    --learning_rate 1e-4 \
+    --lora_rank 8 \
+    --lora_alpha 32 \
+    --target_modules all-linear \
+    --gradient_accumulation_steps 1 \
+    --eval_steps 100 \
+    --save_steps 100 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --max_length 2048 \
+    --output_dir output \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --dataset_num_proc 4 \
+    --num_labels 1 \
+    --task_type seq_cls \
+    --use_chat_template false \
+    --problem_type regression

ms-swift/examples/train/think_model/qwen3_demo2.sh ADDED Viewed

	@@ -0,0 +1,29 @@

+# use `swift/self-cognition:qwen3`
+# Avoid losing the thinking capability by appending `/no_think` to the dataset query.
+# https://github.com/modelscope/ms-swift/blob/77985c2ccdac8ed4037174ee222e79d1f1d5059d/swift/llm/dataset/dataset/llm.py#L835
+CUDA_VISIBLE_DEVICES=0 \
+swift sft \
+    --model Qwen/Qwen3-8B \
+    --train_type lora \
+    --dataset 'swift/Qwen3-SFT-Mixin#2000' \
+              'swift/self-cognition:qwen3#600' \
+    --torch_dtype bfloat16 \
+    --num_train_epochs 1 \
+    --per_device_train_batch_size 1 \
+    --per_device_eval_batch_size 1 \
+    --learning_rate 1e-4 \
+    --lora_rank 8 \
+    --lora_alpha 32 \
+    --target_modules all-linear \
+    --gradient_accumulation_steps 16 \
+    --eval_steps 50 \
+    --save_steps 50 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --max_length 2048 \
+    --output_dir output \
+    --warmup_ratio 0.05 \
+    --dataloader_num_workers 4 \
+    --use_liger_kernel true \
+    --model_author swift \
+    --model_name swift-robot

ms-swift/examples/train/tuners/bone/train.sh ADDED Viewed

	@@ -0,0 +1,16 @@

+# 17.3GiB
+CUDA_VISIBLE_DEVICES=0 \
+swift sft \
+    --model Qwen/Qwen2.5-7B-Instruct \
+    --train_type bone \
+    --dataset 'swift/self-cognition#1000' \
+    --num_train_epochs 1 \
+    --per_device_train_batch_size 1 \
+    --learning_rate 1e-4 \
+    --gradient_accumulation_steps 16 \
+    --eval_steps 100 \
+    --save_steps 100 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --model_author swift \
+    --model_name swift-robot

ms-swift/examples/train/tuners/lisa/train.sh ADDED Viewed

	@@ -0,0 +1,17 @@

+# 29GiB
+CUDA_VISIBLE_DEVICES=0 \
+swift sft \
+    --model Qwen/Qwen2.5-7B-Instruct \
+    --train_type full \
+    --dataset 'swift/self-cognition#1000' \
+    --lisa_activated_layers 2 \
+    --num_train_epochs 1 \
+    --per_device_train_batch_size 1 \
+    --learning_rate 1e-5 \
+    --gradient_accumulation_steps 16 \
+    --eval_steps 100 \
+    --save_steps 100 \
+    --save_total_limit 2 \
+    --logging_steps 5 \
+    --model_author swift \
+    --model_name swift-robot

ms-swift/examples/train/tuners/longlora/train.sh ADDED Viewed

	@@ -0,0 +1,16 @@

+CUDA_VISIBLE_DEVICES=0 \
+swift sft \
+    --model LLM-Research/Meta-Llama-3.1-8B-Instruct \
+    --train_type longlora \
+    --dataset 'AI-ModelScope/LongAlpaca-12k#1000' \
+    --num_train_epochs 1 \
+    --learning_rate 1e-4 \
+    --attn_impl flash_attn \
+    --gradient_accumulation_steps 16 \
+    --lora_rank 8 \
+    --lora_alpha 32 \
+    --eval_steps 100 \
+    --save_steps 100 \
+    --max_length 10000 \
+    --save_total_limit 2 \
+    --logging_steps 5

ms-swift/ms_swift.egg-info/SOURCES.txt ADDED Viewed

	@@ -0,0 +1,373 @@

+LICENSE
+MANIFEST.in
+README.md
+setup.cfg
+setup.py
+ms_swift.egg-info/PKG-INFO
+ms_swift.egg-info/SOURCES.txt
+ms_swift.egg-info/dependency_links.txt
+ms_swift.egg-info/entry_points.txt
+ms_swift.egg-info/not-zip-safe
+ms_swift.egg-info/requires.txt
+ms_swift.egg-info/top_level.txt
+requirements/docs.txt
+requirements/eval.txt
+requirements/framework.txt
+requirements/seq_parallel.txt
+requirements/swanlab.txt
+requirements/tests.txt
+swift/__init__.py
+swift/version.py
+swift/cli/__init__.py
+swift/cli/app.py
+swift/cli/deploy.py
+swift/cli/eval.py
+swift/cli/export.py
+swift/cli/infer.py
+swift/cli/main.py
+swift/cli/merge_lora.py
+swift/cli/pt.py
+swift/cli/rlhf.py
+swift/cli/rollout.py
+swift/cli/sample.py
+swift/cli/sft.py
+swift/cli/web_ui.py
+swift/cli/_megatron/__init__.py
+swift/cli/_megatron/main.py
+swift/cli/_megatron/pt.py
+swift/cli/_megatron/sft.py
+swift/hub/__init__.py
+swift/hub/constant.py
+swift/hub/hub.py
+swift/llm/__init__.py
+swift/llm/base.py
+swift/llm/data_loader.py
+swift/llm/utils.py
+swift/llm/app/__init__.py
+swift/llm/app/app.py
+swift/llm/app/build_ui.py
+swift/llm/app/locale.py
+swift/llm/argument/__init__.py
+swift/llm/argument/app_args.py
+swift/llm/argument/deploy_args.py
+swift/llm/argument/eval_args.py
+swift/llm/argument/export_args.py
+swift/llm/argument/infer_args.py
+swift/llm/argument/merge_args.py
+swift/llm/argument/rlhf_args.py
+swift/llm/argument/sampling_args.py
+swift/llm/argument/train_args.py
+swift/llm/argument/tuner_args.py
+swift/llm/argument/webui_args.py
+swift/llm/argument/base_args/__init__.py
+swift/llm/argument/base_args/base_args.py
+swift/llm/argument/base_args/data_args.py
+swift/llm/argument/base_args/generation_args.py
+swift/llm/argument/base_args/model_args.py
+swift/llm/argument/base_args/quant_args.py
+swift/llm/argument/base_args/template_args.py
+swift/llm/argument/base_args/utils.py
+swift/llm/dataset/__init__.py
+swift/llm/dataset/loader.py
+swift/llm/dataset/media.py
+swift/llm/dataset/register.py
+swift/llm/dataset/utils.py
+swift/llm/dataset/data/dataset_info.json
+swift/llm/dataset/dataset/__init__.py
+swift/llm/dataset/dataset/llm.py
+swift/llm/dataset/dataset/mllm.py
+swift/llm/dataset/preprocessor/__init__.py
+swift/llm/dataset/preprocessor/core.py
+swift/llm/dataset/preprocessor/extra.py
+swift/llm/ds_config/zero0.json
+swift/llm/ds_config/zero1.json
+swift/llm/ds_config/zero2.json
+swift/llm/ds_config/zero2_offload.json
+swift/llm/ds_config/zero3.json
+swift/llm/ds_config/zero3_offload.json
+swift/llm/eval/__init__.py
+swift/llm/eval/eval.py
+swift/llm/eval/utils.py
+swift/llm/export/__init__.py
+swift/llm/export/export.py
+swift/llm/export/merge_lora.py
+swift/llm/export/ollama.py
+swift/llm/export/quant.py
+swift/llm/infer/__init__.py
+swift/llm/infer/deploy.py
+swift/llm/infer/infer.py
+swift/llm/infer/protocol.py
+swift/llm/infer/rollout.py
+swift/llm/infer/utils.py
+swift/llm/infer/infer_engine/__init__.py
+swift/llm/infer/infer_engine/base.py
+swift/llm/infer/infer_engine/grpo_vllm_engine.py
+swift/llm/infer/infer_engine/infer_client.py
+swift/llm/infer/infer_engine/infer_engine.py
+swift/llm/infer/infer_engine/lmdeploy_engine.py
+swift/llm/infer/infer_engine/patch.py
+swift/llm/infer/infer_engine/pt_engine.py
+swift/llm/infer/infer_engine/utils.py
+swift/llm/infer/infer_engine/vllm_engine.py
+swift/llm/model/__init__.py
+swift/llm/model/constant.py
+swift/llm/model/model_arch.py
+swift/llm/model/patcher.py
+swift/llm/model/register.py
+swift/llm/model/utils.py
+swift/llm/model/model/__init__.py
+swift/llm/model/model/baai.py
+swift/llm/model/model/baichuan.py
+swift/llm/model/model/bert.py
+swift/llm/model/model/codefuse.py
+swift/llm/model/model/deepseek.py
+swift/llm/model/model/gemma.py
+swift/llm/model/model/glm.py
+swift/llm/model/model/internlm.py
+swift/llm/model/model/llama.py
+swift/llm/model/model/llava.py
+swift/llm/model/model/llm.py
+swift/llm/model/model/mamba.py
+swift/llm/model/model/microsoft.py
+swift/llm/model/model/minicpm.py
+swift/llm/model/model/minimax.py
+swift/llm/model/model/mistral.py
+swift/llm/model/model/mllm.py
+swift/llm/model/model/moonshot.py
+swift/llm/model/model/mplug.py
+swift/llm/model/model/openbuddy.py
+swift/llm/model/model/qwen.py
+swift/llm/model/model/skywork.py
+swift/llm/model/model/stepfun.py
+swift/llm/model/model/telechat.py
+swift/llm/model/model/valley.py
+swift/llm/model/model/yi.py
+swift/llm/sampling/__init__.py
+swift/llm/sampling/base.py
+swift/llm/sampling/distill_sampler.py
+swift/llm/sampling/mcts.py
+swift/llm/sampling/sampling.py
+swift/llm/sampling/utils.py
+swift/llm/sampling/vanilla_sampler.py
+swift/llm/template/__init__.py
+swift/llm/template/base.py
+swift/llm/template/constant.py
+swift/llm/template/grounding.py
+swift/llm/template/register.py
+swift/llm/template/template_inputs.py
+swift/llm/template/template_meta.py
+swift/llm/template/utils.py
+swift/llm/template/vision_utils.py
+swift/llm/template/template/__init__.py
+swift/llm/template/template/deepseek.py
+swift/llm/template/template/emu3.py
+swift/llm/template/template/gemma.py
+swift/llm/template/template/glm.py
+swift/llm/template/template/idefics3.py
+swift/llm/template/template/internlm.py
+swift/llm/template/template/internvl.py
+swift/llm/template/template/llama.py
+swift/llm/template/template/llava.py
+swift/llm/template/template/llm.py
+swift/llm/template/template/megrez.py
+swift/llm/template/template/microsoft.py
+swift/llm/template/template/minicpm.py
+swift/llm/template/template/minimax.py
+swift/llm/template/template/mistral.py
+swift/llm/template/template/molmo.py
+swift/llm/template/template/moonshot.py
+swift/llm/template/template/mplug.py
+swift/llm/template/template/openbuddy.py
+swift/llm/template/template/pixtral.py
+swift/llm/template/template/qwen.py
+swift/llm/template/template/stepfun.py
+swift/llm/template/template/utils.py
+swift/llm/template/template/valley.py
+swift/llm/template/template/yi.py
+swift/llm/train/__init__.py
+swift/llm/train/callback.py
+swift/llm/train/kto.py
+swift/llm/train/pt.py
+swift/llm/train/rlhf.py
+swift/llm/train/sft.py
+swift/llm/train/tuner.py
+swift/megatron/__init__.py
+swift/megatron/init.py
+swift/megatron/argument/__init__.py
+swift/megatron/argument/megatron_args.py
+swift/megatron/argument/train_args.py
+swift/megatron/model/__init__.py
+swift/megatron/model/config.py
+swift/megatron/model/constant.py
+swift/megatron/model/register.py
+swift/megatron/model/rope.py
+swift/megatron/model/gpt/__init__.py
+swift/megatron/model/gpt/config.py
+swift/megatron/model/gpt/hf2mcore.py
+swift/megatron/model/gpt/mcore2hf.py
+swift/megatron/model/gpt/model.py
+swift/megatron/train/__init__.py
+swift/megatron/train/patcher.py
+swift/megatron/train/pt.py
+swift/megatron/train/sft.py
+swift/megatron/train/utils.py
+swift/megatron/utils/__init__.py
+swift/megatron/utils/convert.py
+swift/megatron/utils/patcher.py
+swift/plugin/__init__.py
+swift/plugin/callback.py
+swift/plugin/loss.py
+swift/plugin/metric.py
+swift/plugin/multi_turn.py
+swift/plugin/optimizer.py
+swift/plugin/orm.py
+swift/plugin/prm.py
+swift/plugin/rm_plugin.py
+swift/plugin/tuner.py
+swift/plugin/agent_template/__init__.py
+swift/plugin/agent_template/base.py
+swift/plugin/agent_template/extra.py
+swift/plugin/agent_template/glm4.py
+swift/plugin/agent_template/hermes.py
+swift/plugin/agent_template/llama.py
+swift/plugin/agent_template/qwen.py
+swift/plugin/agent_template/react.py
+swift/plugin/agent_template/toolbench.py
+swift/plugin/loss_scale/__init__.py
+swift/plugin/loss_scale/loss_scale.py
+swift/plugin/loss_scale/utils.py
+swift/plugin/loss_scale/config/agentflan.json
+swift/plugin/loss_scale/config/alpha_umi.json
+swift/plugin/loss_scale/config/hermes.json
+swift/plugin/loss_scale/config/ignore_empty_think.json
+swift/plugin/loss_scale/config/qwen.json
+swift/plugin/loss_scale/config/react.json
+swift/trainers/__init__.py
+swift/trainers/arguments.py
+swift/trainers/callback.py
+swift/trainers/mixin.py
+swift/trainers/rlhf_arguments.py
+swift/trainers/torchacc_mixin.py
+swift/trainers/trainer_factory.py
+swift/trainers/trainers.py
+swift/trainers/utils.py
+swift/trainers/optimizers/__init__.py
+swift/trainers/optimizers/galore/__init__.py
+swift/trainers/optimizers/galore/adafactor.py
+swift/trainers/optimizers/galore/adamw.py
+swift/trainers/optimizers/galore/adamw8bit.py
+swift/trainers/optimizers/galore/galore_projector.py
+swift/trainers/optimizers/galore/utils.py
+swift/trainers/rlhf_trainer/__init__.py
+swift/trainers/rlhf_trainer/cpo_trainer.py
+swift/trainers/rlhf_trainer/dpo_trainer.py
+swift/trainers/rlhf_trainer/grpo_trainer.py
+swift/trainers/rlhf_trainer/kto_trainer.py
+swift/trainers/rlhf_trainer/orpo_trainer.py
+swift/trainers/rlhf_trainer/ppo_trainer.py
+swift/trainers/rlhf_trainer/reward_trainer.py
+swift/trainers/rlhf_trainer/rlhf_mixin.py
+swift/trainers/rlhf_trainer/utils.py
+swift/trainers/rlhf_trainer/vllm_client.py
+swift/trainers/sequence_parallel/__init__.py
+swift/trainers/sequence_parallel/base.py
+swift/trainers/sequence_parallel/ulysses.py
+swift/trainers/sequence_parallel/xtuner.py
+swift/tuners/__init__.py
+swift/tuners/adapter.py
+swift/tuners/base.py
+swift/tuners/llamapro.py
+swift/tuners/lora.py
+swift/tuners/lora_layers.py
+swift/tuners/mapping.py
+swift/tuners/neftune.py
+swift/tuners/part.py
+swift/tuners/peft.py
+swift/tuners/prompt.py
+swift/tuners/reft.py
+swift/tuners/restuning.py
+swift/tuners/restuning_components.py
+swift/tuners/side.py
+swift/tuners/utils.py
+swift/tuners/longlora/__init__.py
+swift/tuners/longlora/llama.py
+swift/tuners/longlora/longlora.py
+swift/tuners/scetuning/__init__.py
+swift/tuners/scetuning/scetuning.py
+swift/tuners/scetuning/scetuning_components.py
+swift/ui/__init__.py
+swift/ui/app.py
+swift/ui/base.py
+swift/ui/llm_eval/__init__.py
+swift/ui/llm_eval/eval.py
+swift/ui/llm_eval/llm_eval.py
+swift/ui/llm_eval/model.py
+swift/ui/llm_eval/runtime.py
+swift/ui/llm_export/__init__.py
+swift/ui/llm_export/export.py
+swift/ui/llm_export/llm_export.py
+swift/ui/llm_export/model.py
+swift/ui/llm_export/runtime.py
+swift/ui/llm_infer/__init__.py
+swift/ui/llm_infer/generate.py
+swift/ui/llm_infer/llm_infer.py
+swift/ui/llm_infer/model.py
+swift/ui/llm_infer/runtime.py
+swift/ui/llm_train/__init__.py
+swift/ui/llm_train/advanced.py
+swift/ui/llm_train/dataset.py
+swift/ui/llm_train/galore.py
+swift/ui/llm_train/hyper.py
+swift/ui/llm_train/lisa.py
+swift/ui/llm_train/llamapro.py
+swift/ui/llm_train/llm_train.py
+swift/ui/llm_train/lora.py
+swift/ui/llm_train/model.py
+swift/ui/llm_train/quantization.py
+swift/ui/llm_train/report_to.py
+swift/ui/llm_train/rlhf.py
+swift/ui/llm_train/runtime.py
+swift/ui/llm_train/save.py
+swift/ui/llm_train/self_cog.py
+swift/ui/llm_train/utils.py
+swift/utils/__init__.py
+swift/utils/constants.py
+swift/utils/env.py
+swift/utils/import_utils.py
+swift/utils/io_utils.py
+swift/utils/logger.py
+swift/utils/np_utils.py
+swift/utils/tb_utils.py
+swift/utils/torch_utils.py
+swift/utils/torchacc_utils.py
+swift/utils/utils.py
+tests/__init__.py
+tests/model_tag.py
+tests/run.py
+tests/test_utils.py
+tests/hub/__init__.py
+tests/hub/test_check_model.py
+tests/llm/__init__.py
+tests/llm/load_model.py
+tests/llm/load_template.py
+tests/llm/test_custom.py
+tests/llm/test_dataset.py
+tests/llm/test_ollama_export.py
+tests/llm/test_run.py
+tests/llm/test_run3.py
+tests/llm/test_template.py
+tests/llm/test_utils.py
+tests/tuners/__init__.py
+tests/tuners/test_extra_state_dict.py
+tests/tuners/test_merged_linear.py
+tests/tuners/test_neft.py
+tests/tuners/test_peft.py
+tests/tuners/test_scetuning.py
+tests/tuners/test_swift_base.py
+tests/tuners/test_swift_device_map.py
+tests/tuners/test_swift_restuning.py
+tests/utils/__init__.py
+tests/utils/test_file_utils.py
+tests/utils/test_io_utils.py
+tests/utils/test_split_str_parts_by.py
+tests/utils/test_torch_utils.py

ms-swift/ms_swift.egg-info/entry_points.txt ADDED Viewed

	@@ -0,0 +1,3 @@

+[console_scripts]
+megatron = swift.cli._megatron.main:cli_main
+swift = swift.cli.main:cli_main

ms-swift/ms_swift.egg-info/top_level.txt ADDED Viewed

	@@ -0,0 +1,2 @@


1	+ swift
2	+ tests

ms-swift/requirements/framework.txt ADDED Viewed

	@@ -0,0 +1,38 @@

+accelerate
+addict
+aiohttp
+attrdict
+binpacking
+charset_normalizer
+cpm_kernels
+dacite
+datasets>=3.0,<3.4
+einops
+fastapi
+gradio>=3.40.0
+importlib_metadata
+jieba
+matplotlib
+modelscope>=1.23
+nltk
+numpy<2.0
+openai
+oss2
+pandas
+peft>=0.11,<0.16
+pillow
+requests
+rouge
+safetensors
+scipy
+sentencepiece
+simplejson>=3.3.0
+sortedcontainers>=1.5.9
+tensorboard
+tiktoken
+tqdm
+transformers>=4.33,<4.53
+transformers_stream_generator
+trl>=0.13,<0.18
+uvicorn
+zstandard

ms-swift/requirements/tests.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+expecttest
+flake8
+isort>=4.3.21
+modelscope
+pre-commit
+yapf==0.30.0 # use fix version to ensure consistent auto-styling

ms-swift/swift/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (2.13 kB). View file

ms-swift/swift/cli/_megatron/main.py ADDED Viewed

	@@ -0,0 +1,20 @@

+# Copyright (c) Alibaba, Inc. and its affiliates.
+from typing import Dict
+from swift.utils import get_logger
+from ..main import cli_main as swift_cli_main
+logger = get_logger()
+ROUTE_MAPPING: Dict[str, str] = {
+    'sft': 'swift.cli._megatron.sft',
+    'pt': 'swift.cli._megatron.pt',
+}
+def cli_main():
+    return swift_cli_main(ROUTE_MAPPING)
+if __name__ == '__main__':
+    cli_main()

ms-swift/swift/cli/infer.py ADDED Viewed

	@@ -0,0 +1,5 @@

+# Copyright (c) Alibaba, Inc. and its affiliates.
+from swift.llm import infer_main
+if __name__ == '__main__':
+    infer_main()

ms-swift/swift/cli/merge_lora.py ADDED Viewed

	@@ -0,0 +1,14 @@

+# Copyright (c) Alibaba, Inc. and its affiliates.
+from swift.llm import ExportArguments, SwiftPipeline, merge_lora
+class SwiftMergeLoRA(SwiftPipeline):
+    args_class = ExportArguments
+    args: args_class
+    def run(self):
+        merge_lora(self.args)
+if __name__ == '__main__':
+    SwiftMergeLoRA().main()

ms-swift/swift/cli/rlhf.py ADDED Viewed

	@@ -0,0 +1,5 @@

+# Copyright (c) Alibaba, Inc. and its affiliates.
+from swift.llm import rlhf_main
+if __name__ == '__main__':
+    rlhf_main()

ms-swift/swift/hub/constant.py ADDED Viewed

	@@ -0,0 +1,6 @@

+# Copyright (c) Alibaba, Inc. and its affiliates.
+from modelscope.hub import constants
+constants.API_HTTP_CLIENT_TIMEOUT = 5
+constants.API_FILE_DOWNLOAD_TIMEOUT = 300
+constants.API_FILE_DOWNLOAD_CHUNK_SIZE = 1024 * 1024 * 16

ms-swift/swift/hub/hub.py ADDED Viewed

	@@ -0,0 +1,451 @@

+# Copyright (c) Alibaba, Inc. and its affiliates.
+import os
+import tempfile
+from contextlib import contextmanager
+from functools import partial
+from pathlib import Path
+from typing import List, Literal, Optional, Union
+import huggingface_hub
+from huggingface_hub import RepoUrl
+from huggingface_hub.hf_api import api, future_compatible
+from requests.exceptions import HTTPError
+from transformers import trainer
+from transformers.utils import logging, strtobool
+from swift.utils.env import use_hf_hub
+logger = logging.get_logger(__name__)
+class HubOperation:
+    @classmethod
+    @contextmanager
+    def patch_hub(cls):
+        yield
+    @classmethod
+    def try_login(cls, token: Optional[str] = None) -> bool:
+        """Try to login to the hub
+        Args:
+            token: The hub token to use
+        Returns:
+            bool: Whether login is successful
+        """
+        raise NotImplementedError
+    @classmethod
+    def create_model_repo(cls, repo_id: str, token: Optional[str] = None, private: bool = False):
+        """Create a model repo on the hub
+        Args:
+            repo_id: The model id of the hub
+            token: The hub token to use
+            private: If is a private repo
+        """
+        raise NotImplementedError
+    @classmethod
+    def push_to_hub(cls,
+                    repo_id: str,
+                    folder_path: Union[str, Path],
+                    path_in_repo: Optional[str] = None,
+                    commit_message: Optional[str] = None,
+                    commit_description: Optional[str] = None,
+                    token: Union[str, bool, None] = None,
+                    private: bool = False,
+                    revision: Optional[str] = 'master',
+                    ignore_patterns: Optional[Union[List[str], str]] = None,
+                    **kwargs):
+        """Push a model-like folder to the hub
+        Args:
+            repo_id: The repo id
+            folder_path: The local folder path
+            path_in_repo: Which remote folder to put the local files in
+            commit_message: The commit message of git
+            commit_description: The commit description
+            token: The hub token
+            private: Private hub or not
+            revision: The revision to push to
+            ignore_patterns: The ignore file patterns
+        """
+        raise NotImplementedError
+    @classmethod
+    def load_dataset(cls,
+                     dataset_id: str,
+                     subset_name: str,
+                     split: str,
+                     streaming: bool = False,
+                     revision: Optional[str] = None):
+        """Load a dataset from the repo
+        Args:
+            dataset_id: The dataset id
+            subset_name: The subset name of the dataset
+            split: The split info
+            streaming: Streaming mode
+            revision: The revision of the dataset
+        Returns:
+            The Dataset instance
+        """
+        raise NotImplementedError
+    @classmethod
+    def download_model(cls,
+                       model_id_or_path: Optional[str] = None,
+                       revision: Optional[str] = None,
+                       download_model: bool = True,
+                       ignore_patterns: Optional[List[str]] = None,
+                       **kwargs):
+        """Download model from the hub
+        Args:
+            model_id_or_path: The model id
+            revision: The model revision
+            download_model: Whether downloading bin/safetensors files, this is usually useful when only
+                using tokenizer
+            ignore_patterns: Custom ignore pattern
+            **kwargs:
+        Returns:
+            The local dir
+        """
+        raise NotImplementedError
+class MSHub(HubOperation):
+    ms_token = None
+    @staticmethod
+    def create_repo(repo_id: str, *, token: Union[str, bool, None] = None, private: bool = False, **kwargs) -> RepoUrl:
+        """
+        Create a new repository on the hub.
+        Args:
+            repo_id: The ID of the repository to create.
+            token: The authentication token to use.
+            private: Whether the repository should be private.
+            **kwargs: Additional arguments.
+        Returns:
+            RepoUrl: The URL of the created repository.
+        """
+        hub_model_id = MSHub.create_model_repo(repo_id, token, private)
+        return RepoUrl(url=hub_model_id, )
+    @staticmethod
+    @future_compatible
+    def upload_folder(
+        self,
+        *,
+        repo_id: str,
+        folder_path: Union[str, Path],
+        path_in_repo: Optional[str] = None,
+        commit_message: Optional[str] = None,
+        commit_description: Optional[str] = None,
+        token: Union[str, bool, None] = None,
+        revision: Optional[str] = 'master',
+        ignore_patterns: Optional[Union[List[str], str]] = None,
+        **kwargs,
+    ):
+        from modelscope.utils.repo_utils import CommitInfo
+        MSHub.push_to_hub(repo_id, folder_path, path_in_repo, commit_message, commit_description, token, True, revision,
+                          ignore_patterns)
+        return CommitInfo(
+            commit_url=f'https://www.modelscope.cn/models/{repo_id}/files',
+            commit_message=commit_message,
+            commit_description=commit_description,
+            oid=None,
+        )
+    @classmethod
+    @contextmanager
+    def patch_hub(cls):
+        hub_create_repo = huggingface_hub.create_repo
+        hub_upload_folder = huggingface_hub.upload_folder
+        trainer_create_repo = trainer.create_repo
+        trainer_upload_folder = trainer.upload_folder
+        huggingface_hub.create_repo = cls.create_repo
+        huggingface_hub.upload_folder = partial(cls.upload_folder, api)
+        trainer.create_repo = cls.create_repo
+        trainer.upload_folder = partial(cls.upload_folder, api)
+        try:
+            yield
+        finally:
+            huggingface_hub.create_repo = hub_create_repo
+            huggingface_hub.upload_folder = hub_upload_folder
+            trainer.create_repo = trainer_create_repo
+            trainer.upload_folder = trainer_upload_folder
+    @classmethod
+    def try_login(cls, token: Optional[str] = None) -> bool:
+        from modelscope import HubApi
+        if token is None:
+            token = os.environ.get('MODELSCOPE_API_TOKEN')
+        if token:
+            api = HubApi()
+            api.login(token)
+            return True
+        return False
+    @classmethod
+    def create_model_repo(cls, repo_id: str, token: Optional[str] = None, private: bool = False) -> str:
+        from modelscope import HubApi
+        from modelscope.hub.api import ModelScopeConfig
+        from modelscope.hub.constants import ModelVisibility
+        assert repo_id is not None, 'Please enter a valid hub_model_id'
+        if not cls.try_login(token):
+            raise ValueError('Please specify a token by `--hub_token` or `MODELSCOPE_API_TOKEN=xxx`')
+        cls.ms_token = token
+        visibility = ModelVisibility.PRIVATE if private else ModelVisibility.PUBLIC
+        api = HubApi()
+        if '/' not in repo_id:
+            user_name = ModelScopeConfig.get_user_info()[0]
+            assert isinstance(user_name, str)
+            hub_model_id = f'{user_name}/{repo_id}'
+            logger.info(f"'/' not in hub_model_id, pushing to personal repo {hub_model_id}")
+        try:
+            api.create_model(repo_id, visibility)
+        except HTTPError:
+            # The remote repository has been created
+            pass
+        with tempfile.TemporaryDirectory() as temp_cache_dir:
+            from modelscope.hub.repository import Repository
+            repo = Repository(temp_cache_dir, repo_id)
+            cls.add_patterns_to_gitattributes(repo, ['*.safetensors', '*.bin', '*.pt'])
+            # Add 'runs/' to .gitignore, ignore tensorboard files
+            cls.add_patterns_to_gitignore(repo, ['runs/', 'images/'])
+            cls.add_patterns_to_file(
+                repo,
+                'configuration.json', ['{"framework": "pytorch", "task": "text-generation", "allow_remote": true}'],
+                ignore_push_error=True)
+            # Add '*.sagemaker' to .gitignore if using SageMaker
+            if os.environ.get('SM_TRAINING_ENV'):
+                cls.add_patterns_to_gitignore(repo, ['*.sagemaker-uploading', '*.sagemaker-uploaded'],
+                                              'Add `*.sagemaker` patterns to .gitignore')
+        return repo_id
+    @classmethod
+    def push_to_hub(cls,
+                    repo_id: str,
+                    folder_path: Union[str, Path],
+                    path_in_repo: Optional[str] = None,
+                    commit_message: Optional[str] = None,
+                    commit_description: Optional[str] = None,
+                    token: Union[str, bool, None] = None,
+                    private: bool = False,
+                    revision: Optional[str] = 'master',
+                    ignore_patterns: Optional[Union[List[str], str]] = None,
+                    **kwargs):
+        cls.create_model_repo(repo_id, token, private)
+        from modelscope import push_to_hub
+        commit_message = commit_message or 'Upload folder using api'
+        if commit_description:
+            commit_message = commit_message + '\n' + commit_description
+        if not os.path.exists(os.path.join(folder_path, 'configuration.json')):
+            with open(os.path.join(folder_path, 'configuration.json'), 'w', encoding='utf-8') as f:
+                f.write('{"framework": "pytorch", "task": "text-generation", "allow_remote": true}')
+        if ignore_patterns:
+            ignore_patterns = [p for p in ignore_patterns if p != '_*']
+        if path_in_repo:
+            # We don't support part submit for now
+            path_in_repo = os.path.basename(folder_path)
+            folder_path = os.path.dirname(folder_path)
+            ignore_patterns = []
+        if revision is None or revision == 'main':
+            revision = 'master'
+        push_to_hub(
+            repo_id,
+            folder_path,
+            token or cls.ms_token,
+            private,
+            commit_message=commit_message,
+            ignore_file_pattern=ignore_patterns,
+            revision=revision,
+            tag=path_in_repo)
+    @classmethod
+    def load_dataset(cls,
+                     dataset_id: str,
+                     subset_name: str,
+                     split: str,
+                     streaming: bool = False,
+                     revision: Optional[str] = None,
+                     download_mode: Literal['force_redownload', 'reuse_dataset_if_exists'] = 'reuse_dataset_if_exists',
+                     token: Optional[str] = None,
+                     **kwargs):
+        from modelscope import MsDataset
+        cls.try_login(token)
+        if revision is None or revision == 'main':
+            revision = 'master'
+        return MsDataset.load(
+            dataset_id,
+            subset_name=subset_name,
+            split=split,
+            version=revision,
+            download_mode=download_mode,
+            use_streaming=streaming,
+        )
+    @classmethod
+    def download_model(cls,
+                       model_id_or_path: Optional[str] = None,
+                       revision: Optional[str] = None,
+                       ignore_patterns: Optional[List[str]] = None,
+                       token: Optional[str] = None,
+                       **kwargs):
+        cls.try_login(token)
+        if revision is None or revision == 'main':
+            revision = 'master'
+        logger.info(f'Downloading the model from ModelScope Hub, model_id: {model_id_or_path}')
+        from modelscope import snapshot_download
+        return snapshot_download(model_id_or_path, revision, ignore_patterns=ignore_patterns, **kwargs)
+    @staticmethod
+    def add_patterns_to_file(repo,
+                             file_name: str,
+                             patterns: List[str],
+                             commit_message: Optional[str] = None,
+                             ignore_push_error=False) -> None:
+        if isinstance(patterns, str):
+            patterns = [patterns]
+        if commit_message is None:
+            commit_message = f'Add `{patterns[0]}` patterns to {file_name}'
+        # Get current file content
+        repo_dir = repo.model_dir
+        file_path = os.path.join(repo_dir, file_name)
+        if os.path.exists(file_path):
+            with open(file_path, 'r', encoding='utf-8') as f:
+                current_content = f.read()
+        else:
+            current_content = ''
+        # Add the patterns to file
+        content = current_content
+        for pattern in patterns:
+            if pattern not in content:
+                if len(content) > 0 and not content.endswith('\n'):
+                    content += '\n'
+                content += f'{pattern}\n'
+        # Write the file if it has changed
+        if content != current_content:
+            with open(file_path, 'w', encoding='utf-8') as f:
+                logger.debug(f'Writing {file_name} file. Content: {content}')
+                f.write(content)
+        try:
+            repo.push(commit_message)
+        except Exception as e:
+            if ignore_push_error:
+                pass
+            else:
+                raise e
+    @staticmethod
+    def add_patterns_to_gitignore(repo, patterns: List[str], commit_message: Optional[str] = None) -> None:
+        MSHub.add_patterns_to_file(repo, '.gitignore', patterns, commit_message, ignore_push_error=True)
+    @staticmethod
+    def add_patterns_to_gitattributes(repo, patterns: List[str], commit_message: Optional[str] = None) -> None:
+        new_patterns = []
+        suffix = 'filter=lfs diff=lfs merge=lfs -text'
+        for pattern in patterns:
+            if suffix not in pattern:
+                pattern = f'{pattern} {suffix}'
+            new_patterns.append(pattern)
+        file_name = '.gitattributes'
+        if commit_message is None:
+            commit_message = f'Add `{patterns[0]}` patterns to {file_name}'
+        MSHub.add_patterns_to_file(repo, file_name, new_patterns, commit_message, ignore_push_error=True)
+class HFHub(HubOperation):
+    @classmethod
+    def try_login(cls, token: Optional[str] = None) -> bool:
+        pass
+    @classmethod
+    def create_model_repo(cls, repo_id: str, token: Optional[str] = None, private: bool = False) -> str:
+        return api.create_repo(repo_id, token=token, private=private)
+    @classmethod
+    def push_to_hub(cls,
+                    repo_id: str,
+                    folder_path: Union[str, Path],
+                    path_in_repo: Optional[str] = None,
+                    commit_message: Optional[str] = None,
+                    commit_description: Optional[str] = None,
+                    token: Union[str, bool, None] = None,
+                    private: bool = False,
+                    revision: Optional[str] = 'master',
+                    ignore_patterns: Optional[Union[List[str], str]] = None,
+                    **kwargs):
+        cls.create_model_repo(repo_id, token, private)
+        if revision is None or revision == 'master':
+            revision = 'main'
+        return api.upload_folder(
+            repo_id=repo_id,
+            folder_path=folder_path,
+            path_in_repo=path_in_repo,
+            commit_message=commit_message,
+            commit_description=commit_description,
+            token=token,
+            revision=revision,
+            ignore_patterns=ignore_patterns,
+            **kwargs)
+    @classmethod
+    def load_dataset(cls,
+                     dataset_id: str,
+                     subset_name: str,
+                     split: str,
+                     streaming: bool = False,
+                     revision: Optional[str] = None,
+                     download_mode: Literal['force_redownload', 'reuse_dataset_if_exists'] = 'reuse_dataset_if_exists',
+                     num_proc: Optional[int] = None,
+                     **kwargs):
+        from datasets import load_dataset
+        if revision is None or revision == 'master':
+            revision = 'main'
+        return load_dataset(
+            dataset_id,
+            name=subset_name,
+            split=split,
+            streaming=streaming,
+            revision=revision,
+            download_mode=download_mode,
+            num_proc=num_proc)
+    @classmethod
+    def download_model(cls,
+                       model_id_or_path: Optional[str] = None,
+                       revision: Optional[str] = None,
+                       ignore_patterns: Optional[List[str]] = None,
+                       **kwargs):
+        if revision is None or revision == 'master':
+            revision = 'main'
+        logger.info(f'Downloading the model from HuggingFace Hub, model_id: {model_id_or_path}')
+        use_hf_transfer = strtobool(os.environ.get('USE_HF_TRANSFER', 'False'))
+        if use_hf_transfer:
+            from huggingface_hub import _snapshot_download
+            _snapshot_download.HF_HUB_ENABLE_HF_TRANSFER = True
+        from huggingface_hub import snapshot_download
+        return snapshot_download(
+            model_id_or_path, repo_type='model', revision=revision, ignore_patterns=ignore_patterns, **kwargs)
+def get_hub(use_hf: Optional[bool] = None):
+    if use_hf is None:
+        use_hf = True if use_hf_hub() else False
+    return {True: HFHub, False: MSHub}[use_hf]

ms-swift/swift/llm/__pycache__/utils.cpython-310.pyc ADDED Viewed

Binary file (9.5 kB). View file

ms-swift/swift/llm/app/app.py ADDED Viewed

	@@ -0,0 +1,44 @@

+# Copyright (c) Alibaba, Inc. and its affiliates.
+from contextlib import nullcontext
+from typing import List, Union
+import gradio
+from packaging import version
+from swift.utils import get_logger
+from ..argument import AppArguments
+from ..base import SwiftPipeline
+from ..infer import run_deploy
+from .build_ui import build_ui
+logger = get_logger()
+class SwiftApp(SwiftPipeline):
+    args_class = AppArguments
+    args: args_class
+    def run(self):
+        args = self.args
+        deploy_context = nullcontext() if args.base_url else run_deploy(args, return_url=True)
+        with deploy_context as base_url:
+            base_url = base_url or args.base_url
+            demo = build_ui(
+                base_url,
+                args.model_suffix,
+                request_config=args.get_request_config(),
+                is_multimodal=args.is_multimodal,
+                studio_title=args.studio_title,
+                lang=args.lang,
+                default_system=args.system)
+            concurrency_count = 1 if args.infer_backend == 'pt' else 16
+            if version.parse(gradio.__version__) < version.parse('4'):
+                queue_kwargs = {'concurrency_count': concurrency_count}
+            else:
+                queue_kwargs = {'default_concurrency_limit': concurrency_count}
+            demo.queue(**queue_kwargs).launch(
+                server_name=args.server_name, server_port=args.server_port, share=args.share)
+def app_main(args: Union[List[str], AppArguments, None] = None):
+    return SwiftApp(args).main()

ms-swift/swift/llm/app/locale.py ADDED Viewed

	@@ -0,0 +1,23 @@

+# Copyright (c) Alibaba, Inc. and its affiliates.
+locale_mapping = {
+    'modify_system': {
+        'en': '🛠️ Set system and clear history',
+        'zh': '🛠️ 设置system并清空历史'
+    },
+    'clear_history': {
+        'en': '🧹 Clear history',
+        'zh': '🧹 清空历史'
+    },
+    'submit': {
+        'en': '🚀 Send',
+        'zh': '🚀 发送'
+    },
+    'regenerate': {
+        'en': '🤔️ Regenerate',
+        'zh': '🤔️ 重试'
+    },
+    'upload': {
+        'en': '📁 Upload',
+        'zh': '📁 上传'
+    }
+}

ms-swift/swift/llm/argument/__init__.py ADDED Viewed

	@@ -0,0 +1,12 @@

+# Copyright (c) Alibaba, Inc. and its affiliates.
+from .app_args import AppArguments
+from .base_args import BaseArguments
+from .deploy_args import DeployArguments
+from .eval_args import EvalArguments
+from .export_args import ExportArguments
+from .infer_args import InferArguments
+from .rlhf_args import RLHFArguments
+from .sampling_args import SamplingArguments
+from .train_args import TrainArguments
+from .tuner_args import TunerArguments
+from .webui_args import WebUIArguments

ms-swift/swift/llm/argument/__pycache__/__init__.cpython-310.pyc ADDED Viewed

Binary file (712 Bytes). View file

ms-swift/swift/llm/argument/__pycache__/deploy_args.cpython-310.pyc ADDED Viewed

Binary file (3.41 kB). View file

ms-swift/swift/llm/argument/__pycache__/infer_args.cpython-310.pyc ADDED Viewed

Binary file (7.37 kB). View file

ms-swift/swift/llm/argument/__pycache__/merge_args.cpython-310.pyc ADDED Viewed

Binary file (934 Bytes). View file

ms-swift/swift/llm/argument/base_args/__init__.py ADDED Viewed

	@@ -0,0 +1,3 @@

+# Copyright (c) Alibaba, Inc. and its affiliates.
+from .base_args import BaseArguments
+from .utils import to_abspath