Add files using upload-large-folder tool
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- ms-swift/examples/train/megatron/multi-node/node1.sh +34 -0
- ms-swift/examples/train/multi-gpu/ddp/train.sh +30 -0
- ms-swift/examples/train/multi-node/deepspeed/host.txt +2 -0
- ms-swift/examples/train/multi-node/swift/train_node2.sh +30 -0
- ms-swift/examples/train/multi-node/torchrun/train_node2.sh +31 -0
- ms-swift/examples/train/multimodal/caption.sh +27 -0
- ms-swift/examples/train/multimodal/lora_llm_full_vit/infer.sh +8 -0
- ms-swift/examples/train/multimodal/lora_llm_full_vit/merge_lora.sh +3 -0
- ms-swift/examples/train/multimodal/omni/sft.sh +39 -0
- ms-swift/examples/train/multimodal/rlhf/kto.sh +32 -0
- ms-swift/examples/train/multimodal/video.sh +31 -0
- ms-swift/examples/train/packing/llm.sh +31 -0
- ms-swift/examples/train/packing/streaming.sh +34 -0
- ms-swift/examples/train/plugins/tuner_phi4_mm.sh +20 -0
- ms-swift/examples/train/predict_with_generate/train.sh +28 -0
- ms-swift/examples/train/qlora/gptq.sh +25 -0
- ms-swift/examples/train/rft/math.json +0 -0
- ms-swift/examples/train/rlhf/README.md +3 -0
- ms-swift/examples/train/rlhf/dpo/lora.sh +25 -0
- ms-swift/examples/train/rlhf/simpo.sh +26 -0
- ms-swift/examples/train/seq_cls/bert/deploy.sh +11 -0
- ms-swift/examples/train/seq_cls/bert/infer.sh +7 -0
- ms-swift/examples/train/seq_cls/qwen2_vl/sft.sh +28 -0
- ms-swift/examples/train/seq_cls/regression/deploy.sh +8 -0
- ms-swift/examples/train/seq_cls/regression/sft.sh +28 -0
- ms-swift/examples/train/think_model/qwen3_demo2.sh +29 -0
- ms-swift/examples/train/tuners/bone/train.sh +16 -0
- ms-swift/examples/train/tuners/lisa/train.sh +17 -0
- ms-swift/examples/train/tuners/longlora/train.sh +16 -0
- ms-swift/ms_swift.egg-info/SOURCES.txt +373 -0
- ms-swift/ms_swift.egg-info/entry_points.txt +3 -0
- ms-swift/ms_swift.egg-info/top_level.txt +2 -0
- ms-swift/requirements/framework.txt +38 -0
- ms-swift/requirements/tests.txt +6 -0
- ms-swift/swift/__pycache__/__init__.cpython-310.pyc +0 -0
- ms-swift/swift/cli/_megatron/main.py +20 -0
- ms-swift/swift/cli/infer.py +5 -0
- ms-swift/swift/cli/merge_lora.py +14 -0
- ms-swift/swift/cli/rlhf.py +5 -0
- ms-swift/swift/hub/constant.py +6 -0
- ms-swift/swift/hub/hub.py +451 -0
- ms-swift/swift/llm/__pycache__/utils.cpython-310.pyc +0 -0
- ms-swift/swift/llm/app/app.py +44 -0
- ms-swift/swift/llm/app/locale.py +23 -0
- ms-swift/swift/llm/argument/__init__.py +12 -0
- ms-swift/swift/llm/argument/__pycache__/__init__.cpython-310.pyc +0 -0
- ms-swift/swift/llm/argument/__pycache__/deploy_args.cpython-310.pyc +0 -0
- ms-swift/swift/llm/argument/__pycache__/infer_args.cpython-310.pyc +0 -0
- ms-swift/swift/llm/argument/__pycache__/merge_args.cpython-310.pyc +0 -0
- ms-swift/swift/llm/argument/base_args/__init__.py +3 -0
ms-swift/examples/train/megatron/multi-node/node1.sh
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# For more information on multi-node training launch methods, refer to:
|
| 2 |
+
# https://github.com/modelscope/ms-swift/tree/main/examples/train/multi-node
|
| 3 |
+
|
| 4 |
+
CUDA_VISIBLE_DEVICES=0,1,2,3 \
|
| 5 |
+
NNODES=2 \
|
| 6 |
+
NODE_RANK=0 \
|
| 7 |
+
MASTER_ADDR=127.0.0.1 \
|
| 8 |
+
MASTER_PORT=29500 \
|
| 9 |
+
NPROC_PER_NODE=4 \
|
| 10 |
+
megatron sft \
|
| 11 |
+
--load Qwen2.5-14B-mcore \
|
| 12 |
+
--dataset 'liucong/Chinese-DeepSeek-R1-Distill-data-110k-SFT' \
|
| 13 |
+
--tensor_model_parallel_size 4 \
|
| 14 |
+
--micro_batch_size 1 \
|
| 15 |
+
--global_batch_size 16 \
|
| 16 |
+
--packing true \
|
| 17 |
+
--recompute_granularity selective \
|
| 18 |
+
--train_iters 2000 \
|
| 19 |
+
--eval_iters 50 \
|
| 20 |
+
--finetune true \
|
| 21 |
+
--cross_entropy_loss_fusion true \
|
| 22 |
+
--lr 1e-5 \
|
| 23 |
+
--lr_warmup_iters 100 \
|
| 24 |
+
--min_lr 1e-6 \
|
| 25 |
+
--save megatron_output/Qwen2.5-14B \
|
| 26 |
+
--eval_interval 200 \
|
| 27 |
+
--save_interval 200 \
|
| 28 |
+
--max_length 8192 \
|
| 29 |
+
--num_workers 8 \
|
| 30 |
+
--dataset_num_proc 8 \
|
| 31 |
+
--no_save_optim true \
|
| 32 |
+
--no_save_rng true \
|
| 33 |
+
--sequence_parallel true \
|
| 34 |
+
--use_flash_attn true
|
ms-swift/examples/train/multi-gpu/ddp/train.sh
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 27.5GiB * 2
|
| 2 |
+
nproc_per_node=2
|
| 3 |
+
|
| 4 |
+
CUDA_VISIBLE_DEVICES=0,1 \
|
| 5 |
+
NPROC_PER_NODE=$nproc_per_node \
|
| 6 |
+
swift sft \
|
| 7 |
+
--model Qwen/Qwen2.5-7B-Instruct \
|
| 8 |
+
--train_type lora \
|
| 9 |
+
--torch_dtype bfloat16 \
|
| 10 |
+
--dataset 'swift/self-cognition#1000' \
|
| 11 |
+
--num_train_epochs 1 \
|
| 12 |
+
--per_device_train_batch_size 1 \
|
| 13 |
+
--per_device_eval_batch_size 1 \
|
| 14 |
+
--learning_rate 1e-4 \
|
| 15 |
+
--lora_rank 8 \
|
| 16 |
+
--lora_alpha 32 \
|
| 17 |
+
--target_modules all-linear \
|
| 18 |
+
--gradient_accumulation_steps $(expr 16 / $nproc_per_node) \
|
| 19 |
+
--eval_steps 100 \
|
| 20 |
+
--save_steps 100 \
|
| 21 |
+
--save_total_limit 2 \
|
| 22 |
+
--logging_steps 5 \
|
| 23 |
+
--max_length 2048 \
|
| 24 |
+
--output_dir output \
|
| 25 |
+
--system 'You are a helpful assistant.' \
|
| 26 |
+
--warmup_ratio 0.05 \
|
| 27 |
+
--dataloader_num_workers 4 \
|
| 28 |
+
--model_author swift \
|
| 29 |
+
--model_name swift-robot \
|
| 30 |
+
--gradient_checkpointing_kwargs '{"use_reentrant": false}'
|
ms-swift/examples/train/multi-node/deepspeed/host.txt
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
worker-0 slots=2
|
| 2 |
+
worker-1 slots=2
|
ms-swift/examples/train/multi-node/swift/train_node2.sh
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
nnodes=2
|
| 2 |
+
nproc_per_node=4
|
| 3 |
+
|
| 4 |
+
CUDA_VISIBLE_DEVICES=0,1,2,3 \
|
| 5 |
+
NNODES=$nnodes \
|
| 6 |
+
NODE_RANK=1 \
|
| 7 |
+
MASTER_ADDR=xxx.xxx.xxx.xxx \
|
| 8 |
+
MASTER_PORT=29500 \
|
| 9 |
+
NPROC_PER_NODE=$nproc_per_node \
|
| 10 |
+
swift sft \
|
| 11 |
+
--model Qwen/Qwen2.5-7B-Instruct \
|
| 12 |
+
--train_type full \
|
| 13 |
+
--dataset 'AI-ModelScope/alpaca-gpt4-data-zh#20000' \
|
| 14 |
+
'AI-ModelScope/alpaca-gpt4-data-en#20000' \
|
| 15 |
+
--torch_dtype bfloat16 \
|
| 16 |
+
--num_train_epochs 1 \
|
| 17 |
+
--per_device_train_batch_size 1 \
|
| 18 |
+
--per_device_eval_batch_size 1 \
|
| 19 |
+
--learning_rate 1e-5 \
|
| 20 |
+
--gradient_accumulation_steps $(expr 32 / $nproc_per_node / $nnodes) \
|
| 21 |
+
--eval_steps 100 \
|
| 22 |
+
--save_steps 100 \
|
| 23 |
+
--save_total_limit 2 \
|
| 24 |
+
--logging_steps 5 \
|
| 25 |
+
--max_length 8192 \
|
| 26 |
+
--output_dir output \
|
| 27 |
+
--system 'You are a helpful assistant.' \
|
| 28 |
+
--warmup_ratio 0.05 \
|
| 29 |
+
--dataloader_num_workers 4 \
|
| 30 |
+
--deepspeed zero2
|
ms-swift/examples/train/multi-node/torchrun/train_node2.sh
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
nnodes=2
|
| 2 |
+
nproc_per_node=4
|
| 3 |
+
|
| 4 |
+
CUDA_VISIBLE_DEVICES=0,1,2,3 \
|
| 5 |
+
torchrun \
|
| 6 |
+
--master_port 29500 \
|
| 7 |
+
--nproc_per_node=$nproc_per_node \
|
| 8 |
+
--nnodes=$nnodes \
|
| 9 |
+
--node_rank=1 \
|
| 10 |
+
--master_addr=xxx.xxx.xxx.xxx \
|
| 11 |
+
swift/cli/sft.py \
|
| 12 |
+
--model Qwen/Qwen2.5-7B-Instruct \
|
| 13 |
+
--train_type full \
|
| 14 |
+
--dataset 'AI-ModelScope/alpaca-gpt4-data-zh#20000' \
|
| 15 |
+
'AI-ModelScope/alpaca-gpt4-data-en#20000' \
|
| 16 |
+
--torch_dtype bfloat16 \
|
| 17 |
+
--num_train_epochs 1 \
|
| 18 |
+
--per_device_train_batch_size 1 \
|
| 19 |
+
--per_device_eval_batch_size 1 \
|
| 20 |
+
--learning_rate 1e-5 \
|
| 21 |
+
--gradient_accumulation_steps $(expr 32 / $nproc_per_node / $nnodes) \
|
| 22 |
+
--eval_steps 100 \
|
| 23 |
+
--save_steps 100 \
|
| 24 |
+
--save_total_limit 2 \
|
| 25 |
+
--logging_steps 5 \
|
| 26 |
+
--max_length 8192 \
|
| 27 |
+
--output_dir output \
|
| 28 |
+
--system 'You are a helpful assistant.' \
|
| 29 |
+
--warmup_ratio 0.05 \
|
| 30 |
+
--dataloader_num_workers 4 \
|
| 31 |
+
--deepspeed zero2
|
ms-swift/examples/train/multimodal/caption.sh
ADDED
|
@@ -0,0 +1,27 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 22GiB
|
| 2 |
+
# You can refer to `https://github.com/QwenLM/Qwen2.5-VL` for the meaning of the `MAX_PIXELS` parameter.
|
| 3 |
+
# 1003520 = 1280 * 28 * 28
|
| 4 |
+
CUDA_VISIBLE_DEVICES=0 \
|
| 5 |
+
MAX_PIXELS=1003520 \
|
| 6 |
+
swift sft \
|
| 7 |
+
--model Qwen/Qwen2.5-VL-7B-Instruct \
|
| 8 |
+
--dataset 'modelscope/coco_2014_caption:validation#20000' \
|
| 9 |
+
--train_type lora \
|
| 10 |
+
--torch_dtype bfloat16 \
|
| 11 |
+
--num_train_epochs 1 \
|
| 12 |
+
--per_device_train_batch_size 1 \
|
| 13 |
+
--per_device_eval_batch_size 1 \
|
| 14 |
+
--learning_rate 1e-4 \
|
| 15 |
+
--lora_rank 8 \
|
| 16 |
+
--lora_alpha 32 \
|
| 17 |
+
--target_modules all-linear \
|
| 18 |
+
--freeze_vit true \
|
| 19 |
+
--gradient_accumulation_steps 16 \
|
| 20 |
+
--eval_steps 100 \
|
| 21 |
+
--save_steps 100 \
|
| 22 |
+
--save_total_limit 2 \
|
| 23 |
+
--logging_steps 5 \
|
| 24 |
+
--max_length 2048 \
|
| 25 |
+
--output_dir output \
|
| 26 |
+
--warmup_ratio 0.05 \
|
| 27 |
+
--dataloader_num_workers 4
|
ms-swift/examples/train/multimodal/lora_llm_full_vit/infer.sh
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# If the weights have been merged, please use `--model`.
|
| 2 |
+
CUDA_VISIBLE_DEVICES=0 \
|
| 3 |
+
swift infer \
|
| 4 |
+
--adapters output/vx-xxx/checkpoint-xxx \
|
| 5 |
+
--stream true \
|
| 6 |
+
--load_data_args true \
|
| 7 |
+
--temperature 0 \
|
| 8 |
+
--max_new_tokens 2048
|
ms-swift/examples/train/multimodal/lora_llm_full_vit/merge_lora.sh
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
swift export \
|
| 2 |
+
--adapters output/vx-xxx/checkpoint-xxx \
|
| 3 |
+
--merge_lora true
|
ms-swift/examples/train/multimodal/omni/sft.sh
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 4*35GB
|
| 2 |
+
# A demo for four modalities that can be run directly
|
| 3 |
+
pip uninstall transformers
|
| 4 |
+
pip install git+https://github.com/huggingface/transformers
|
| 5 |
+
|
| 6 |
+
nproc_per_node=4
|
| 7 |
+
|
| 8 |
+
CUDA_VISIBLE_DEVICES=0,1,2,3 \
|
| 9 |
+
NPROC_PER_NODE=$nproc_per_node \
|
| 10 |
+
VIDEO_MAX_PIXELS=50176 \
|
| 11 |
+
FPS_MAX_FRAMES=12 \
|
| 12 |
+
MAX_PIXELS=1003520 \
|
| 13 |
+
ENABLE_AUDIO_OUTPUT=0 \
|
| 14 |
+
swift sft \
|
| 15 |
+
--model Qwen/Qwen2.5-Omni-7B \
|
| 16 |
+
--dataset 'AI-ModelScope/alpaca-gpt4-data-zh#2000' \
|
| 17 |
+
'AI-ModelScope/LaTeX_OCR:human_handwrite#2000' \
|
| 18 |
+
'speech_asr/speech_asr_aishell1_trainsets:validation#2000' \
|
| 19 |
+
'swift/VideoChatGPT:all#2000' \
|
| 20 |
+
--train_type lora \
|
| 21 |
+
--torch_dtype bfloat16 \
|
| 22 |
+
--num_train_epochs 1 \
|
| 23 |
+
--per_device_train_batch_size 1 \
|
| 24 |
+
--per_device_eval_batch_size 1 \
|
| 25 |
+
--learning_rate 1e-4 \
|
| 26 |
+
--lora_rank 8 \
|
| 27 |
+
--lora_alpha 32 \
|
| 28 |
+
--target_modules all-linear \
|
| 29 |
+
--freeze_vit true \
|
| 30 |
+
--gradient_accumulation_steps $(expr 16 / $nproc_per_node) \
|
| 31 |
+
--eval_steps 50 \
|
| 32 |
+
--save_steps 50 \
|
| 33 |
+
--save_total_limit 2 \
|
| 34 |
+
--logging_steps 5 \
|
| 35 |
+
--max_length 2048 \
|
| 36 |
+
--output_dir output \
|
| 37 |
+
--warmup_ratio 0.05 \
|
| 38 |
+
--dataloader_num_workers 4 \
|
| 39 |
+
--deepspeed zero2
|
ms-swift/examples/train/multimodal/rlhf/kto.sh
ADDED
|
@@ -0,0 +1,32 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Due to the absence of a multi-modal open-source dataset for kto,
|
| 2 |
+
# we will use a pure text kto dataset as an example here.
|
| 3 |
+
nproc_per_node=2
|
| 4 |
+
|
| 5 |
+
CUDA_VISIBLE_DEVICES=0,1 \
|
| 6 |
+
NPROC_PER_NODE=$nproc_per_node \
|
| 7 |
+
MAX_PIXELS=1003520 \
|
| 8 |
+
swift rlhf \
|
| 9 |
+
--rlhf_type kto \
|
| 10 |
+
--model Qwen/Qwen2.5-VL-7B-Instruct \
|
| 11 |
+
--dataset 'AI-ModelScope/ultrafeedback-binarized-preferences-cleaned-kto#10000' \
|
| 12 |
+
--train_type lora \
|
| 13 |
+
--torch_dtype bfloat16 \
|
| 14 |
+
--num_train_epochs 1 \
|
| 15 |
+
--per_device_train_batch_size 1 \
|
| 16 |
+
--per_device_eval_batch_size 1 \
|
| 17 |
+
--learning_rate 1e-4 \
|
| 18 |
+
--lora_rank 8 \
|
| 19 |
+
--lora_alpha 32 \
|
| 20 |
+
--target_modules all-linear \
|
| 21 |
+
--freeze_vit true \
|
| 22 |
+
--gradient_accumulation_steps $(expr 16 / $nproc_per_node) \
|
| 23 |
+
--eval_steps 100 \
|
| 24 |
+
--save_steps 100 \
|
| 25 |
+
--save_total_limit 2 \
|
| 26 |
+
--deepspeed zero2 \
|
| 27 |
+
--logging_steps 5 \
|
| 28 |
+
--max_length 4096 \
|
| 29 |
+
--output_dir output \
|
| 30 |
+
--warmup_ratio 0.05 \
|
| 31 |
+
--dataloader_num_workers 4 \
|
| 32 |
+
--dataset_num_proc 4
|
ms-swift/examples/train/multimodal/video.sh
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 4*80GB
|
| 2 |
+
# You can refer to `https://github.com/QwenLM/Qwen2.5-VL` for the meaning of the `VIDEO_MAX_PIXELS` parameter.
|
| 3 |
+
nproc_per_node=4
|
| 4 |
+
|
| 5 |
+
CUDA_VISIBLE_DEVICES=0,1,2,3 \
|
| 6 |
+
NPROC_PER_NODE=$nproc_per_node \
|
| 7 |
+
VIDEO_MAX_PIXELS=50176 \
|
| 8 |
+
FPS_MAX_FRAMES=12 \
|
| 9 |
+
swift sft \
|
| 10 |
+
--model Qwen/QVQ-72B-Preview \
|
| 11 |
+
--dataset swift/VideoChatGPT:all \
|
| 12 |
+
--train_type lora \
|
| 13 |
+
--torch_dtype bfloat16 \
|
| 14 |
+
--num_train_epochs 1 \
|
| 15 |
+
--per_device_train_batch_size 1 \
|
| 16 |
+
--per_device_eval_batch_size 1 \
|
| 17 |
+
--learning_rate 1e-4 \
|
| 18 |
+
--lora_rank 8 \
|
| 19 |
+
--lora_alpha 32 \
|
| 20 |
+
--target_modules all-linear \
|
| 21 |
+
--freeze_vit true \
|
| 22 |
+
--gradient_accumulation_steps $(expr 16 / $nproc_per_node) \
|
| 23 |
+
--eval_steps 50 \
|
| 24 |
+
--save_steps 50 \
|
| 25 |
+
--save_total_limit 2 \
|
| 26 |
+
--logging_steps 5 \
|
| 27 |
+
--max_length 2048 \
|
| 28 |
+
--output_dir output \
|
| 29 |
+
--warmup_ratio 0.05 \
|
| 30 |
+
--dataloader_num_workers 4 \
|
| 31 |
+
--deepspeed zero3
|
ms-swift/examples/train/packing/llm.sh
ADDED
|
@@ -0,0 +1,31 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 22GB
|
| 2 |
+
CUDA_VISIBLE_DEVICES=0 \
|
| 3 |
+
swift sft \
|
| 4 |
+
--model Qwen/Qwen2.5-7B-Instruct \
|
| 5 |
+
--train_type lora \
|
| 6 |
+
--packing true \
|
| 7 |
+
--dataset 'AI-ModelScope/alpaca-gpt4-data-zh#500' \
|
| 8 |
+
'AI-ModelScope/alpaca-gpt4-data-en#500' \
|
| 9 |
+
'swift/self-cognition#500' \
|
| 10 |
+
--torch_dtype bfloat16 \
|
| 11 |
+
--num_train_epochs 3 \
|
| 12 |
+
--attn_impl flash_attn \
|
| 13 |
+
--per_device_train_batch_size 1 \
|
| 14 |
+
--per_device_eval_batch_size 1 \
|
| 15 |
+
--learning_rate 1e-4 \
|
| 16 |
+
--lora_rank 8 \
|
| 17 |
+
--lora_alpha 32 \
|
| 18 |
+
--target_modules all-linear \
|
| 19 |
+
--gradient_accumulation_steps 4 \
|
| 20 |
+
--eval_steps 50 \
|
| 21 |
+
--save_steps 50 \
|
| 22 |
+
--save_total_limit 2 \
|
| 23 |
+
--logging_steps 5 \
|
| 24 |
+
--max_length 2048 \
|
| 25 |
+
--output_dir output \
|
| 26 |
+
--system 'You are a helpful assistant.' \
|
| 27 |
+
--warmup_ratio 0.05 \
|
| 28 |
+
--dataloader_num_workers 4 \
|
| 29 |
+
--dataset_num_proc 4 \
|
| 30 |
+
--model_author swift \
|
| 31 |
+
--model_name swift-robot
|
ms-swift/examples/train/packing/streaming.sh
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 4 * 36GB
|
| 2 |
+
# A demo using the Hugging Face dataset
|
| 3 |
+
# The first model weights will be saved around step 70.
|
| 4 |
+
NPROC_PER_NODE=4 \
|
| 5 |
+
MAX_PIXELS=1003520 \
|
| 6 |
+
CUDA_VISIBLE_DEVICES=0,1,2,3 \
|
| 7 |
+
HF_ENDPOINT=https://hf-mirror.com \
|
| 8 |
+
swift sft \
|
| 9 |
+
--model Qwen/Qwen2.5-VL-7B-Instruct \
|
| 10 |
+
--train_type lora \
|
| 11 |
+
--dataset 'HF::linxy/LaTeX_OCR:full#20000' \
|
| 12 |
+
--torch_dtype bfloat16 \
|
| 13 |
+
--attn_impl flash_attn \
|
| 14 |
+
--streaming true \
|
| 15 |
+
--shuffle_buffer_size 1000 \
|
| 16 |
+
--packing true \
|
| 17 |
+
--save_strategy epoch \
|
| 18 |
+
--max_steps 1000 \
|
| 19 |
+
--max_epochs 5 \
|
| 20 |
+
--per_device_train_batch_size 1 \
|
| 21 |
+
--per_device_eval_batch_size 1 \
|
| 22 |
+
--learning_rate 1e-4 \
|
| 23 |
+
--lora_rank 8 \
|
| 24 |
+
--lora_alpha 32 \
|
| 25 |
+
--target_modules all-linear \
|
| 26 |
+
--gradient_accumulation_steps 1 \
|
| 27 |
+
--save_total_limit 2 \
|
| 28 |
+
--logging_steps 5 \
|
| 29 |
+
--max_length 8192 \
|
| 30 |
+
--output_dir output \
|
| 31 |
+
--warmup_ratio 0.05 \
|
| 32 |
+
--dataloader_num_workers 1 \
|
| 33 |
+
--dataset_num_proc 8 \
|
| 34 |
+
--deepspeed zero2
|
ms-swift/examples/train/plugins/tuner_phi4_mm.sh
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# `--train_type dummy`
|
| 2 |
+
CUDA_VISIBLE_DEVICES=0 \
|
| 3 |
+
swift sft \
|
| 4 |
+
--model LLM-Research/Phi-4-multimodal-instruct \
|
| 5 |
+
--dataset 'AI-ModelScope/LaTeX_OCR:human_handwrite#20000' \
|
| 6 |
+
--train_type dummy \
|
| 7 |
+
--torch_dtype bfloat16 \
|
| 8 |
+
--num_train_epochs 1 \
|
| 9 |
+
--per_device_train_batch_size 1 \
|
| 10 |
+
--per_device_eval_batch_size 1 \
|
| 11 |
+
--learning_rate 1e-4 \
|
| 12 |
+
--gradient_accumulation_steps 16 \
|
| 13 |
+
--eval_steps 200 \
|
| 14 |
+
--save_steps 200 \
|
| 15 |
+
--save_total_limit 2 \
|
| 16 |
+
--logging_steps 5 \
|
| 17 |
+
--max_length 2048 \
|
| 18 |
+
--output_dir output \
|
| 19 |
+
--warmup_ratio 0.05 \
|
| 20 |
+
--dataloader_num_workers 4
|
ms-swift/examples/train/predict_with_generate/train.sh
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 20GiB
|
| 2 |
+
CUDA_VISIBLE_DEVICES=0 \
|
| 3 |
+
MAX_PIXELS=1003520 \
|
| 4 |
+
swift sft \
|
| 5 |
+
--model Qwen/Qwen2.5-VL-7B-Instruct \
|
| 6 |
+
--dataset 'AI-ModelScope/LaTeX_OCR:human_handwrite#20000' \
|
| 7 |
+
--train_type lora \
|
| 8 |
+
--torch_dtype bfloat16 \
|
| 9 |
+
--num_train_epochs 1 \
|
| 10 |
+
--per_device_train_batch_size 1 \
|
| 11 |
+
--per_device_eval_batch_size 2 \
|
| 12 |
+
--learning_rate 1e-4 \
|
| 13 |
+
--lora_rank 8 \
|
| 14 |
+
--lora_alpha 32 \
|
| 15 |
+
--target_modules all-linear \
|
| 16 |
+
--freeze_vit true \
|
| 17 |
+
--gradient_accumulation_steps 16 \
|
| 18 |
+
--eval_steps 100 \
|
| 19 |
+
--save_steps 100 \
|
| 20 |
+
--save_total_limit 2 \
|
| 21 |
+
--logging_steps 5 \
|
| 22 |
+
--max_length 2048 \
|
| 23 |
+
--output_dir output \
|
| 24 |
+
--warmup_ratio 0.05 \
|
| 25 |
+
--dataloader_num_workers 4 \
|
| 26 |
+
--predict_with_generate true \
|
| 27 |
+
--metric_for_best_model rouge-l \
|
| 28 |
+
--greater_is_better true
|
ms-swift/examples/train/qlora/gptq.sh
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 2 * 30GiB
|
| 2 |
+
CUDA_VISIBLE_DEVICES=0,1 \
|
| 3 |
+
MAX_PIXELS=1003520 \
|
| 4 |
+
swift sft \
|
| 5 |
+
--model Qwen/Qwen2.5-VL-72B-Instruct-GPTQ-Int4 \
|
| 6 |
+
--dataset 'modelscope/coco_2014_caption:validation#20000' \
|
| 7 |
+
--train_type lora \
|
| 8 |
+
--torch_dtype bfloat16 \
|
| 9 |
+
--num_train_epochs 1 \
|
| 10 |
+
--per_device_train_batch_size 1 \
|
| 11 |
+
--per_device_eval_batch_size 1 \
|
| 12 |
+
--learning_rate 1e-4 \
|
| 13 |
+
--lora_rank 8 \
|
| 14 |
+
--lora_alpha 32 \
|
| 15 |
+
--target_modules all-linear \
|
| 16 |
+
--freeze_vit true \
|
| 17 |
+
--gradient_accumulation_steps 16 \
|
| 18 |
+
--eval_steps 100 \
|
| 19 |
+
--save_steps 100 \
|
| 20 |
+
--save_total_limit 2 \
|
| 21 |
+
--logging_steps 5 \
|
| 22 |
+
--max_length 2048 \
|
| 23 |
+
--output_dir output \
|
| 24 |
+
--warmup_ratio 0.05 \
|
| 25 |
+
--dataloader_num_workers 4
|
ms-swift/examples/train/rft/math.json
ADDED
|
The diff for this file is too large to render.
See raw diff
|
|
|
ms-swift/examples/train/rlhf/README.md
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# TIPS
|
| 2 |
+
|
| 3 |
+
Multi-modal models' RLHF are also supported! Check the multimodal folder for details.
|
ms-swift/examples/train/rlhf/dpo/lora.sh
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 24GiB
|
| 2 |
+
CUDA_VISIBLE_DEVICES=0 \
|
| 3 |
+
swift rlhf \
|
| 4 |
+
--rlhf_type dpo \
|
| 5 |
+
--model Qwen/Qwen2.5-7B-Instruct \
|
| 6 |
+
--train_type lora \
|
| 7 |
+
--dataset hjh0119/shareAI-Llama3-DPO-zh-en-emoji \
|
| 8 |
+
--torch_dtype bfloat16 \
|
| 9 |
+
--num_train_epochs 1 \
|
| 10 |
+
--per_device_train_batch_size 1 \
|
| 11 |
+
--per_device_eval_batch_size 1 \
|
| 12 |
+
--learning_rate 1e-4 \
|
| 13 |
+
--lora_rank 8 \
|
| 14 |
+
--lora_alpha 32 \
|
| 15 |
+
--target_modules all-linear \
|
| 16 |
+
--gradient_accumulation_steps 16 \
|
| 17 |
+
--eval_steps 100 \
|
| 18 |
+
--save_steps 100 \
|
| 19 |
+
--save_total_limit 2 \
|
| 20 |
+
--logging_steps 5 \
|
| 21 |
+
--max_length 2048 \
|
| 22 |
+
--output_dir output \
|
| 23 |
+
--warmup_ratio 0.05 \
|
| 24 |
+
--dataloader_num_workers 4 \
|
| 25 |
+
--dataset_num_proc 4
|
ms-swift/examples/train/rlhf/simpo.sh
ADDED
|
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 2*50GB
|
| 2 |
+
nproc_per_node=2
|
| 3 |
+
|
| 4 |
+
CUDA_VISIBLE_DEVICES=0,1 \
|
| 5 |
+
NPROC_PER_NODE=$nproc_per_node \
|
| 6 |
+
swift rlhf \
|
| 7 |
+
--rlhf_type simpo \
|
| 8 |
+
--model Qwen/Qwen2.5-3B-Instruct \
|
| 9 |
+
--train_type full \
|
| 10 |
+
--dataset hjh0119/shareAI-Llama3-DPO-zh-en-emoji \
|
| 11 |
+
--torch_dtype bfloat16 \
|
| 12 |
+
--num_train_epochs 1 \
|
| 13 |
+
--per_device_train_batch_size 1 \
|
| 14 |
+
--per_device_eval_batch_size 1 \
|
| 15 |
+
--learning_rate 1e-5 \
|
| 16 |
+
--gradient_accumulation_steps $(expr 16 / $nproc_per_node) \
|
| 17 |
+
--eval_steps 100 \
|
| 18 |
+
--save_steps 100 \
|
| 19 |
+
--save_total_limit 2 \
|
| 20 |
+
--logging_steps 5 \
|
| 21 |
+
--max_length 2048 \
|
| 22 |
+
--output_dir output \
|
| 23 |
+
--warmup_ratio 0.05 \
|
| 24 |
+
--dataloader_num_workers 4 \
|
| 25 |
+
--deepspeed zero2 \
|
| 26 |
+
--dataset_num_proc 4
|
ms-swift/examples/train/seq_cls/bert/deploy.sh
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
CUDA_VISIBLE_DEVICES=0 \
|
| 2 |
+
swift deploy \
|
| 3 |
+
--adapters output/vx-xxx/checkpoint-xxx \
|
| 4 |
+
--served_model_name bert-base-chinese \
|
| 5 |
+
--truncation_strategy right \
|
| 6 |
+
--max_length 512
|
| 7 |
+
|
| 8 |
+
# curl http://localhost:8000/v1/chat/completions -H "Content-Type: application/json" -d '{
|
| 9 |
+
# "model": "bert-base-chinese",
|
| 10 |
+
# "messages": [{"role": "user", "content": "包装差,容易被调包。"}]
|
| 11 |
+
# }'
|
ms-swift/examples/train/seq_cls/bert/infer.sh
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
CUDA_VISIBLE_DEVICES=0 \
|
| 2 |
+
swift infer \
|
| 3 |
+
--adapters output/vx-xxx/checkpoint-xxx \
|
| 4 |
+
--load_data_args true \
|
| 5 |
+
--max_batch_size 16 \
|
| 6 |
+
--truncation_strategy right \
|
| 7 |
+
--max_length 512
|
ms-swift/examples/train/seq_cls/qwen2_vl/sft.sh
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# If `num_labels` is provided, it will be considered a classification task.
|
| 2 |
+
# You can also specify `--model Qwen/Qwen2.5-VL-2B-Instruct --use_chat_template true`.
|
| 3 |
+
CUDA_VISIBLE_DEVICES=0 \
|
| 4 |
+
MAX_PIXELS=1003520 \
|
| 5 |
+
swift sft \
|
| 6 |
+
--model Qwen/Qwen2-VL-2B \
|
| 7 |
+
--train_type lora \
|
| 8 |
+
--dataset 'tany0699/garbage265#20000' \
|
| 9 |
+
--torch_dtype bfloat16 \
|
| 10 |
+
--num_train_epochs 1 \
|
| 11 |
+
--per_device_train_batch_size 1 \
|
| 12 |
+
--per_device_eval_batch_size 1 \
|
| 13 |
+
--learning_rate 1e-4 \
|
| 14 |
+
--lora_rank 8 \
|
| 15 |
+
--lora_alpha 32 \
|
| 16 |
+
--target_modules all-linear \
|
| 17 |
+
--gradient_accumulation_steps 16 \
|
| 18 |
+
--eval_steps 50 \
|
| 19 |
+
--save_steps 50 \
|
| 20 |
+
--save_total_limit 2 \
|
| 21 |
+
--logging_steps 5 \
|
| 22 |
+
--max_length 2048 \
|
| 23 |
+
--output_dir output \
|
| 24 |
+
--warmup_ratio 0.05 \
|
| 25 |
+
--dataloader_num_workers 4 \
|
| 26 |
+
--num_labels 265 \
|
| 27 |
+
--task_type seq_cls \
|
| 28 |
+
--use_chat_template false
|
ms-swift/examples/train/seq_cls/regression/deploy.sh
ADDED
|
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
CUDA_VISIBLE_DEVICES=0 \
|
| 2 |
+
swift deploy \
|
| 3 |
+
--adapters output/vx-xxx/checkpoint-xxx
|
| 4 |
+
|
| 5 |
+
# curl http://localhost:8000/v1/chat/completions -H "Content-Type: application/json" -d '{
|
| 6 |
+
# "model": "Qwen2.5-0.5B",
|
| 7 |
+
# "messages": [{"role": "user", "content": "Task: Based on the given two sentences, provide a similarity score between 0.0 and 1.0.\nSentence 1: The animal is eating.\nSentence 2: A woman is dancing.\nSimilarity score: "}]
|
| 8 |
+
# }'
|
ms-swift/examples/train/seq_cls/regression/sft.sh
ADDED
|
@@ -0,0 +1,28 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 2GB
|
| 2 |
+
CUDA_VISIBLE_DEVICES=0 \
|
| 3 |
+
swift sft \
|
| 4 |
+
--model Qwen/Qwen2.5-0.5B \
|
| 5 |
+
--train_type lora \
|
| 6 |
+
--dataset 'sentence-transformers/stsb:reg#20000' \
|
| 7 |
+
--torch_dtype bfloat16 \
|
| 8 |
+
--num_train_epochs 1 \
|
| 9 |
+
--per_device_train_batch_size 16 \
|
| 10 |
+
--per_device_eval_batch_size 16 \
|
| 11 |
+
--learning_rate 1e-4 \
|
| 12 |
+
--lora_rank 8 \
|
| 13 |
+
--lora_alpha 32 \
|
| 14 |
+
--target_modules all-linear \
|
| 15 |
+
--gradient_accumulation_steps 1 \
|
| 16 |
+
--eval_steps 100 \
|
| 17 |
+
--save_steps 100 \
|
| 18 |
+
--save_total_limit 2 \
|
| 19 |
+
--logging_steps 5 \
|
| 20 |
+
--max_length 2048 \
|
| 21 |
+
--output_dir output \
|
| 22 |
+
--warmup_ratio 0.05 \
|
| 23 |
+
--dataloader_num_workers 4 \
|
| 24 |
+
--dataset_num_proc 4 \
|
| 25 |
+
--num_labels 1 \
|
| 26 |
+
--task_type seq_cls \
|
| 27 |
+
--use_chat_template false \
|
| 28 |
+
--problem_type regression
|
ms-swift/examples/train/think_model/qwen3_demo2.sh
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# use `swift/self-cognition:qwen3`
|
| 2 |
+
# Avoid losing the thinking capability by appending `/no_think` to the dataset query.
|
| 3 |
+
# https://github.com/modelscope/ms-swift/blob/77985c2ccdac8ed4037174ee222e79d1f1d5059d/swift/llm/dataset/dataset/llm.py#L835
|
| 4 |
+
CUDA_VISIBLE_DEVICES=0 \
|
| 5 |
+
swift sft \
|
| 6 |
+
--model Qwen/Qwen3-8B \
|
| 7 |
+
--train_type lora \
|
| 8 |
+
--dataset 'swift/Qwen3-SFT-Mixin#2000' \
|
| 9 |
+
'swift/self-cognition:qwen3#600' \
|
| 10 |
+
--torch_dtype bfloat16 \
|
| 11 |
+
--num_train_epochs 1 \
|
| 12 |
+
--per_device_train_batch_size 1 \
|
| 13 |
+
--per_device_eval_batch_size 1 \
|
| 14 |
+
--learning_rate 1e-4 \
|
| 15 |
+
--lora_rank 8 \
|
| 16 |
+
--lora_alpha 32 \
|
| 17 |
+
--target_modules all-linear \
|
| 18 |
+
--gradient_accumulation_steps 16 \
|
| 19 |
+
--eval_steps 50 \
|
| 20 |
+
--save_steps 50 \
|
| 21 |
+
--save_total_limit 2 \
|
| 22 |
+
--logging_steps 5 \
|
| 23 |
+
--max_length 2048 \
|
| 24 |
+
--output_dir output \
|
| 25 |
+
--warmup_ratio 0.05 \
|
| 26 |
+
--dataloader_num_workers 4 \
|
| 27 |
+
--use_liger_kernel true \
|
| 28 |
+
--model_author swift \
|
| 29 |
+
--model_name swift-robot
|
ms-swift/examples/train/tuners/bone/train.sh
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 17.3GiB
|
| 2 |
+
CUDA_VISIBLE_DEVICES=0 \
|
| 3 |
+
swift sft \
|
| 4 |
+
--model Qwen/Qwen2.5-7B-Instruct \
|
| 5 |
+
--train_type bone \
|
| 6 |
+
--dataset 'swift/self-cognition#1000' \
|
| 7 |
+
--num_train_epochs 1 \
|
| 8 |
+
--per_device_train_batch_size 1 \
|
| 9 |
+
--learning_rate 1e-4 \
|
| 10 |
+
--gradient_accumulation_steps 16 \
|
| 11 |
+
--eval_steps 100 \
|
| 12 |
+
--save_steps 100 \
|
| 13 |
+
--save_total_limit 2 \
|
| 14 |
+
--logging_steps 5 \
|
| 15 |
+
--model_author swift \
|
| 16 |
+
--model_name swift-robot
|
ms-swift/examples/train/tuners/lisa/train.sh
ADDED
|
@@ -0,0 +1,17 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# 29GiB
|
| 2 |
+
CUDA_VISIBLE_DEVICES=0 \
|
| 3 |
+
swift sft \
|
| 4 |
+
--model Qwen/Qwen2.5-7B-Instruct \
|
| 5 |
+
--train_type full \
|
| 6 |
+
--dataset 'swift/self-cognition#1000' \
|
| 7 |
+
--lisa_activated_layers 2 \
|
| 8 |
+
--num_train_epochs 1 \
|
| 9 |
+
--per_device_train_batch_size 1 \
|
| 10 |
+
--learning_rate 1e-5 \
|
| 11 |
+
--gradient_accumulation_steps 16 \
|
| 12 |
+
--eval_steps 100 \
|
| 13 |
+
--save_steps 100 \
|
| 14 |
+
--save_total_limit 2 \
|
| 15 |
+
--logging_steps 5 \
|
| 16 |
+
--model_author swift \
|
| 17 |
+
--model_name swift-robot
|
ms-swift/examples/train/tuners/longlora/train.sh
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
CUDA_VISIBLE_DEVICES=0 \
|
| 2 |
+
swift sft \
|
| 3 |
+
--model LLM-Research/Meta-Llama-3.1-8B-Instruct \
|
| 4 |
+
--train_type longlora \
|
| 5 |
+
--dataset 'AI-ModelScope/LongAlpaca-12k#1000' \
|
| 6 |
+
--num_train_epochs 1 \
|
| 7 |
+
--learning_rate 1e-4 \
|
| 8 |
+
--attn_impl flash_attn \
|
| 9 |
+
--gradient_accumulation_steps 16 \
|
| 10 |
+
--lora_rank 8 \
|
| 11 |
+
--lora_alpha 32 \
|
| 12 |
+
--eval_steps 100 \
|
| 13 |
+
--save_steps 100 \
|
| 14 |
+
--max_length 10000 \
|
| 15 |
+
--save_total_limit 2 \
|
| 16 |
+
--logging_steps 5
|
ms-swift/ms_swift.egg-info/SOURCES.txt
ADDED
|
@@ -0,0 +1,373 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
LICENSE
|
| 2 |
+
MANIFEST.in
|
| 3 |
+
README.md
|
| 4 |
+
setup.cfg
|
| 5 |
+
setup.py
|
| 6 |
+
ms_swift.egg-info/PKG-INFO
|
| 7 |
+
ms_swift.egg-info/SOURCES.txt
|
| 8 |
+
ms_swift.egg-info/dependency_links.txt
|
| 9 |
+
ms_swift.egg-info/entry_points.txt
|
| 10 |
+
ms_swift.egg-info/not-zip-safe
|
| 11 |
+
ms_swift.egg-info/requires.txt
|
| 12 |
+
ms_swift.egg-info/top_level.txt
|
| 13 |
+
requirements/docs.txt
|
| 14 |
+
requirements/eval.txt
|
| 15 |
+
requirements/framework.txt
|
| 16 |
+
requirements/seq_parallel.txt
|
| 17 |
+
requirements/swanlab.txt
|
| 18 |
+
requirements/tests.txt
|
| 19 |
+
swift/__init__.py
|
| 20 |
+
swift/version.py
|
| 21 |
+
swift/cli/__init__.py
|
| 22 |
+
swift/cli/app.py
|
| 23 |
+
swift/cli/deploy.py
|
| 24 |
+
swift/cli/eval.py
|
| 25 |
+
swift/cli/export.py
|
| 26 |
+
swift/cli/infer.py
|
| 27 |
+
swift/cli/main.py
|
| 28 |
+
swift/cli/merge_lora.py
|
| 29 |
+
swift/cli/pt.py
|
| 30 |
+
swift/cli/rlhf.py
|
| 31 |
+
swift/cli/rollout.py
|
| 32 |
+
swift/cli/sample.py
|
| 33 |
+
swift/cli/sft.py
|
| 34 |
+
swift/cli/web_ui.py
|
| 35 |
+
swift/cli/_megatron/__init__.py
|
| 36 |
+
swift/cli/_megatron/main.py
|
| 37 |
+
swift/cli/_megatron/pt.py
|
| 38 |
+
swift/cli/_megatron/sft.py
|
| 39 |
+
swift/hub/__init__.py
|
| 40 |
+
swift/hub/constant.py
|
| 41 |
+
swift/hub/hub.py
|
| 42 |
+
swift/llm/__init__.py
|
| 43 |
+
swift/llm/base.py
|
| 44 |
+
swift/llm/data_loader.py
|
| 45 |
+
swift/llm/utils.py
|
| 46 |
+
swift/llm/app/__init__.py
|
| 47 |
+
swift/llm/app/app.py
|
| 48 |
+
swift/llm/app/build_ui.py
|
| 49 |
+
swift/llm/app/locale.py
|
| 50 |
+
swift/llm/argument/__init__.py
|
| 51 |
+
swift/llm/argument/app_args.py
|
| 52 |
+
swift/llm/argument/deploy_args.py
|
| 53 |
+
swift/llm/argument/eval_args.py
|
| 54 |
+
swift/llm/argument/export_args.py
|
| 55 |
+
swift/llm/argument/infer_args.py
|
| 56 |
+
swift/llm/argument/merge_args.py
|
| 57 |
+
swift/llm/argument/rlhf_args.py
|
| 58 |
+
swift/llm/argument/sampling_args.py
|
| 59 |
+
swift/llm/argument/train_args.py
|
| 60 |
+
swift/llm/argument/tuner_args.py
|
| 61 |
+
swift/llm/argument/webui_args.py
|
| 62 |
+
swift/llm/argument/base_args/__init__.py
|
| 63 |
+
swift/llm/argument/base_args/base_args.py
|
| 64 |
+
swift/llm/argument/base_args/data_args.py
|
| 65 |
+
swift/llm/argument/base_args/generation_args.py
|
| 66 |
+
swift/llm/argument/base_args/model_args.py
|
| 67 |
+
swift/llm/argument/base_args/quant_args.py
|
| 68 |
+
swift/llm/argument/base_args/template_args.py
|
| 69 |
+
swift/llm/argument/base_args/utils.py
|
| 70 |
+
swift/llm/dataset/__init__.py
|
| 71 |
+
swift/llm/dataset/loader.py
|
| 72 |
+
swift/llm/dataset/media.py
|
| 73 |
+
swift/llm/dataset/register.py
|
| 74 |
+
swift/llm/dataset/utils.py
|
| 75 |
+
swift/llm/dataset/data/dataset_info.json
|
| 76 |
+
swift/llm/dataset/dataset/__init__.py
|
| 77 |
+
swift/llm/dataset/dataset/llm.py
|
| 78 |
+
swift/llm/dataset/dataset/mllm.py
|
| 79 |
+
swift/llm/dataset/preprocessor/__init__.py
|
| 80 |
+
swift/llm/dataset/preprocessor/core.py
|
| 81 |
+
swift/llm/dataset/preprocessor/extra.py
|
| 82 |
+
swift/llm/ds_config/zero0.json
|
| 83 |
+
swift/llm/ds_config/zero1.json
|
| 84 |
+
swift/llm/ds_config/zero2.json
|
| 85 |
+
swift/llm/ds_config/zero2_offload.json
|
| 86 |
+
swift/llm/ds_config/zero3.json
|
| 87 |
+
swift/llm/ds_config/zero3_offload.json
|
| 88 |
+
swift/llm/eval/__init__.py
|
| 89 |
+
swift/llm/eval/eval.py
|
| 90 |
+
swift/llm/eval/utils.py
|
| 91 |
+
swift/llm/export/__init__.py
|
| 92 |
+
swift/llm/export/export.py
|
| 93 |
+
swift/llm/export/merge_lora.py
|
| 94 |
+
swift/llm/export/ollama.py
|
| 95 |
+
swift/llm/export/quant.py
|
| 96 |
+
swift/llm/infer/__init__.py
|
| 97 |
+
swift/llm/infer/deploy.py
|
| 98 |
+
swift/llm/infer/infer.py
|
| 99 |
+
swift/llm/infer/protocol.py
|
| 100 |
+
swift/llm/infer/rollout.py
|
| 101 |
+
swift/llm/infer/utils.py
|
| 102 |
+
swift/llm/infer/infer_engine/__init__.py
|
| 103 |
+
swift/llm/infer/infer_engine/base.py
|
| 104 |
+
swift/llm/infer/infer_engine/grpo_vllm_engine.py
|
| 105 |
+
swift/llm/infer/infer_engine/infer_client.py
|
| 106 |
+
swift/llm/infer/infer_engine/infer_engine.py
|
| 107 |
+
swift/llm/infer/infer_engine/lmdeploy_engine.py
|
| 108 |
+
swift/llm/infer/infer_engine/patch.py
|
| 109 |
+
swift/llm/infer/infer_engine/pt_engine.py
|
| 110 |
+
swift/llm/infer/infer_engine/utils.py
|
| 111 |
+
swift/llm/infer/infer_engine/vllm_engine.py
|
| 112 |
+
swift/llm/model/__init__.py
|
| 113 |
+
swift/llm/model/constant.py
|
| 114 |
+
swift/llm/model/model_arch.py
|
| 115 |
+
swift/llm/model/patcher.py
|
| 116 |
+
swift/llm/model/register.py
|
| 117 |
+
swift/llm/model/utils.py
|
| 118 |
+
swift/llm/model/model/__init__.py
|
| 119 |
+
swift/llm/model/model/baai.py
|
| 120 |
+
swift/llm/model/model/baichuan.py
|
| 121 |
+
swift/llm/model/model/bert.py
|
| 122 |
+
swift/llm/model/model/codefuse.py
|
| 123 |
+
swift/llm/model/model/deepseek.py
|
| 124 |
+
swift/llm/model/model/gemma.py
|
| 125 |
+
swift/llm/model/model/glm.py
|
| 126 |
+
swift/llm/model/model/internlm.py
|
| 127 |
+
swift/llm/model/model/llama.py
|
| 128 |
+
swift/llm/model/model/llava.py
|
| 129 |
+
swift/llm/model/model/llm.py
|
| 130 |
+
swift/llm/model/model/mamba.py
|
| 131 |
+
swift/llm/model/model/microsoft.py
|
| 132 |
+
swift/llm/model/model/minicpm.py
|
| 133 |
+
swift/llm/model/model/minimax.py
|
| 134 |
+
swift/llm/model/model/mistral.py
|
| 135 |
+
swift/llm/model/model/mllm.py
|
| 136 |
+
swift/llm/model/model/moonshot.py
|
| 137 |
+
swift/llm/model/model/mplug.py
|
| 138 |
+
swift/llm/model/model/openbuddy.py
|
| 139 |
+
swift/llm/model/model/qwen.py
|
| 140 |
+
swift/llm/model/model/skywork.py
|
| 141 |
+
swift/llm/model/model/stepfun.py
|
| 142 |
+
swift/llm/model/model/telechat.py
|
| 143 |
+
swift/llm/model/model/valley.py
|
| 144 |
+
swift/llm/model/model/yi.py
|
| 145 |
+
swift/llm/sampling/__init__.py
|
| 146 |
+
swift/llm/sampling/base.py
|
| 147 |
+
swift/llm/sampling/distill_sampler.py
|
| 148 |
+
swift/llm/sampling/mcts.py
|
| 149 |
+
swift/llm/sampling/sampling.py
|
| 150 |
+
swift/llm/sampling/utils.py
|
| 151 |
+
swift/llm/sampling/vanilla_sampler.py
|
| 152 |
+
swift/llm/template/__init__.py
|
| 153 |
+
swift/llm/template/base.py
|
| 154 |
+
swift/llm/template/constant.py
|
| 155 |
+
swift/llm/template/grounding.py
|
| 156 |
+
swift/llm/template/register.py
|
| 157 |
+
swift/llm/template/template_inputs.py
|
| 158 |
+
swift/llm/template/template_meta.py
|
| 159 |
+
swift/llm/template/utils.py
|
| 160 |
+
swift/llm/template/vision_utils.py
|
| 161 |
+
swift/llm/template/template/__init__.py
|
| 162 |
+
swift/llm/template/template/deepseek.py
|
| 163 |
+
swift/llm/template/template/emu3.py
|
| 164 |
+
swift/llm/template/template/gemma.py
|
| 165 |
+
swift/llm/template/template/glm.py
|
| 166 |
+
swift/llm/template/template/idefics3.py
|
| 167 |
+
swift/llm/template/template/internlm.py
|
| 168 |
+
swift/llm/template/template/internvl.py
|
| 169 |
+
swift/llm/template/template/llama.py
|
| 170 |
+
swift/llm/template/template/llava.py
|
| 171 |
+
swift/llm/template/template/llm.py
|
| 172 |
+
swift/llm/template/template/megrez.py
|
| 173 |
+
swift/llm/template/template/microsoft.py
|
| 174 |
+
swift/llm/template/template/minicpm.py
|
| 175 |
+
swift/llm/template/template/minimax.py
|
| 176 |
+
swift/llm/template/template/mistral.py
|
| 177 |
+
swift/llm/template/template/molmo.py
|
| 178 |
+
swift/llm/template/template/moonshot.py
|
| 179 |
+
swift/llm/template/template/mplug.py
|
| 180 |
+
swift/llm/template/template/openbuddy.py
|
| 181 |
+
swift/llm/template/template/pixtral.py
|
| 182 |
+
swift/llm/template/template/qwen.py
|
| 183 |
+
swift/llm/template/template/stepfun.py
|
| 184 |
+
swift/llm/template/template/utils.py
|
| 185 |
+
swift/llm/template/template/valley.py
|
| 186 |
+
swift/llm/template/template/yi.py
|
| 187 |
+
swift/llm/train/__init__.py
|
| 188 |
+
swift/llm/train/callback.py
|
| 189 |
+
swift/llm/train/kto.py
|
| 190 |
+
swift/llm/train/pt.py
|
| 191 |
+
swift/llm/train/rlhf.py
|
| 192 |
+
swift/llm/train/sft.py
|
| 193 |
+
swift/llm/train/tuner.py
|
| 194 |
+
swift/megatron/__init__.py
|
| 195 |
+
swift/megatron/init.py
|
| 196 |
+
swift/megatron/argument/__init__.py
|
| 197 |
+
swift/megatron/argument/megatron_args.py
|
| 198 |
+
swift/megatron/argument/train_args.py
|
| 199 |
+
swift/megatron/model/__init__.py
|
| 200 |
+
swift/megatron/model/config.py
|
| 201 |
+
swift/megatron/model/constant.py
|
| 202 |
+
swift/megatron/model/register.py
|
| 203 |
+
swift/megatron/model/rope.py
|
| 204 |
+
swift/megatron/model/gpt/__init__.py
|
| 205 |
+
swift/megatron/model/gpt/config.py
|
| 206 |
+
swift/megatron/model/gpt/hf2mcore.py
|
| 207 |
+
swift/megatron/model/gpt/mcore2hf.py
|
| 208 |
+
swift/megatron/model/gpt/model.py
|
| 209 |
+
swift/megatron/train/__init__.py
|
| 210 |
+
swift/megatron/train/patcher.py
|
| 211 |
+
swift/megatron/train/pt.py
|
| 212 |
+
swift/megatron/train/sft.py
|
| 213 |
+
swift/megatron/train/utils.py
|
| 214 |
+
swift/megatron/utils/__init__.py
|
| 215 |
+
swift/megatron/utils/convert.py
|
| 216 |
+
swift/megatron/utils/patcher.py
|
| 217 |
+
swift/plugin/__init__.py
|
| 218 |
+
swift/plugin/callback.py
|
| 219 |
+
swift/plugin/loss.py
|
| 220 |
+
swift/plugin/metric.py
|
| 221 |
+
swift/plugin/multi_turn.py
|
| 222 |
+
swift/plugin/optimizer.py
|
| 223 |
+
swift/plugin/orm.py
|
| 224 |
+
swift/plugin/prm.py
|
| 225 |
+
swift/plugin/rm_plugin.py
|
| 226 |
+
swift/plugin/tuner.py
|
| 227 |
+
swift/plugin/agent_template/__init__.py
|
| 228 |
+
swift/plugin/agent_template/base.py
|
| 229 |
+
swift/plugin/agent_template/extra.py
|
| 230 |
+
swift/plugin/agent_template/glm4.py
|
| 231 |
+
swift/plugin/agent_template/hermes.py
|
| 232 |
+
swift/plugin/agent_template/llama.py
|
| 233 |
+
swift/plugin/agent_template/qwen.py
|
| 234 |
+
swift/plugin/agent_template/react.py
|
| 235 |
+
swift/plugin/agent_template/toolbench.py
|
| 236 |
+
swift/plugin/loss_scale/__init__.py
|
| 237 |
+
swift/plugin/loss_scale/loss_scale.py
|
| 238 |
+
swift/plugin/loss_scale/utils.py
|
| 239 |
+
swift/plugin/loss_scale/config/agentflan.json
|
| 240 |
+
swift/plugin/loss_scale/config/alpha_umi.json
|
| 241 |
+
swift/plugin/loss_scale/config/hermes.json
|
| 242 |
+
swift/plugin/loss_scale/config/ignore_empty_think.json
|
| 243 |
+
swift/plugin/loss_scale/config/qwen.json
|
| 244 |
+
swift/plugin/loss_scale/config/react.json
|
| 245 |
+
swift/trainers/__init__.py
|
| 246 |
+
swift/trainers/arguments.py
|
| 247 |
+
swift/trainers/callback.py
|
| 248 |
+
swift/trainers/mixin.py
|
| 249 |
+
swift/trainers/rlhf_arguments.py
|
| 250 |
+
swift/trainers/torchacc_mixin.py
|
| 251 |
+
swift/trainers/trainer_factory.py
|
| 252 |
+
swift/trainers/trainers.py
|
| 253 |
+
swift/trainers/utils.py
|
| 254 |
+
swift/trainers/optimizers/__init__.py
|
| 255 |
+
swift/trainers/optimizers/galore/__init__.py
|
| 256 |
+
swift/trainers/optimizers/galore/adafactor.py
|
| 257 |
+
swift/trainers/optimizers/galore/adamw.py
|
| 258 |
+
swift/trainers/optimizers/galore/adamw8bit.py
|
| 259 |
+
swift/trainers/optimizers/galore/galore_projector.py
|
| 260 |
+
swift/trainers/optimizers/galore/utils.py
|
| 261 |
+
swift/trainers/rlhf_trainer/__init__.py
|
| 262 |
+
swift/trainers/rlhf_trainer/cpo_trainer.py
|
| 263 |
+
swift/trainers/rlhf_trainer/dpo_trainer.py
|
| 264 |
+
swift/trainers/rlhf_trainer/grpo_trainer.py
|
| 265 |
+
swift/trainers/rlhf_trainer/kto_trainer.py
|
| 266 |
+
swift/trainers/rlhf_trainer/orpo_trainer.py
|
| 267 |
+
swift/trainers/rlhf_trainer/ppo_trainer.py
|
| 268 |
+
swift/trainers/rlhf_trainer/reward_trainer.py
|
| 269 |
+
swift/trainers/rlhf_trainer/rlhf_mixin.py
|
| 270 |
+
swift/trainers/rlhf_trainer/utils.py
|
| 271 |
+
swift/trainers/rlhf_trainer/vllm_client.py
|
| 272 |
+
swift/trainers/sequence_parallel/__init__.py
|
| 273 |
+
swift/trainers/sequence_parallel/base.py
|
| 274 |
+
swift/trainers/sequence_parallel/ulysses.py
|
| 275 |
+
swift/trainers/sequence_parallel/xtuner.py
|
| 276 |
+
swift/tuners/__init__.py
|
| 277 |
+
swift/tuners/adapter.py
|
| 278 |
+
swift/tuners/base.py
|
| 279 |
+
swift/tuners/llamapro.py
|
| 280 |
+
swift/tuners/lora.py
|
| 281 |
+
swift/tuners/lora_layers.py
|
| 282 |
+
swift/tuners/mapping.py
|
| 283 |
+
swift/tuners/neftune.py
|
| 284 |
+
swift/tuners/part.py
|
| 285 |
+
swift/tuners/peft.py
|
| 286 |
+
swift/tuners/prompt.py
|
| 287 |
+
swift/tuners/reft.py
|
| 288 |
+
swift/tuners/restuning.py
|
| 289 |
+
swift/tuners/restuning_components.py
|
| 290 |
+
swift/tuners/side.py
|
| 291 |
+
swift/tuners/utils.py
|
| 292 |
+
swift/tuners/longlora/__init__.py
|
| 293 |
+
swift/tuners/longlora/llama.py
|
| 294 |
+
swift/tuners/longlora/longlora.py
|
| 295 |
+
swift/tuners/scetuning/__init__.py
|
| 296 |
+
swift/tuners/scetuning/scetuning.py
|
| 297 |
+
swift/tuners/scetuning/scetuning_components.py
|
| 298 |
+
swift/ui/__init__.py
|
| 299 |
+
swift/ui/app.py
|
| 300 |
+
swift/ui/base.py
|
| 301 |
+
swift/ui/llm_eval/__init__.py
|
| 302 |
+
swift/ui/llm_eval/eval.py
|
| 303 |
+
swift/ui/llm_eval/llm_eval.py
|
| 304 |
+
swift/ui/llm_eval/model.py
|
| 305 |
+
swift/ui/llm_eval/runtime.py
|
| 306 |
+
swift/ui/llm_export/__init__.py
|
| 307 |
+
swift/ui/llm_export/export.py
|
| 308 |
+
swift/ui/llm_export/llm_export.py
|
| 309 |
+
swift/ui/llm_export/model.py
|
| 310 |
+
swift/ui/llm_export/runtime.py
|
| 311 |
+
swift/ui/llm_infer/__init__.py
|
| 312 |
+
swift/ui/llm_infer/generate.py
|
| 313 |
+
swift/ui/llm_infer/llm_infer.py
|
| 314 |
+
swift/ui/llm_infer/model.py
|
| 315 |
+
swift/ui/llm_infer/runtime.py
|
| 316 |
+
swift/ui/llm_train/__init__.py
|
| 317 |
+
swift/ui/llm_train/advanced.py
|
| 318 |
+
swift/ui/llm_train/dataset.py
|
| 319 |
+
swift/ui/llm_train/galore.py
|
| 320 |
+
swift/ui/llm_train/hyper.py
|
| 321 |
+
swift/ui/llm_train/lisa.py
|
| 322 |
+
swift/ui/llm_train/llamapro.py
|
| 323 |
+
swift/ui/llm_train/llm_train.py
|
| 324 |
+
swift/ui/llm_train/lora.py
|
| 325 |
+
swift/ui/llm_train/model.py
|
| 326 |
+
swift/ui/llm_train/quantization.py
|
| 327 |
+
swift/ui/llm_train/report_to.py
|
| 328 |
+
swift/ui/llm_train/rlhf.py
|
| 329 |
+
swift/ui/llm_train/runtime.py
|
| 330 |
+
swift/ui/llm_train/save.py
|
| 331 |
+
swift/ui/llm_train/self_cog.py
|
| 332 |
+
swift/ui/llm_train/utils.py
|
| 333 |
+
swift/utils/__init__.py
|
| 334 |
+
swift/utils/constants.py
|
| 335 |
+
swift/utils/env.py
|
| 336 |
+
swift/utils/import_utils.py
|
| 337 |
+
swift/utils/io_utils.py
|
| 338 |
+
swift/utils/logger.py
|
| 339 |
+
swift/utils/np_utils.py
|
| 340 |
+
swift/utils/tb_utils.py
|
| 341 |
+
swift/utils/torch_utils.py
|
| 342 |
+
swift/utils/torchacc_utils.py
|
| 343 |
+
swift/utils/utils.py
|
| 344 |
+
tests/__init__.py
|
| 345 |
+
tests/model_tag.py
|
| 346 |
+
tests/run.py
|
| 347 |
+
tests/test_utils.py
|
| 348 |
+
tests/hub/__init__.py
|
| 349 |
+
tests/hub/test_check_model.py
|
| 350 |
+
tests/llm/__init__.py
|
| 351 |
+
tests/llm/load_model.py
|
| 352 |
+
tests/llm/load_template.py
|
| 353 |
+
tests/llm/test_custom.py
|
| 354 |
+
tests/llm/test_dataset.py
|
| 355 |
+
tests/llm/test_ollama_export.py
|
| 356 |
+
tests/llm/test_run.py
|
| 357 |
+
tests/llm/test_run3.py
|
| 358 |
+
tests/llm/test_template.py
|
| 359 |
+
tests/llm/test_utils.py
|
| 360 |
+
tests/tuners/__init__.py
|
| 361 |
+
tests/tuners/test_extra_state_dict.py
|
| 362 |
+
tests/tuners/test_merged_linear.py
|
| 363 |
+
tests/tuners/test_neft.py
|
| 364 |
+
tests/tuners/test_peft.py
|
| 365 |
+
tests/tuners/test_scetuning.py
|
| 366 |
+
tests/tuners/test_swift_base.py
|
| 367 |
+
tests/tuners/test_swift_device_map.py
|
| 368 |
+
tests/tuners/test_swift_restuning.py
|
| 369 |
+
tests/utils/__init__.py
|
| 370 |
+
tests/utils/test_file_utils.py
|
| 371 |
+
tests/utils/test_io_utils.py
|
| 372 |
+
tests/utils/test_split_str_parts_by.py
|
| 373 |
+
tests/utils/test_torch_utils.py
|
ms-swift/ms_swift.egg-info/entry_points.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
[console_scripts]
|
| 2 |
+
megatron = swift.cli._megatron.main:cli_main
|
| 3 |
+
swift = swift.cli.main:cli_main
|
ms-swift/ms_swift.egg-info/top_level.txt
ADDED
|
@@ -0,0 +1,2 @@
|
|
|
|
|
|
|
|
|
|
| 1 |
+
swift
|
| 2 |
+
tests
|
ms-swift/requirements/framework.txt
ADDED
|
@@ -0,0 +1,38 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
accelerate
|
| 2 |
+
addict
|
| 3 |
+
aiohttp
|
| 4 |
+
attrdict
|
| 5 |
+
binpacking
|
| 6 |
+
charset_normalizer
|
| 7 |
+
cpm_kernels
|
| 8 |
+
dacite
|
| 9 |
+
datasets>=3.0,<3.4
|
| 10 |
+
einops
|
| 11 |
+
fastapi
|
| 12 |
+
gradio>=3.40.0
|
| 13 |
+
importlib_metadata
|
| 14 |
+
jieba
|
| 15 |
+
matplotlib
|
| 16 |
+
modelscope>=1.23
|
| 17 |
+
nltk
|
| 18 |
+
numpy<2.0
|
| 19 |
+
openai
|
| 20 |
+
oss2
|
| 21 |
+
pandas
|
| 22 |
+
peft>=0.11,<0.16
|
| 23 |
+
pillow
|
| 24 |
+
requests
|
| 25 |
+
rouge
|
| 26 |
+
safetensors
|
| 27 |
+
scipy
|
| 28 |
+
sentencepiece
|
| 29 |
+
simplejson>=3.3.0
|
| 30 |
+
sortedcontainers>=1.5.9
|
| 31 |
+
tensorboard
|
| 32 |
+
tiktoken
|
| 33 |
+
tqdm
|
| 34 |
+
transformers>=4.33,<4.53
|
| 35 |
+
transformers_stream_generator
|
| 36 |
+
trl>=0.13,<0.18
|
| 37 |
+
uvicorn
|
| 38 |
+
zstandard
|
ms-swift/requirements/tests.txt
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
expecttest
|
| 2 |
+
flake8
|
| 3 |
+
isort>=4.3.21
|
| 4 |
+
modelscope
|
| 5 |
+
pre-commit
|
| 6 |
+
yapf==0.30.0 # use fix version to ensure consistent auto-styling
|
ms-swift/swift/__pycache__/__init__.cpython-310.pyc
ADDED
|
Binary file (2.13 kB). View file
|
|
|
ms-swift/swift/cli/_megatron/main.py
ADDED
|
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Alibaba, Inc. and its affiliates.
|
| 2 |
+
from typing import Dict
|
| 3 |
+
|
| 4 |
+
from swift.utils import get_logger
|
| 5 |
+
from ..main import cli_main as swift_cli_main
|
| 6 |
+
|
| 7 |
+
logger = get_logger()
|
| 8 |
+
|
| 9 |
+
ROUTE_MAPPING: Dict[str, str] = {
|
| 10 |
+
'sft': 'swift.cli._megatron.sft',
|
| 11 |
+
'pt': 'swift.cli._megatron.pt',
|
| 12 |
+
}
|
| 13 |
+
|
| 14 |
+
|
| 15 |
+
def cli_main():
|
| 16 |
+
return swift_cli_main(ROUTE_MAPPING)
|
| 17 |
+
|
| 18 |
+
|
| 19 |
+
if __name__ == '__main__':
|
| 20 |
+
cli_main()
|
ms-swift/swift/cli/infer.py
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Alibaba, Inc. and its affiliates.
|
| 2 |
+
from swift.llm import infer_main
|
| 3 |
+
|
| 4 |
+
if __name__ == '__main__':
|
| 5 |
+
infer_main()
|
ms-swift/swift/cli/merge_lora.py
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Alibaba, Inc. and its affiliates.
|
| 2 |
+
from swift.llm import ExportArguments, SwiftPipeline, merge_lora
|
| 3 |
+
|
| 4 |
+
|
| 5 |
+
class SwiftMergeLoRA(SwiftPipeline):
|
| 6 |
+
args_class = ExportArguments
|
| 7 |
+
args: args_class
|
| 8 |
+
|
| 9 |
+
def run(self):
|
| 10 |
+
merge_lora(self.args)
|
| 11 |
+
|
| 12 |
+
|
| 13 |
+
if __name__ == '__main__':
|
| 14 |
+
SwiftMergeLoRA().main()
|
ms-swift/swift/cli/rlhf.py
ADDED
|
@@ -0,0 +1,5 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Alibaba, Inc. and its affiliates.
|
| 2 |
+
from swift.llm import rlhf_main
|
| 3 |
+
|
| 4 |
+
if __name__ == '__main__':
|
| 5 |
+
rlhf_main()
|
ms-swift/swift/hub/constant.py
ADDED
|
@@ -0,0 +1,6 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Alibaba, Inc. and its affiliates.
|
| 2 |
+
from modelscope.hub import constants
|
| 3 |
+
|
| 4 |
+
constants.API_HTTP_CLIENT_TIMEOUT = 5
|
| 5 |
+
constants.API_FILE_DOWNLOAD_TIMEOUT = 300
|
| 6 |
+
constants.API_FILE_DOWNLOAD_CHUNK_SIZE = 1024 * 1024 * 16
|
ms-swift/swift/hub/hub.py
ADDED
|
@@ -0,0 +1,451 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Alibaba, Inc. and its affiliates.
|
| 2 |
+
import os
|
| 3 |
+
import tempfile
|
| 4 |
+
from contextlib import contextmanager
|
| 5 |
+
from functools import partial
|
| 6 |
+
from pathlib import Path
|
| 7 |
+
from typing import List, Literal, Optional, Union
|
| 8 |
+
|
| 9 |
+
import huggingface_hub
|
| 10 |
+
from huggingface_hub import RepoUrl
|
| 11 |
+
from huggingface_hub.hf_api import api, future_compatible
|
| 12 |
+
from requests.exceptions import HTTPError
|
| 13 |
+
from transformers import trainer
|
| 14 |
+
from transformers.utils import logging, strtobool
|
| 15 |
+
|
| 16 |
+
from swift.utils.env import use_hf_hub
|
| 17 |
+
|
| 18 |
+
logger = logging.get_logger(__name__)
|
| 19 |
+
|
| 20 |
+
|
| 21 |
+
class HubOperation:
|
| 22 |
+
|
| 23 |
+
@classmethod
|
| 24 |
+
@contextmanager
|
| 25 |
+
def patch_hub(cls):
|
| 26 |
+
yield
|
| 27 |
+
|
| 28 |
+
@classmethod
|
| 29 |
+
def try_login(cls, token: Optional[str] = None) -> bool:
|
| 30 |
+
"""Try to login to the hub
|
| 31 |
+
|
| 32 |
+
Args:
|
| 33 |
+
token: The hub token to use
|
| 34 |
+
|
| 35 |
+
Returns:
|
| 36 |
+
bool: Whether login is successful
|
| 37 |
+
"""
|
| 38 |
+
raise NotImplementedError
|
| 39 |
+
|
| 40 |
+
@classmethod
|
| 41 |
+
def create_model_repo(cls, repo_id: str, token: Optional[str] = None, private: bool = False):
|
| 42 |
+
"""Create a model repo on the hub
|
| 43 |
+
|
| 44 |
+
Args:
|
| 45 |
+
repo_id: The model id of the hub
|
| 46 |
+
token: The hub token to use
|
| 47 |
+
private: If is a private repo
|
| 48 |
+
"""
|
| 49 |
+
raise NotImplementedError
|
| 50 |
+
|
| 51 |
+
@classmethod
|
| 52 |
+
def push_to_hub(cls,
|
| 53 |
+
repo_id: str,
|
| 54 |
+
folder_path: Union[str, Path],
|
| 55 |
+
path_in_repo: Optional[str] = None,
|
| 56 |
+
commit_message: Optional[str] = None,
|
| 57 |
+
commit_description: Optional[str] = None,
|
| 58 |
+
token: Union[str, bool, None] = None,
|
| 59 |
+
private: bool = False,
|
| 60 |
+
revision: Optional[str] = 'master',
|
| 61 |
+
ignore_patterns: Optional[Union[List[str], str]] = None,
|
| 62 |
+
**kwargs):
|
| 63 |
+
"""Push a model-like folder to the hub
|
| 64 |
+
|
| 65 |
+
Args:
|
| 66 |
+
repo_id: The repo id
|
| 67 |
+
folder_path: The local folder path
|
| 68 |
+
path_in_repo: Which remote folder to put the local files in
|
| 69 |
+
commit_message: The commit message of git
|
| 70 |
+
commit_description: The commit description
|
| 71 |
+
token: The hub token
|
| 72 |
+
private: Private hub or not
|
| 73 |
+
revision: The revision to push to
|
| 74 |
+
ignore_patterns: The ignore file patterns
|
| 75 |
+
"""
|
| 76 |
+
raise NotImplementedError
|
| 77 |
+
|
| 78 |
+
@classmethod
|
| 79 |
+
def load_dataset(cls,
|
| 80 |
+
dataset_id: str,
|
| 81 |
+
subset_name: str,
|
| 82 |
+
split: str,
|
| 83 |
+
streaming: bool = False,
|
| 84 |
+
revision: Optional[str] = None):
|
| 85 |
+
"""Load a dataset from the repo
|
| 86 |
+
|
| 87 |
+
Args:
|
| 88 |
+
dataset_id: The dataset id
|
| 89 |
+
subset_name: The subset name of the dataset
|
| 90 |
+
split: The split info
|
| 91 |
+
streaming: Streaming mode
|
| 92 |
+
revision: The revision of the dataset
|
| 93 |
+
|
| 94 |
+
Returns:
|
| 95 |
+
The Dataset instance
|
| 96 |
+
"""
|
| 97 |
+
raise NotImplementedError
|
| 98 |
+
|
| 99 |
+
@classmethod
|
| 100 |
+
def download_model(cls,
|
| 101 |
+
model_id_or_path: Optional[str] = None,
|
| 102 |
+
revision: Optional[str] = None,
|
| 103 |
+
download_model: bool = True,
|
| 104 |
+
ignore_patterns: Optional[List[str]] = None,
|
| 105 |
+
**kwargs):
|
| 106 |
+
"""Download model from the hub
|
| 107 |
+
|
| 108 |
+
Args:
|
| 109 |
+
model_id_or_path: The model id
|
| 110 |
+
revision: The model revision
|
| 111 |
+
download_model: Whether downloading bin/safetensors files, this is usually useful when only
|
| 112 |
+
using tokenizer
|
| 113 |
+
ignore_patterns: Custom ignore pattern
|
| 114 |
+
**kwargs:
|
| 115 |
+
|
| 116 |
+
Returns:
|
| 117 |
+
The local dir
|
| 118 |
+
"""
|
| 119 |
+
raise NotImplementedError
|
| 120 |
+
|
| 121 |
+
|
| 122 |
+
class MSHub(HubOperation):
|
| 123 |
+
ms_token = None
|
| 124 |
+
|
| 125 |
+
@staticmethod
|
| 126 |
+
def create_repo(repo_id: str, *, token: Union[str, bool, None] = None, private: bool = False, **kwargs) -> RepoUrl:
|
| 127 |
+
"""
|
| 128 |
+
Create a new repository on the hub.
|
| 129 |
+
|
| 130 |
+
Args:
|
| 131 |
+
repo_id: The ID of the repository to create.
|
| 132 |
+
token: The authentication token to use.
|
| 133 |
+
private: Whether the repository should be private.
|
| 134 |
+
**kwargs: Additional arguments.
|
| 135 |
+
|
| 136 |
+
Returns:
|
| 137 |
+
RepoUrl: The URL of the created repository.
|
| 138 |
+
"""
|
| 139 |
+
hub_model_id = MSHub.create_model_repo(repo_id, token, private)
|
| 140 |
+
return RepoUrl(url=hub_model_id, )
|
| 141 |
+
|
| 142 |
+
@staticmethod
|
| 143 |
+
@future_compatible
|
| 144 |
+
def upload_folder(
|
| 145 |
+
self,
|
| 146 |
+
*,
|
| 147 |
+
repo_id: str,
|
| 148 |
+
folder_path: Union[str, Path],
|
| 149 |
+
path_in_repo: Optional[str] = None,
|
| 150 |
+
commit_message: Optional[str] = None,
|
| 151 |
+
commit_description: Optional[str] = None,
|
| 152 |
+
token: Union[str, bool, None] = None,
|
| 153 |
+
revision: Optional[str] = 'master',
|
| 154 |
+
ignore_patterns: Optional[Union[List[str], str]] = None,
|
| 155 |
+
**kwargs,
|
| 156 |
+
):
|
| 157 |
+
from modelscope.utils.repo_utils import CommitInfo
|
| 158 |
+
MSHub.push_to_hub(repo_id, folder_path, path_in_repo, commit_message, commit_description, token, True, revision,
|
| 159 |
+
ignore_patterns)
|
| 160 |
+
return CommitInfo(
|
| 161 |
+
commit_url=f'https://www.modelscope.cn/models/{repo_id}/files',
|
| 162 |
+
commit_message=commit_message,
|
| 163 |
+
commit_description=commit_description,
|
| 164 |
+
oid=None,
|
| 165 |
+
)
|
| 166 |
+
|
| 167 |
+
@classmethod
|
| 168 |
+
@contextmanager
|
| 169 |
+
def patch_hub(cls):
|
| 170 |
+
hub_create_repo = huggingface_hub.create_repo
|
| 171 |
+
hub_upload_folder = huggingface_hub.upload_folder
|
| 172 |
+
trainer_create_repo = trainer.create_repo
|
| 173 |
+
trainer_upload_folder = trainer.upload_folder
|
| 174 |
+
|
| 175 |
+
huggingface_hub.create_repo = cls.create_repo
|
| 176 |
+
huggingface_hub.upload_folder = partial(cls.upload_folder, api)
|
| 177 |
+
trainer.create_repo = cls.create_repo
|
| 178 |
+
trainer.upload_folder = partial(cls.upload_folder, api)
|
| 179 |
+
try:
|
| 180 |
+
yield
|
| 181 |
+
finally:
|
| 182 |
+
huggingface_hub.create_repo = hub_create_repo
|
| 183 |
+
huggingface_hub.upload_folder = hub_upload_folder
|
| 184 |
+
trainer.create_repo = trainer_create_repo
|
| 185 |
+
trainer.upload_folder = trainer_upload_folder
|
| 186 |
+
|
| 187 |
+
@classmethod
|
| 188 |
+
def try_login(cls, token: Optional[str] = None) -> bool:
|
| 189 |
+
from modelscope import HubApi
|
| 190 |
+
if token is None:
|
| 191 |
+
token = os.environ.get('MODELSCOPE_API_TOKEN')
|
| 192 |
+
if token:
|
| 193 |
+
api = HubApi()
|
| 194 |
+
api.login(token)
|
| 195 |
+
return True
|
| 196 |
+
return False
|
| 197 |
+
|
| 198 |
+
@classmethod
|
| 199 |
+
def create_model_repo(cls, repo_id: str, token: Optional[str] = None, private: bool = False) -> str:
|
| 200 |
+
from modelscope import HubApi
|
| 201 |
+
from modelscope.hub.api import ModelScopeConfig
|
| 202 |
+
from modelscope.hub.constants import ModelVisibility
|
| 203 |
+
assert repo_id is not None, 'Please enter a valid hub_model_id'
|
| 204 |
+
|
| 205 |
+
if not cls.try_login(token):
|
| 206 |
+
raise ValueError('Please specify a token by `--hub_token` or `MODELSCOPE_API_TOKEN=xxx`')
|
| 207 |
+
cls.ms_token = token
|
| 208 |
+
visibility = ModelVisibility.PRIVATE if private else ModelVisibility.PUBLIC
|
| 209 |
+
api = HubApi()
|
| 210 |
+
if '/' not in repo_id:
|
| 211 |
+
user_name = ModelScopeConfig.get_user_info()[0]
|
| 212 |
+
assert isinstance(user_name, str)
|
| 213 |
+
hub_model_id = f'{user_name}/{repo_id}'
|
| 214 |
+
logger.info(f"'/' not in hub_model_id, pushing to personal repo {hub_model_id}")
|
| 215 |
+
try:
|
| 216 |
+
api.create_model(repo_id, visibility)
|
| 217 |
+
except HTTPError:
|
| 218 |
+
# The remote repository has been created
|
| 219 |
+
pass
|
| 220 |
+
|
| 221 |
+
with tempfile.TemporaryDirectory() as temp_cache_dir:
|
| 222 |
+
from modelscope.hub.repository import Repository
|
| 223 |
+
repo = Repository(temp_cache_dir, repo_id)
|
| 224 |
+
cls.add_patterns_to_gitattributes(repo, ['*.safetensors', '*.bin', '*.pt'])
|
| 225 |
+
# Add 'runs/' to .gitignore, ignore tensorboard files
|
| 226 |
+
cls.add_patterns_to_gitignore(repo, ['runs/', 'images/'])
|
| 227 |
+
cls.add_patterns_to_file(
|
| 228 |
+
repo,
|
| 229 |
+
'configuration.json', ['{"framework": "pytorch", "task": "text-generation", "allow_remote": true}'],
|
| 230 |
+
ignore_push_error=True)
|
| 231 |
+
# Add '*.sagemaker' to .gitignore if using SageMaker
|
| 232 |
+
if os.environ.get('SM_TRAINING_ENV'):
|
| 233 |
+
cls.add_patterns_to_gitignore(repo, ['*.sagemaker-uploading', '*.sagemaker-uploaded'],
|
| 234 |
+
'Add `*.sagemaker` patterns to .gitignore')
|
| 235 |
+
return repo_id
|
| 236 |
+
|
| 237 |
+
@classmethod
|
| 238 |
+
def push_to_hub(cls,
|
| 239 |
+
repo_id: str,
|
| 240 |
+
folder_path: Union[str, Path],
|
| 241 |
+
path_in_repo: Optional[str] = None,
|
| 242 |
+
commit_message: Optional[str] = None,
|
| 243 |
+
commit_description: Optional[str] = None,
|
| 244 |
+
token: Union[str, bool, None] = None,
|
| 245 |
+
private: bool = False,
|
| 246 |
+
revision: Optional[str] = 'master',
|
| 247 |
+
ignore_patterns: Optional[Union[List[str], str]] = None,
|
| 248 |
+
**kwargs):
|
| 249 |
+
cls.create_model_repo(repo_id, token, private)
|
| 250 |
+
from modelscope import push_to_hub
|
| 251 |
+
commit_message = commit_message or 'Upload folder using api'
|
| 252 |
+
if commit_description:
|
| 253 |
+
commit_message = commit_message + '\n' + commit_description
|
| 254 |
+
if not os.path.exists(os.path.join(folder_path, 'configuration.json')):
|
| 255 |
+
with open(os.path.join(folder_path, 'configuration.json'), 'w', encoding='utf-8') as f:
|
| 256 |
+
f.write('{"framework": "pytorch", "task": "text-generation", "allow_remote": true}')
|
| 257 |
+
if ignore_patterns:
|
| 258 |
+
ignore_patterns = [p for p in ignore_patterns if p != '_*']
|
| 259 |
+
if path_in_repo:
|
| 260 |
+
# We don't support part submit for now
|
| 261 |
+
path_in_repo = os.path.basename(folder_path)
|
| 262 |
+
folder_path = os.path.dirname(folder_path)
|
| 263 |
+
ignore_patterns = []
|
| 264 |
+
if revision is None or revision == 'main':
|
| 265 |
+
revision = 'master'
|
| 266 |
+
push_to_hub(
|
| 267 |
+
repo_id,
|
| 268 |
+
folder_path,
|
| 269 |
+
token or cls.ms_token,
|
| 270 |
+
private,
|
| 271 |
+
commit_message=commit_message,
|
| 272 |
+
ignore_file_pattern=ignore_patterns,
|
| 273 |
+
revision=revision,
|
| 274 |
+
tag=path_in_repo)
|
| 275 |
+
|
| 276 |
+
@classmethod
|
| 277 |
+
def load_dataset(cls,
|
| 278 |
+
dataset_id: str,
|
| 279 |
+
subset_name: str,
|
| 280 |
+
split: str,
|
| 281 |
+
streaming: bool = False,
|
| 282 |
+
revision: Optional[str] = None,
|
| 283 |
+
download_mode: Literal['force_redownload', 'reuse_dataset_if_exists'] = 'reuse_dataset_if_exists',
|
| 284 |
+
token: Optional[str] = None,
|
| 285 |
+
**kwargs):
|
| 286 |
+
from modelscope import MsDataset
|
| 287 |
+
cls.try_login(token)
|
| 288 |
+
if revision is None or revision == 'main':
|
| 289 |
+
revision = 'master'
|
| 290 |
+
|
| 291 |
+
return MsDataset.load(
|
| 292 |
+
dataset_id,
|
| 293 |
+
subset_name=subset_name,
|
| 294 |
+
split=split,
|
| 295 |
+
version=revision,
|
| 296 |
+
download_mode=download_mode,
|
| 297 |
+
use_streaming=streaming,
|
| 298 |
+
)
|
| 299 |
+
|
| 300 |
+
@classmethod
|
| 301 |
+
def download_model(cls,
|
| 302 |
+
model_id_or_path: Optional[str] = None,
|
| 303 |
+
revision: Optional[str] = None,
|
| 304 |
+
ignore_patterns: Optional[List[str]] = None,
|
| 305 |
+
token: Optional[str] = None,
|
| 306 |
+
**kwargs):
|
| 307 |
+
cls.try_login(token)
|
| 308 |
+
if revision is None or revision == 'main':
|
| 309 |
+
revision = 'master'
|
| 310 |
+
logger.info(f'Downloading the model from ModelScope Hub, model_id: {model_id_or_path}')
|
| 311 |
+
from modelscope import snapshot_download
|
| 312 |
+
return snapshot_download(model_id_or_path, revision, ignore_patterns=ignore_patterns, **kwargs)
|
| 313 |
+
|
| 314 |
+
@staticmethod
|
| 315 |
+
def add_patterns_to_file(repo,
|
| 316 |
+
file_name: str,
|
| 317 |
+
patterns: List[str],
|
| 318 |
+
commit_message: Optional[str] = None,
|
| 319 |
+
ignore_push_error=False) -> None:
|
| 320 |
+
if isinstance(patterns, str):
|
| 321 |
+
patterns = [patterns]
|
| 322 |
+
if commit_message is None:
|
| 323 |
+
commit_message = f'Add `{patterns[0]}` patterns to {file_name}'
|
| 324 |
+
|
| 325 |
+
# Get current file content
|
| 326 |
+
repo_dir = repo.model_dir
|
| 327 |
+
file_path = os.path.join(repo_dir, file_name)
|
| 328 |
+
if os.path.exists(file_path):
|
| 329 |
+
with open(file_path, 'r', encoding='utf-8') as f:
|
| 330 |
+
current_content = f.read()
|
| 331 |
+
else:
|
| 332 |
+
current_content = ''
|
| 333 |
+
# Add the patterns to file
|
| 334 |
+
content = current_content
|
| 335 |
+
for pattern in patterns:
|
| 336 |
+
if pattern not in content:
|
| 337 |
+
if len(content) > 0 and not content.endswith('\n'):
|
| 338 |
+
content += '\n'
|
| 339 |
+
content += f'{pattern}\n'
|
| 340 |
+
|
| 341 |
+
# Write the file if it has changed
|
| 342 |
+
if content != current_content:
|
| 343 |
+
with open(file_path, 'w', encoding='utf-8') as f:
|
| 344 |
+
logger.debug(f'Writing {file_name} file. Content: {content}')
|
| 345 |
+
f.write(content)
|
| 346 |
+
try:
|
| 347 |
+
repo.push(commit_message)
|
| 348 |
+
except Exception as e:
|
| 349 |
+
if ignore_push_error:
|
| 350 |
+
pass
|
| 351 |
+
else:
|
| 352 |
+
raise e
|
| 353 |
+
|
| 354 |
+
@staticmethod
|
| 355 |
+
def add_patterns_to_gitignore(repo, patterns: List[str], commit_message: Optional[str] = None) -> None:
|
| 356 |
+
MSHub.add_patterns_to_file(repo, '.gitignore', patterns, commit_message, ignore_push_error=True)
|
| 357 |
+
|
| 358 |
+
@staticmethod
|
| 359 |
+
def add_patterns_to_gitattributes(repo, patterns: List[str], commit_message: Optional[str] = None) -> None:
|
| 360 |
+
new_patterns = []
|
| 361 |
+
suffix = 'filter=lfs diff=lfs merge=lfs -text'
|
| 362 |
+
for pattern in patterns:
|
| 363 |
+
if suffix not in pattern:
|
| 364 |
+
pattern = f'{pattern} {suffix}'
|
| 365 |
+
new_patterns.append(pattern)
|
| 366 |
+
file_name = '.gitattributes'
|
| 367 |
+
if commit_message is None:
|
| 368 |
+
commit_message = f'Add `{patterns[0]}` patterns to {file_name}'
|
| 369 |
+
MSHub.add_patterns_to_file(repo, file_name, new_patterns, commit_message, ignore_push_error=True)
|
| 370 |
+
|
| 371 |
+
|
| 372 |
+
class HFHub(HubOperation):
|
| 373 |
+
|
| 374 |
+
@classmethod
|
| 375 |
+
def try_login(cls, token: Optional[str] = None) -> bool:
|
| 376 |
+
pass
|
| 377 |
+
|
| 378 |
+
@classmethod
|
| 379 |
+
def create_model_repo(cls, repo_id: str, token: Optional[str] = None, private: bool = False) -> str:
|
| 380 |
+
return api.create_repo(repo_id, token=token, private=private)
|
| 381 |
+
|
| 382 |
+
@classmethod
|
| 383 |
+
def push_to_hub(cls,
|
| 384 |
+
repo_id: str,
|
| 385 |
+
folder_path: Union[str, Path],
|
| 386 |
+
path_in_repo: Optional[str] = None,
|
| 387 |
+
commit_message: Optional[str] = None,
|
| 388 |
+
commit_description: Optional[str] = None,
|
| 389 |
+
token: Union[str, bool, None] = None,
|
| 390 |
+
private: bool = False,
|
| 391 |
+
revision: Optional[str] = 'master',
|
| 392 |
+
ignore_patterns: Optional[Union[List[str], str]] = None,
|
| 393 |
+
**kwargs):
|
| 394 |
+
cls.create_model_repo(repo_id, token, private)
|
| 395 |
+
if revision is None or revision == 'master':
|
| 396 |
+
revision = 'main'
|
| 397 |
+
return api.upload_folder(
|
| 398 |
+
repo_id=repo_id,
|
| 399 |
+
folder_path=folder_path,
|
| 400 |
+
path_in_repo=path_in_repo,
|
| 401 |
+
commit_message=commit_message,
|
| 402 |
+
commit_description=commit_description,
|
| 403 |
+
token=token,
|
| 404 |
+
revision=revision,
|
| 405 |
+
ignore_patterns=ignore_patterns,
|
| 406 |
+
**kwargs)
|
| 407 |
+
|
| 408 |
+
@classmethod
|
| 409 |
+
def load_dataset(cls,
|
| 410 |
+
dataset_id: str,
|
| 411 |
+
subset_name: str,
|
| 412 |
+
split: str,
|
| 413 |
+
streaming: bool = False,
|
| 414 |
+
revision: Optional[str] = None,
|
| 415 |
+
download_mode: Literal['force_redownload', 'reuse_dataset_if_exists'] = 'reuse_dataset_if_exists',
|
| 416 |
+
num_proc: Optional[int] = None,
|
| 417 |
+
**kwargs):
|
| 418 |
+
from datasets import load_dataset
|
| 419 |
+
if revision is None or revision == 'master':
|
| 420 |
+
revision = 'main'
|
| 421 |
+
return load_dataset(
|
| 422 |
+
dataset_id,
|
| 423 |
+
name=subset_name,
|
| 424 |
+
split=split,
|
| 425 |
+
streaming=streaming,
|
| 426 |
+
revision=revision,
|
| 427 |
+
download_mode=download_mode,
|
| 428 |
+
num_proc=num_proc)
|
| 429 |
+
|
| 430 |
+
@classmethod
|
| 431 |
+
def download_model(cls,
|
| 432 |
+
model_id_or_path: Optional[str] = None,
|
| 433 |
+
revision: Optional[str] = None,
|
| 434 |
+
ignore_patterns: Optional[List[str]] = None,
|
| 435 |
+
**kwargs):
|
| 436 |
+
if revision is None or revision == 'master':
|
| 437 |
+
revision = 'main'
|
| 438 |
+
logger.info(f'Downloading the model from HuggingFace Hub, model_id: {model_id_or_path}')
|
| 439 |
+
use_hf_transfer = strtobool(os.environ.get('USE_HF_TRANSFER', 'False'))
|
| 440 |
+
if use_hf_transfer:
|
| 441 |
+
from huggingface_hub import _snapshot_download
|
| 442 |
+
_snapshot_download.HF_HUB_ENABLE_HF_TRANSFER = True
|
| 443 |
+
from huggingface_hub import snapshot_download
|
| 444 |
+
return snapshot_download(
|
| 445 |
+
model_id_or_path, repo_type='model', revision=revision, ignore_patterns=ignore_patterns, **kwargs)
|
| 446 |
+
|
| 447 |
+
|
| 448 |
+
def get_hub(use_hf: Optional[bool] = None):
|
| 449 |
+
if use_hf is None:
|
| 450 |
+
use_hf = True if use_hf_hub() else False
|
| 451 |
+
return {True: HFHub, False: MSHub}[use_hf]
|
ms-swift/swift/llm/__pycache__/utils.cpython-310.pyc
ADDED
|
Binary file (9.5 kB). View file
|
|
|
ms-swift/swift/llm/app/app.py
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Alibaba, Inc. and its affiliates.
|
| 2 |
+
from contextlib import nullcontext
|
| 3 |
+
from typing import List, Union
|
| 4 |
+
|
| 5 |
+
import gradio
|
| 6 |
+
from packaging import version
|
| 7 |
+
|
| 8 |
+
from swift.utils import get_logger
|
| 9 |
+
from ..argument import AppArguments
|
| 10 |
+
from ..base import SwiftPipeline
|
| 11 |
+
from ..infer import run_deploy
|
| 12 |
+
from .build_ui import build_ui
|
| 13 |
+
|
| 14 |
+
logger = get_logger()
|
| 15 |
+
|
| 16 |
+
|
| 17 |
+
class SwiftApp(SwiftPipeline):
|
| 18 |
+
args_class = AppArguments
|
| 19 |
+
args: args_class
|
| 20 |
+
|
| 21 |
+
def run(self):
|
| 22 |
+
args = self.args
|
| 23 |
+
deploy_context = nullcontext() if args.base_url else run_deploy(args, return_url=True)
|
| 24 |
+
with deploy_context as base_url:
|
| 25 |
+
base_url = base_url or args.base_url
|
| 26 |
+
demo = build_ui(
|
| 27 |
+
base_url,
|
| 28 |
+
args.model_suffix,
|
| 29 |
+
request_config=args.get_request_config(),
|
| 30 |
+
is_multimodal=args.is_multimodal,
|
| 31 |
+
studio_title=args.studio_title,
|
| 32 |
+
lang=args.lang,
|
| 33 |
+
default_system=args.system)
|
| 34 |
+
concurrency_count = 1 if args.infer_backend == 'pt' else 16
|
| 35 |
+
if version.parse(gradio.__version__) < version.parse('4'):
|
| 36 |
+
queue_kwargs = {'concurrency_count': concurrency_count}
|
| 37 |
+
else:
|
| 38 |
+
queue_kwargs = {'default_concurrency_limit': concurrency_count}
|
| 39 |
+
demo.queue(**queue_kwargs).launch(
|
| 40 |
+
server_name=args.server_name, server_port=args.server_port, share=args.share)
|
| 41 |
+
|
| 42 |
+
|
| 43 |
+
def app_main(args: Union[List[str], AppArguments, None] = None):
|
| 44 |
+
return SwiftApp(args).main()
|
ms-swift/swift/llm/app/locale.py
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Alibaba, Inc. and its affiliates.
|
| 2 |
+
locale_mapping = {
|
| 3 |
+
'modify_system': {
|
| 4 |
+
'en': '🛠️ Set system and clear history',
|
| 5 |
+
'zh': '🛠️ 设置system并清空历史'
|
| 6 |
+
},
|
| 7 |
+
'clear_history': {
|
| 8 |
+
'en': '🧹 Clear history',
|
| 9 |
+
'zh': '🧹 清空历史'
|
| 10 |
+
},
|
| 11 |
+
'submit': {
|
| 12 |
+
'en': '🚀 Send',
|
| 13 |
+
'zh': '🚀 发送'
|
| 14 |
+
},
|
| 15 |
+
'regenerate': {
|
| 16 |
+
'en': '🤔️ Regenerate',
|
| 17 |
+
'zh': '🤔️ 重试'
|
| 18 |
+
},
|
| 19 |
+
'upload': {
|
| 20 |
+
'en': '📁 Upload',
|
| 21 |
+
'zh': '📁 上传'
|
| 22 |
+
}
|
| 23 |
+
}
|
ms-swift/swift/llm/argument/__init__.py
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Alibaba, Inc. and its affiliates.
|
| 2 |
+
from .app_args import AppArguments
|
| 3 |
+
from .base_args import BaseArguments
|
| 4 |
+
from .deploy_args import DeployArguments
|
| 5 |
+
from .eval_args import EvalArguments
|
| 6 |
+
from .export_args import ExportArguments
|
| 7 |
+
from .infer_args import InferArguments
|
| 8 |
+
from .rlhf_args import RLHFArguments
|
| 9 |
+
from .sampling_args import SamplingArguments
|
| 10 |
+
from .train_args import TrainArguments
|
| 11 |
+
from .tuner_args import TunerArguments
|
| 12 |
+
from .webui_args import WebUIArguments
|
ms-swift/swift/llm/argument/__pycache__/__init__.cpython-310.pyc
ADDED
|
Binary file (712 Bytes). View file
|
|
|
ms-swift/swift/llm/argument/__pycache__/deploy_args.cpython-310.pyc
ADDED
|
Binary file (3.41 kB). View file
|
|
|
ms-swift/swift/llm/argument/__pycache__/infer_args.cpython-310.pyc
ADDED
|
Binary file (7.37 kB). View file
|
|
|
ms-swift/swift/llm/argument/__pycache__/merge_args.cpython-310.pyc
ADDED
|
Binary file (934 Bytes). View file
|
|
|
ms-swift/swift/llm/argument/base_args/__init__.py
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Copyright (c) Alibaba, Inc. and its affiliates.
|
| 2 |
+
from .base_args import BaseArguments
|
| 3 |
+
from .utils import to_abspath
|