| | import os |
| |
|
| | os.environ['CUDA_VISIBLE_DEVICES'] = '0' |
| |
|
| | kwargs = { |
| | 'per_device_train_batch_size': 2, |
| | 'per_device_eval_batch_size': 2, |
| | 'save_steps': 5, |
| | 'gradient_accumulation_steps': 4, |
| | 'num_train_epochs': 1, |
| | } |
| |
|
| |
|
| | def test_llm_ddp(): |
| | os.environ['CUDA_VISIBLE_DEVICES'] = '0,1' |
| | from swift.llm import sft_main, TrainArguments, infer_main, InferArguments |
| | result = sft_main( |
| | TrainArguments( |
| | model='Qwen/Qwen2-7B-Instruct', |
| | dataset=['AI-ModelScope/alpaca-gpt4-data-zh#100', 'AI-ModelScope/alpaca-gpt4-data-en#100'], |
| | |
| | gradient_checkpointing_kwargs={'use_reentrant': False}, |
| | target_modules=['all-linear', 'all-embedding'], |
| | modules_to_save=['all-embedding', 'all-norm'], |
| | **kwargs)) |
| | last_model_checkpoint = result['last_model_checkpoint'] |
| | infer_main(InferArguments(adapters=last_model_checkpoint, load_data_args=True)) |
| |
|
| |
|
| | def test_unsloth(): |
| | from swift.llm import sft_main, TrainArguments, infer_main, InferArguments |
| | result = sft_main( |
| | TrainArguments( |
| | model='Qwen/Qwen2-0.5B', |
| | dataset=['AI-ModelScope/alpaca-gpt4-data-zh#100', 'AI-ModelScope/alpaca-gpt4-data-en#100'], |
| | max_steps=5, |
| | tuner_backend='unsloth', |
| | **kwargs)) |
| | last_model_checkpoint = result['last_model_checkpoint'] |
| | result = sft_main(TrainArguments(resume_from_checkpoint=last_model_checkpoint, load_data_args=True, max_steps=10)) |
| | last_model_checkpoint = result['last_model_checkpoint'] |
| | infer_main(InferArguments(adapters=last_model_checkpoint, load_data_args=True)) |
| |
|
| |
|
| | def test_mllm_mp(): |
| | os.environ['MAX_PIXELS'] = '100352' |
| | os.environ['CUDA_VISIBLE_DEVICES'] = '0,1,2,3' |
| | from swift.llm import sft_main, TrainArguments, infer_main, InferArguments |
| | result = sft_main( |
| | TrainArguments( |
| | model='bytedance-research/Valley-Eagle-7B', |
| | dataset=['modelscope/coco_2014_caption:validation#20'], |
| | |
| | train_type='lora', |
| | target_modules=['all-linear'], |
| | freeze_aligner=False, |
| | **kwargs)) |
| | last_model_checkpoint = result['last_model_checkpoint'] |
| | infer_main(InferArguments(ckpt_dir=last_model_checkpoint, load_data_args=True, merge_lora=True)) |
| |
|
| |
|
| | def test_llm_streaming(): |
| | from swift.llm import sft_main, TrainArguments, infer_main, InferArguments |
| | result = sft_main( |
| | TrainArguments( |
| | model='Qwen/Qwen2-7B-Instruct', dataset=['swift/chinese-c4'], streaming=True, max_steps=16, **kwargs)) |
| | last_model_checkpoint = result['last_model_checkpoint'] |
| | infer_main(InferArguments(ckpt_dir=last_model_checkpoint, load_data_args=True, merge_lora=True)) |
| |
|
| |
|
| | def test_mllm_streaming(): |
| | from swift.llm import sft_main, TrainArguments, infer_main, InferArguments |
| | result = sft_main( |
| | TrainArguments( |
| | model='Qwen/Qwen2-VL-7B-Instruct', |
| | dataset=['modelscope/coco_2014_caption:validation', 'AI-ModelScope/alpaca-gpt4-data-en'], |
| | streaming=True, |
| | max_steps=16, |
| | **kwargs)) |
| | last_model_checkpoint = result['last_model_checkpoint'] |
| | infer_main(InferArguments(ckpt_dir=last_model_checkpoint, load_data_args=True, merge_lora=True)) |
| |
|
| |
|
| | def test_mllm_zero3(): |
| | os.environ['CUDA_VISIBLE_DEVICES'] = '0,1' |
| | from swift.llm import sft_main, TrainArguments, infer_main, InferArguments |
| | sft_main( |
| | TrainArguments( |
| | model='Qwen/Qwen2-VL-7B-Instruct', |
| | dataset=['modelscope/coco_2014_caption:validation#100', 'AI-ModelScope/alpaca-gpt4-data-en#100'], |
| | deepspeed='zero3', |
| | **kwargs)) |
| |
|
| |
|
| | def test_qwen_vl(): |
| | os.environ['CUDA_VISIBLE_DEVICES'] = '0,1' |
| | from swift.llm import sft_main, TrainArguments, infer_main, InferArguments |
| | sft_main( |
| | TrainArguments( |
| | model='Qwen/Qwen-VL-Chat', |
| | dataset=['AI-ModelScope/LaTeX_OCR#40', 'modelscope/coco_2014_caption:validation#40'], |
| | **kwargs)) |
| |
|
| |
|
| | def test_qwen2_audio(): |
| | os.environ['CUDA_VISIBLE_DEVICES'] = '0,1' |
| | from swift.llm import sft_main, TrainArguments, infer_main, InferArguments |
| | sft_main( |
| | TrainArguments( |
| | model='Qwen/Qwen2-Audio-7B-Instruct', |
| | dataset=['speech_asr/speech_asr_aishell1_trainsets:validation#200'], |
| | freeze_parameters_ratio=1, |
| | trainable_parameters=['audio_tower'], |
| | train_type='full', |
| | **kwargs)) |
| |
|
| |
|
| | def test_llm_gptq(): |
| | from swift.llm import sft_main, TrainArguments, infer_main, InferArguments |
| | result = sft_main( |
| | TrainArguments( |
| | model='Qwen/Qwen2-7B-Instruct-GPTQ-Int4', |
| | dataset=['AI-ModelScope/alpaca-gpt4-data-zh#100', 'AI-ModelScope/alpaca-gpt4-data-en#100'], |
| | **kwargs)) |
| | last_model_checkpoint = result['last_model_checkpoint'] |
| | infer_main(InferArguments(ckpt_dir=last_model_checkpoint, load_data_args=True)) |
| |
|
| |
|
| | def test_llm_awq(): |
| | from swift.llm import sft_main, TrainArguments, infer_main, InferArguments |
| | result = sft_main( |
| | TrainArguments( |
| | model='Qwen/Qwen2-7B-Instruct-AWQ', |
| | dataset=['AI-ModelScope/alpaca-gpt4-data-zh#100', 'AI-ModelScope/alpaca-gpt4-data-en#100'], |
| | **kwargs)) |
| | last_model_checkpoint = result['last_model_checkpoint'] |
| | infer_main(InferArguments(ckpt_dir=last_model_checkpoint, load_data_args=True)) |
| |
|
| |
|
| | def test_mllm_streaming_zero3(): |
| | os.environ['CUDA_VISIBLE_DEVICES'] = '0,1' |
| | from swift.llm import sft_main, TrainArguments, infer_main, InferArguments |
| | sft_main( |
| | TrainArguments( |
| | model='Qwen/Qwen2-VL-7B-Instruct', |
| | dataset=['modelscope/coco_2014_caption:validation', 'AI-ModelScope/alpaca-gpt4-data-en'], |
| | streaming=True, |
| | max_steps=16, |
| | deepspeed='zero3', |
| | **kwargs)) |
| |
|
| |
|
| | def test_mllm_streaming_mp_ddp(): |
| | os.environ['CUDA_VISIBLE_DEVICES'] = '0,1,2,3' |
| | from swift.llm import sft_main, TrainArguments, infer_main, InferArguments |
| | sft_main( |
| | TrainArguments( |
| | model='Qwen/Qwen2-VL-7B-Instruct', |
| | dataset=['modelscope/coco_2014_caption:validation', 'AI-ModelScope/alpaca-gpt4-data-en'], |
| | streaming=True, |
| | max_steps=16, |
| | gradient_checkpointing_kwargs={'use_reentrant': False}, |
| | **kwargs)) |
| |
|
| |
|
| | def test_llm_hqq(): |
| | from swift.llm import sft_main, TrainArguments, infer_main, InferArguments |
| | result = sft_main( |
| | TrainArguments( |
| | model='Qwen/Qwen2-7B-Instruct', |
| | dataset=['AI-ModelScope/alpaca-gpt4-data-zh#100', 'AI-ModelScope/alpaca-gpt4-data-en#100'], |
| | quant_method='hqq', |
| | quant_bits=4, |
| | **kwargs)) |
| | last_model_checkpoint = result['last_model_checkpoint'] |
| | infer_main(InferArguments(ckpt_dir=last_model_checkpoint, load_data_args=True)) |
| |
|
| |
|
| | def test_llm_bnb(): |
| | from swift.llm import sft_main, TrainArguments, infer_main, InferArguments |
| | result = sft_main( |
| | TrainArguments( |
| | model='Qwen/Qwen2-7B-Instruct', |
| | dataset=['AI-ModelScope/alpaca-gpt4-data-zh#100', 'AI-ModelScope/alpaca-gpt4-data-en#100'], |
| | quant_method='bnb', |
| | quant_bits=4, |
| | **kwargs)) |
| | last_model_checkpoint = result['last_model_checkpoint'] |
| | infer_main(InferArguments(ckpt_dir=last_model_checkpoint, load_data_args=True)) |
| |
|
| |
|
| | def test_moe(): |
| | from swift.llm import sft_main, TrainArguments, infer_main, InferArguments |
| | result = sft_main( |
| | TrainArguments( |
| | model='Qwen/Qwen1.5-MoE-A2.7B-Chat-GPTQ-Int4', |
| | dataset=['AI-ModelScope/alpaca-gpt4-data-zh#100', 'AI-ModelScope/alpaca-gpt4-data-en#100'], |
| | **kwargs)) |
| | last_model_checkpoint = result['last_model_checkpoint'] |
| | infer_main(InferArguments(adapters=last_model_checkpoint, load_data_args=True)) |
| |
|
| |
|
| | def test_resume_from_checkpoint(): |
| | from swift.llm import sft_main, TrainArguments, infer_main, InferArguments |
| | result = sft_main( |
| | TrainArguments( |
| | model='Qwen/Qwen2-0.5B', |
| | dataset=['AI-ModelScope/alpaca-gpt4-data-zh#100', 'AI-ModelScope/alpaca-gpt4-data-en#100'], |
| | max_steps=5, |
| | streaming=True, |
| | **kwargs)) |
| | last_model_checkpoint = result['last_model_checkpoint'] |
| | result = sft_main( |
| | TrainArguments( |
| | resume_from_checkpoint=last_model_checkpoint, |
| | streaming=True, |
| | load_data_args=True, |
| | max_steps=10, |
| | )) |
| | last_model_checkpoint = result['last_model_checkpoint'] |
| | infer_main(InferArguments(adapters=last_model_checkpoint, load_data_args=True)) |
| |
|
| |
|
| | def test_resume_only_model(): |
| | import os |
| | os.environ['CUDA_VISIBLE_DEVICES'] = '0,1' |
| | from swift.llm import sft_main, TrainArguments, infer_main, InferArguments |
| | result = sft_main( |
| | TrainArguments( |
| | model='Qwen/Qwen2-0.5B', |
| | dataset=['AI-ModelScope/alpaca-gpt4-data-zh#10', 'AI-ModelScope/alpaca-gpt4-data-en#10'], |
| | max_steps=20, |
| | save_only_model=True, |
| | deepspeed='zero3', |
| | **kwargs)) |
| | last_model_checkpoint = result['last_model_checkpoint'] |
| | result = sft_main( |
| | TrainArguments( |
| | resume_from_checkpoint=last_model_checkpoint, load_data_args=True, max_steps=20, resume_only_model=True)) |
| |
|
| |
|
| | def test_llm_transformers_4_33(): |
| | os.environ['CUDA_VISIBLE_DEVICES'] = '0,1' |
| | from swift.llm import sft_main, TrainArguments, infer_main, InferArguments |
| | sft_main( |
| | TrainArguments( |
| | model='Qwen/Qwen-7B-Chat', |
| | dataset=['AI-ModelScope/alpaca-gpt4-data-zh#100', 'AI-ModelScope/alpaca-gpt4-data-en#100'], |
| | **kwargs)) |
| |
|
| |
|
| | def test_predict_with_generate(): |
| | import os |
| | os.environ['CUDA_VISIBLE_DEVICES'] = '0,1' |
| | from swift.llm import sft_main, TrainArguments, infer_main, InferArguments |
| | |
| | sft_main( |
| | TrainArguments( |
| | model='Qwen/Qwen2-7B-Instruct', |
| | dataset=['AI-ModelScope/alpaca-gpt4-data-en#40'], |
| | predict_with_generate=True, |
| | split_dataset_ratio=0.5, |
| | **kwargs)) |
| |
|
| |
|
| | def test_predict_with_generate_zero3(): |
| | import os |
| | os.environ['CUDA_VISIBLE_DEVICES'] = '0,1' |
| | from swift.llm import sft_main, TrainArguments, infer_main, InferArguments |
| | |
| | sft_main( |
| | TrainArguments( |
| | model='Qwen/Qwen2-VL-7B-Instruct', |
| | dataset=['AI-ModelScope/LaTeX_OCR#40'], |
| | predict_with_generate=True, |
| | freeze_vit=False, |
| | split_dataset_ratio=0.5, |
| | deepspeed='zero3', |
| | **kwargs)) |
| |
|
| |
|
| | def test_template(): |
| | from swift.llm import sft_main, TrainArguments, infer_main, InferArguments |
| | global kwargs |
| | kwargs = kwargs.copy() |
| | kwargs['num_train_epochs'] = 3 |
| | result = sft_main( |
| | TrainArguments( |
| | model='Qwen/Qwen2-0.5B', |
| | dataset=['swift/self-cognition#200'], |
| | model_name=['小黄'], |
| | model_author=['swift'], |
| | **kwargs)) |
| | last_model_checkpoint = result['last_model_checkpoint'] |
| | infer_main(InferArguments(ckpt_dir=last_model_checkpoint, load_data_args=True, merge_lora=True)) |
| |
|
| |
|
| | def test_emu3_gen(): |
| | os.environ['CUDA_VISIBLE_DEVICES'] = '1' |
| | os.environ['max_position_embeddings'] = '10240' |
| | os.environ['image_area'] = '518400' |
| | from swift.llm import sft_main, TrainArguments, infer_main, InferArguments |
| | kwargs['num_train_epochs'] = 100 |
| | result = sft_main(TrainArguments(model='BAAI/Emu3-Gen', dataset=['swift/TextCaps#2'], **kwargs)) |
| | last_model_checkpoint = result['last_model_checkpoint'] |
| | args = InferArguments( |
| | ckpt_dir=last_model_checkpoint, |
| | infer_backend='pt', |
| | stream=False, |
| | use_chat_template=False, |
| | top_k=2048, |
| | max_new_tokens=40960) |
| | infer_main(args) |
| |
|
| |
|
| | def test_eval_strategy(): |
| | os.environ['CUDA_VISIBLE_DEVICES'] = '0,1' |
| | from swift.llm import sft_main, TrainArguments, infer_main, InferArguments |
| | result = sft_main( |
| | TrainArguments( |
| | model='Qwen/Qwen2-7B-Instruct', |
| | eval_strategy='no', |
| | dataset=['AI-ModelScope/alpaca-gpt4-data-zh#100', 'AI-ModelScope/alpaca-gpt4-data-en#100'], |
| | **kwargs)) |
| | last_model_checkpoint = result['last_model_checkpoint'] |
| | infer_main(InferArguments(adapters=last_model_checkpoint, load_data_args=True)) |
| |
|
| |
|
| | def test_epoch(): |
| | os.environ['CUDA_VISIBLE_DEVICES'] = '0,1' |
| | from swift.llm import sft_main, TrainArguments, infer_main, InferArguments |
| |
|
| | train_kwargs = kwargs.copy() |
| | train_kwargs['num_train_epochs'] = 3 |
| | |
| | result = sft_main( |
| | TrainArguments( |
| | model='Qwen/Qwen2-7B-Instruct', |
| | dataset=['AI-ModelScope/alpaca-gpt4-data-zh#50', 'AI-ModelScope/alpaca-gpt4-data-en#50'], |
| | save_strategy='epoch', |
| | **train_kwargs)) |
| | last_model_checkpoint = result['last_model_checkpoint'] |
| | infer_main(InferArguments(adapters=last_model_checkpoint, load_data_args=True)) |
| |
|
| |
|
| | def test_agent(): |
| | os.environ['CUDA_VISIBLE_DEVICES'] = '0,1' |
| | from swift.llm import sft_main, TrainArguments, infer_main, InferArguments |
| |
|
| | result = sft_main( |
| | TrainArguments( |
| | model='Qwen/Qwen2-7B-Instruct', |
| | dataset=['swift/ToolBench#500'], |
| | loss_scale='react', |
| | agent_template='toolbench', |
| | **kwargs)) |
| | last_model_checkpoint = result['last_model_checkpoint'] |
| | infer_main(InferArguments(adapters=last_model_checkpoint, load_data_args=True)) |
| |
|
| |
|
| | def test_grounding(): |
| | os.environ['CUDA_VISIBLE_DEVICES'] = '0,1' |
| | from swift.llm import sft_main, TrainArguments, infer_main, InferArguments |
| |
|
| | result = sft_main( |
| | TrainArguments( |
| | model='Qwen/Qwen2.5-VL-7B-Instruct', dataset=['AI-ModelScope/coco#200'], dataset_num_proc=4, **kwargs)) |
| | last_model_checkpoint = result['last_model_checkpoint'] |
| | infer_main(InferArguments(adapters=last_model_checkpoint, load_data_args=True, stream=True, max_new_tokens=2048)) |
| |
|
| |
|
| | if __name__ == '__main__': |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | test_grounding() |
| |
|