Student0809's picture
Add files using upload-large-folder tool
7feac49 verified
raw
history blame
15 kB
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0'
kwargs = {
'per_device_train_batch_size': 2,
'per_device_eval_batch_size': 2,
'save_steps': 5,
'gradient_accumulation_steps': 4,
'num_train_epochs': 1,
}
def test_llm_ddp():
os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'
from swift.llm import sft_main, TrainArguments, infer_main, InferArguments
result = sft_main(
TrainArguments(
model='Qwen/Qwen2-7B-Instruct',
dataset=['AI-ModelScope/alpaca-gpt4-data-zh#100', 'AI-ModelScope/alpaca-gpt4-data-en#100'],
# ddp_find_unused_parameters=False,
gradient_checkpointing_kwargs={'use_reentrant': False},
target_modules=['all-linear', 'all-embedding'],
modules_to_save=['all-embedding', 'all-norm'],
**kwargs))
last_model_checkpoint = result['last_model_checkpoint']
infer_main(InferArguments(adapters=last_model_checkpoint, load_data_args=True))
def test_unsloth():
from swift.llm import sft_main, TrainArguments, infer_main, InferArguments
result = sft_main(
TrainArguments(
model='Qwen/Qwen2-0.5B',
dataset=['AI-ModelScope/alpaca-gpt4-data-zh#100', 'AI-ModelScope/alpaca-gpt4-data-en#100'],
max_steps=5,
tuner_backend='unsloth',
**kwargs))
last_model_checkpoint = result['last_model_checkpoint']
result = sft_main(TrainArguments(resume_from_checkpoint=last_model_checkpoint, load_data_args=True, max_steps=10))
last_model_checkpoint = result['last_model_checkpoint']
infer_main(InferArguments(adapters=last_model_checkpoint, load_data_args=True))
def test_mllm_mp():
os.environ['MAX_PIXELS'] = '100352'
os.environ['CUDA_VISIBLE_DEVICES'] = '0,1,2,3'
from swift.llm import sft_main, TrainArguments, infer_main, InferArguments
result = sft_main(
TrainArguments(
model='bytedance-research/Valley-Eagle-7B',
dataset=['modelscope/coco_2014_caption:validation#20'],
# dataset=['modelscope/coco_2014_caption:validation#20', 'AI-ModelScope/alpaca-gpt4-data-en#20'],
train_type='lora',
target_modules=['all-linear'],
freeze_aligner=False,
**kwargs))
last_model_checkpoint = result['last_model_checkpoint']
infer_main(InferArguments(ckpt_dir=last_model_checkpoint, load_data_args=True, merge_lora=True))
def test_llm_streaming():
from swift.llm import sft_main, TrainArguments, infer_main, InferArguments
result = sft_main(
TrainArguments(
model='Qwen/Qwen2-7B-Instruct', dataset=['swift/chinese-c4'], streaming=True, max_steps=16, **kwargs))
last_model_checkpoint = result['last_model_checkpoint']
infer_main(InferArguments(ckpt_dir=last_model_checkpoint, load_data_args=True, merge_lora=True))
def test_mllm_streaming():
from swift.llm import sft_main, TrainArguments, infer_main, InferArguments
result = sft_main(
TrainArguments(
model='Qwen/Qwen2-VL-7B-Instruct',
dataset=['modelscope/coco_2014_caption:validation', 'AI-ModelScope/alpaca-gpt4-data-en'],
streaming=True,
max_steps=16,
**kwargs))
last_model_checkpoint = result['last_model_checkpoint']
infer_main(InferArguments(ckpt_dir=last_model_checkpoint, load_data_args=True, merge_lora=True))
def test_mllm_zero3():
os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'
from swift.llm import sft_main, TrainArguments, infer_main, InferArguments
sft_main(
TrainArguments(
model='Qwen/Qwen2-VL-7B-Instruct',
dataset=['modelscope/coco_2014_caption:validation#100', 'AI-ModelScope/alpaca-gpt4-data-en#100'], #
deepspeed='zero3',
**kwargs))
def test_qwen_vl():
os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'
from swift.llm import sft_main, TrainArguments, infer_main, InferArguments
sft_main(
TrainArguments(
model='Qwen/Qwen-VL-Chat',
dataset=['AI-ModelScope/LaTeX_OCR#40', 'modelscope/coco_2014_caption:validation#40'],
**kwargs))
def test_qwen2_audio():
os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'
from swift.llm import sft_main, TrainArguments, infer_main, InferArguments
sft_main(
TrainArguments(
model='Qwen/Qwen2-Audio-7B-Instruct',
dataset=['speech_asr/speech_asr_aishell1_trainsets:validation#200'],
freeze_parameters_ratio=1,
trainable_parameters=['audio_tower'],
train_type='full',
**kwargs))
def test_llm_gptq():
from swift.llm import sft_main, TrainArguments, infer_main, InferArguments
result = sft_main(
TrainArguments(
model='Qwen/Qwen2-7B-Instruct-GPTQ-Int4',
dataset=['AI-ModelScope/alpaca-gpt4-data-zh#100', 'AI-ModelScope/alpaca-gpt4-data-en#100'],
**kwargs))
last_model_checkpoint = result['last_model_checkpoint']
infer_main(InferArguments(ckpt_dir=last_model_checkpoint, load_data_args=True))
def test_llm_awq():
from swift.llm import sft_main, TrainArguments, infer_main, InferArguments
result = sft_main(
TrainArguments(
model='Qwen/Qwen2-7B-Instruct-AWQ',
dataset=['AI-ModelScope/alpaca-gpt4-data-zh#100', 'AI-ModelScope/alpaca-gpt4-data-en#100'],
**kwargs))
last_model_checkpoint = result['last_model_checkpoint']
infer_main(InferArguments(ckpt_dir=last_model_checkpoint, load_data_args=True))
def test_mllm_streaming_zero3():
os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'
from swift.llm import sft_main, TrainArguments, infer_main, InferArguments
sft_main(
TrainArguments(
model='Qwen/Qwen2-VL-7B-Instruct',
dataset=['modelscope/coco_2014_caption:validation', 'AI-ModelScope/alpaca-gpt4-data-en'],
streaming=True,
max_steps=16,
deepspeed='zero3',
**kwargs))
def test_mllm_streaming_mp_ddp():
os.environ['CUDA_VISIBLE_DEVICES'] = '0,1,2,3'
from swift.llm import sft_main, TrainArguments, infer_main, InferArguments
sft_main(
TrainArguments(
model='Qwen/Qwen2-VL-7B-Instruct',
dataset=['modelscope/coco_2014_caption:validation', 'AI-ModelScope/alpaca-gpt4-data-en'],
streaming=True,
max_steps=16,
gradient_checkpointing_kwargs={'use_reentrant': False},
**kwargs))
def test_llm_hqq():
from swift.llm import sft_main, TrainArguments, infer_main, InferArguments
result = sft_main(
TrainArguments(
model='Qwen/Qwen2-7B-Instruct',
dataset=['AI-ModelScope/alpaca-gpt4-data-zh#100', 'AI-ModelScope/alpaca-gpt4-data-en#100'],
quant_method='hqq',
quant_bits=4,
**kwargs))
last_model_checkpoint = result['last_model_checkpoint']
infer_main(InferArguments(ckpt_dir=last_model_checkpoint, load_data_args=True))
def test_llm_bnb():
from swift.llm import sft_main, TrainArguments, infer_main, InferArguments
result = sft_main(
TrainArguments(
model='Qwen/Qwen2-7B-Instruct',
dataset=['AI-ModelScope/alpaca-gpt4-data-zh#100', 'AI-ModelScope/alpaca-gpt4-data-en#100'],
quant_method='bnb',
quant_bits=4,
**kwargs))
last_model_checkpoint = result['last_model_checkpoint']
infer_main(InferArguments(ckpt_dir=last_model_checkpoint, load_data_args=True))
def test_moe():
from swift.llm import sft_main, TrainArguments, infer_main, InferArguments
result = sft_main(
TrainArguments(
model='Qwen/Qwen1.5-MoE-A2.7B-Chat-GPTQ-Int4',
dataset=['AI-ModelScope/alpaca-gpt4-data-zh#100', 'AI-ModelScope/alpaca-gpt4-data-en#100'],
**kwargs))
last_model_checkpoint = result['last_model_checkpoint']
infer_main(InferArguments(adapters=last_model_checkpoint, load_data_args=True))
def test_resume_from_checkpoint():
from swift.llm import sft_main, TrainArguments, infer_main, InferArguments
result = sft_main(
TrainArguments(
model='Qwen/Qwen2-0.5B',
dataset=['AI-ModelScope/alpaca-gpt4-data-zh#100', 'AI-ModelScope/alpaca-gpt4-data-en#100'],
max_steps=5,
streaming=True,
**kwargs))
last_model_checkpoint = result['last_model_checkpoint']
result = sft_main(
TrainArguments(
resume_from_checkpoint=last_model_checkpoint,
streaming=True,
load_data_args=True,
max_steps=10,
))
last_model_checkpoint = result['last_model_checkpoint']
infer_main(InferArguments(adapters=last_model_checkpoint, load_data_args=True))
def test_resume_only_model():
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'
from swift.llm import sft_main, TrainArguments, infer_main, InferArguments
result = sft_main(
TrainArguments(
model='Qwen/Qwen2-0.5B',
dataset=['AI-ModelScope/alpaca-gpt4-data-zh#10', 'AI-ModelScope/alpaca-gpt4-data-en#10'],
max_steps=20,
save_only_model=True,
deepspeed='zero3',
**kwargs))
last_model_checkpoint = result['last_model_checkpoint']
result = sft_main(
TrainArguments(
resume_from_checkpoint=last_model_checkpoint, load_data_args=True, max_steps=20, resume_only_model=True))
def test_llm_transformers_4_33():
os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'
from swift.llm import sft_main, TrainArguments, infer_main, InferArguments
sft_main(
TrainArguments(
model='Qwen/Qwen-7B-Chat',
dataset=['AI-ModelScope/alpaca-gpt4-data-zh#100', 'AI-ModelScope/alpaca-gpt4-data-en#100'],
**kwargs))
def test_predict_with_generate():
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'
from swift.llm import sft_main, TrainArguments, infer_main, InferArguments
# 'modelscope/coco_2014_caption:validation#100',
sft_main(
TrainArguments(
model='Qwen/Qwen2-7B-Instruct',
dataset=['AI-ModelScope/alpaca-gpt4-data-en#40'],
predict_with_generate=True,
split_dataset_ratio=0.5,
**kwargs))
def test_predict_with_generate_zero3():
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'
from swift.llm import sft_main, TrainArguments, infer_main, InferArguments
# 'modelscope/coco_2014_caption:validation#100',
sft_main(
TrainArguments(
model='Qwen/Qwen2-VL-7B-Instruct',
dataset=['AI-ModelScope/LaTeX_OCR#40'],
predict_with_generate=True,
freeze_vit=False,
split_dataset_ratio=0.5,
deepspeed='zero3',
**kwargs))
def test_template():
from swift.llm import sft_main, TrainArguments, infer_main, InferArguments
global kwargs
kwargs = kwargs.copy()
kwargs['num_train_epochs'] = 3
result = sft_main(
TrainArguments(
model='Qwen/Qwen2-0.5B',
dataset=['swift/self-cognition#200'],
model_name=['小黄'],
model_author=['swift'],
**kwargs))
last_model_checkpoint = result['last_model_checkpoint']
infer_main(InferArguments(ckpt_dir=last_model_checkpoint, load_data_args=True, merge_lora=True))
def test_emu3_gen():
os.environ['CUDA_VISIBLE_DEVICES'] = '1'
os.environ['max_position_embeddings'] = '10240'
os.environ['image_area'] = '518400'
from swift.llm import sft_main, TrainArguments, infer_main, InferArguments
kwargs['num_train_epochs'] = 100
result = sft_main(TrainArguments(model='BAAI/Emu3-Gen', dataset=['swift/TextCaps#2'], **kwargs))
last_model_checkpoint = result['last_model_checkpoint']
args = InferArguments(
ckpt_dir=last_model_checkpoint,
infer_backend='pt',
stream=False,
use_chat_template=False,
top_k=2048,
max_new_tokens=40960)
infer_main(args)
def test_eval_strategy():
os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'
from swift.llm import sft_main, TrainArguments, infer_main, InferArguments
result = sft_main(
TrainArguments(
model='Qwen/Qwen2-7B-Instruct',
eval_strategy='no',
dataset=['AI-ModelScope/alpaca-gpt4-data-zh#100', 'AI-ModelScope/alpaca-gpt4-data-en#100'],
**kwargs))
last_model_checkpoint = result['last_model_checkpoint']
infer_main(InferArguments(adapters=last_model_checkpoint, load_data_args=True))
def test_epoch():
os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'
from swift.llm import sft_main, TrainArguments, infer_main, InferArguments
train_kwargs = kwargs.copy()
train_kwargs['num_train_epochs'] = 3
# train_kwargs['save_steps'] = 2 # not use
result = sft_main(
TrainArguments(
model='Qwen/Qwen2-7B-Instruct',
dataset=['AI-ModelScope/alpaca-gpt4-data-zh#50', 'AI-ModelScope/alpaca-gpt4-data-en#50'],
save_strategy='epoch',
**train_kwargs))
last_model_checkpoint = result['last_model_checkpoint']
infer_main(InferArguments(adapters=last_model_checkpoint, load_data_args=True))
def test_agent():
os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'
from swift.llm import sft_main, TrainArguments, infer_main, InferArguments
result = sft_main(
TrainArguments(
model='Qwen/Qwen2-7B-Instruct',
dataset=['swift/ToolBench#500'],
loss_scale='react',
agent_template='toolbench',
**kwargs))
last_model_checkpoint = result['last_model_checkpoint']
infer_main(InferArguments(adapters=last_model_checkpoint, load_data_args=True))
def test_grounding():
os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'
from swift.llm import sft_main, TrainArguments, infer_main, InferArguments
result = sft_main(
TrainArguments(
model='Qwen/Qwen2.5-VL-7B-Instruct', dataset=['AI-ModelScope/coco#200'], dataset_num_proc=4, **kwargs))
last_model_checkpoint = result['last_model_checkpoint']
infer_main(InferArguments(adapters=last_model_checkpoint, load_data_args=True, stream=True, max_new_tokens=2048))
if __name__ == '__main__':
# test_llm_ddp()
# test_mllm_mp()
# test_llm_streaming()
# test_mllm_streaming()
# test_mllm_zero3()
# test_llm_gptq()
# test_llm_awq()
# test_mllm_streaming_zero3()
# test_mllm_streaming_mp_ddp()
# test_llm_bnb()
# test_llm_hqq()
# test_moe()
# test_resume_from_checkpoint()
# test_resume_only_model()
# test_llm_transformers_4_33()
# test_predict_with_generate()
# test_predict_with_generate_zero3()
# test_template()
# test_qwen_vl()
# test_qwen2_audio()
# test_emu3_gen()
# test_unsloth()
# test_eval_strategy()
# test_epoch()
# test_agent()
test_grounding()