interactSpeech / tests /train /test_sft.py

Add files using upload-large-folder tool

7feac49 verified 5 months ago

15 kB

	import os

	os.environ['CUDA_VISIBLE_DEVICES'] = '0'

	kwargs = {
	'per_device_train_batch_size': 2,
	'per_device_eval_batch_size': 2,
	'save_steps': 5,
	'gradient_accumulation_steps': 4,
	'num_train_epochs': 1,
	}


	def test_llm_ddp():
	os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'
	from swift.llm import sft_main, TrainArguments, infer_main, InferArguments
	result = sft_main(
	TrainArguments(
	model='Qwen/Qwen2-7B-Instruct',
	dataset=['AI-ModelScope/alpaca-gpt4-data-zh#100', 'AI-ModelScope/alpaca-gpt4-data-en#100'],
	# ddp_find_unused_parameters=False,
	gradient_checkpointing_kwargs={'use_reentrant': False},
	target_modules=['all-linear', 'all-embedding'],
	modules_to_save=['all-embedding', 'all-norm'],
	**kwargs))
	last_model_checkpoint = result['last_model_checkpoint']
	infer_main(InferArguments(adapters=last_model_checkpoint, load_data_args=True))


	def test_unsloth():
	from swift.llm import sft_main, TrainArguments, infer_main, InferArguments
	result = sft_main(
	TrainArguments(
	model='Qwen/Qwen2-0.5B',
	dataset=['AI-ModelScope/alpaca-gpt4-data-zh#100', 'AI-ModelScope/alpaca-gpt4-data-en#100'],
	max_steps=5,
	tuner_backend='unsloth',
	**kwargs))
	last_model_checkpoint = result['last_model_checkpoint']
	result = sft_main(TrainArguments(resume_from_checkpoint=last_model_checkpoint, load_data_args=True, max_steps=10))
	last_model_checkpoint = result['last_model_checkpoint']
	infer_main(InferArguments(adapters=last_model_checkpoint, load_data_args=True))


	def test_mllm_mp():
	os.environ['MAX_PIXELS'] = '100352'
	os.environ['CUDA_VISIBLE_DEVICES'] = '0,1,2,3'
	from swift.llm import sft_main, TrainArguments, infer_main, InferArguments
	result = sft_main(
	TrainArguments(
	model='bytedance-research/Valley-Eagle-7B',
	dataset=['modelscope/coco_2014_caption:validation#20'],
	# dataset=['modelscope/coco_2014_caption:validation#20', 'AI-ModelScope/alpaca-gpt4-data-en#20'],
	train_type='lora',
	target_modules=['all-linear'],
	freeze_aligner=False,
	**kwargs))
	last_model_checkpoint = result['last_model_checkpoint']
	infer_main(InferArguments(ckpt_dir=last_model_checkpoint, load_data_args=True, merge_lora=True))


	def test_llm_streaming():
	from swift.llm import sft_main, TrainArguments, infer_main, InferArguments
	result = sft_main(
	TrainArguments(
	model='Qwen/Qwen2-7B-Instruct', dataset=['swift/chinese-c4'], streaming=True, max_steps=16, **kwargs))
	last_model_checkpoint = result['last_model_checkpoint']
	infer_main(InferArguments(ckpt_dir=last_model_checkpoint, load_data_args=True, merge_lora=True))


	def test_mllm_streaming():
	from swift.llm import sft_main, TrainArguments, infer_main, InferArguments
	result = sft_main(
	TrainArguments(
	model='Qwen/Qwen2-VL-7B-Instruct',
	dataset=['modelscope/coco_2014_caption:validation', 'AI-ModelScope/alpaca-gpt4-data-en'],
	streaming=True,
	max_steps=16,
	**kwargs))
	last_model_checkpoint = result['last_model_checkpoint']
	infer_main(InferArguments(ckpt_dir=last_model_checkpoint, load_data_args=True, merge_lora=True))


	def test_mllm_zero3():
	os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'
	from swift.llm import sft_main, TrainArguments, infer_main, InferArguments
	sft_main(
	TrainArguments(
	model='Qwen/Qwen2-VL-7B-Instruct',
	dataset=['modelscope/coco_2014_caption:validation#100', 'AI-ModelScope/alpaca-gpt4-data-en#100'], #
	deepspeed='zero3',
	**kwargs))


	def test_qwen_vl():
	os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'
	from swift.llm import sft_main, TrainArguments, infer_main, InferArguments
	sft_main(
	TrainArguments(
	model='Qwen/Qwen-VL-Chat',
	dataset=['AI-ModelScope/LaTeX_OCR#40', 'modelscope/coco_2014_caption:validation#40'],
	**kwargs))


	def test_qwen2_audio():
	os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'
	from swift.llm import sft_main, TrainArguments, infer_main, InferArguments
	sft_main(
	TrainArguments(
	model='Qwen/Qwen2-Audio-7B-Instruct',
	dataset=['speech_asr/speech_asr_aishell1_trainsets:validation#200'],
	freeze_parameters_ratio=1,
	trainable_parameters=['audio_tower'],
	train_type='full',
	**kwargs))


	def test_llm_gptq():
	from swift.llm import sft_main, TrainArguments, infer_main, InferArguments
	result = sft_main(
	TrainArguments(
	model='Qwen/Qwen2-7B-Instruct-GPTQ-Int4',
	dataset=['AI-ModelScope/alpaca-gpt4-data-zh#100', 'AI-ModelScope/alpaca-gpt4-data-en#100'],
	**kwargs))
	last_model_checkpoint = result['last_model_checkpoint']
	infer_main(InferArguments(ckpt_dir=last_model_checkpoint, load_data_args=True))


	def test_llm_awq():
	from swift.llm import sft_main, TrainArguments, infer_main, InferArguments
	result = sft_main(
	TrainArguments(
	model='Qwen/Qwen2-7B-Instruct-AWQ',
	dataset=['AI-ModelScope/alpaca-gpt4-data-zh#100', 'AI-ModelScope/alpaca-gpt4-data-en#100'],
	**kwargs))
	last_model_checkpoint = result['last_model_checkpoint']
	infer_main(InferArguments(ckpt_dir=last_model_checkpoint, load_data_args=True))


	def test_mllm_streaming_zero3():
	os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'
	from swift.llm import sft_main, TrainArguments, infer_main, InferArguments
	sft_main(
	TrainArguments(
	model='Qwen/Qwen2-VL-7B-Instruct',
	dataset=['modelscope/coco_2014_caption:validation', 'AI-ModelScope/alpaca-gpt4-data-en'],
	streaming=True,
	max_steps=16,
	deepspeed='zero3',
	**kwargs))


	def test_mllm_streaming_mp_ddp():
	os.environ['CUDA_VISIBLE_DEVICES'] = '0,1,2,3'
	from swift.llm import sft_main, TrainArguments, infer_main, InferArguments
	sft_main(
	TrainArguments(
	model='Qwen/Qwen2-VL-7B-Instruct',
	dataset=['modelscope/coco_2014_caption:validation', 'AI-ModelScope/alpaca-gpt4-data-en'],
	streaming=True,
	max_steps=16,
	gradient_checkpointing_kwargs={'use_reentrant': False},
	**kwargs))


	def test_llm_hqq():
	from swift.llm import sft_main, TrainArguments, infer_main, InferArguments
	result = sft_main(
	TrainArguments(
	model='Qwen/Qwen2-7B-Instruct',
	dataset=['AI-ModelScope/alpaca-gpt4-data-zh#100', 'AI-ModelScope/alpaca-gpt4-data-en#100'],
	quant_method='hqq',
	quant_bits=4,
	**kwargs))
	last_model_checkpoint = result['last_model_checkpoint']
	infer_main(InferArguments(ckpt_dir=last_model_checkpoint, load_data_args=True))


	def test_llm_bnb():
	from swift.llm import sft_main, TrainArguments, infer_main, InferArguments
	result = sft_main(
	TrainArguments(
	model='Qwen/Qwen2-7B-Instruct',
	dataset=['AI-ModelScope/alpaca-gpt4-data-zh#100', 'AI-ModelScope/alpaca-gpt4-data-en#100'],
	quant_method='bnb',
	quant_bits=4,
	**kwargs))
	last_model_checkpoint = result['last_model_checkpoint']
	infer_main(InferArguments(ckpt_dir=last_model_checkpoint, load_data_args=True))


	def test_moe():
	from swift.llm import sft_main, TrainArguments, infer_main, InferArguments
	result = sft_main(
	TrainArguments(
	model='Qwen/Qwen1.5-MoE-A2.7B-Chat-GPTQ-Int4',
	dataset=['AI-ModelScope/alpaca-gpt4-data-zh#100', 'AI-ModelScope/alpaca-gpt4-data-en#100'],
	**kwargs))
	last_model_checkpoint = result['last_model_checkpoint']
	infer_main(InferArguments(adapters=last_model_checkpoint, load_data_args=True))


	def test_resume_from_checkpoint():
	from swift.llm import sft_main, TrainArguments, infer_main, InferArguments
	result = sft_main(
	TrainArguments(
	model='Qwen/Qwen2-0.5B',
	dataset=['AI-ModelScope/alpaca-gpt4-data-zh#100', 'AI-ModelScope/alpaca-gpt4-data-en#100'],
	max_steps=5,
	streaming=True,
	**kwargs))
	last_model_checkpoint = result['last_model_checkpoint']
	result = sft_main(
	TrainArguments(
	resume_from_checkpoint=last_model_checkpoint,
	streaming=True,
	load_data_args=True,
	max_steps=10,
	))
	last_model_checkpoint = result['last_model_checkpoint']
	infer_main(InferArguments(adapters=last_model_checkpoint, load_data_args=True))


	def test_resume_only_model():
	import os
	os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'
	from swift.llm import sft_main, TrainArguments, infer_main, InferArguments
	result = sft_main(
	TrainArguments(
	model='Qwen/Qwen2-0.5B',
	dataset=['AI-ModelScope/alpaca-gpt4-data-zh#10', 'AI-ModelScope/alpaca-gpt4-data-en#10'],
	max_steps=20,
	save_only_model=True,
	deepspeed='zero3',
	**kwargs))
	last_model_checkpoint = result['last_model_checkpoint']
	result = sft_main(
	TrainArguments(
	resume_from_checkpoint=last_model_checkpoint, load_data_args=True, max_steps=20, resume_only_model=True))


	def test_llm_transformers_4_33():
	os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'
	from swift.llm import sft_main, TrainArguments, infer_main, InferArguments
	sft_main(
	TrainArguments(
	model='Qwen/Qwen-7B-Chat',
	dataset=['AI-ModelScope/alpaca-gpt4-data-zh#100', 'AI-ModelScope/alpaca-gpt4-data-en#100'],
	**kwargs))


	def test_predict_with_generate():
	import os
	os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'
	from swift.llm import sft_main, TrainArguments, infer_main, InferArguments
	# 'modelscope/coco_2014_caption:validation#100',
	sft_main(
	TrainArguments(
	model='Qwen/Qwen2-7B-Instruct',
	dataset=['AI-ModelScope/alpaca-gpt4-data-en#40'],
	predict_with_generate=True,
	split_dataset_ratio=0.5,
	**kwargs))


	def test_predict_with_generate_zero3():
	import os
	os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'
	from swift.llm import sft_main, TrainArguments, infer_main, InferArguments
	# 'modelscope/coco_2014_caption:validation#100',
	sft_main(
	TrainArguments(
	model='Qwen/Qwen2-VL-7B-Instruct',
	dataset=['AI-ModelScope/LaTeX_OCR#40'],
	predict_with_generate=True,
	freeze_vit=False,
	split_dataset_ratio=0.5,
	deepspeed='zero3',
	**kwargs))


	def test_template():
	from swift.llm import sft_main, TrainArguments, infer_main, InferArguments
	global kwargs
	kwargs = kwargs.copy()
	kwargs['num_train_epochs'] = 3
	result = sft_main(
	TrainArguments(
	model='Qwen/Qwen2-0.5B',
	dataset=['swift/self-cognition#200'],
	model_name=['小黄'],
	model_author=['swift'],
	**kwargs))
	last_model_checkpoint = result['last_model_checkpoint']
	infer_main(InferArguments(ckpt_dir=last_model_checkpoint, load_data_args=True, merge_lora=True))


	def test_emu3_gen():
	os.environ['CUDA_VISIBLE_DEVICES'] = '1'
	os.environ['max_position_embeddings'] = '10240'
	os.environ['image_area'] = '518400'
	from swift.llm import sft_main, TrainArguments, infer_main, InferArguments
	kwargs['num_train_epochs'] = 100
	result = sft_main(TrainArguments(model='BAAI/Emu3-Gen', dataset=['swift/TextCaps#2'], **kwargs))
	last_model_checkpoint = result['last_model_checkpoint']
	args = InferArguments(
	ckpt_dir=last_model_checkpoint,
	infer_backend='pt',
	stream=False,
	use_chat_template=False,
	top_k=2048,
	max_new_tokens=40960)
	infer_main(args)


	def test_eval_strategy():
	os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'
	from swift.llm import sft_main, TrainArguments, infer_main, InferArguments
	result = sft_main(
	TrainArguments(
	model='Qwen/Qwen2-7B-Instruct',
	eval_strategy='no',
	dataset=['AI-ModelScope/alpaca-gpt4-data-zh#100', 'AI-ModelScope/alpaca-gpt4-data-en#100'],
	**kwargs))
	last_model_checkpoint = result['last_model_checkpoint']
	infer_main(InferArguments(adapters=last_model_checkpoint, load_data_args=True))


	def test_epoch():
	os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'
	from swift.llm import sft_main, TrainArguments, infer_main, InferArguments

	train_kwargs = kwargs.copy()
	train_kwargs['num_train_epochs'] = 3
	# train_kwargs['save_steps'] = 2 # not use
	result = sft_main(
	TrainArguments(
	model='Qwen/Qwen2-7B-Instruct',
	dataset=['AI-ModelScope/alpaca-gpt4-data-zh#50', 'AI-ModelScope/alpaca-gpt4-data-en#50'],
	save_strategy='epoch',
	**train_kwargs))
	last_model_checkpoint = result['last_model_checkpoint']
	infer_main(InferArguments(adapters=last_model_checkpoint, load_data_args=True))


	def test_agent():
	os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'
	from swift.llm import sft_main, TrainArguments, infer_main, InferArguments

	result = sft_main(
	TrainArguments(
	model='Qwen/Qwen2-7B-Instruct',
	dataset=['swift/ToolBench#500'],
	loss_scale='react',
	agent_template='toolbench',
	**kwargs))
	last_model_checkpoint = result['last_model_checkpoint']
	infer_main(InferArguments(adapters=last_model_checkpoint, load_data_args=True))


	def test_grounding():
	os.environ['CUDA_VISIBLE_DEVICES'] = '0,1'
	from swift.llm import sft_main, TrainArguments, infer_main, InferArguments

	result = sft_main(
	TrainArguments(
	model='Qwen/Qwen2.5-VL-7B-Instruct', dataset=['AI-ModelScope/coco#200'], dataset_num_proc=4, **kwargs))
	last_model_checkpoint = result['last_model_checkpoint']
	infer_main(InferArguments(adapters=last_model_checkpoint, load_data_args=True, stream=True, max_new_tokens=2048))


	if __name__ == '__main__':
	# test_llm_ddp()
	# test_mllm_mp()
	# test_llm_streaming()
	# test_mllm_streaming()
	# test_mllm_zero3()
	# test_llm_gptq()
	# test_llm_awq()
	# test_mllm_streaming_zero3()
	# test_mllm_streaming_mp_ddp()
	# test_llm_bnb()
	# test_llm_hqq()
	# test_moe()
	# test_resume_from_checkpoint()
	# test_resume_only_model()
	# test_llm_transformers_4_33()
	# test_predict_with_generate()
	# test_predict_with_generate_zero3()
	# test_template()
	# test_qwen_vl()
	# test_qwen2_audio()
	# test_emu3_gen()
	# test_unsloth()
	# test_eval_strategy()
	# test_epoch()
	# test_agent()
	test_grounding()