Student0809
/

interactSpeech

Model card Files Files and versions

interactSpeech / tests /megatron /test_save.py

Student0809's picture

Add files using upload-large-folder tool

7feac49 verified 5 months ago

2.53 kB

	import os

	os.environ['CUDA_VISIBLE_DEVICES'] = '0'


	def get_mg_model_tokenizer():
	model_id = 'Qwen/Qwen2.5-7B-Instruct'
	hf_model_id = 'Qwen/Qwen2.5-7B'
	from megatron.training.initialize import initialize_megatron
	set_default_ddp_config()
	hf_model, _ = get_model_tokenizer(hf_model_id, torch_dtype=torch.float32)
	_, processor = get_model_tokenizer(model_id, load_model=False)
	megatron_model_meta = get_megatron_model_meta(processor.model_meta.model_type)
	model_info = processor.model_info
	kwargs = megatron_model_meta.convert_hf_config(model_info.config)
	megatron_args = MegatronArguments(
	**kwargs,
	seq_length=1,
	use_cpu_initialization=True,
	no_initialization=True,
	load='Qwen2-7B-Instruct-mcore',
	save='mcore-hf-test',
	no_load_optim=True,
	no_load_rng=True)
	patch_megatron_tokenizer(processor)
	extra_args = megatron_args.parse_to_megatron()
	initialize_megatron(args_defaults=extra_args)
	mg_model = megatron_model_meta.model_provider()
	megatron_model_meta.convert_mcore2hf(hf_model, mg_model)
	return hf_model, mg_model, processor


	def test_align(hf_model, mg_model, processor):
	from megatron.training.utils import get_ltor_masks_and_position_ids
	template = get_template(hf_model.model_meta.template, processor)
	input_ids = template.encode(InferRequest(messages=[{'role': 'user', 'content': 'who are you?'}]))['input_ids']
	input_ids = torch.tensor(input_ids)[None].to('cuda')
	attention_mask, _, position_ids = get_ltor_masks_and_position_ids(input_ids, -100, True, True, True)
	with torch.inference_mode():
	hf_model.cuda()
	mg_model.cuda()
	hf_logits = hf_model(input_ids).logits
	mg_logits = mg_model(input_ids=input_ids, attention_mask=attention_mask, position_ids=position_ids)
	mean_diff = (mg_logits - hf_logits).abs().mean().item()
	max_diff = (mg_logits - hf_logits).abs().max().item()
	print(f'mean_diff: {mean_diff}, max_diff: {max_diff}')


	def test_save():
	hf_model, mg_model, processor = get_mg_model_tokenizer()
	test_align(hf_model, mg_model, processor)


	if __name__ == '__main__':
	import torch
	from swift.llm import InferRequest, get_model_tokenizer, get_template
	from swift.utils import set_default_ddp_config
	from swift.megatron.argument import MegatronArguments
	from swift.megatron.model import get_megatron_model_meta
	from swift.megatron.utils import patch_megatron_tokenizer
	test_save()