| import os | |
| import shutil | |
| import tempfile | |
| import unittest | |
| import torch | |
| from modelscope import AutoModel, Preprocessor | |
| from peft.utils import WEIGHTS_NAME | |
| from transformers import PreTrainedModel | |
| from swift import LoRAConfig, Swift | |
| from swift.tuners import NEFTuneConfig | |
| class TestNEFT(unittest.TestCase): | |
| def setUp(self): | |
| print(('Testing %s.%s' % (type(self).__name__, self._testMethodName))) | |
| self.tmp_dir = tempfile.TemporaryDirectory().name | |
| if not os.path.exists(self.tmp_dir): | |
| os.makedirs(self.tmp_dir) | |
| def tearDown(self): | |
| shutil.rmtree(self.tmp_dir) | |
| super().tearDown() | |
| def test_neft(self): | |
| model = AutoModel.from_pretrained('AI-ModelScope/bert-base-uncased') | |
| preprocessor = Preprocessor.from_pretrained('damo/nlp_structbert_sentence-similarity_chinese-base') | |
| inputs = preprocessor('how are you') | |
| config = NEFTuneConfig() | |
| t1 = model.embeddings.word_embeddings(inputs['input_ids']) | |
| model = Swift.prepare_model(model, config) | |
| model.train() | |
| t2 = model.embeddings.word_embeddings(inputs['input_ids']) | |
| model.deactivate_adapter('default') | |
| t3 = model.embeddings.word_embeddings(inputs['input_ids']) | |
| self.assertTrue(torch.allclose(t1, t3)) | |
| self.assertFalse(torch.allclose(t1, t2)) | |
| model.save_pretrained(self.tmp_dir) | |
| bin_file = os.path.join(self.tmp_dir, 'pytorch_model.bin') | |
| self.assertTrue(os.path.isfile(bin_file)) | |
| model2 = AutoModel.from_pretrained(self.tmp_dir) | |
| state_dict = model.state_dict() | |
| state_dict2 = model2.state_dict() | |
| self.assertTrue(len(state_dict) > 0) | |
| for key in state_dict: | |
| self.assertTrue(key in state_dict2) | |
| self.assertTrue(all(torch.isclose(state_dict[key], state_dict2[key]).flatten().detach().cpu())) | |
| shutil.rmtree(self.tmp_dir) | |
| PreTrainedModel.origin_save_pretrained = PreTrainedModel.save_pretrained | |
| delattr(PreTrainedModel, 'save_pretrained') | |
| model.save_pretrained(self.tmp_dir) | |
| bin_file = os.path.join(self.tmp_dir, WEIGHTS_NAME) | |
| self.assertTrue(os.path.isfile(bin_file)) | |
| model_new = AutoModel.from_pretrained('AI-ModelScope/bert-base-uncased') | |
| model_new_2 = Swift.from_pretrained(model_new, self.tmp_dir) | |
| state_dict = model.state_dict() | |
| state_dict2 = model_new_2.state_dict() | |
| self.assertTrue(len(state_dict) > 0) | |
| for key in state_dict: | |
| self.assertTrue(key in state_dict2) | |
| self.assertTrue(all(torch.isclose(state_dict[key], state_dict2[key]).flatten().detach().cpu())) | |
| PreTrainedModel.save_pretrained = PreTrainedModel.origin_save_pretrained | |
| def test_neft_lora(self): | |
| model = AutoModel.from_pretrained('AI-ModelScope/bert-base-uncased') | |
| preprocessor = Preprocessor.from_pretrained('damo/nlp_structbert_sentence-similarity_chinese-base') | |
| inputs = preprocessor('how are you') | |
| config = NEFTuneConfig() | |
| config2 = LoRAConfig(target_modules=['query', 'key', 'value']) | |
| t1 = model.embeddings.word_embeddings(inputs['input_ids']) | |
| model = Swift.prepare_model(model, {'c1': config, 'c2': config2}) | |
| model.train() | |
| t2 = model.embeddings.word_embeddings(inputs['input_ids']) | |
| model.deactivate_adapter('c1') | |
| t3 = model.embeddings.word_embeddings(inputs['input_ids']) | |
| self.assertTrue(torch.allclose(t1, t3)) | |
| self.assertFalse(torch.allclose(t1, t2)) | |
| model.save_pretrained(self.tmp_dir) | |
| bin_file = os.path.join(self.tmp_dir, 'c2', WEIGHTS_NAME) | |
| self.assertTrue(os.path.isfile(bin_file)) | |
| bin_file = os.path.join(self.tmp_dir, 'c1', WEIGHTS_NAME) | |
| self.assertTrue(not os.path.isfile(bin_file)) | |
| model_new = AutoModel.from_pretrained('AI-ModelScope/bert-base-uncased') | |
| t1 = model_new.embeddings.word_embeddings(inputs['input_ids']) | |
| model_new = Swift.from_pretrained(model_new, self.tmp_dir) | |
| model_new.train() | |
| t2 = model_new.embeddings.word_embeddings(inputs['input_ids']) | |
| model_new.eval() | |
| t4 = model_new.embeddings.word_embeddings(inputs['input_ids']) | |
| model_new.train() | |
| model_new.deactivate_adapter('c1') | |
| t3 = model_new.embeddings.word_embeddings(inputs['input_ids']) | |
| self.assertTrue(torch.allclose(t1, t3)) | |
| self.assertTrue(torch.allclose(t1, t4)) | |
| self.assertFalse(torch.allclose(t1, t2)) | |
| state_dict = model.state_dict() | |
| state_dict2 = model_new.state_dict() | |
| self.assertTrue(len(state_dict) > 0 and all(['lora' in key for key in state_dict.keys()])) | |
| for key in state_dict: | |
| self.assertTrue(key in state_dict2) | |
| self.assertTrue(all(torch.isclose(state_dict[key], state_dict2[key]).flatten().detach().cpu())) | |