| import os |
| import shutil |
| import tempfile |
| import unittest |
|
|
| import torch |
| from modelscope import AutoModel, Preprocessor |
| from peft.utils import WEIGHTS_NAME |
| from transformers import PreTrainedModel |
|
|
| from swift import LoRAConfig, Swift |
| from swift.tuners import NEFTuneConfig |
|
|
|
|
| class TestNEFT(unittest.TestCase): |
|
|
| def setUp(self): |
| print(('Testing %s.%s' % (type(self).__name__, self._testMethodName))) |
| self.tmp_dir = tempfile.TemporaryDirectory().name |
| if not os.path.exists(self.tmp_dir): |
| os.makedirs(self.tmp_dir) |
|
|
| def tearDown(self): |
| shutil.rmtree(self.tmp_dir) |
| super().tearDown() |
|
|
| def test_neft(self): |
| model = AutoModel.from_pretrained('AI-ModelScope/bert-base-uncased') |
| preprocessor = Preprocessor.from_pretrained('damo/nlp_structbert_sentence-similarity_chinese-base') |
| inputs = preprocessor('how are you') |
| config = NEFTuneConfig() |
|
|
| t1 = model.embeddings.word_embeddings(inputs['input_ids']) |
| model = Swift.prepare_model(model, config) |
| model.train() |
| t2 = model.embeddings.word_embeddings(inputs['input_ids']) |
| model.deactivate_adapter('default') |
| t3 = model.embeddings.word_embeddings(inputs['input_ids']) |
| self.assertTrue(torch.allclose(t1, t3)) |
| self.assertFalse(torch.allclose(t1, t2)) |
| model.save_pretrained(self.tmp_dir) |
| bin_file = os.path.join(self.tmp_dir, 'pytorch_model.bin') |
| self.assertTrue(os.path.isfile(bin_file)) |
| model2 = AutoModel.from_pretrained(self.tmp_dir) |
|
|
| state_dict = model.state_dict() |
| state_dict2 = model2.state_dict() |
| self.assertTrue(len(state_dict) > 0) |
| for key in state_dict: |
| self.assertTrue(key in state_dict2) |
| self.assertTrue(all(torch.isclose(state_dict[key], state_dict2[key]).flatten().detach().cpu())) |
|
|
| shutil.rmtree(self.tmp_dir) |
| PreTrainedModel.origin_save_pretrained = PreTrainedModel.save_pretrained |
| delattr(PreTrainedModel, 'save_pretrained') |
| model.save_pretrained(self.tmp_dir) |
| bin_file = os.path.join(self.tmp_dir, WEIGHTS_NAME) |
| self.assertTrue(os.path.isfile(bin_file)) |
| model_new = AutoModel.from_pretrained('AI-ModelScope/bert-base-uncased') |
| model_new_2 = Swift.from_pretrained(model_new, self.tmp_dir) |
|
|
| state_dict = model.state_dict() |
| state_dict2 = model_new_2.state_dict() |
| self.assertTrue(len(state_dict) > 0) |
| for key in state_dict: |
| self.assertTrue(key in state_dict2) |
| self.assertTrue(all(torch.isclose(state_dict[key], state_dict2[key]).flatten().detach().cpu())) |
| PreTrainedModel.save_pretrained = PreTrainedModel.origin_save_pretrained |
|
|
| def test_neft_lora(self): |
| model = AutoModel.from_pretrained('AI-ModelScope/bert-base-uncased') |
| preprocessor = Preprocessor.from_pretrained('damo/nlp_structbert_sentence-similarity_chinese-base') |
| inputs = preprocessor('how are you') |
| config = NEFTuneConfig() |
| config2 = LoRAConfig(target_modules=['query', 'key', 'value']) |
|
|
| t1 = model.embeddings.word_embeddings(inputs['input_ids']) |
| model = Swift.prepare_model(model, {'c1': config, 'c2': config2}) |
| model.train() |
| t2 = model.embeddings.word_embeddings(inputs['input_ids']) |
| model.deactivate_adapter('c1') |
| t3 = model.embeddings.word_embeddings(inputs['input_ids']) |
| self.assertTrue(torch.allclose(t1, t3)) |
| self.assertFalse(torch.allclose(t1, t2)) |
| model.save_pretrained(self.tmp_dir) |
| bin_file = os.path.join(self.tmp_dir, 'c2', WEIGHTS_NAME) |
| self.assertTrue(os.path.isfile(bin_file)) |
| bin_file = os.path.join(self.tmp_dir, 'c1', WEIGHTS_NAME) |
| self.assertTrue(not os.path.isfile(bin_file)) |
| model_new = AutoModel.from_pretrained('AI-ModelScope/bert-base-uncased') |
| t1 = model_new.embeddings.word_embeddings(inputs['input_ids']) |
| model_new = Swift.from_pretrained(model_new, self.tmp_dir) |
| model_new.train() |
| t2 = model_new.embeddings.word_embeddings(inputs['input_ids']) |
| model_new.eval() |
| t4 = model_new.embeddings.word_embeddings(inputs['input_ids']) |
| model_new.train() |
| model_new.deactivate_adapter('c1') |
| t3 = model_new.embeddings.word_embeddings(inputs['input_ids']) |
| self.assertTrue(torch.allclose(t1, t3)) |
| self.assertTrue(torch.allclose(t1, t4)) |
| self.assertFalse(torch.allclose(t1, t2)) |
|
|
| state_dict = model.state_dict() |
| state_dict2 = model_new.state_dict() |
| self.assertTrue(len(state_dict) > 0 and all(['lora' in key for key in state_dict.keys()])) |
| for key in state_dict: |
| self.assertTrue(key in state_dict2) |
| self.assertTrue(all(torch.isclose(state_dict[key], state_dict2[key]).flatten().detach().cpu())) |
|
|