| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | |
| | import json |
| | import os |
| | import shutil |
| | import sys |
| | import tempfile |
| | import unittest |
| | import unittest.mock as mock |
| | import warnings |
| | from pathlib import Path |
| |
|
| | import httpx |
| |
|
| | from transformers import AutoConfig, BertConfig, Florence2Config, GPT2Config |
| | from transformers.configuration_utils import PreTrainedConfig |
| | from transformers.testing_utils import TOKEN, TemporaryHubRepo, is_staging_test, require_torch |
| |
|
| |
|
| | sys.path.append(str(Path(__file__).parent.parent.parent / "utils")) |
| |
|
| | from test_module.custom_configuration import CustomConfig |
| |
|
| |
|
| | config_common_kwargs = { |
| | "return_dict": False, |
| | "output_hidden_states": True, |
| | "output_attentions": True, |
| | "dtype": "float16", |
| | |
| | "is_decoder": True, |
| | "cross_attention_hidden_size": 128, |
| | "add_cross_attention": True, |
| | "chunk_size_feed_forward": 5, |
| | "architectures": ["BertModel"], |
| | "finetuning_task": "translation", |
| | "id2label": {0: "label"}, |
| | "label2id": {"label": "0"}, |
| | "tokenizer_class": "BertTokenizerFast", |
| | "prefix": "prefix", |
| | "bos_token_id": 6, |
| | "pad_token_id": 7, |
| | "eos_token_id": 8, |
| | "sep_token_id": 9, |
| | "decoder_start_token_id": 10, |
| | "task_specific_params": {"translation": "some_params"}, |
| | "problem_type": "regression", |
| | } |
| |
|
| |
|
| | @is_staging_test |
| | class ConfigPushToHubTester(unittest.TestCase): |
| | @classmethod |
| | def setUpClass(cls): |
| | cls._token = TOKEN |
| |
|
| | def test_push_to_hub(self): |
| | with TemporaryHubRepo(token=self._token) as tmp_repo: |
| | config = BertConfig( |
| | vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37 |
| | ) |
| | config.push_to_hub(tmp_repo.repo_id, token=self._token) |
| |
|
| | new_config = BertConfig.from_pretrained(tmp_repo.repo_id) |
| | for k, v in config.to_dict().items(): |
| | if k != "transformers_version": |
| | self.assertEqual(v, getattr(new_config, k)) |
| |
|
| | def test_push_to_hub_via_save_pretrained(self): |
| | with TemporaryHubRepo(token=self._token) as tmp_repo: |
| | config = BertConfig( |
| | vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37 |
| | ) |
| | |
| | with tempfile.TemporaryDirectory() as tmp_dir: |
| | config.save_pretrained(tmp_dir, repo_id=tmp_repo.repo_id, push_to_hub=True, token=self._token) |
| |
|
| | new_config = BertConfig.from_pretrained(tmp_repo.repo_id) |
| | for k, v in config.to_dict().items(): |
| | if k != "transformers_version": |
| | self.assertEqual(v, getattr(new_config, k)) |
| |
|
| | def test_push_to_hub_in_organization(self): |
| | with TemporaryHubRepo(namespace="valid_org", token=self._token) as tmp_repo: |
| | config = BertConfig( |
| | vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37 |
| | ) |
| | config.push_to_hub(tmp_repo.repo_id, token=self._token) |
| |
|
| | new_config = BertConfig.from_pretrained(tmp_repo.repo_id) |
| | for k, v in config.to_dict().items(): |
| | if k != "transformers_version": |
| | self.assertEqual(v, getattr(new_config, k)) |
| |
|
| | def test_push_to_hub_in_organization_via_save_pretrained(self): |
| | with TemporaryHubRepo(namespace="valid_org", token=self._token) as tmp_repo: |
| | config = BertConfig( |
| | vocab_size=99, hidden_size=32, num_hidden_layers=5, num_attention_heads=4, intermediate_size=37 |
| | ) |
| | |
| | with tempfile.TemporaryDirectory() as tmp_dir: |
| | config.save_pretrained(tmp_dir, repo_id=tmp_repo.repo_id, push_to_hub=True, token=self._token) |
| |
|
| | new_config = BertConfig.from_pretrained(tmp_repo.repo_id) |
| | for k, v in config.to_dict().items(): |
| | if k != "transformers_version": |
| | self.assertEqual(v, getattr(new_config, k)) |
| |
|
| | def test_push_to_hub_dynamic_config(self): |
| | with TemporaryHubRepo(token=self._token) as tmp_repo: |
| | CustomConfig.register_for_auto_class() |
| | config = CustomConfig(attribute=42) |
| |
|
| | config.push_to_hub(tmp_repo.repo_id, token=self._token) |
| |
|
| | |
| | self.assertDictEqual(config.auto_map, {"AutoConfig": "custom_configuration.CustomConfig"}) |
| |
|
| | new_config = AutoConfig.from_pretrained(tmp_repo.repo_id, trust_remote_code=True) |
| | |
| | self.assertEqual(new_config.__class__.__name__, "CustomConfig") |
| | self.assertEqual(new_config.attribute, 42) |
| |
|
| |
|
| | class ConfigTestUtils(unittest.TestCase): |
| | def test_config_from_string(self): |
| | c = GPT2Config() |
| |
|
| | |
| | n_embd = c.n_embd + 1 |
| | resid_pdrop = c.resid_pdrop + 1.0 |
| | scale_attn_weights = not c.scale_attn_weights |
| | summary_type = c.summary_type + "foo" |
| | c.update_from_string( |
| | f"n_embd={n_embd},resid_pdrop={resid_pdrop},scale_attn_weights={scale_attn_weights},summary_type={summary_type}" |
| | ) |
| | self.assertEqual(n_embd, c.n_embd, "mismatch for key: n_embd") |
| | self.assertEqual(resid_pdrop, c.resid_pdrop, "mismatch for key: resid_pdrop") |
| | self.assertEqual(scale_attn_weights, c.scale_attn_weights, "mismatch for key: scale_attn_weights") |
| | self.assertEqual(summary_type, c.summary_type, "mismatch for key: summary_type") |
| |
|
| | def test_config_common_kwargs_is_complete(self): |
| | base_config = PreTrainedConfig() |
| | missing_keys = [key for key in base_config.__dict__ if key not in config_common_kwargs] |
| | |
| | self.assertListEqual( |
| | missing_keys, |
| | [ |
| | "_output_attentions", |
| | "tie_word_embeddings", |
| | "is_encoder_decoder", |
| | "_name_or_path", |
| | "_commit_hash", |
| | "_attn_implementation_internal", |
| | "_experts_implementation_internal", |
| | "transformers_version", |
| | ], |
| | ) |
| | keys_with_defaults = [key for key, value in config_common_kwargs.items() if value == getattr(base_config, key)] |
| | if len(keys_with_defaults) > 0: |
| | raise ValueError( |
| | "The following keys are set with the default values in" |
| | " `test_configuration_common.config_common_kwargs` pick another value for them:" |
| | f" {', '.join(keys_with_defaults)}." |
| | ) |
| |
|
| | def test_nested_config_load_from_dict(self): |
| | config = AutoConfig.from_pretrained( |
| | "hf-internal-testing/tiny-random-CLIPModel", text_config={"num_hidden_layers": 2} |
| | ) |
| | self.assertNotIsInstance(config.text_config, dict) |
| | self.assertEqual(config.text_config.__class__.__name__, "CLIPTextConfig") |
| |
|
| | def test_from_pretrained_subfolder(self): |
| | config = BertConfig.from_pretrained("hf-internal-testing/tiny-random-bert-subfolder") |
| | self.assertIsNotNone(config) |
| |
|
| | config = BertConfig.from_pretrained("hf-internal-testing/tiny-random-bert-subfolder", subfolder="bert") |
| | self.assertIsNotNone(config) |
| |
|
| | def test_cached_files_are_used_when_internet_is_down(self): |
| | |
| | response_mock = mock.Mock() |
| | response_mock.status_code = 500 |
| | response_mock.headers = {} |
| | response_mock.raise_for_status.side_effect = httpx.HTTPStatusError( |
| | "failed", request=mock.Mock(), response=mock.Mock() |
| | ) |
| | response_mock.json.return_value = {} |
| |
|
| | |
| | _ = BertConfig.from_pretrained("hf-internal-testing/tiny-random-bert") |
| |
|
| | |
| | with mock.patch("httpx.Client.request", return_value=response_mock) as mock_head: |
| | _ = BertConfig.from_pretrained("hf-internal-testing/tiny-random-bert") |
| | |
| | mock_head.assert_called() |
| |
|
| | def test_local_versioning(self): |
| | configuration = AutoConfig.from_pretrained("google-bert/bert-base-cased") |
| | configuration.configuration_files = ["config.4.0.0.json"] |
| |
|
| | with tempfile.TemporaryDirectory() as tmp_dir: |
| | configuration.save_pretrained(tmp_dir) |
| | configuration.hidden_size = 2 |
| | json.dump(configuration.to_dict(), open(os.path.join(tmp_dir, "config.4.0.0.json"), "w")) |
| |
|
| | |
| | new_configuration = AutoConfig.from_pretrained(tmp_dir) |
| | self.assertEqual(new_configuration.hidden_size, 2) |
| |
|
| | |
| | |
| | configuration.configuration_files = ["config.42.0.0.json"] |
| | configuration.hidden_size = 768 |
| | configuration.save_pretrained(tmp_dir) |
| | shutil.move(os.path.join(tmp_dir, "config.4.0.0.json"), os.path.join(tmp_dir, "config.42.0.0.json")) |
| | new_configuration = AutoConfig.from_pretrained(tmp_dir) |
| | self.assertEqual(new_configuration.hidden_size, 768) |
| |
|
| | def test_repo_versioning_before(self): |
| | |
| | repo = "hf-internal-testing/test-two-configs" |
| |
|
| | import transformers as new_transformers |
| |
|
| | new_transformers.configuration_utils.__version__ = "v4.0.0" |
| | new_configuration, kwargs = new_transformers.models.auto.AutoConfig.from_pretrained( |
| | repo, return_unused_kwargs=True |
| | ) |
| | self.assertEqual(new_configuration.hidden_size, 2) |
| | |
| | self.assertDictEqual(kwargs, {}) |
| |
|
| | |
| | import transformers as old_transformers |
| |
|
| | old_transformers.configuration_utils.__version__ = "v3.0.0" |
| | old_configuration = old_transformers.models.auto.AutoConfig.from_pretrained(repo) |
| | self.assertEqual(old_configuration.hidden_size, 768) |
| |
|
| | def test_saving_config_with_custom_generation_kwargs_raises_error(self): |
| | config = BertConfig() |
| | config.min_length = 3 |
| | with tempfile.TemporaryDirectory() as tmp_dir: |
| | with self.assertRaises(ValueError): |
| | config.save_pretrained(tmp_dir) |
| |
|
| | def test_get_generation_parameters(self): |
| | config = BertConfig() |
| | self.assertFalse(len(config._get_generation_parameters()) > 0) |
| | config.min_length = 3 |
| | self.assertTrue(len(config._get_generation_parameters()) > 0) |
| | config.min_length = 0 |
| | self.assertTrue(len(config._get_generation_parameters()) > 0) |
| |
|
| | def test_loading_config_do_not_raise_future_warnings(self): |
| | """Regression test for https://github.com/huggingface/transformers/issues/31002.""" |
| | |
| | with warnings.catch_warnings(): |
| | warnings.simplefilter("error") |
| | PreTrainedConfig.from_pretrained("bert-base-uncased") |
| |
|
| | def test_get_text_config(self): |
| | """Tests the `get_text_config` method.""" |
| | |
| | config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-LlamaForCausalLM") |
| | self.assertEqual(config.get_text_config(), config) |
| | self.assertEqual(config.get_text_config(decoder=True), config) |
| |
|
| | |
| | config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-LlavaForConditionalGeneration") |
| | self.assertEqual(config.get_text_config(), config.text_config) |
| | self.assertEqual(config.get_text_config(decoder=True), config.text_config) |
| |
|
| | |
| | config = Florence2Config() |
| | self.assertEqual(config.get_text_config(), config.text_config) |
| | self.assertEqual(config.get_text_config(decoder=True), config.text_config) |
| |
|
| | |
| | config = AutoConfig.from_pretrained("hf-internal-testing/tiny-random-bart") |
| | self.assertEqual(config.get_text_config(), config) |
| | |
| | self.assertTrue(getattr(config, "encoder_layers", None) is not None) |
| | self.assertTrue(getattr(config, "decoder_layers", None) is not None) |
| | decoder_config = config.get_text_config(decoder=True) |
| | self.assertNotEqual(decoder_config, config) |
| | self.assertEqual(decoder_config.num_hidden_layers, config.decoder_layers) |
| | self.assertTrue(getattr(decoder_config, "encoder_layers", None) is None) |
| | encoder_config = config.get_text_config(encoder=True) |
| | self.assertNotEqual(encoder_config, config) |
| | self.assertEqual(encoder_config.num_hidden_layers, config.encoder_layers) |
| | self.assertTrue(getattr(encoder_config, "decoder_layers", None) is None) |
| |
|
| | @require_torch |
| | def test_bc_torch_dtype(self): |
| | import torch |
| |
|
| | config = PreTrainedConfig(dtype="bfloat16") |
| | self.assertEqual(config.dtype, torch.bfloat16) |
| |
|
| | config = PreTrainedConfig(torch_dtype="bfloat16") |
| | self.assertEqual(config.dtype, torch.bfloat16) |
| |
|
| | |
| | config = PreTrainedConfig(dtype="bfloat16", torch_dtype="float32") |
| | self.assertEqual(config.dtype, torch.bfloat16) |
| |
|
| | with tempfile.TemporaryDirectory() as tmpdirname: |
| | config.save_pretrained(tmpdirname) |
| |
|
| | config = PreTrainedConfig.from_pretrained(tmpdirname) |
| | self.assertEqual(config.dtype, torch.bfloat16) |
| |
|
| | config = PreTrainedConfig.from_pretrained(tmpdirname, dtype="float32") |
| | self.assertEqual(config.dtype, "float32") |
| |
|
| | config = PreTrainedConfig.from_pretrained(tmpdirname, torch_dtype="float32") |
| | self.assertEqual(config.dtype, "float32") |
| |
|
| | def test_unserializable_json_is_encoded(self): |
| | class NewConfig(PreTrainedConfig): |
| | def __init__( |
| | self, |
| | inf_positive: float = float("inf"), |
| | inf_negative: float = float("-inf"), |
| | nan: float = float("nan"), |
| | **kwargs, |
| | ): |
| | self.inf_positive = inf_positive |
| | self.inf_negative = inf_negative |
| | self.nan = nan |
| |
|
| | super().__init__(**kwargs) |
| |
|
| | new_config = NewConfig() |
| |
|
| | |
| | self.assertIsInstance(new_config.inf_positive, float) |
| | self.assertIsInstance(new_config.inf_negative, float) |
| | self.assertIsInstance(new_config.nan, float) |
| |
|
| | with tempfile.TemporaryDirectory() as tmpdirname: |
| | new_config.save_pretrained(tmpdirname) |
| | config_file = Path(tmpdirname) / "config.json" |
| | config_contents = json.loads(config_file.read_text()) |
| | new_config_instance = NewConfig.from_pretrained(tmpdirname) |
| |
|
| | |
| | self.assertDictEqual(config_contents["inf_positive"], {"__float__": "Infinity"}) |
| | self.assertDictEqual(config_contents["inf_negative"], {"__float__": "-Infinity"}) |
| | self.assertDictEqual(config_contents["nan"], {"__float__": "NaN"}) |
| |
|
| | with tempfile.TemporaryDirectory() as tmpdirname: |
| | new_config.save_pretrained(tmpdirname) |
| |
|
| | |
| | self.assertIsInstance(new_config_instance.inf_positive, float) |
| | self.assertIsInstance(new_config_instance.inf_negative, float) |
| | self.assertIsInstance(new_config_instance.nan, float) |
| |
|