| |
| |
| |
| |
| @@ -83,6 +83,8 @@ def pytest_configure(config): |
| config.addinivalue_line("markers", "is_staging_test: mark test to run only in the staging environment") |
| config.addinivalue_line("markers", "accelerate_tests: mark test that require accelerate") |
| config.addinivalue_line("markers", "not_device_test: mark the tests always running on cpu") |
| + config.addinivalue_line("markers", "torch_compile_test: mark test which tests torch compile functionality") |
| + config.addinivalue_line("markers", "torch_export_test: mark test which tests torch export functionality") |
| |
| |
| def pytest_collection_modifyitems(items): |
| |
| |
| |
| |
| @@ -2048,6 +2048,7 @@ def test_generate_with_quant_cache(self): |
| model.generate(**generation_kwargs, **inputs_dict) |
| |
| @pytest.mark.generate |
| + @pytest.mark.torch_compile_test |
| @require_torch_greater_or_equal("2.6") # Uses torch.compiler.set_stance |
| def test_generate_compile_model_forward(self): |
| """ |
| @@ -2744,6 +2745,7 @@ def test_speculative_sampling_target_distribution(self): |
| self.assertTrue(last_token_counts[1] > last_token_counts[3] > last_token_counts[7] > 0) |
| self.assertTrue(last_token_counts[8] > last_token_counts[3]) |
| |
| + @pytest.mark.torch_export_test |
| def test_cache_dependant_input_preparation_exporting(self): |
| self.assertFalse( |
| is_torchdynamo_exporting() |
| @@ -4342,6 +4344,7 @@ def test_prepare_inputs_for_generation_encoder_decoder_llm(self): |
| self.assertTrue(model_inputs["encoder_outputs"] == "foo") |
| # See the decoder-only test for more corner cases. The code is the same, so we don't repeat it here. |
| |
| + @pytest.mark.torch_compile_test |
| def test_generate_compile_fullgraph_tiny(self): |
| """ |
| Tests that we can call end-to-end generation with a tiny model (i.e. doesn't crash) |
| @@ -4931,6 +4934,7 @@ def test_cache_device_map_with_vision_layer_device_map(self): |
| _ = model.generate(**inputs, max_new_tokens=2, do_sample=False) |
| |
| @require_torch_accelerator |
| + @pytest.mark.torch_compile_test |
| def test_cpu_offload_doesnt_compile(self): |
| """Test that CPU offload doesn't trigger compilation""" |
| tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-MistralForCausalLM") |
| |
| |
| |
| |
| @@ -15,6 +15,7 @@ |
| |
| import unittest |
| |
| +import pytest |
| from packaging import version |
| |
| from transformers import AlbertConfig, AutoTokenizer, is_torch_available |
| @@ -337,6 +338,7 @@ def test_inference_no_head_absolute_embedding(self): |
| torch.testing.assert_close(output[:, 1:4, 1:4], expected_slice, rtol=1e-4, atol=1e-4) |
| |
| @slow |
| + @pytest.mark.torch_export_test |
| def test_export(self): |
| if version.parse(torch.__version__) < version.parse("2.4.0"): |
| self.skipTest(reason="This test requires torch >= 2.4 to run.") |
| |
| |
| |
| |
| @@ -15,6 +15,7 @@ |
| |
| import unittest |
| |
| +import pytest |
| import requests |
| |
| from transformers import ( |
| @@ -211,6 +212,7 @@ def test_training_gradient_checkpointing_use_reentrant_false(self): |
| pass |
| |
| @unittest.skip(reason="Compile not yet supported because in LLava models") |
| + @pytest.mark.torch_compile_test |
| def test_sdpa_can_compile_dynamic(self): |
| pass |
| |
| |
| |
| |
| |
| @@ -267,6 +267,7 @@ def test_initialization(self): |
| pass |
| |
| @unittest.skip(reason="Compile not yet supported because in LLava models") |
| + @pytest.mark.torch_compile_test |
| def test_sdpa_can_compile_dynamic(self): |
| pass |
| |
| |
| |
| |
| |
| @@ -15,6 +15,7 @@ |
| |
| import unittest |
| |
| +import pytest |
| from datasets import load_dataset |
| |
| from transformers import BeitConfig |
| @@ -285,6 +286,7 @@ def test_feed_forward_chunking(self): |
| pass |
| |
| @unittest.skip(reason="BEiT can't compile dynamic") |
| + @pytest.mark.torch_compile_test |
| def test_sdpa_can_compile_dynamic(self): |
| pass |
| |
| |
| |
| |
| |
| @@ -13,6 +13,7 @@ |
| # limitations under the License. |
| import unittest |
| |
| +import pytest |
| from packaging import version |
| |
| from transformers import AutoTokenizer, BertConfig, is_torch_available |
| @@ -722,6 +723,7 @@ def test_sdpa_ignored_mask(self): |
| ) |
| |
| @slow |
| + @pytest.mark.torch_export_test |
| def test_export(self): |
| if version.parse(torch.__version__) < version.parse("2.4.0"): |
| self.skipTest(reason="This test requires torch >= 2.4 to run.") |
| |
| |
| |
| |
| @@ -19,6 +19,7 @@ |
| import unittest |
| |
| import numpy as np |
| +import pytest |
| import requests |
| from parameterized import parameterized |
| from pytest import mark |
| @@ -708,6 +709,7 @@ def test_sdpa_can_dispatch_on_flash(self): |
| self.skipTest(reason="CLIP text tower has two attention masks: `causal_attention_mask` and `attention_mask`") |
| |
| @require_torch_sdpa |
| + @pytest.mark.torch_compile_test |
| def test_sdpa_can_compile_dynamic(self): |
| self.skipTest(reason="CLIP model can't be compiled dynamic, error in clip_loss`") |
| |
| |
| |
| |
| |
| @@ -225,6 +225,7 @@ def test_model_flash_attn(self): |
| |
| self.assertEqual(output_text, EXPECTED_TEXTS) |
| |
| + @pytest.mark.torch_export_test |
| def test_export_static_cache(self): |
| if version.parse(torch.__version__) < version.parse("2.5.0"): |
| self.skipTest(reason="This test requires torch >= 2.5 to run.") |
| |
| |
| |
| |
| @@ -19,6 +19,7 @@ |
| import unittest |
| from typing import ClassVar |
| |
| +import pytest |
| import torch |
| from datasets import load_dataset |
| |
| @@ -287,6 +288,7 @@ def test_sdpa_can_dispatch_on_flash(self): |
| pass |
| |
| @unittest.skip(reason="Pass because ColPali requires `attention_mask is not None`") |
| + @pytest.mark.torch_compile_test |
| def test_sdpa_can_compile_dynamic(self): |
| pass |
| |
| |
| |
| |
| |
| @@ -17,6 +17,7 @@ |
| import unittest |
| from typing import ClassVar |
| |
| +import pytest |
| import torch |
| from datasets import load_dataset |
| |
| @@ -277,6 +278,7 @@ def test_sdpa_can_dispatch_on_flash(self): |
| pass |
| |
| @unittest.skip(reason="Pass because ColQwen2 requires `attention_mask is not None`") |
| + @pytest.mark.torch_compile_test |
| def test_sdpa_can_compile_dynamic(self): |
| pass |
| |
| |
| |
| |
| |
| @@ -15,6 +15,8 @@ |
| |
| import unittest |
| |
| +import pytest |
| + |
| from transformers import Data2VecVisionConfig |
| from transformers.testing_utils import ( |
| require_torch, |
| @@ -214,6 +216,7 @@ def test_config(self): |
| @unittest.skip( |
| reason="Will fix only if requested by the community: it fails with `torch._dynamo.exc.InternalTorchDynamoError: IndexError: list index out of range`. Without compile, the test pass." |
| ) |
| + @pytest.mark.torch_compile_test |
| def test_sdpa_can_compile_dynamic(self): |
| pass |
| |
| |
| |
| |
| |
| @@ -16,6 +16,8 @@ |
| |
| import unittest |
| |
| +import pytest |
| + |
| from transformers import BitsAndBytesConfig, Cache, DeepseekV2Config, is_torch_available |
| from transformers.testing_utils import require_read_token, require_torch, require_torch_accelerator, slow, torch_device |
| |
| @@ -173,10 +175,12 @@ def _check_past_key_values_for_generate(self, batch_size, decoder_past_key_value |
| self.assertEqual(layer.values.shape, expected_value_shape) |
| |
| @unittest.skip("Deepseek-V2 uses MLA which has a special head dim and is not compatible with StaticCache shape") |
| + @pytest.mark.torch_compile_test |
| def test_generate_compilation_all_outputs(self): |
| pass |
| |
| @unittest.skip("Deepseek-V2 uses MLA which has a special head dim and is not compatible with StaticCache shape") |
| + @pytest.mark.torch_compile_test |
| def test_generate_compile_model_forward(self): |
| pass |
| |
| @@ -185,10 +189,12 @@ def test_generate_from_inputs_embeds_with_static_cache(self): |
| pass |
| |
| @unittest.skip("Deepseek-V2 uses MLA which has a special head dim and is not compatible with StaticCache shape") |
| + @pytest.mark.torch_compile_test |
| def test_generate_with_static_cache(self): |
| pass |
| |
| @unittest.skip("Dynamic control flow in MoE") |
| + @pytest.mark.torch_compile_test |
| def test_torch_compile_for_training(self): |
| pass |
| |
| |
| |
| |
| |
| @@ -15,6 +15,7 @@ |
| |
| import unittest |
| |
| +import pytest |
| from packaging import version |
| from parameterized import parameterized |
| |
| @@ -311,6 +312,7 @@ def test_generate_compilation_all_outputs(self): |
| pass |
| |
| @unittest.skip("Deepseek-V3 uses MLA so it is not compatible with the standard cache format") |
| + @pytest.mark.torch_compile_test |
| def test_generate_compile_model_forward(self): |
| pass |
| |
| @@ -533,6 +535,7 @@ def tearDown(self): |
| |
| @slow |
| @require_torch_accelerator |
| + @pytest.mark.torch_compile_test |
| @require_read_token |
| def test_compile_static_cache(self): |
| # `torch==2.2` will throw an error on this test (as in other compilation tests), but torch==2.1.2 and torch>2.2 |
| |
| |
| |
| |
| @@ -15,6 +15,8 @@ |
| |
| import unittest |
| |
| +import pytest |
| + |
| from transformers import DepthAnythingConfig, Dinov2Config |
| from transformers.file_utils import is_torch_available, is_vision_available |
| from transformers.pytorch_utils import is_torch_greater_or_equal_than_2_4 |
| @@ -286,6 +288,7 @@ def test_inference(self): |
| |
| torch.testing.assert_close(predicted_depth[0, :3, :3], expected_slice, rtol=1e-4, atol=1e-4) |
| |
| + @pytest.mark.torch_export_test |
| def test_export(self): |
| for strict in [False, True]: |
| with self.subTest(strict=strict): |
| |
| |
| |
| |
| @@ -15,6 +15,8 @@ |
| |
| import unittest |
| |
| +import pytest |
| + |
| from transformers import DepthProConfig |
| from transformers.file_utils import is_torch_available, is_vision_available |
| from transformers.testing_utils import require_torch, require_vision, slow, torch_device |
| @@ -221,6 +223,7 @@ def test_config(self): |
| self.config_tester.run_common_tests() |
| |
| @unittest.skip(reason="Inductor error: name 'OpaqueUnaryFn_log2' is not defined") |
| + @pytest.mark.torch_compile_test |
| def test_sdpa_can_compile_dynamic(self): |
| pass |
| |
| |
| |
| |
| |
| @@ -570,6 +570,7 @@ def tearDown(self): |
| @slow |
| @require_torch_accelerator |
| @require_read_token |
| + @pytest.mark.torch_compile_test |
| def test_compile_static_cache(self): |
| # `torch==2.2` will throw an error on this test (as in other compilation tests), but torch==2.1.2 and torch>2.2 |
| # work as intended. See https://github.com/pytorch/pytorch/issues/121943 |
| |
| |
| |
| |
| @@ -399,6 +399,7 @@ def test_inference_no_head_absolute_embedding(self): |
| |
| torch.testing.assert_close(output[:, 1:4, 1:4], expected_slice, rtol=1e-4, atol=1e-4) |
| |
| + @pytest.mark.torch_export_test |
| @slow |
| def test_export(self): |
| if not is_torch_greater_or_equal_than_2_4: |
| |
| |
| |
| |
| @@ -96,6 +96,7 @@ def test_generate_compilation_all_outputs(self): |
| pass |
| |
| @unittest.skip("dots.llm1's moe is not compatible `token_indices, weight_indices = torch.where(mask)`") |
| + @pytest.mark.torch_compile_test |
| def test_generate_compile_model_forward(self): |
| pass |
| |
| |
| |
| |
| |
| @@ -15,6 +15,8 @@ |
| |
| import unittest |
| |
| +import pytest |
| + |
| from transformers import DPTConfig |
| from transformers.file_utils import is_torch_available, is_vision_available |
| from transformers.pytorch_utils import is_torch_greater_or_equal_than_2_4 |
| @@ -255,6 +257,7 @@ def test_training_gradient_checkpointing_use_reentrant_false(self): |
| pass |
| |
| @unittest.skip(reason="Inductor error for dynamic shape") |
| + @pytest.mark.torch_compile_test |
| def test_sdpa_can_compile_dynamic(self): |
| pass |
| |
| @@ -420,6 +423,7 @@ def test_post_processing_depth_estimation(self): |
| self.assertTrue(output_enlarged.shape == expected_shape) |
| torch.testing.assert_close(predicted_depth_l, output_enlarged, atol=1e-3, rtol=1e-3) |
| |
| + @pytest.mark.torch_export_test |
| def test_export(self): |
| for strict in [True, False]: |
| with self.subTest(strict=strict): |
| |
| |
| |
| |
| @@ -354,6 +354,7 @@ def test_model_generation_beyond_sliding_window(self): |
| del model |
| cleanup(torch_device, gc_collect=True) |
| |
| + @pytest.mark.torch_export_test |
| @slow |
| def test_export_static_cache(self): |
| if version.parse(torch.__version__) < version.parse("2.4.0"): |
| |
| |
| |
| |
| @@ -17,6 +17,8 @@ |
| import unittest |
| from unittest.util import safe_repr |
| |
| +import pytest |
| + |
| from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, FalconMambaConfig, is_torch_available |
| from transformers.testing_utils import ( |
| Expectations, |
| @@ -487,6 +489,7 @@ def test_generation_4bit(self): |
| "Hello today Iava,\n\nI'm sorry to hear that you're having trouble with the ", |
| ) |
| |
| + @pytest.mark.torch_compile_test |
| def test_generation_torch_compile(self): |
| model = AutoModelForCausalLM.from_pretrained(self.model_id, torch_dtype=torch.float16).to(torch_device) |
| model = torch.compile(model) |
| |
| |
| |
| |
| @@ -356,6 +356,7 @@ def test_model_7b_4bit(self): |
| |
| @slow |
| @require_torch_accelerator |
| + @pytest.mark.torch_compile_test |
| @require_read_token |
| def test_compile_static_cache(self): |
| # `torch==2.2` will throw an error on this test (as in other compilation tests), but torch==2.1.2 and torch>2.2 |
| @@ -394,6 +395,7 @@ def test_compile_static_cache(self): |
| static_compiled_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True) |
| self.assertEqual(EXPECTED_TEXT_COMPLETION, static_compiled_text) |
| |
| + @pytest.mark.torch_export_test |
| @slow |
| @require_read_token |
| def test_export_static_cache(self): |
| |
| |
| |
| |
| @@ -306,6 +306,7 @@ def test_model_9b_flash_attn(self): |
| |
| self.assertEqual(output_text, EXPECTED_TEXTS) |
| |
| + @pytest.mark.torch_export_test |
| @slow |
| @require_read_token |
| def test_export_static_cache(self): |
| @@ -379,6 +380,7 @@ def test_export_static_cache(self): |
| @slow |
| @require_read_token |
| @require_large_cpu_ram |
| + @pytest.mark.torch_export_test |
| def test_export_hybrid_cache(self): |
| from transformers.integrations.executorch import TorchExportableModuleForDecoderOnlyLM |
| from transformers.pytorch_utils import is_torch_greater_or_equal |
| |
| |
| |
| |
| @@ -819,6 +819,7 @@ def test_generation_beyond_sliding_window(self, attn_implementation: str): |
| EXPECTED_COMPLETIONS = [" and I'm going to take a walk.\n\nI really enjoy the scenery, and I'", ", green, yellow, orange, purple, brown, black, white, gray.\n\nI'"] # fmt: skip |
| self.assertEqual(output_text, EXPECTED_COMPLETIONS) |
| |
| + @pytest.mark.torch_export_test |
| def test_export_text_only_with_hybrid_cache(self): |
| if not is_torch_greater_or_equal("2.6.0"): |
| self.skipTest(reason="This test requires torch >= 2.6 to run.") |
| |
| |
| |
| |
| @@ -15,6 +15,7 @@ |
| |
| import unittest |
| |
| +import pytest |
| import torch |
| from packaging import version |
| |
| @@ -93,6 +94,7 @@ def tearDown(self): |
| |
| @slow |
| @require_torch_accelerator |
| + @pytest.mark.torch_compile_test |
| @require_read_token |
| def test_compile_static_cache(self): |
| # `torch==2.2` will throw an error on this test (as in other compilation tests), but torch==2.1.2 and torch>2.2 |
| |
| |
| |
| |
| @@ -195,6 +195,7 @@ def test_flash_attn_2_inference_padding_right(self): |
| pass |
| |
| @unittest.skip(reason="Compile not yet supported in idefics3 models") |
| + @pytest.mark.torch_compile_test |
| def test_sdpa_can_compile_dynamic(self): |
| pass |
| |
| @@ -379,6 +380,7 @@ def test_eager_matches_sdpa_generate(self): |
| pass |
| |
| @unittest.skip(reason="Compile not yet supported in Idefics3 models end-to-end") |
| + @pytest.mark.torch_compile_test |
| def test_sdpa_can_compile_dynamic(self): |
| pass |
| |
| |
| |
| |
| |
| @@ -17,6 +17,7 @@ |
| import unittest |
| from io import BytesIO |
| |
| +import pytest |
| import requests |
| |
| from transformers import ( |
| @@ -216,6 +217,7 @@ def test_initialization(self): |
| ) |
| |
| @unittest.skip(reason="Compile not yet supported because in LLava models") |
| + @pytest.mark.torch_compile_test |
| def test_sdpa_can_compile_dynamic(self): |
| pass |
| |
| |
| |
| |
| |
| @@ -20,6 +20,7 @@ |
| from functools import reduce |
| |
| import numpy as np |
| +import pytest |
| import requests |
| |
| from transformers import ( |
| @@ -294,6 +295,7 @@ def check_training_gradient_checkpointing(self, gradient_checkpointing_kwargs=No |
| pass |
| |
| @unittest.skip("There are recompilations in Janus") # TODO (joao, raushan): fix me |
| + @pytest.mark.torch_compile_test |
| def test_generate_compile_model_forward(self): |
| pass |
| |
| |
| |
| |
| |
| @@ -14,6 +14,7 @@ |
| |
| import unittest |
| |
| +import pytest |
| import requests |
| from packaging import version |
| |
| @@ -202,6 +203,7 @@ def test_slow_fast_equivalence_batched(self): |
| @slow |
| @require_torch_accelerator |
| @require_vision |
| + @pytest.mark.torch_compile_test |
| def test_can_compile_fast_image_processor(self): |
| if self.fast_image_processing_class is None: |
| self.skipTest("Skipping compilation test as fast image processor is not defined") |
| |
| |
| |
| |
| @@ -15,6 +15,8 @@ |
| |
| import unittest |
| |
| +import pytest |
| + |
| from transformers import is_torch_available |
| from transformers.testing_utils import ( |
| require_read_token, |
| @@ -88,6 +90,7 @@ def test_contrastive_generate_low_memory(self): |
| @unittest.skip( |
| "Lfm2 has a special cache format which is not compatible with compile as it has static address for conv cache" |
| ) |
| + @pytest.mark.torch_compile_test |
| def test_sdpa_can_compile_dynamic(self): |
| pass |
| |
| |
| |
| |
| |
| @@ -15,6 +15,7 @@ |
| |
| import unittest |
| |
| +import pytest |
| from packaging import version |
| |
| from transformers import AutoTokenizer, StaticCache, is_torch_available |
| @@ -256,6 +257,7 @@ def test_model_7b_dola_generation(self): |
| |
| @slow |
| @require_torch_accelerator |
| + @pytest.mark.torch_compile_test |
| def test_compile_static_cache(self): |
| # `torch==2.2` will throw an error on this test (as in other compilation tests), but torch==2.1.2 and torch>2.2 |
| # work as intended. See https://github.com/pytorch/pytorch/issues/121943 |
| @@ -296,6 +298,7 @@ def test_compile_static_cache(self): |
| self.assertEqual(EXPECTED_TEXT_COMPLETION, static_text) |
| |
| @slow |
| + @pytest.mark.torch_export_test |
| def test_export_static_cache(self): |
| if version.parse(torch.__version__) < version.parse("2.4.0"): |
| self.skipTest(reason="This test requires torch >= 2.4 to run.") |
| |
| |
| |
| |
| @@ -15,6 +15,7 @@ |
| import unittest |
| |
| import numpy as np |
| +import pytest |
| |
| from transformers.image_utils import OPENAI_CLIP_MEAN, OPENAI_CLIP_STD, ChannelDimension |
| from transformers.testing_utils import require_torch, require_vision |
| @@ -246,6 +247,7 @@ def test_multi_images(self): |
| @unittest.skip( |
| reason="LlavaOnevisionImageProcessorFast doesn't compile (infinitely) when using class transforms" |
| ) # FIXME yoni |
| + @pytest.mark.torch_compile_test |
| def test_can_compile_fast_image_processor(self): |
| pass |
| |
| |
| |
| |
| |
| @@ -17,6 +17,7 @@ |
| import unittest |
| from unittest.util import safe_repr |
| |
| +import pytest |
| from parameterized import parameterized |
| |
| from transformers import AutoTokenizer, MambaConfig, is_torch_available |
| @@ -518,6 +519,7 @@ def test_simple_generate_cuda_kernels_big(self, device): |
| self.assertEqual(output_sentence, expected_output) |
| |
| @slow |
| + @pytest.mark.torch_compile_test |
| def test_compile_mamba_cache(self): |
| expected_output = "Hello my name is John and I am a\n\nI am a single father of a beautiful daughter. I am a" |
| |
| |
| |
| |
| |
| @@ -16,6 +16,7 @@ |
| import unittest |
| |
| import numpy as np |
| +import pytest |
| |
| from tests.test_modeling_common import floats_tensor |
| from transformers import AutoModelForImageClassification, Mask2FormerConfig, is_torch_available, is_vision_available |
| @@ -576,6 +577,7 @@ def test_with_segmentation_maps_and_loss(self): |
| |
| self.assertTrue(outputs.loss is not None) |
| |
| + @pytest.mark.torch_export_test |
| def test_export(self): |
| if not is_torch_greater_or_equal_than_2_4: |
| self.skipTest(reason="This test requires torch >= 2.4 to run.") |
| |
| |
| |
| |
| @@ -19,6 +19,7 @@ |
| import unittest |
| |
| import numpy as np |
| +import pytest |
| from datasets import Audio, load_dataset |
| from pytest import mark |
| |
| @@ -446,6 +447,7 @@ def test_flash_attn_2_inference_equivalence_right_padding(self): |
| pass |
| |
| @unittest.skip(reason="The MimiModel does not have support dynamic compile yet") |
| + @pytest.mark.torch_compile_test |
| def test_sdpa_can_compile_dynamic(self): |
| pass |
| |
| |
| |
| |
| |
| @@ -278,6 +278,7 @@ def test_speculative_generation(self): |
| text = tokenizer.decode(generated_ids[0], skip_special_tokens=True) |
| self.assertEqual(EXPECTED_TEXT_COMPLETION, text) |
| |
| + @pytest.mark.torch_compile_test |
| @slow |
| def test_compile_static_cache(self): |
| # `torch==2.2` will throw an error on this test (as in other compilation tests), but torch==2.1.2 and torch>2.2 |
| |
| |
| |
| |
| @@ -16,6 +16,7 @@ |
| import unittest |
| |
| import accelerate |
| +import pytest |
| |
| from transformers import ( |
| AutoProcessor, |
| @@ -207,6 +208,7 @@ def test_initialization(self): |
| ) |
| |
| @unittest.skip(reason="Compile not yet supported because in LLava models") |
| + @pytest.mark.torch_compile_test |
| def test_sdpa_can_compile_dynamic(self): |
| pass |
| |
| |
| |
| |
| |
| @@ -352,6 +352,7 @@ def test_generate_with_quant_cache(self): |
| pass |
| |
| @unittest.skip("For some unknown reasons the tests fails in CrossAttention layer when doing torch.sdpa(). ") |
| + @pytest.mark.torch_compile_test |
| def test_sdpa_can_compile_dynamic(self): |
| pass |
| |
| |
| |
| |
| |
| @@ -15,6 +15,7 @@ |
| |
| import unittest |
| |
| +import pytest |
| from packaging import version |
| |
| from transformers import AutoTokenizer, MobileBertConfig, MobileBertForMaskedLM, is_torch_available |
| @@ -386,6 +387,7 @@ def test_inference_no_head(self): |
| |
| self.assertTrue(lower_bound and upper_bound) |
| |
| + @pytest.mark.torch_export_test |
| @slow |
| def test_export(self): |
| if version.parse(torch.__version__) < version.parse("2.4.0"): |
| |
| |
| |
| |
| @@ -390,6 +390,7 @@ def test_flash_attn_2_inference_equivalence_right_padding(self): |
| def test_flash_attn_2_conversion(self): |
| self.skipTest(reason="ModernBert doesn't use the ModernBertFlashAttention2 class method.") |
| |
| + @pytest.mark.torch_compile_test |
| def test_saved_config_excludes_reference_compile(self): |
| config = ModernBertConfig(reference_compile=True) |
| with tempfile.TemporaryDirectory() as tmpdirname: |
| @@ -501,6 +502,7 @@ def test_inference_sequence_classification(self): |
| expected = torch.tensor([[1.6466, 4.5662]]) |
| torch.testing.assert_close(output, expected, rtol=1e-4, atol=1e-4) |
| |
| + @pytest.mark.torch_export_test |
| @slow |
| def test_export(self): |
| if version.parse(torch.__version__) < version.parse("2.4.0"): |
| |
| |
| |
| |
| @@ -178,6 +178,7 @@ def setUp(self): |
| ) |
| |
| @unittest.skip(reason="The MoshiModel does not have support dynamic compile yet") |
| + @pytest.mark.torch_compile_test |
| def test_sdpa_can_compile_dynamic(self): |
| pass |
| |
| @@ -636,6 +637,7 @@ def test_eager_matches_sdpa_inference( |
| pass |
| |
| @unittest.skip(reason="The Moshi model does not have support dynamic compile yet") |
| + @pytest.mark.torch_compile_test |
| def test_sdpa_can_compile_dynamic(self): |
| pass |
| |
| |
| |
| |
| |
| @@ -20,6 +20,7 @@ |
| import unittest |
| |
| import numpy as np |
| +import pytest |
| from pytest import mark |
| |
| from transformers import ( |
| @@ -1235,6 +1236,7 @@ def test_generation_tester_mixin_inheritance(self): |
| pass |
| |
| @unittest.skip(reason=("MusicGen has a set of composite models which might not have SDPA themselves, e.g. T5.")) |
| + @pytest.mark.torch_compile_test |
| def test_sdpa_can_compile_dynamic(self): |
| pass |
| |
| |
| |
| |
| |
| @@ -20,6 +20,7 @@ |
| import unittest |
| |
| import numpy as np |
| +import pytest |
| from pytest import mark |
| |
| from transformers import ( |
| @@ -1236,6 +1237,7 @@ def test_generation_tester_mixin_inheritance(self): |
| pass |
| |
| @unittest.skip(reason=("MusicGen has a set of composite models which might not have SDPA themselves, e.g. T5.")) |
| + @pytest.mark.torch_compile_test |
| def test_sdpa_can_compile_dynamic(self): |
| pass |
| |
| |
| |
| |
| |
| @@ -15,6 +15,7 @@ |
| |
| import unittest |
| |
| +import pytest |
| from packaging import version |
| from parameterized import parameterized |
| |
| @@ -327,6 +328,7 @@ def test_simple_encode_decode(self): |
| |
| self.assertEqual(rust_tokenizer.encode(" Hello"), [24387]) |
| |
| + @pytest.mark.torch_export_test |
| @slow |
| def test_export_static_cache(self): |
| if version.parse(torch.__version__) < version.parse("2.4.0"): |
| |
| |
| |
| |
| @@ -15,6 +15,7 @@ |
| |
| import unittest |
| |
| +import pytest |
| from packaging import version |
| from parameterized import parameterized |
| |
| @@ -327,6 +328,7 @@ def test_simple_encode_decode(self): |
| |
| self.assertEqual(rust_tokenizer.encode(" Hello"), [22691]) |
| |
| + @pytest.mark.torch_export_test |
| @slow |
| def test_export_static_cache(self): |
| if version.parse(torch.__version__) < version.parse("2.4.0"): |
| |
| |
| |
| |
| @@ -318,6 +318,7 @@ def test_generate_with_static_cache(self): |
| pass |
| |
| @pytest.mark.generate |
| + @pytest.mark.torch_compile_test |
| @is_flaky |
| def test_generate_compile_model_forward(self): |
| super().test_generate_compile_model_forward() |
| |
| |
| |
| |
| @@ -16,6 +16,8 @@ |
| |
| import unittest |
| |
| +import pytest |
| + |
| from transformers import Phi3Config, StaticCache, is_torch_available |
| from transformers.models.auto.configuration_auto import AutoConfig |
| from transformers.testing_utils import ( |
| @@ -342,6 +344,7 @@ def test_phi3_mini_4k_sliding_window(self): |
| |
| self.assertListEqual(output_text, EXPECTED_OUTPUT) |
| |
| + @pytest.mark.torch_export_test |
| @slow |
| def test_export_static_cache(self): |
| from transformers.pytorch_utils import is_torch_greater_or_equal_than_2_4 |
| |
| |
| |
| |
| @@ -20,6 +20,7 @@ |
| import warnings |
| |
| import numpy as np |
| +import pytest |
| from packaging import version |
| |
| from transformers.testing_utils import require_torch, require_vision, slow, torch_device |
| @@ -288,6 +289,7 @@ def test_image_processor_preprocess_arguments(self): |
| self.skipTest(reason="No validation found for `preprocess` method") |
| |
| @slow |
| + @pytest.mark.torch_compile_test |
| def test_can_compile_fast_image_processor(self): |
| if self.fast_image_processing_class is None: |
| self.skipTest("Skipping compilation test as fast image processor is not defined") |
| |
| |
| |
| |
| @@ -14,6 +14,7 @@ |
| |
| import unittest |
| |
| +import pytest |
| import requests |
| from parameterized import parameterized |
| |
| @@ -253,6 +254,7 @@ def test_generate_compilation_all_outputs(self): |
| @unittest.skip( |
| reason="Supported only for text-only inputs (otherwise dynamic control flows for multimodal inputs)" |
| ) |
| + @pytest.mark.torch_compile_test |
| def test_generate_compile_model_forward(self): |
| pass |
| |
| |
| |
| |
| |
| @@ -15,6 +15,7 @@ |
| import unittest |
| |
| import numpy as np |
| +import pytest |
| import requests |
| from packaging import version |
| |
| @@ -263,6 +264,7 @@ def test_slow_fast_equivalence_batched(self): |
| @slow |
| @require_torch_gpu |
| @require_vision |
| + @pytest.mark.torch_compile_test |
| def test_can_compile_fast_image_processor(self): |
| if self.fast_image_processing_class is None: |
| self.skipTest("Skipping compilation test as fast image processor is not defined") |
| |
| |
| |
| |
| @@ -15,6 +15,7 @@ |
| |
| import unittest |
| |
| +import pytest |
| import requests |
| |
| from transformers import Dinov2Config, PromptDepthAnythingConfig |
| @@ -284,6 +285,7 @@ def test_inference(self): |
| |
| self.assertTrue(torch.allclose(predicted_depth[0, :3, :3], expected_slice, atol=1e-3)) |
| |
| + @pytest.mark.torch_export_test |
| def test_export(self): |
| for strict in [False, True]: |
| if strict and get_torch_major_and_minor_version() == "2.7": |
| |
| |
| |
| |
| @@ -239,6 +239,7 @@ def test_speculative_generation(self): |
| backend_empty_cache(torch_device) |
| gc.collect() |
| |
| + @pytest.mark.torch_export_test |
| @slow |
| def test_export_static_cache(self): |
| if version.parse(torch.__version__) < version.parse("2.4.0"): |
| |
| |
| |
| |
| @@ -21,6 +21,7 @@ |
| from urllib.request import urlopen |
| |
| import librosa |
| +import pytest |
| import requests |
| |
| from transformers import ( |
| @@ -281,6 +282,7 @@ def test_correct_missing_keys(self): |
| pass |
| |
| @unittest.skip(reason="Compile not yet supported because in QwenOmniThinker models") |
| + @pytest.mark.torch_compile_test |
| def test_sdpa_can_compile_dynamic(self): |
| pass |
| |
| @@ -444,6 +446,7 @@ def test_generate_from_inputs_embeds_with_static_cache(self): |
| # TODO (joao, raushan): there are multiple standardization issues in this model that prevent this test from |
| # passing, fix me |
| @unittest.skip("Cannot handle 4D attention mask") |
| + @pytest.mark.torch_compile_test |
| def test_generate_compile_model_forward(self): |
| pass |
| |
| |
| |
| |
| |
| @@ -19,6 +19,7 @@ |
| from urllib.request import urlopen |
| |
| import librosa |
| +import pytest |
| |
| from transformers import ( |
| AutoProcessor, |
| @@ -148,6 +149,7 @@ def setUp(self): |
| self.config_tester = ConfigTester(self, config_class=Qwen2AudioConfig, has_text_modality=False) |
| |
| @unittest.skip(reason="Compile not yet supported because in Qwen2Audio models") |
| + @pytest.mark.torch_compile_test |
| def test_sdpa_can_compile_dynamic(self): |
| pass |
| |
| |
| |
| |
| |
| @@ -231,6 +231,7 @@ def test_speculative_generation(self): |
| |
| self.assertEqual(EXPECTED_TEXT_COMPLETION, text) |
| |
| + @pytest.mark.torch_export_test |
| @slow |
| def test_export_static_cache(self): |
| if version.parse(torch.__version__) < version.parse("2.4.0"): |
| |
| |
| |
| |
| @@ -15,6 +15,8 @@ |
| |
| import unittest |
| |
| +import pytest |
| + |
| from transformers import AutoTokenizer, RobertaConfig, is_torch_available |
| from transformers.testing_utils import TestCasePlus, require_torch, slow, torch_device |
| |
| @@ -575,6 +577,7 @@ def test_inference_classification_head(self): |
| |
| torch.testing.assert_close(output, expected_tensor, rtol=1e-4, atol=1e-4) |
| |
| + @pytest.mark.torch_export_test |
| @slow |
| def test_export(self): |
| if not is_torch_greater_or_equal_than_2_4: |
| |
| |
| |
| |
| @@ -16,6 +16,7 @@ |
| import tempfile |
| import unittest |
| |
| +import pytest |
| import requests |
| |
| from transformers import SamConfig, SamMaskDecoderConfig, SamPromptEncoderConfig, SamVisionConfig, pipeline |
| @@ -257,6 +258,7 @@ def test_hidden_states_output(self): |
| pass |
| |
| @require_torch_sdpa |
| + @pytest.mark.torch_compile_test |
| def test_sdpa_can_compile_dynamic(self): |
| self.skipTest(reason="SAM model can't be compiled dynamic yet") |
| |
| @@ -658,6 +660,7 @@ def test_model_from_pretrained(self): |
| self.assertIsNotNone(model) |
| |
| @require_torch_sdpa |
| + @pytest.mark.torch_compile_test |
| def test_sdpa_can_compile_dynamic(self): |
| self.skipTest(reason="SAM model can't be compiled dynamic yet") |
| |
| |
| |
| |
| |
| @@ -17,6 +17,7 @@ |
| import tempfile |
| import unittest |
| |
| +import pytest |
| import requests |
| |
| from transformers import ( |
| @@ -265,6 +266,7 @@ def test_hidden_states_output(self): |
| pass |
| |
| @require_torch_sdpa |
| + @pytest.mark.torch_compile_test |
| def test_sdpa_can_compile_dynamic(self): |
| self.skipTest(reason="SAM model can't be compiled dynamic yet") |
| |
| @@ -706,6 +708,7 @@ def test_model_from_pretrained(self): |
| self.assertIsNotNone(model) |
| |
| @require_torch_sdpa |
| + @pytest.mark.torch_compile_test |
| def test_sdpa_can_compile_dynamic(self): |
| self.skipTest(reason="SamHQModel can't be compiled dynamic yet") |
| |
| |
| |
| |
| |
| @@ -172,6 +172,7 @@ def test_model_3b_long_prompt(self): |
| backend_empty_cache(torch_device) |
| gc.collect() |
| |
| + @pytest.mark.torch_export_test |
| @slow |
| def test_export_static_cache(self): |
| if version.parse(torch.__version__) < version.parse("2.4.0"): |
| |
| |
| |
| |
| @@ -186,6 +186,7 @@ def test_flash_attn_2_inference_padding_right(self): |
| pass |
| |
| @unittest.skip(reason="Compile not yet supported in SmolVLM models") |
| + @pytest.mark.torch_compile_test |
| def test_sdpa_can_compile_dynamic(self): |
| pass |
| |
| @@ -387,6 +388,7 @@ def test_generate_with_static_cache(self): |
| pass |
| |
| @unittest.skip(reason="Compile not yet supported in SmolVLM models") |
| + @pytest.mark.torch_compile_test |
| def test_sdpa_can_compile_dynamic(self): |
| pass |
| |
| |
| |
| |
| |
| @@ -19,6 +19,8 @@ |
| import tempfile |
| import unittest |
| |
| +import pytest |
| + |
| from transformers import T5Config, is_torch_available |
| from transformers.models.auto.modeling_auto import MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING_NAMES |
| from transformers.pytorch_utils import is_torch_greater_or_equal_than_2_4 |
| @@ -1610,6 +1612,7 @@ def test_contrastive_search_t5(self): |
| |
| @slow |
| @require_torch_accelerator |
| + @pytest.mark.torch_compile_test |
| def test_compile_static_cache(self): |
| NUM_TOKENS_TO_GENERATE = 40 |
| EXPECTED_TEXT_COMPLETION = [ |
| @@ -1650,6 +1653,7 @@ def test_compile_static_cache(self): |
| |
| @slow |
| @require_torch_accelerator |
| + @pytest.mark.torch_compile_test |
| def test_compile_static_cache_encoder(self): |
| prompts = [ |
| "summarize: Simply put, the theory of relativity states that 1) the speed of light is constant in all inertial " |
| @@ -1668,6 +1672,7 @@ def test_compile_static_cache_encoder(self): |
| logits_compiled = model(**inputs) |
| torch.testing.assert_close(logits[0][:, -3:, -3], logits_compiled[0][:, -3:, -3], rtol=1e-5, atol=1e-5) |
| |
| + @pytest.mark.torch_export_test |
| @slow |
| def test_export_encoder(self): |
| """Test exporting T5EncoderModel to torch export format.""" |
| @@ -1704,6 +1709,7 @@ def test_export_encoder(self): |
| # Verify outputs are close enough |
| self.assertTrue(torch.allclose(original_output, exported_output, atol=1e-5)) |
| |
| + @pytest.mark.torch_export_test |
| @slow |
| def test_export_decoder(self): |
| """Test exporting T5 decoder with static cache to torch export format.""" |
| @@ -1765,6 +1771,7 @@ def test_export_decoder(self): |
| # Verify cache buffers are 3D |
| self.assertEqual(buffer.shape[2], max_cache_len) |
| |
| + @pytest.mark.torch_export_test |
| @slow |
| def test_export_t5_summarization(self): |
| """Test composing exported T5 encoder and decoder for summarization.""" |
| |
| |
| |
| |
| @@ -18,6 +18,7 @@ |
| import warnings |
| |
| import numpy as np |
| +import pytest |
| import requests |
| from packaging import version |
| |
| @@ -340,6 +341,7 @@ def test_slow_fast_equivalence_batched(self): |
| @slow |
| @require_torch_accelerator |
| @require_vision |
| + @pytest.mark.torch_compile_test |
| def test_can_compile_fast_image_processor(self): |
| # override as trimaps are needed for the image processor |
| if self.fast_image_processing_class is None: |
| |
| |
| |
| |
| @@ -16,6 +16,8 @@ |
| import inspect |
| import unittest |
| |
| +import pytest |
| + |
| from transformers import VitPoseBackboneConfig |
| from transformers.testing_utils import require_torch, torch_device |
| from transformers.utils import is_torch_available |
| @@ -193,6 +195,7 @@ def test_forward_signature(self): |
| expected_arg_names = ["pixel_values"] |
| self.assertListEqual(arg_names[:1], expected_arg_names) |
| |
| + @pytest.mark.torch_export_test |
| def test_torch_export(self): |
| # Dense architecture |
| super().test_torch_export() |
| |
| |
| |
| |
| @@ -1420,6 +1420,7 @@ def test_labels_sequence_max_length_error_after_changing_config(self): |
| |
| # TODO (joao, eustache): fix me :) The model is not returning a `Cache` by default |
| @unittest.skip(reason="Whisper's custom generate is not consistent regarding the cache return types") |
| + @pytest.mark.torch_compile_test |
| def test_generate_compile_model_forward(self): |
| pass |
| |
| |
| |
| |
| |
| @@ -18,6 +18,7 @@ |
| import unittest |
| from unittest import skip |
| |
| +import pytest |
| from packaging import version |
| |
| from transformers import AqlmConfig, AutoConfig, AutoModelForCausalLM, AutoTokenizer, OPTForCausalLM, StaticCache |
| @@ -198,6 +199,7 @@ def test_quantized_model_multi_gpu(self): |
| is_aqlm_available() and version.parse(importlib.metadata.version("aqlm")) >= version.parse("1.0.3"), |
| "test requires `aqlm>=1.0.3`", |
| ) |
| + @pytest.mark.torch_compile_test |
| def test_quantized_model_compile(self): |
| """ |
| Simple test that checks if the quantized model is working properly |
| |
| |
| |
| |
| @@ -16,6 +16,7 @@ |
| import tempfile |
| import unittest |
| |
| +import pytest |
| from packaging import version |
| |
| from transformers import ( |
| @@ -849,6 +850,7 @@ def setUp(self): |
| self.tokenizer = AutoTokenizer.from_pretrained(self.model_name) |
| self.model_4bit = AutoModelForCausalLM.from_pretrained(self.model_name, load_in_4bit=True) |
| |
| + @pytest.mark.torch_compile_test |
| def test_generate_compile(self): |
| encoded_input = self.tokenizer(self.input_text, return_tensors="pt") |
| |
| |
| |
| |
| |
| @@ -16,6 +16,7 @@ |
| import tempfile |
| import unittest |
| |
| +import pytest |
| from packaging import version |
| |
| from transformers import ( |
| @@ -996,6 +997,7 @@ def setUp(self): |
| self.tokenizer = AutoTokenizer.from_pretrained(self.model_name) |
| self.model_8bit = AutoModelForCausalLM.from_pretrained(self.model_name, load_in_8bit=True) |
| |
| + @pytest.mark.torch_compile_test |
| def test_generate_compile(self): |
| encoded_input = self.tokenizer(self.input_text, return_tensors="pt") |
| |
| |
| |
| |
| |
| @@ -16,6 +16,8 @@ |
| import tempfile |
| import unittest |
| |
| +import pytest |
| + |
| from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer, SpQRConfig, StaticCache |
| from transformers.testing_utils import ( |
| backend_empty_cache, |
| @@ -179,6 +181,7 @@ def test_quantized_model_multi_gpu(self): |
| |
| self.assertEqual(self.tokenizer.decode(output[0], skip_special_tokens=True), self.EXPECTED_OUTPUT) |
| |
| + @pytest.mark.torch_compile_test |
| def test_quantized_model_compile(self): |
| """ |
| Simple test that checks if the quantized model is working properly |
| |
| |
| |
| |
| @@ -22,6 +22,7 @@ |
| from copy import deepcopy |
| |
| import numpy as np |
| +import pytest |
| import requests |
| from packaging import version |
| |
| @@ -614,6 +615,7 @@ def test_override_instance_attributes_does_not_affect_other_instances(self): |
| @slow |
| @require_torch_accelerator |
| @require_vision |
| + @pytest.mark.torch_compile_test |
| def test_can_compile_fast_image_processor(self): |
| if self.fast_image_processing_class is None: |
| self.skipTest("Skipping compilation test as fast image processor is not defined") |
| |
| |
| |
| |
| @@ -27,6 +27,7 @@ |
| from contextlib import contextmanager |
| |
| import numpy as np |
| +import pytest |
| from packaging import version |
| from parameterized import parameterized |
| from pytest import mark |
| @@ -3866,6 +3867,7 @@ def test_sdpa_can_dispatch_on_flash(self): |
| |
| @require_torch_sdpa |
| @require_torch_accelerator |
| + @pytest.mark.torch_compile_test |
| @slow |
| def test_sdpa_can_compile_dynamic(self): |
| if not self.has_attentions: |
| @@ -4114,6 +4116,7 @@ def test_flash_attn_2_fp32_ln(self): |
| @require_flash_attn |
| @require_torch_gpu |
| @mark.flash_attn_test |
| + @pytest.mark.torch_compile_test |
| @slow |
| def test_flash_attn_2_can_compile_with_attention_mask_None_without_graph_break(self): |
| if version.parse(torch.__version__) < version.parse("2.3"): |
| @@ -4581,6 +4584,7 @@ def test_custom_4d_attention_mask(self): |
| |
| @slow |
| @require_torch_accelerator |
| + @pytest.mark.torch_compile_test |
| def test_torch_compile_for_training(self): |
| if version.parse(torch.__version__) < version.parse("2.3"): |
| self.skipTest(reason="This test requires torch >= 2.3 to run.") |
| @@ -4653,6 +4657,7 @@ def test_forward_with_logits_to_keep(self): |
| |
| @slow |
| @require_torch_greater_or_equal("2.5") |
| + @pytest.mark.torch_export_test |
| def test_torch_export(self, config=None, inputs_dict=None, tolerance=1e-4): |
| """ |
| Test if model can be exported with torch.export.export() |
| |
| |
| |
| |
| @@ -21,6 +21,7 @@ |
| from copy import deepcopy |
| |
| import numpy as np |
| +import pytest |
| from packaging import version |
| |
| from transformers import AutoVideoProcessor |
| @@ -168,6 +169,7 @@ def test_init_without_params(self): |
| @slow |
| @require_torch_accelerator |
| @require_vision |
| + @pytest.mark.torch_compile_test |
| def test_can_compile_fast_video_processor(self): |
| if self.fast_video_processing_class is None: |
| self.skipTest("Skipping compilation test as fast video processor is not defined") |
| |
| |
| |
| |
| @@ -31,6 +31,7 @@ |
| from unittest.mock import Mock, patch |
| |
| import numpy as np |
| +import pytest |
| from huggingface_hub import HfFolder, ModelCard, create_branch, list_repo_commits, list_repo_files |
| from packaging import version |
| from parameterized import parameterized |
| @@ -1358,6 +1359,7 @@ def test_number_of_steps_in_training(self): |
| train_output = trainer.train() |
| self.assertEqual(train_output.global_step, 10) |
| |
| + @pytest.mark.torch_compile_test |
| def test_torch_compile_loss_func_compatibility(self): |
| config = LlamaConfig(vocab_size=100, hidden_size=32, num_hidden_layers=3, num_attention_heads=4) |
| tiny_llama = LlamaForCausalLM(config) |
| @@ -1377,6 +1379,7 @@ def test_torch_compile_loss_func_compatibility(self): |
| |
| @require_peft |
| @require_bitsandbytes |
| + @pytest.mark.torch_compile_test |
| def test_bnb_compile(self): |
| from peft import LoraConfig, get_peft_model |
| |
| |
| |
| |
| |
| @@ -15,6 +15,7 @@ |
| import copy |
| import unittest |
| |
| +import pytest |
| from packaging import version |
| from parameterized import parameterized |
| |
| @@ -594,6 +595,7 @@ def test_cache_gptj_model(self, cache_implementation): |
| class CacheExportIntegrationTest(unittest.TestCase): |
| """Cache tests that rely on `torch.export()` and model loading""" |
| |
| + @pytest.mark.torch_export_test |
| def test_dynamic_cache_exportability(self): |
| model = AutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-MistralForCausalLM") |
| model = model.eval() |
| @@ -635,6 +637,7 @@ def test_dynamic_cache_exportability(self): |
| self.assertTrue(torch.allclose(l1.keys, l2.keys, atol=1e-5)) |
| self.assertTrue(torch.allclose(l1.values, l2.values, atol=1e-5)) |
| |
| + @pytest.mark.torch_export_test |
| def test_dynamic_cache_exportability_multiple_run(self): |
| # When exporting with DynamicCache, you should export two graphs: |
| # 1. A graph without cache |
| @@ -730,6 +733,7 @@ def test_dynamic_cache_exportability_multiple_run(self): |
| self.assertTrue(torch.allclose(l1.values, l2.values, atol=1e-5)) |
| |
| @unittest.skip("Runs on my machine locally, passed, no idea why it does not online") |
| + @pytest.mark.torch_export_test |
| def test_static_cache_exportability(self): |
| """ |
| Tests that static cache works with `torch.export()` |
| @@ -808,6 +812,7 @@ def test_static_cache_exportability(self): |
| strict=strict, |
| ) |
| |
| + @pytest.mark.torch_export_test |
| def test_hybrid_cache_exportability(self): |
| """ |
| Tests that static cache works with `torch.export()` |
| |
| |
| |
| |
| @@ -15,6 +15,7 @@ |
| import unittest |
| import warnings |
| |
| +import pytest |
| from parameterized import parameterized |
| |
| from transformers import __version__, is_torch_available |
| @@ -174,6 +175,7 @@ def dummy_function(new_name=None, **kwargs): |
| result = dummy_function(deprecated_name="old_value", new_name="new_value") |
| self.assertEqual(result, "new_value") |
| |
| + @pytest.mark.torch_compile_test |
| @require_torch_accelerator |
| def test_compile_safe(self): |
| @deprecate_kwarg("deprecated_factor", new_name="new_factor", version=INFINITE_VERSION) |
| |
| |
| |
| |
| @@ -16,6 +16,7 @@ |
| import warnings |
| |
| import numpy as np |
| +import pytest |
| |
| from transformers.configuration_utils import PretrainedConfig |
| from transformers.modeling_outputs import BaseModelOutput |
| @@ -261,6 +262,7 @@ def test_decorator_eager(self): |
| message = f"output should be a {expected_type.__name__} when config.use_return_dict={config_return_dict} and return_dict={return_dict}" |
| self.assertIsInstance(output, expected_type, message) |
| |
| + @pytest.mark.torch_compile_test |
| def test_decorator_compiled(self): |
| """Test that the can_return_tuple decorator works with compiled mode.""" |
| config = PretrainedConfig() |
| @@ -277,6 +279,7 @@ def test_decorator_compiled(self): |
| output = compiled_model(torch.tensor(10), return_dict=False) |
| self.assertIsInstance(output, tuple) |
| |
| + @pytest.mark.torch_export_test |
| def test_decorator_torch_export(self): |
| """Test that the can_return_tuple decorator works with torch.export.""" |
| config = PretrainedConfig() |
| |
| |
| |
| |
| @@ -17,6 +17,8 @@ |
| from dataclasses import dataclass |
| from typing import Optional |
| |
| +import pytest |
| + |
| from transformers import AlbertForMaskedLM |
| from transformers.testing_utils import require_torch |
| from transformers.utils import ModelOutput, is_torch_available |
| @@ -160,6 +162,7 @@ def test_torch_pytree(self): |
| # TODO: @ydshieh |
| @unittest.skip(reason="CPU OOM") |
| @require_torch |
| + @pytest.mark.torch_export_test |
| def test_export_serialization(self): |
| if not is_torch_greater_or_equal_than_2_2: |
| self.skipTest(reason="Export serialization requires torch >= 2.2.0") |
| |
| |
| |
| |
| @@ -27,6 +27,7 @@ |
| import warnings |
| from pathlib import Path |
| |
| +import pytest |
| import requests |
| from huggingface_hub import HfApi, HfFolder |
| from parameterized import parameterized |
| @@ -2541,6 +2542,7 @@ def test_causal_mask_sliding(self): |
| # non auto-regressive case |
| self.check_to_causal(mask_converter, q_len=7, kv_len=7) |
| |
| + @pytest.mark.torch_compile_test |
| def test_torch_compile_fullgraph(self): |
| model = Prepare4dCausalAttentionMaskModel() |
| |
|
|