Spaces:

HF-slyfox
/

harness

Running

File size: 64,758 Bytes

dfefe0b

diff --git a/conftest.py b/conftest.py
index 2134dceb84b9..7a8344ea5056 100644
--- a/conftest.py
+++ b/conftest.py
@@ -83,6 +83,8 @@ def pytest_configure(config):
     config.addinivalue_line("markers", "is_staging_test: mark test to run only in the staging environment")
     config.addinivalue_line("markers", "accelerate_tests: mark test that require accelerate")
     config.addinivalue_line("markers", "not_device_test: mark the tests always running on cpu")
+    config.addinivalue_line("markers", "torch_compile_test: mark test which tests torch compile functionality")
+    config.addinivalue_line("markers", "torch_export_test: mark test which tests torch export functionality")
 
 
 def pytest_collection_modifyitems(items):
diff --git a/tests/generation/test_utils.py b/tests/generation/test_utils.py
index c9d20e692e92..76161928f6ba 100644
--- a/tests/generation/test_utils.py
+++ b/tests/generation/test_utils.py
@@ -2048,6 +2048,7 @@ def test_generate_with_quant_cache(self):
                 model.generate(**generation_kwargs, **inputs_dict)
 
     @pytest.mark.generate
+    @pytest.mark.torch_compile_test
     @require_torch_greater_or_equal("2.6")  # Uses torch.compiler.set_stance
     def test_generate_compile_model_forward(self):
         """
@@ -2744,6 +2745,7 @@ def test_speculative_sampling_target_distribution(self):
         self.assertTrue(last_token_counts[1] > last_token_counts[3] > last_token_counts[7] > 0)
         self.assertTrue(last_token_counts[8] > last_token_counts[3])
 
+    @pytest.mark.torch_export_test
     def test_cache_dependant_input_preparation_exporting(self):
         self.assertFalse(
             is_torchdynamo_exporting()
@@ -4342,6 +4344,7 @@ def test_prepare_inputs_for_generation_encoder_decoder_llm(self):
         self.assertTrue(model_inputs["encoder_outputs"] == "foo")
         # See the decoder-only test for more corner cases. The code is the same, so we don't repeat it here.
 
+    @pytest.mark.torch_compile_test
     def test_generate_compile_fullgraph_tiny(self):
         """
         Tests that we can call end-to-end generation with a tiny model (i.e. doesn't crash)
@@ -4931,6 +4934,7 @@ def test_cache_device_map_with_vision_layer_device_map(self):
         _ = model.generate(**inputs, max_new_tokens=2, do_sample=False)
 
     @require_torch_accelerator
+    @pytest.mark.torch_compile_test
     def test_cpu_offload_doesnt_compile(self):
         """Test that CPU offload doesn't trigger compilation"""
         tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-MistralForCausalLM")
diff --git a/tests/models/albert/test_modeling_albert.py b/tests/models/albert/test_modeling_albert.py
index f0440fb349d6..eb857f3383c9 100644
--- a/tests/models/albert/test_modeling_albert.py
+++ b/tests/models/albert/test_modeling_albert.py
@@ -15,6 +15,7 @@
 
 import unittest
 
+import pytest
 from packaging import version
 
 from transformers import AlbertConfig, AutoTokenizer, is_torch_available
@@ -337,6 +338,7 @@ def test_inference_no_head_absolute_embedding(self):
         torch.testing.assert_close(output[:, 1:4, 1:4], expected_slice, rtol=1e-4, atol=1e-4)
 
     @slow
+    @pytest.mark.torch_export_test
     def test_export(self):
         if version.parse(torch.__version__) < version.parse("2.4.0"):
             self.skipTest(reason="This test requires torch >= 2.4 to run.")
diff --git a/tests/models/aria/test_modeling_aria.py b/tests/models/aria/test_modeling_aria.py
index 94dafdc2d49c..c5829fb97ffb 100644
--- a/tests/models/aria/test_modeling_aria.py
+++ b/tests/models/aria/test_modeling_aria.py
@@ -15,6 +15,7 @@
 
 import unittest
 
+import pytest
 import requests
 
 from transformers import (
@@ -211,6 +212,7 @@ def test_training_gradient_checkpointing_use_reentrant_false(self):
         pass
 
     @unittest.skip(reason="Compile not yet supported because in LLava models")
+    @pytest.mark.torch_compile_test
     def test_sdpa_can_compile_dynamic(self):
         pass
 
diff --git a/tests/models/aya_vision/test_modeling_aya_vision.py b/tests/models/aya_vision/test_modeling_aya_vision.py
index d472e0eb90f1..f6f33d39ea78 100644
--- a/tests/models/aya_vision/test_modeling_aya_vision.py
+++ b/tests/models/aya_vision/test_modeling_aya_vision.py
@@ -267,6 +267,7 @@ def test_initialization(self):
         pass
 
     @unittest.skip(reason="Compile not yet supported because in LLava models")
+    @pytest.mark.torch_compile_test
     def test_sdpa_can_compile_dynamic(self):
         pass
 
diff --git a/tests/models/beit/test_modeling_beit.py b/tests/models/beit/test_modeling_beit.py
index af8fd48e00c0..ff696f8cf607 100644
--- a/tests/models/beit/test_modeling_beit.py
+++ b/tests/models/beit/test_modeling_beit.py
@@ -15,6 +15,7 @@
 
 import unittest
 
+import pytest
 from datasets import load_dataset
 
 from transformers import BeitConfig
@@ -285,6 +286,7 @@ def test_feed_forward_chunking(self):
         pass
 
     @unittest.skip(reason="BEiT can't compile dynamic")
+    @pytest.mark.torch_compile_test
     def test_sdpa_can_compile_dynamic(self):
         pass
 
diff --git a/tests/models/bert/test_modeling_bert.py b/tests/models/bert/test_modeling_bert.py
index 97bca3317ae2..9cc0a8be2437 100644
--- a/tests/models/bert/test_modeling_bert.py
+++ b/tests/models/bert/test_modeling_bert.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 import unittest
 
+import pytest
 from packaging import version
 
 from transformers import AutoTokenizer, BertConfig, is_torch_available
@@ -722,6 +723,7 @@ def test_sdpa_ignored_mask(self):
             )
 
     @slow
+    @pytest.mark.torch_export_test
     def test_export(self):
         if version.parse(torch.__version__) < version.parse("2.4.0"):
             self.skipTest(reason="This test requires torch >= 2.4 to run.")
diff --git a/tests/models/clip/test_modeling_clip.py b/tests/models/clip/test_modeling_clip.py
index ad7d817f962c..b506a442b9eb 100644
--- a/tests/models/clip/test_modeling_clip.py
+++ b/tests/models/clip/test_modeling_clip.py
@@ -19,6 +19,7 @@
 import unittest
 
 import numpy as np
+import pytest
 import requests
 from parameterized import parameterized
 from pytest import mark
@@ -708,6 +709,7 @@ def test_sdpa_can_dispatch_on_flash(self):
         self.skipTest(reason="CLIP text tower has two attention masks: `causal_attention_mask` and `attention_mask`")
 
     @require_torch_sdpa
+    @pytest.mark.torch_compile_test
     def test_sdpa_can_compile_dynamic(self):
         self.skipTest(reason="CLIP model can't be compiled dynamic, error in clip_loss`")
 
diff --git a/tests/models/cohere2/test_modeling_cohere2.py b/tests/models/cohere2/test_modeling_cohere2.py
index 71335c37075e..1cf2a8424b54 100644
--- a/tests/models/cohere2/test_modeling_cohere2.py
+++ b/tests/models/cohere2/test_modeling_cohere2.py
@@ -225,6 +225,7 @@ def test_model_flash_attn(self):
 
         self.assertEqual(output_text, EXPECTED_TEXTS)
 
+    @pytest.mark.torch_export_test
     def test_export_static_cache(self):
         if version.parse(torch.__version__) < version.parse("2.5.0"):
             self.skipTest(reason="This test requires torch >= 2.5 to run.")
diff --git a/tests/models/colpali/test_modeling_colpali.py b/tests/models/colpali/test_modeling_colpali.py
index c6c9892d1737..62f87af46007 100644
--- a/tests/models/colpali/test_modeling_colpali.py
+++ b/tests/models/colpali/test_modeling_colpali.py
@@ -19,6 +19,7 @@
 import unittest
 from typing import ClassVar
 
+import pytest
 import torch
 from datasets import load_dataset
 
@@ -287,6 +288,7 @@ def test_sdpa_can_dispatch_on_flash(self):
         pass
 
     @unittest.skip(reason="Pass because ColPali requires `attention_mask is not None`")
+    @pytest.mark.torch_compile_test
     def test_sdpa_can_compile_dynamic(self):
         pass
 
diff --git a/tests/models/colqwen2/test_modeling_colqwen2.py b/tests/models/colqwen2/test_modeling_colqwen2.py
index 53a37108da83..3bf5b8a6c496 100644
--- a/tests/models/colqwen2/test_modeling_colqwen2.py
+++ b/tests/models/colqwen2/test_modeling_colqwen2.py
@@ -17,6 +17,7 @@
 import unittest
 from typing import ClassVar
 
+import pytest
 import torch
 from datasets import load_dataset
 
@@ -277,6 +278,7 @@ def test_sdpa_can_dispatch_on_flash(self):
         pass
 
     @unittest.skip(reason="Pass because ColQwen2 requires `attention_mask is not None`")
+    @pytest.mark.torch_compile_test
     def test_sdpa_can_compile_dynamic(self):
         pass
 
diff --git a/tests/models/data2vec/test_modeling_data2vec_vision.py b/tests/models/data2vec/test_modeling_data2vec_vision.py
index a1dde3d31b96..709436ce1419 100644
--- a/tests/models/data2vec/test_modeling_data2vec_vision.py
+++ b/tests/models/data2vec/test_modeling_data2vec_vision.py
@@ -15,6 +15,8 @@
 
 import unittest
 
+import pytest
+
 from transformers import Data2VecVisionConfig
 from transformers.testing_utils import (
     require_torch,
@@ -214,6 +216,7 @@ def test_config(self):
     @unittest.skip(
         reason="Will fix only if requested by the community: it fails with `torch._dynamo.exc.InternalTorchDynamoError: IndexError: list index out of range`. Without compile, the test pass."
     )
+    @pytest.mark.torch_compile_test
     def test_sdpa_can_compile_dynamic(self):
         pass
 
diff --git a/tests/models/deepseek_v2/test_modeling_deepseek_v2.py b/tests/models/deepseek_v2/test_modeling_deepseek_v2.py
index 0bdc6884590f..f1c6cf6786f1 100644
--- a/tests/models/deepseek_v2/test_modeling_deepseek_v2.py
+++ b/tests/models/deepseek_v2/test_modeling_deepseek_v2.py
@@ -16,6 +16,8 @@
 
 import unittest
 
+import pytest
+
 from transformers import BitsAndBytesConfig, Cache, DeepseekV2Config, is_torch_available
 from transformers.testing_utils import require_read_token, require_torch, require_torch_accelerator, slow, torch_device
 
@@ -173,10 +175,12 @@ def _check_past_key_values_for_generate(self, batch_size, decoder_past_key_value
                 self.assertEqual(layer.values.shape, expected_value_shape)
 
     @unittest.skip("Deepseek-V2 uses MLA which has a special head dim and is not compatible with StaticCache shape")
+    @pytest.mark.torch_compile_test
     def test_generate_compilation_all_outputs(self):
         pass
 
     @unittest.skip("Deepseek-V2 uses MLA which has a special head dim and is not compatible with StaticCache shape")
+    @pytest.mark.torch_compile_test
     def test_generate_compile_model_forward(self):
         pass
 
@@ -185,10 +189,12 @@ def test_generate_from_inputs_embeds_with_static_cache(self):
         pass
 
     @unittest.skip("Deepseek-V2 uses MLA which has a special head dim and is not compatible with StaticCache shape")
+    @pytest.mark.torch_compile_test
     def test_generate_with_static_cache(self):
         pass
 
     @unittest.skip("Dynamic control flow in MoE")
+    @pytest.mark.torch_compile_test
     def test_torch_compile_for_training(self):
         pass
 
diff --git a/tests/models/deepseek_v3/test_modeling_deepseek_v3.py b/tests/models/deepseek_v3/test_modeling_deepseek_v3.py
index 87f7b2abb0e9..3e1dc11998c4 100644
--- a/tests/models/deepseek_v3/test_modeling_deepseek_v3.py
+++ b/tests/models/deepseek_v3/test_modeling_deepseek_v3.py
@@ -15,6 +15,7 @@
 
 import unittest
 
+import pytest
 from packaging import version
 from parameterized import parameterized
 
@@ -311,6 +312,7 @@ def test_generate_compilation_all_outputs(self):
         pass
 
     @unittest.skip("Deepseek-V3 uses MLA so it is not compatible with the standard cache format")
+    @pytest.mark.torch_compile_test
     def test_generate_compile_model_forward(self):
         pass
 
@@ -533,6 +535,7 @@ def tearDown(self):
 
     @slow
     @require_torch_accelerator
+    @pytest.mark.torch_compile_test
     @require_read_token
     def test_compile_static_cache(self):
         # `torch==2.2` will throw an error on this test (as in other compilation tests), but torch==2.1.2 and torch>2.2
diff --git a/tests/models/depth_anything/test_modeling_depth_anything.py b/tests/models/depth_anything/test_modeling_depth_anything.py
index 3527e1d6b8cf..f0c638a76f22 100644
--- a/tests/models/depth_anything/test_modeling_depth_anything.py
+++ b/tests/models/depth_anything/test_modeling_depth_anything.py
@@ -15,6 +15,8 @@
 
 import unittest
 
+import pytest
+
 from transformers import DepthAnythingConfig, Dinov2Config
 from transformers.file_utils import is_torch_available, is_vision_available
 from transformers.pytorch_utils import is_torch_greater_or_equal_than_2_4
@@ -286,6 +288,7 @@ def test_inference(self):
 
         torch.testing.assert_close(predicted_depth[0, :3, :3], expected_slice, rtol=1e-4, atol=1e-4)
 
+    @pytest.mark.torch_export_test
     def test_export(self):
         for strict in [False, True]:
             with self.subTest(strict=strict):
diff --git a/tests/models/depth_pro/test_modeling_depth_pro.py b/tests/models/depth_pro/test_modeling_depth_pro.py
index 50ac4cd1d28f..0e644c7c1892 100644
--- a/tests/models/depth_pro/test_modeling_depth_pro.py
+++ b/tests/models/depth_pro/test_modeling_depth_pro.py
@@ -15,6 +15,8 @@
 
 import unittest
 
+import pytest
+
 from transformers import DepthProConfig
 from transformers.file_utils import is_torch_available, is_vision_available
 from transformers.testing_utils import require_torch, require_vision, slow, torch_device
@@ -221,6 +223,7 @@ def test_config(self):
         self.config_tester.run_common_tests()
 
     @unittest.skip(reason="Inductor error: name 'OpaqueUnaryFn_log2' is not defined")
+    @pytest.mark.torch_compile_test
     def test_sdpa_can_compile_dynamic(self):
         pass
 
diff --git a/tests/models/diffllama/test_modeling_diffllama.py b/tests/models/diffllama/test_modeling_diffllama.py
index f376fab87e14..10675b1681df 100644
--- a/tests/models/diffllama/test_modeling_diffllama.py
+++ b/tests/models/diffllama/test_modeling_diffllama.py
@@ -570,6 +570,7 @@ def tearDown(self):
     @slow
     @require_torch_accelerator
     @require_read_token
+    @pytest.mark.torch_compile_test
     def test_compile_static_cache(self):
         # `torch==2.2` will throw an error on this test (as in other compilation tests), but torch==2.1.2 and torch>2.2
         # work as intended. See https://github.com/pytorch/pytorch/issues/121943
diff --git a/tests/models/distilbert/test_modeling_distilbert.py b/tests/models/distilbert/test_modeling_distilbert.py
index 3124689b0eaf..871b32b3af70 100644
--- a/tests/models/distilbert/test_modeling_distilbert.py
+++ b/tests/models/distilbert/test_modeling_distilbert.py
@@ -399,6 +399,7 @@ def test_inference_no_head_absolute_embedding(self):
 
         torch.testing.assert_close(output[:, 1:4, 1:4], expected_slice, rtol=1e-4, atol=1e-4)
 
+    @pytest.mark.torch_export_test
     @slow
     def test_export(self):
         if not is_torch_greater_or_equal_than_2_4:
diff --git a/tests/models/dots1/test_modeling_dots1.py b/tests/models/dots1/test_modeling_dots1.py
index 2df3fd965446..9426ff7d300a 100644
--- a/tests/models/dots1/test_modeling_dots1.py
+++ b/tests/models/dots1/test_modeling_dots1.py
@@ -96,6 +96,7 @@ def test_generate_compilation_all_outputs(self):
         pass
 
     @unittest.skip("dots.llm1's moe is not compatible `token_indices, weight_indices = torch.where(mask)`")
+    @pytest.mark.torch_compile_test
     def test_generate_compile_model_forward(self):
         pass
 
diff --git a/tests/models/dpt/test_modeling_dpt.py b/tests/models/dpt/test_modeling_dpt.py
index dac36338beb3..1d693e7f408c 100644
--- a/tests/models/dpt/test_modeling_dpt.py
+++ b/tests/models/dpt/test_modeling_dpt.py
@@ -15,6 +15,8 @@
 
 import unittest
 
+import pytest
+
 from transformers import DPTConfig
 from transformers.file_utils import is_torch_available, is_vision_available
 from transformers.pytorch_utils import is_torch_greater_or_equal_than_2_4
@@ -255,6 +257,7 @@ def test_training_gradient_checkpointing_use_reentrant_false(self):
         pass
 
     @unittest.skip(reason="Inductor error for dynamic shape")
+    @pytest.mark.torch_compile_test
     def test_sdpa_can_compile_dynamic(self):
         pass
 
@@ -420,6 +423,7 @@ def test_post_processing_depth_estimation(self):
         self.assertTrue(output_enlarged.shape == expected_shape)
         torch.testing.assert_close(predicted_depth_l, output_enlarged, atol=1e-3, rtol=1e-3)
 
+    @pytest.mark.torch_export_test
     def test_export(self):
         for strict in [True, False]:
             with self.subTest(strict=strict):
diff --git a/tests/models/exaone4/test_modeling_exaone4.py b/tests/models/exaone4/test_modeling_exaone4.py
index 4ac87ce900b5..7bd98b1850fc 100644
--- a/tests/models/exaone4/test_modeling_exaone4.py
+++ b/tests/models/exaone4/test_modeling_exaone4.py
@@ -354,6 +354,7 @@ def test_model_generation_beyond_sliding_window(self):
         del model
         cleanup(torch_device, gc_collect=True)
 
+    @pytest.mark.torch_export_test
     @slow
     def test_export_static_cache(self):
         if version.parse(torch.__version__) < version.parse("2.4.0"):
diff --git a/tests/models/falcon_mamba/test_modeling_falcon_mamba.py b/tests/models/falcon_mamba/test_modeling_falcon_mamba.py
index 9686b1660fdb..eafebbcb5365 100644
--- a/tests/models/falcon_mamba/test_modeling_falcon_mamba.py
+++ b/tests/models/falcon_mamba/test_modeling_falcon_mamba.py
@@ -17,6 +17,8 @@
 import unittest
 from unittest.util import safe_repr
 
+import pytest
+
 from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, FalconMambaConfig, is_torch_available
 from transformers.testing_utils import (
     Expectations,
@@ -487,6 +489,7 @@ def test_generation_4bit(self):
             "Hello today Iava,\n\nI'm sorry to hear that you're having trouble with the ",
         )
 
+    @pytest.mark.torch_compile_test
     def test_generation_torch_compile(self):
         model = AutoModelForCausalLM.from_pretrained(self.model_id, torch_dtype=torch.float16).to(torch_device)
         model = torch.compile(model)
diff --git a/tests/models/gemma/test_modeling_gemma.py b/tests/models/gemma/test_modeling_gemma.py
index 8a1e2ea9eb7f..4b5d939359ac 100644
--- a/tests/models/gemma/test_modeling_gemma.py
+++ b/tests/models/gemma/test_modeling_gemma.py
@@ -356,6 +356,7 @@ def test_model_7b_4bit(self):
 
     @slow
     @require_torch_accelerator
+    @pytest.mark.torch_compile_test
     @require_read_token
     def test_compile_static_cache(self):
         # `torch==2.2` will throw an error on this test (as in other compilation tests), but torch==2.1.2 and torch>2.2
@@ -394,6 +395,7 @@ def test_compile_static_cache(self):
         static_compiled_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
         self.assertEqual(EXPECTED_TEXT_COMPLETION, static_compiled_text)
 
+    @pytest.mark.torch_export_test
     @slow
     @require_read_token
     def test_export_static_cache(self):
diff --git a/tests/models/gemma2/test_modeling_gemma2.py b/tests/models/gemma2/test_modeling_gemma2.py
index 5d778d8cb2ec..b8bfcaad43c1 100644
--- a/tests/models/gemma2/test_modeling_gemma2.py
+++ b/tests/models/gemma2/test_modeling_gemma2.py
@@ -306,6 +306,7 @@ def test_model_9b_flash_attn(self):
 
         self.assertEqual(output_text, EXPECTED_TEXTS)
 
+    @pytest.mark.torch_export_test
     @slow
     @require_read_token
     def test_export_static_cache(self):
@@ -379,6 +380,7 @@ def test_export_static_cache(self):
     @slow
     @require_read_token
     @require_large_cpu_ram
+    @pytest.mark.torch_export_test
     def test_export_hybrid_cache(self):
         from transformers.integrations.executorch import TorchExportableModuleForDecoderOnlyLM
         from transformers.pytorch_utils import is_torch_greater_or_equal
diff --git a/tests/models/gemma3/test_modeling_gemma3.py b/tests/models/gemma3/test_modeling_gemma3.py
index f9a19646e7e0..eb87743b2a8a 100644
--- a/tests/models/gemma3/test_modeling_gemma3.py
+++ b/tests/models/gemma3/test_modeling_gemma3.py
@@ -819,6 +819,7 @@ def test_generation_beyond_sliding_window(self, attn_implementation: str):
         EXPECTED_COMPLETIONS = [" and I'm going to take a walk.\n\nI really enjoy the scenery, and I'", ", green, yellow, orange, purple, brown, black, white, gray.\n\nI'"]  # fmt: skip
         self.assertEqual(output_text, EXPECTED_COMPLETIONS)
 
+    @pytest.mark.torch_export_test
     def test_export_text_only_with_hybrid_cache(self):
         if not is_torch_greater_or_equal("2.6.0"):
             self.skipTest(reason="This test requires torch >= 2.6 to run.")
diff --git a/tests/models/glm4_moe/test_modeling_glm4_moe.py b/tests/models/glm4_moe/test_modeling_glm4_moe.py
index 59631fb37228..3d3582cb2435 100644
--- a/tests/models/glm4_moe/test_modeling_glm4_moe.py
+++ b/tests/models/glm4_moe/test_modeling_glm4_moe.py
@@ -15,6 +15,7 @@
 
 import unittest
 
+import pytest
 import torch
 from packaging import version
 
@@ -93,6 +94,7 @@ def tearDown(self):
 
     @slow
     @require_torch_accelerator
+    @pytest.mark.torch_compile_test
     @require_read_token
     def test_compile_static_cache(self):
         # `torch==2.2` will throw an error on this test (as in other compilation tests), but torch==2.1.2 and torch>2.2
diff --git a/tests/models/idefics3/test_modeling_idefics3.py b/tests/models/idefics3/test_modeling_idefics3.py
index 5cf06a50be10..49d940f6fb0a 100644
--- a/tests/models/idefics3/test_modeling_idefics3.py
+++ b/tests/models/idefics3/test_modeling_idefics3.py
@@ -195,6 +195,7 @@ def test_flash_attn_2_inference_padding_right(self):
         pass
 
     @unittest.skip(reason="Compile not yet supported in idefics3 models")
+    @pytest.mark.torch_compile_test
     def test_sdpa_can_compile_dynamic(self):
         pass
 
@@ -379,6 +380,7 @@ def test_eager_matches_sdpa_generate(self):
         pass
 
     @unittest.skip(reason="Compile not yet supported in Idefics3 models end-to-end")
+    @pytest.mark.torch_compile_test
     def test_sdpa_can_compile_dynamic(self):
         pass
 
diff --git a/tests/models/internvl/test_modeling_internvl.py b/tests/models/internvl/test_modeling_internvl.py
index 4317cb36825e..a4dd976fa8ab 100644
--- a/tests/models/internvl/test_modeling_internvl.py
+++ b/tests/models/internvl/test_modeling_internvl.py
@@ -17,6 +17,7 @@
 import unittest
 from io import BytesIO
 
+import pytest
 import requests
 
 from transformers import (
@@ -216,6 +217,7 @@ def test_initialization(self):
                     )
 
     @unittest.skip(reason="Compile not yet supported because in LLava models")
+    @pytest.mark.torch_compile_test
     def test_sdpa_can_compile_dynamic(self):
         pass
 
diff --git a/tests/models/janus/test_modeling_janus.py b/tests/models/janus/test_modeling_janus.py
index 2fa257bc5c42..9c334b609175 100644
--- a/tests/models/janus/test_modeling_janus.py
+++ b/tests/models/janus/test_modeling_janus.py
@@ -20,6 +20,7 @@
 from functools import reduce
 
 import numpy as np
+import pytest
 import requests
 
 from transformers import (
@@ -294,6 +295,7 @@ def check_training_gradient_checkpointing(self, gradient_checkpointing_kwargs=No
                             pass
 
     @unittest.skip("There are recompilations in Janus")  # TODO (joao, raushan): fix me
+    @pytest.mark.torch_compile_test
     def test_generate_compile_model_forward(self):
         pass
 
diff --git a/tests/models/layoutlmv2/test_image_processing_layoutlmv2.py b/tests/models/layoutlmv2/test_image_processing_layoutlmv2.py
index f574f6751104..9c2a3eee735d 100644
--- a/tests/models/layoutlmv2/test_image_processing_layoutlmv2.py
+++ b/tests/models/layoutlmv2/test_image_processing_layoutlmv2.py
@@ -14,6 +14,7 @@
 
 import unittest
 
+import pytest
 import requests
 from packaging import version
 
@@ -202,6 +203,7 @@ def test_slow_fast_equivalence_batched(self):
     @slow
     @require_torch_accelerator
     @require_vision
+    @pytest.mark.torch_compile_test
     def test_can_compile_fast_image_processor(self):
         if self.fast_image_processing_class is None:
             self.skipTest("Skipping compilation test as fast image processor is not defined")
diff --git a/tests/models/lfm2/test_modeling_lfm2.py b/tests/models/lfm2/test_modeling_lfm2.py
index 7921fcbf1560..4603f54dc7f7 100644
--- a/tests/models/lfm2/test_modeling_lfm2.py
+++ b/tests/models/lfm2/test_modeling_lfm2.py
@@ -15,6 +15,8 @@
 
 import unittest
 
+import pytest
+
 from transformers import is_torch_available
 from transformers.testing_utils import (
     require_read_token,
@@ -88,6 +90,7 @@ def test_contrastive_generate_low_memory(self):
     @unittest.skip(
         "Lfm2 has a special cache format which is not compatible with compile as it has static address for conv cache"
     )
+    @pytest.mark.torch_compile_test
     def test_sdpa_can_compile_dynamic(self):
         pass
 
diff --git a/tests/models/llama/test_modeling_llama.py b/tests/models/llama/test_modeling_llama.py
index 26be82b9da82..5be6e9803e05 100644
--- a/tests/models/llama/test_modeling_llama.py
+++ b/tests/models/llama/test_modeling_llama.py
@@ -15,6 +15,7 @@
 
 import unittest
 
+import pytest
 from packaging import version
 
 from transformers import AutoTokenizer, StaticCache, is_torch_available
@@ -256,6 +257,7 @@ def test_model_7b_dola_generation(self):
 
     @slow
     @require_torch_accelerator
+    @pytest.mark.torch_compile_test
     def test_compile_static_cache(self):
         # `torch==2.2` will throw an error on this test (as in other compilation tests), but torch==2.1.2 and torch>2.2
         # work as intended. See https://github.com/pytorch/pytorch/issues/121943
@@ -296,6 +298,7 @@ def test_compile_static_cache(self):
         self.assertEqual(EXPECTED_TEXT_COMPLETION, static_text)
 
     @slow
+    @pytest.mark.torch_export_test
     def test_export_static_cache(self):
         if version.parse(torch.__version__) < version.parse("2.4.0"):
             self.skipTest(reason="This test requires torch >= 2.4 to run.")
diff --git a/tests/models/llava_onevision/test_image_processing_llava_onevision.py b/tests/models/llava_onevision/test_image_processing_llava_onevision.py
index 4aba232c9dfb..29f20cc7124e 100644
--- a/tests/models/llava_onevision/test_image_processing_llava_onevision.py
+++ b/tests/models/llava_onevision/test_image_processing_llava_onevision.py
@@ -15,6 +15,7 @@
 import unittest
 
 import numpy as np
+import pytest
 
 from transformers.image_utils import OPENAI_CLIP_MEAN, OPENAI_CLIP_STD, ChannelDimension
 from transformers.testing_utils import require_torch, require_vision
@@ -246,6 +247,7 @@ def test_multi_images(self):
     @unittest.skip(
         reason="LlavaOnevisionImageProcessorFast doesn't compile (infinitely) when using class transforms"
     )  # FIXME yoni
+    @pytest.mark.torch_compile_test
     def test_can_compile_fast_image_processor(self):
         pass
 
diff --git a/tests/models/mamba/test_modeling_mamba.py b/tests/models/mamba/test_modeling_mamba.py
index aff98558d52b..2fbe6ef81b37 100644
--- a/tests/models/mamba/test_modeling_mamba.py
+++ b/tests/models/mamba/test_modeling_mamba.py
@@ -17,6 +17,7 @@
 import unittest
 from unittest.util import safe_repr
 
+import pytest
 from parameterized import parameterized
 
 from transformers import AutoTokenizer, MambaConfig, is_torch_available
@@ -518,6 +519,7 @@ def test_simple_generate_cuda_kernels_big(self, device):
         self.assertEqual(output_sentence, expected_output)
 
     @slow
+    @pytest.mark.torch_compile_test
     def test_compile_mamba_cache(self):
         expected_output = "Hello my name is John and I am a\n\nI am a single father of a beautiful daughter. I am a"
 
diff --git a/tests/models/mask2former/test_modeling_mask2former.py b/tests/models/mask2former/test_modeling_mask2former.py
index 160dc553be9a..de0efc0410d9 100644
--- a/tests/models/mask2former/test_modeling_mask2former.py
+++ b/tests/models/mask2former/test_modeling_mask2former.py
@@ -16,6 +16,7 @@
 import unittest
 
 import numpy as np
+import pytest
 
 from tests.test_modeling_common import floats_tensor
 from transformers import AutoModelForImageClassification, Mask2FormerConfig, is_torch_available, is_vision_available
@@ -576,6 +577,7 @@ def test_with_segmentation_maps_and_loss(self):
 
         self.assertTrue(outputs.loss is not None)
 
+    @pytest.mark.torch_export_test
     def test_export(self):
         if not is_torch_greater_or_equal_than_2_4:
             self.skipTest(reason="This test requires torch >= 2.4 to run.")
diff --git a/tests/models/mimi/test_modeling_mimi.py b/tests/models/mimi/test_modeling_mimi.py
index 2123e93c698b..1ca50b33560d 100644
--- a/tests/models/mimi/test_modeling_mimi.py
+++ b/tests/models/mimi/test_modeling_mimi.py
@@ -19,6 +19,7 @@
 import unittest
 
 import numpy as np
+import pytest
 from datasets import Audio, load_dataset
 from pytest import mark
 
@@ -446,6 +447,7 @@ def test_flash_attn_2_inference_equivalence_right_padding(self):
         pass
 
     @unittest.skip(reason="The MimiModel does not have support dynamic compile yet")
+    @pytest.mark.torch_compile_test
     def test_sdpa_can_compile_dynamic(self):
         pass
 
diff --git a/tests/models/mistral/test_modeling_mistral.py b/tests/models/mistral/test_modeling_mistral.py
index a17f464370b8..dce2f756119e 100644
--- a/tests/models/mistral/test_modeling_mistral.py
+++ b/tests/models/mistral/test_modeling_mistral.py
@@ -278,6 +278,7 @@ def test_speculative_generation(self):
         text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
         self.assertEqual(EXPECTED_TEXT_COMPLETION, text)
 
+    @pytest.mark.torch_compile_test
     @slow
     def test_compile_static_cache(self):
         # `torch==2.2` will throw an error on this test (as in other compilation tests), but torch==2.1.2 and torch>2.2
diff --git a/tests/models/mistral3/test_modeling_mistral3.py b/tests/models/mistral3/test_modeling_mistral3.py
index 666997d4a5e1..99b2394037fc 100644
--- a/tests/models/mistral3/test_modeling_mistral3.py
+++ b/tests/models/mistral3/test_modeling_mistral3.py
@@ -16,6 +16,7 @@
 import unittest
 
 import accelerate
+import pytest
 
 from transformers import (
     AutoProcessor,
@@ -207,6 +208,7 @@ def test_initialization(self):
                     )
 
     @unittest.skip(reason="Compile not yet supported because in LLava models")
+    @pytest.mark.torch_compile_test
     def test_sdpa_can_compile_dynamic(self):
         pass
 
diff --git a/tests/models/mllama/test_modeling_mllama.py b/tests/models/mllama/test_modeling_mllama.py
index f2b6969c4109..0c623d5fa396 100644
--- a/tests/models/mllama/test_modeling_mllama.py
+++ b/tests/models/mllama/test_modeling_mllama.py
@@ -352,6 +352,7 @@ def test_generate_with_quant_cache(self):
         pass
 
     @unittest.skip("For some unknown reasons the tests fails in CrossAttention layer when doing torch.sdpa(). ")
+    @pytest.mark.torch_compile_test
     def test_sdpa_can_compile_dynamic(self):
         pass
 
diff --git a/tests/models/mobilebert/test_modeling_mobilebert.py b/tests/models/mobilebert/test_modeling_mobilebert.py
index 126631fd9ce4..72bc842ec9c8 100644
--- a/tests/models/mobilebert/test_modeling_mobilebert.py
+++ b/tests/models/mobilebert/test_modeling_mobilebert.py
@@ -15,6 +15,7 @@
 
 import unittest
 
+import pytest
 from packaging import version
 
 from transformers import AutoTokenizer, MobileBertConfig, MobileBertForMaskedLM, is_torch_available
@@ -386,6 +387,7 @@ def test_inference_no_head(self):
 
         self.assertTrue(lower_bound and upper_bound)
 
+    @pytest.mark.torch_export_test
     @slow
     def test_export(self):
         if version.parse(torch.__version__) < version.parse("2.4.0"):
diff --git a/tests/models/modernbert/test_modeling_modernbert.py b/tests/models/modernbert/test_modeling_modernbert.py
index 7e60d51d5f91..2a9c63089819 100644
--- a/tests/models/modernbert/test_modeling_modernbert.py
+++ b/tests/models/modernbert/test_modeling_modernbert.py
@@ -390,6 +390,7 @@ def test_flash_attn_2_inference_equivalence_right_padding(self):
     def test_flash_attn_2_conversion(self):
         self.skipTest(reason="ModernBert doesn't use the ModernBertFlashAttention2 class method.")
 
+    @pytest.mark.torch_compile_test
     def test_saved_config_excludes_reference_compile(self):
         config = ModernBertConfig(reference_compile=True)
         with tempfile.TemporaryDirectory() as tmpdirname:
@@ -501,6 +502,7 @@ def test_inference_sequence_classification(self):
         expected = torch.tensor([[1.6466, 4.5662]])
         torch.testing.assert_close(output, expected, rtol=1e-4, atol=1e-4)
 
+    @pytest.mark.torch_export_test
     @slow
     def test_export(self):
         if version.parse(torch.__version__) < version.parse("2.4.0"):
diff --git a/tests/models/moshi/test_modeling_moshi.py b/tests/models/moshi/test_modeling_moshi.py
index f750f35cf3c9..286f93e84805 100644
--- a/tests/models/moshi/test_modeling_moshi.py
+++ b/tests/models/moshi/test_modeling_moshi.py
@@ -178,6 +178,7 @@ def setUp(self):
         )
 
     @unittest.skip(reason="The MoshiModel does not have support dynamic compile yet")
+    @pytest.mark.torch_compile_test
     def test_sdpa_can_compile_dynamic(self):
         pass
 
@@ -636,6 +637,7 @@ def test_eager_matches_sdpa_inference(
         pass
 
     @unittest.skip(reason="The Moshi model does not have support dynamic compile yet")
+    @pytest.mark.torch_compile_test
     def test_sdpa_can_compile_dynamic(self):
         pass
 
diff --git a/tests/models/musicgen/test_modeling_musicgen.py b/tests/models/musicgen/test_modeling_musicgen.py
index bd4290f05ce1..4098c45ba218 100644
--- a/tests/models/musicgen/test_modeling_musicgen.py
+++ b/tests/models/musicgen/test_modeling_musicgen.py
@@ -20,6 +20,7 @@
 import unittest
 
 import numpy as np
+import pytest
 from pytest import mark
 
 from transformers import (
@@ -1235,6 +1236,7 @@ def test_generation_tester_mixin_inheritance(self):
         pass
 
     @unittest.skip(reason=("MusicGen has a set of composite models which might not have SDPA themselves, e.g. T5."))
+    @pytest.mark.torch_compile_test
     def test_sdpa_can_compile_dynamic(self):
         pass
 
diff --git a/tests/models/musicgen_melody/test_modeling_musicgen_melody.py b/tests/models/musicgen_melody/test_modeling_musicgen_melody.py
index 73af767e04f2..180436e6268c 100644
--- a/tests/models/musicgen_melody/test_modeling_musicgen_melody.py
+++ b/tests/models/musicgen_melody/test_modeling_musicgen_melody.py
@@ -20,6 +20,7 @@
 import unittest
 
 import numpy as np
+import pytest
 from pytest import mark
 
 from transformers import (
@@ -1236,6 +1237,7 @@ def test_generation_tester_mixin_inheritance(self):
         pass
 
     @unittest.skip(reason=("MusicGen has a set of composite models which might not have SDPA themselves, e.g. T5."))
+    @pytest.mark.torch_compile_test
     def test_sdpa_can_compile_dynamic(self):
         pass
 
diff --git a/tests/models/olmo/test_modeling_olmo.py b/tests/models/olmo/test_modeling_olmo.py
index ea23f4e96fda..38395fbbbaa3 100644
--- a/tests/models/olmo/test_modeling_olmo.py
+++ b/tests/models/olmo/test_modeling_olmo.py
@@ -15,6 +15,7 @@
 
 import unittest
 
+import pytest
 from packaging import version
 from parameterized import parameterized
 
@@ -327,6 +328,7 @@ def test_simple_encode_decode(self):
 
         self.assertEqual(rust_tokenizer.encode(" Hello"), [24387])
 
+    @pytest.mark.torch_export_test
     @slow
     def test_export_static_cache(self):
         if version.parse(torch.__version__) < version.parse("2.4.0"):
diff --git a/tests/models/olmo2/test_modeling_olmo2.py b/tests/models/olmo2/test_modeling_olmo2.py
index 20b0c49d3f0b..b980d10a0f0e 100644
--- a/tests/models/olmo2/test_modeling_olmo2.py
+++ b/tests/models/olmo2/test_modeling_olmo2.py
@@ -15,6 +15,7 @@
 
 import unittest
 
+import pytest
 from packaging import version
 from parameterized import parameterized
 
@@ -327,6 +328,7 @@ def test_simple_encode_decode(self):
 
         self.assertEqual(rust_tokenizer.encode(" Hello"), [22691])
 
+    @pytest.mark.torch_export_test
     @slow
     def test_export_static_cache(self):
         if version.parse(torch.__version__) < version.parse("2.4.0"):
diff --git a/tests/models/paligemma2/test_modeling_paligemma2.py b/tests/models/paligemma2/test_modeling_paligemma2.py
index 7523f83fd96e..17735b89c6e9 100644
--- a/tests/models/paligemma2/test_modeling_paligemma2.py
+++ b/tests/models/paligemma2/test_modeling_paligemma2.py
@@ -318,6 +318,7 @@ def test_generate_with_static_cache(self):
         pass
 
     @pytest.mark.generate
+    @pytest.mark.torch_compile_test
     @is_flaky
     def test_generate_compile_model_forward(self):
         super().test_generate_compile_model_forward()
diff --git a/tests/models/phi3/test_modeling_phi3.py b/tests/models/phi3/test_modeling_phi3.py
index f80015eeeb56..71b99e6786c3 100644
--- a/tests/models/phi3/test_modeling_phi3.py
+++ b/tests/models/phi3/test_modeling_phi3.py
@@ -16,6 +16,8 @@
 
 import unittest
 
+import pytest
+
 from transformers import Phi3Config, StaticCache, is_torch_available
 from transformers.models.auto.configuration_auto import AutoConfig
 from transformers.testing_utils import (
@@ -342,6 +344,7 @@ def test_phi3_mini_4k_sliding_window(self):
 
         self.assertListEqual(output_text, EXPECTED_OUTPUT)
 
+    @pytest.mark.torch_export_test
     @slow
     def test_export_static_cache(self):
         from transformers.pytorch_utils import is_torch_greater_or_equal_than_2_4
diff --git a/tests/models/phi4_multimodal/test_image_processing_phi4_multimodal.py b/tests/models/phi4_multimodal/test_image_processing_phi4_multimodal.py
index 3ad87b5780db..25a5ef9f3c93 100644
--- a/tests/models/phi4_multimodal/test_image_processing_phi4_multimodal.py
+++ b/tests/models/phi4_multimodal/test_image_processing_phi4_multimodal.py
@@ -20,6 +20,7 @@
 import warnings
 
 import numpy as np
+import pytest
 from packaging import version
 
 from transformers.testing_utils import require_torch, require_vision, slow, torch_device
@@ -288,6 +289,7 @@ def test_image_processor_preprocess_arguments(self):
             self.skipTest(reason="No validation found for `preprocess` method")
 
     @slow
+    @pytest.mark.torch_compile_test
     def test_can_compile_fast_image_processor(self):
         if self.fast_image_processing_class is None:
             self.skipTest("Skipping compilation test as fast image processor is not defined")
diff --git a/tests/models/phi4_multimodal/test_modeling_phi4_multimodal.py b/tests/models/phi4_multimodal/test_modeling_phi4_multimodal.py
index 07fd24577bf8..497d6ae08cfa 100644
--- a/tests/models/phi4_multimodal/test_modeling_phi4_multimodal.py
+++ b/tests/models/phi4_multimodal/test_modeling_phi4_multimodal.py
@@ -14,6 +14,7 @@
 
 import unittest
 
+import pytest
 import requests
 from parameterized import parameterized
 
@@ -253,6 +254,7 @@ def test_generate_compilation_all_outputs(self):
     @unittest.skip(
         reason="Supported only for text-only inputs (otherwise dynamic control flows for multimodal inputs)"
     )
+    @pytest.mark.torch_compile_test
     def test_generate_compile_model_forward(self):
         pass
 
diff --git a/tests/models/pixtral/test_image_processing_pixtral.py b/tests/models/pixtral/test_image_processing_pixtral.py
index 43ee7cfc273a..b2763a348a9d 100644
--- a/tests/models/pixtral/test_image_processing_pixtral.py
+++ b/tests/models/pixtral/test_image_processing_pixtral.py
@@ -15,6 +15,7 @@
 import unittest
 
 import numpy as np
+import pytest
 import requests
 from packaging import version
 
@@ -263,6 +264,7 @@ def test_slow_fast_equivalence_batched(self):
     @slow
     @require_torch_gpu
     @require_vision
+    @pytest.mark.torch_compile_test
     def test_can_compile_fast_image_processor(self):
         if self.fast_image_processing_class is None:
             self.skipTest("Skipping compilation test as fast image processor is not defined")
diff --git a/tests/models/prompt_depth_anything/test_modeling_prompt_depth_anything.py b/tests/models/prompt_depth_anything/test_modeling_prompt_depth_anything.py
index 697557b6ac01..e0aad3d5d9ef 100644
--- a/tests/models/prompt_depth_anything/test_modeling_prompt_depth_anything.py
+++ b/tests/models/prompt_depth_anything/test_modeling_prompt_depth_anything.py
@@ -15,6 +15,7 @@
 
 import unittest
 
+import pytest
 import requests
 
 from transformers import Dinov2Config, PromptDepthAnythingConfig
@@ -284,6 +285,7 @@ def test_inference(self):
 
         self.assertTrue(torch.allclose(predicted_depth[0, :3, :3], expected_slice, atol=1e-3))
 
+    @pytest.mark.torch_export_test
     def test_export(self):
         for strict in [False, True]:
             if strict and get_torch_major_and_minor_version() == "2.7":
diff --git a/tests/models/qwen2/test_modeling_qwen2.py b/tests/models/qwen2/test_modeling_qwen2.py
index 51bd943cf916..d520b593f638 100644
--- a/tests/models/qwen2/test_modeling_qwen2.py
+++ b/tests/models/qwen2/test_modeling_qwen2.py
@@ -239,6 +239,7 @@ def test_speculative_generation(self):
         backend_empty_cache(torch_device)
         gc.collect()
 
+    @pytest.mark.torch_export_test
     @slow
     def test_export_static_cache(self):
         if version.parse(torch.__version__) < version.parse("2.4.0"):
diff --git a/tests/models/qwen2_5_omni/test_modeling_qwen2_5_omni.py b/tests/models/qwen2_5_omni/test_modeling_qwen2_5_omni.py
index 28be4eba3f85..b930aef695bb 100644
--- a/tests/models/qwen2_5_omni/test_modeling_qwen2_5_omni.py
+++ b/tests/models/qwen2_5_omni/test_modeling_qwen2_5_omni.py
@@ -21,6 +21,7 @@
 from urllib.request import urlopen
 
 import librosa
+import pytest
 import requests
 
 from transformers import (
@@ -281,6 +282,7 @@ def test_correct_missing_keys(self):
         pass
 
     @unittest.skip(reason="Compile not yet supported because in QwenOmniThinker models")
+    @pytest.mark.torch_compile_test
     def test_sdpa_can_compile_dynamic(self):
         pass
 
@@ -444,6 +446,7 @@ def test_generate_from_inputs_embeds_with_static_cache(self):
     # TODO (joao, raushan): there are multiple standardization issues in this model that prevent this test from
     # passing, fix me
     @unittest.skip("Cannot handle 4D attention mask")
+    @pytest.mark.torch_compile_test
     def test_generate_compile_model_forward(self):
         pass
 
diff --git a/tests/models/qwen2_audio/test_modeling_qwen2_audio.py b/tests/models/qwen2_audio/test_modeling_qwen2_audio.py
index 4533fbbf99d8..1c4aa2c09387 100644
--- a/tests/models/qwen2_audio/test_modeling_qwen2_audio.py
+++ b/tests/models/qwen2_audio/test_modeling_qwen2_audio.py
@@ -19,6 +19,7 @@
 from urllib.request import urlopen
 
 import librosa
+import pytest
 
 from transformers import (
     AutoProcessor,
@@ -148,6 +149,7 @@ def setUp(self):
         self.config_tester = ConfigTester(self, config_class=Qwen2AudioConfig, has_text_modality=False)
 
     @unittest.skip(reason="Compile not yet supported because in Qwen2Audio models")
+    @pytest.mark.torch_compile_test
     def test_sdpa_can_compile_dynamic(self):
         pass
 
diff --git a/tests/models/qwen3/test_modeling_qwen3.py b/tests/models/qwen3/test_modeling_qwen3.py
index 205228073e19..223112f24a0f 100644
--- a/tests/models/qwen3/test_modeling_qwen3.py
+++ b/tests/models/qwen3/test_modeling_qwen3.py
@@ -231,6 +231,7 @@ def test_speculative_generation(self):
 
         self.assertEqual(EXPECTED_TEXT_COMPLETION, text)
 
+    @pytest.mark.torch_export_test
     @slow
     def test_export_static_cache(self):
         if version.parse(torch.__version__) < version.parse("2.4.0"):
diff --git a/tests/models/roberta/test_modeling_roberta.py b/tests/models/roberta/test_modeling_roberta.py
index 4f4d93b07f4d..5001438e4c6e 100644
--- a/tests/models/roberta/test_modeling_roberta.py
+++ b/tests/models/roberta/test_modeling_roberta.py
@@ -15,6 +15,8 @@
 
 import unittest
 
+import pytest
+
 from transformers import AutoTokenizer, RobertaConfig, is_torch_available
 from transformers.testing_utils import TestCasePlus, require_torch, slow, torch_device
 
@@ -575,6 +577,7 @@ def test_inference_classification_head(self):
 
         torch.testing.assert_close(output, expected_tensor, rtol=1e-4, atol=1e-4)
 
+    @pytest.mark.torch_export_test
     @slow
     def test_export(self):
         if not is_torch_greater_or_equal_than_2_4:
diff --git a/tests/models/sam/test_modeling_sam.py b/tests/models/sam/test_modeling_sam.py
index 7f411c41117a..4b6f1df1451a 100644
--- a/tests/models/sam/test_modeling_sam.py
+++ b/tests/models/sam/test_modeling_sam.py
@@ -16,6 +16,7 @@
 import tempfile
 import unittest
 
+import pytest
 import requests
 
 from transformers import SamConfig, SamMaskDecoderConfig, SamPromptEncoderConfig, SamVisionConfig, pipeline
@@ -257,6 +258,7 @@ def test_hidden_states_output(self):
         pass
 
     @require_torch_sdpa
+    @pytest.mark.torch_compile_test
     def test_sdpa_can_compile_dynamic(self):
         self.skipTest(reason="SAM model can't be compiled dynamic yet")
 
@@ -658,6 +660,7 @@ def test_model_from_pretrained(self):
         self.assertIsNotNone(model)
 
     @require_torch_sdpa
+    @pytest.mark.torch_compile_test
     def test_sdpa_can_compile_dynamic(self):
         self.skipTest(reason="SAM model can't be compiled dynamic yet")
 
diff --git a/tests/models/sam_hq/test_modeling_sam_hq.py b/tests/models/sam_hq/test_modeling_sam_hq.py
index 192f7c8b02d0..98a7f5a45256 100644
--- a/tests/models/sam_hq/test_modeling_sam_hq.py
+++ b/tests/models/sam_hq/test_modeling_sam_hq.py
@@ -17,6 +17,7 @@
 import tempfile
 import unittest
 
+import pytest
 import requests
 
 from transformers import (
@@ -265,6 +266,7 @@ def test_hidden_states_output(self):
         pass
 
     @require_torch_sdpa
+    @pytest.mark.torch_compile_test
     def test_sdpa_can_compile_dynamic(self):
         self.skipTest(reason="SAM model can't be compiled dynamic yet")
 
@@ -706,6 +708,7 @@ def test_model_from_pretrained(self):
         self.assertIsNotNone(model)
 
     @require_torch_sdpa
+    @pytest.mark.torch_compile_test
     def test_sdpa_can_compile_dynamic(self):
         self.skipTest(reason="SamHQModel can't be compiled dynamic yet")
 
diff --git a/tests/models/smollm3/test_modeling_smollm3.py b/tests/models/smollm3/test_modeling_smollm3.py
index f855e0b36a5f..cb58cca5d49b 100644
--- a/tests/models/smollm3/test_modeling_smollm3.py
+++ b/tests/models/smollm3/test_modeling_smollm3.py
@@ -172,6 +172,7 @@ def test_model_3b_long_prompt(self):
         backend_empty_cache(torch_device)
         gc.collect()
 
+    @pytest.mark.torch_export_test
     @slow
     def test_export_static_cache(self):
         if version.parse(torch.__version__) < version.parse("2.4.0"):
diff --git a/tests/models/smolvlm/test_modeling_smolvlm.py b/tests/models/smolvlm/test_modeling_smolvlm.py
index d1140b6ec114..495c0d346aa5 100644
--- a/tests/models/smolvlm/test_modeling_smolvlm.py
+++ b/tests/models/smolvlm/test_modeling_smolvlm.py
@@ -186,6 +186,7 @@ def test_flash_attn_2_inference_padding_right(self):
         pass
 
     @unittest.skip(reason="Compile not yet supported in SmolVLM models")
+    @pytest.mark.torch_compile_test
     def test_sdpa_can_compile_dynamic(self):
         pass
 
@@ -387,6 +388,7 @@ def test_generate_with_static_cache(self):
         pass
 
     @unittest.skip(reason="Compile not yet supported in SmolVLM models")
+    @pytest.mark.torch_compile_test
     def test_sdpa_can_compile_dynamic(self):
         pass
 
diff --git a/tests/models/t5/test_modeling_t5.py b/tests/models/t5/test_modeling_t5.py
index 97b8cc2511b2..535008a1a02f 100644
--- a/tests/models/t5/test_modeling_t5.py
+++ b/tests/models/t5/test_modeling_t5.py
@@ -19,6 +19,8 @@
 import tempfile
 import unittest
 
+import pytest
+
 from transformers import T5Config, is_torch_available
 from transformers.models.auto.modeling_auto import MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING_NAMES
 from transformers.pytorch_utils import is_torch_greater_or_equal_than_2_4
@@ -1610,6 +1612,7 @@ def test_contrastive_search_t5(self):
 
     @slow
     @require_torch_accelerator
+    @pytest.mark.torch_compile_test
     def test_compile_static_cache(self):
         NUM_TOKENS_TO_GENERATE = 40
         EXPECTED_TEXT_COMPLETION = [
@@ -1650,6 +1653,7 @@ def test_compile_static_cache(self):
 
     @slow
     @require_torch_accelerator
+    @pytest.mark.torch_compile_test
     def test_compile_static_cache_encoder(self):
         prompts = [
             "summarize: Simply put, the theory of relativity states that 1) the speed of light is constant in all inertial "
@@ -1668,6 +1672,7 @@ def test_compile_static_cache_encoder(self):
         logits_compiled = model(**inputs)
         torch.testing.assert_close(logits[0][:, -3:, -3], logits_compiled[0][:, -3:, -3], rtol=1e-5, atol=1e-5)
 
+    @pytest.mark.torch_export_test
     @slow
     def test_export_encoder(self):
         """Test exporting T5EncoderModel to torch export format."""
@@ -1704,6 +1709,7 @@ def test_export_encoder(self):
         # Verify outputs are close enough
         self.assertTrue(torch.allclose(original_output, exported_output, atol=1e-5))
 
+    @pytest.mark.torch_export_test
     @slow
     def test_export_decoder(self):
         """Test exporting T5 decoder with static cache to torch export format."""
@@ -1765,6 +1771,7 @@ def test_export_decoder(self):
             # Verify cache buffers are 3D
             self.assertEqual(buffer.shape[2], max_cache_len)
 
+    @pytest.mark.torch_export_test
     @slow
     def test_export_t5_summarization(self):
         """Test composing exported T5 encoder and decoder for summarization."""
diff --git a/tests/models/vitmatte/test_image_processing_vitmatte.py b/tests/models/vitmatte/test_image_processing_vitmatte.py
index a22ab5ee0cf5..a76953920897 100644
--- a/tests/models/vitmatte/test_image_processing_vitmatte.py
+++ b/tests/models/vitmatte/test_image_processing_vitmatte.py
@@ -18,6 +18,7 @@
 import warnings
 
 import numpy as np
+import pytest
 import requests
 from packaging import version
 
@@ -340,6 +341,7 @@ def test_slow_fast_equivalence_batched(self):
     @slow
     @require_torch_accelerator
     @require_vision
+    @pytest.mark.torch_compile_test
     def test_can_compile_fast_image_processor(self):
         # override as trimaps are needed for the image processor
         if self.fast_image_processing_class is None:
diff --git a/tests/models/vitpose_backbone/test_modeling_vitpose_backbone.py b/tests/models/vitpose_backbone/test_modeling_vitpose_backbone.py
index a95d6ca1fa58..5a35795a7495 100644
--- a/tests/models/vitpose_backbone/test_modeling_vitpose_backbone.py
+++ b/tests/models/vitpose_backbone/test_modeling_vitpose_backbone.py
@@ -16,6 +16,8 @@
 import inspect
 import unittest
 
+import pytest
+
 from transformers import VitPoseBackboneConfig
 from transformers.testing_utils import require_torch, torch_device
 from transformers.utils import is_torch_available
@@ -193,6 +195,7 @@ def test_forward_signature(self):
             expected_arg_names = ["pixel_values"]
             self.assertListEqual(arg_names[:1], expected_arg_names)
 
+    @pytest.mark.torch_export_test
     def test_torch_export(self):
         # Dense architecture
         super().test_torch_export()
diff --git a/tests/models/whisper/test_modeling_whisper.py b/tests/models/whisper/test_modeling_whisper.py
index a1b010435588..b1a57bcc564f 100644
--- a/tests/models/whisper/test_modeling_whisper.py
+++ b/tests/models/whisper/test_modeling_whisper.py
@@ -1420,6 +1420,7 @@ def test_labels_sequence_max_length_error_after_changing_config(self):
 
     # TODO (joao, eustache): fix me :) The model is not returning a `Cache` by default
     @unittest.skip(reason="Whisper's custom generate is not consistent regarding the cache return types")
+    @pytest.mark.torch_compile_test
     def test_generate_compile_model_forward(self):
         pass
 
diff --git a/tests/quantization/aqlm_integration/test_aqlm.py b/tests/quantization/aqlm_integration/test_aqlm.py
index 2fbc4595f302..9d935e9f7623 100644
--- a/tests/quantization/aqlm_integration/test_aqlm.py
+++ b/tests/quantization/aqlm_integration/test_aqlm.py
@@ -18,6 +18,7 @@
 import unittest
 from unittest import skip
 
+import pytest
 from packaging import version
 
 from transformers import AqlmConfig, AutoConfig, AutoModelForCausalLM, AutoTokenizer, OPTForCausalLM, StaticCache
@@ -198,6 +199,7 @@ def test_quantized_model_multi_gpu(self):
         is_aqlm_available() and version.parse(importlib.metadata.version("aqlm")) >= version.parse("1.0.3"),
         "test requires `aqlm>=1.0.3`",
     )
+    @pytest.mark.torch_compile_test
     def test_quantized_model_compile(self):
         """
         Simple test that checks if the quantized model is working properly
diff --git a/tests/quantization/bnb/test_4bit.py b/tests/quantization/bnb/test_4bit.py
index a5ba1a861fe3..b49fd43f1793 100644
--- a/tests/quantization/bnb/test_4bit.py
+++ b/tests/quantization/bnb/test_4bit.py
@@ -16,6 +16,7 @@
 import tempfile
 import unittest
 
+import pytest
 from packaging import version
 
 from transformers import (
@@ -849,6 +850,7 @@ def setUp(self):
         self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
         self.model_4bit = AutoModelForCausalLM.from_pretrained(self.model_name, load_in_4bit=True)
 
+    @pytest.mark.torch_compile_test
     def test_generate_compile(self):
         encoded_input = self.tokenizer(self.input_text, return_tensors="pt")
 
diff --git a/tests/quantization/bnb/test_mixed_int8.py b/tests/quantization/bnb/test_mixed_int8.py
index 304d97879f29..8a3bcb84af33 100644
--- a/tests/quantization/bnb/test_mixed_int8.py
+++ b/tests/quantization/bnb/test_mixed_int8.py
@@ -16,6 +16,7 @@
 import tempfile
 import unittest
 
+import pytest
 from packaging import version
 
 from transformers import (
@@ -996,6 +997,7 @@ def setUp(self):
         self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
         self.model_8bit = AutoModelForCausalLM.from_pretrained(self.model_name, load_in_8bit=True)
 
+    @pytest.mark.torch_compile_test
     def test_generate_compile(self):
         encoded_input = self.tokenizer(self.input_text, return_tensors="pt")
 
diff --git a/tests/quantization/spqr_integration/test_spqr.py b/tests/quantization/spqr_integration/test_spqr.py
index 443b687d54a8..973ecd6e7db4 100644
--- a/tests/quantization/spqr_integration/test_spqr.py
+++ b/tests/quantization/spqr_integration/test_spqr.py
@@ -16,6 +16,8 @@
 import tempfile
 import unittest
 
+import pytest
+
 from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer, SpQRConfig, StaticCache
 from transformers.testing_utils import (
     backend_empty_cache,
@@ -179,6 +181,7 @@ def test_quantized_model_multi_gpu(self):
 
         self.assertEqual(self.tokenizer.decode(output[0], skip_special_tokens=True), self.EXPECTED_OUTPUT)
 
+    @pytest.mark.torch_compile_test
     def test_quantized_model_compile(self):
         """
         Simple test that checks if the quantized model is working properly
diff --git a/tests/test_image_processing_common.py b/tests/test_image_processing_common.py
index c8b75c73d984..635d6a35dc85 100644
--- a/tests/test_image_processing_common.py
+++ b/tests/test_image_processing_common.py
@@ -22,6 +22,7 @@
 from copy import deepcopy
 
 import numpy as np
+import pytest
 import requests
 from packaging import version
 
@@ -614,6 +615,7 @@ def test_override_instance_attributes_does_not_affect_other_instances(self):
     @slow
     @require_torch_accelerator
     @require_vision
+    @pytest.mark.torch_compile_test
     def test_can_compile_fast_image_processor(self):
         if self.fast_image_processing_class is None:
             self.skipTest("Skipping compilation test as fast image processor is not defined")
diff --git a/tests/test_modeling_common.py b/tests/test_modeling_common.py
index ef8a1712530e..50d7b2724d5e 100755
--- a/tests/test_modeling_common.py
+++ b/tests/test_modeling_common.py
@@ -27,6 +27,7 @@
 from contextlib import contextmanager
 
 import numpy as np
+import pytest
 from packaging import version
 from parameterized import parameterized
 from pytest import mark
@@ -3866,6 +3867,7 @@ def test_sdpa_can_dispatch_on_flash(self):
 
     @require_torch_sdpa
     @require_torch_accelerator
+    @pytest.mark.torch_compile_test
     @slow
     def test_sdpa_can_compile_dynamic(self):
         if not self.has_attentions:
@@ -4114,6 +4116,7 @@ def test_flash_attn_2_fp32_ln(self):
     @require_flash_attn
     @require_torch_gpu
     @mark.flash_attn_test
+    @pytest.mark.torch_compile_test
     @slow
     def test_flash_attn_2_can_compile_with_attention_mask_None_without_graph_break(self):
         if version.parse(torch.__version__) < version.parse("2.3"):
@@ -4581,6 +4584,7 @@ def test_custom_4d_attention_mask(self):
 
     @slow
     @require_torch_accelerator
+    @pytest.mark.torch_compile_test
     def test_torch_compile_for_training(self):
         if version.parse(torch.__version__) < version.parse("2.3"):
             self.skipTest(reason="This test requires torch >= 2.3 to run.")
@@ -4653,6 +4657,7 @@ def test_forward_with_logits_to_keep(self):
 
     @slow
     @require_torch_greater_or_equal("2.5")
+    @pytest.mark.torch_export_test
     def test_torch_export(self, config=None, inputs_dict=None, tolerance=1e-4):
         """
         Test if model can be exported with torch.export.export()
diff --git a/tests/test_video_processing_common.py b/tests/test_video_processing_common.py
index 8507108163ca..5f8f378c12cc 100644
--- a/tests/test_video_processing_common.py
+++ b/tests/test_video_processing_common.py
@@ -21,6 +21,7 @@
 from copy import deepcopy
 
 import numpy as np
+import pytest
 from packaging import version
 
 from transformers import AutoVideoProcessor
@@ -168,6 +169,7 @@ def test_init_without_params(self):
     @slow
     @require_torch_accelerator
     @require_vision
+    @pytest.mark.torch_compile_test
     def test_can_compile_fast_video_processor(self):
         if self.fast_video_processing_class is None:
             self.skipTest("Skipping compilation test as fast video processor is not defined")
diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py
index 6ee9d23e35ec..4c2b64f0d07f 100644
--- a/tests/trainer/test_trainer.py
+++ b/tests/trainer/test_trainer.py
@@ -31,6 +31,7 @@
 from unittest.mock import Mock, patch
 
 import numpy as np
+import pytest
 from huggingface_hub import HfFolder, ModelCard, create_branch, list_repo_commits, list_repo_files
 from packaging import version
 from parameterized import parameterized
@@ -1358,6 +1359,7 @@ def test_number_of_steps_in_training(self):
         train_output = trainer.train()
         self.assertEqual(train_output.global_step, 10)
 
+    @pytest.mark.torch_compile_test
     def test_torch_compile_loss_func_compatibility(self):
         config = LlamaConfig(vocab_size=100, hidden_size=32, num_hidden_layers=3, num_attention_heads=4)
         tiny_llama = LlamaForCausalLM(config)
@@ -1377,6 +1379,7 @@ def test_torch_compile_loss_func_compatibility(self):
 
     @require_peft
     @require_bitsandbytes
+    @pytest.mark.torch_compile_test
     def test_bnb_compile(self):
         from peft import LoraConfig, get_peft_model
 
diff --git a/tests/utils/test_cache_utils.py b/tests/utils/test_cache_utils.py
index 37d15452c7ed..54a7dc24cf63 100644
--- a/tests/utils/test_cache_utils.py
+++ b/tests/utils/test_cache_utils.py
@@ -15,6 +15,7 @@
 import copy
 import unittest
 
+import pytest
 from packaging import version
 from parameterized import parameterized
 
@@ -594,6 +595,7 @@ def test_cache_gptj_model(self, cache_implementation):
 class CacheExportIntegrationTest(unittest.TestCase):
     """Cache tests that rely on `torch.export()` and model loading"""
 
+    @pytest.mark.torch_export_test
     def test_dynamic_cache_exportability(self):
         model = AutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-MistralForCausalLM")
         model = model.eval()
@@ -635,6 +637,7 @@ def test_dynamic_cache_exportability(self):
             self.assertTrue(torch.allclose(l1.keys, l2.keys, atol=1e-5))
             self.assertTrue(torch.allclose(l1.values, l2.values, atol=1e-5))
 
+    @pytest.mark.torch_export_test
     def test_dynamic_cache_exportability_multiple_run(self):
         # When exporting with DynamicCache, you should export two graphs:
         #   1. A graph without cache
@@ -730,6 +733,7 @@ def test_dynamic_cache_exportability_multiple_run(self):
             self.assertTrue(torch.allclose(l1.values, l2.values, atol=1e-5))
 
     @unittest.skip("Runs on my machine locally, passed, no idea why it does not online")
+    @pytest.mark.torch_export_test
     def test_static_cache_exportability(self):
         """
         Tests that static cache works with `torch.export()`
@@ -808,6 +812,7 @@ def test_static_cache_exportability(self):
             strict=strict,
         )
 
+    @pytest.mark.torch_export_test
     def test_hybrid_cache_exportability(self):
         """
         Tests that static cache works with `torch.export()`
diff --git a/tests/utils/test_deprecation.py b/tests/utils/test_deprecation.py
index 81b46af37eb4..f09c42101941 100644
--- a/tests/utils/test_deprecation.py
+++ b/tests/utils/test_deprecation.py
@@ -15,6 +15,7 @@
 import unittest
 import warnings
 
+import pytest
 from parameterized import parameterized
 
 from transformers import __version__, is_torch_available
@@ -174,6 +175,7 @@ def dummy_function(new_name=None, **kwargs):
             result = dummy_function(deprecated_name="old_value", new_name="new_value")
         self.assertEqual(result, "new_value")
 
+    @pytest.mark.torch_compile_test
     @require_torch_accelerator
     def test_compile_safe(self):
         @deprecate_kwarg("deprecated_factor", new_name="new_factor", version=INFINITE_VERSION)
diff --git a/tests/utils/test_generic.py b/tests/utils/test_generic.py
index e08c26fd02e8..77e7cdba7c2c 100644
--- a/tests/utils/test_generic.py
+++ b/tests/utils/test_generic.py
@@ -16,6 +16,7 @@
 import warnings
 
 import numpy as np
+import pytest
 
 from transformers.configuration_utils import PretrainedConfig
 from transformers.modeling_outputs import BaseModelOutput
@@ -261,6 +262,7 @@ def test_decorator_eager(self):
                 message = f"output should be a {expected_type.__name__} when config.use_return_dict={config_return_dict} and return_dict={return_dict}"
                 self.assertIsInstance(output, expected_type, message)
 
+    @pytest.mark.torch_compile_test
     def test_decorator_compiled(self):
         """Test that the can_return_tuple decorator works with compiled mode."""
         config = PretrainedConfig()
@@ -277,6 +279,7 @@ def test_decorator_compiled(self):
         output = compiled_model(torch.tensor(10), return_dict=False)
         self.assertIsInstance(output, tuple)
 
+    @pytest.mark.torch_export_test
     def test_decorator_torch_export(self):
         """Test that the can_return_tuple decorator works with torch.export."""
         config = PretrainedConfig()
diff --git a/tests/utils/test_model_output.py b/tests/utils/test_model_output.py
index 00473e878600..eef5feb014f5 100644
--- a/tests/utils/test_model_output.py
+++ b/tests/utils/test_model_output.py
@@ -17,6 +17,8 @@
 from dataclasses import dataclass
 from typing import Optional
 
+import pytest
+
 from transformers import AlbertForMaskedLM
 from transformers.testing_utils import require_torch
 from transformers.utils import ModelOutput, is_torch_available
@@ -160,6 +162,7 @@ def test_torch_pytree(self):
     # TODO: @ydshieh
     @unittest.skip(reason="CPU OOM")
     @require_torch
+    @pytest.mark.torch_export_test
     def test_export_serialization(self):
         if not is_torch_greater_or_equal_than_2_2:
             self.skipTest(reason="Export serialization requires torch >= 2.2.0")
diff --git a/tests/utils/test_modeling_utils.py b/tests/utils/test_modeling_utils.py
index a1b8b0c35a73..a7837d5d2336 100644
--- a/tests/utils/test_modeling_utils.py
+++ b/tests/utils/test_modeling_utils.py
@@ -27,6 +27,7 @@
 import warnings
 from pathlib import Path
 
+import pytest
 import requests
 from huggingface_hub import HfApi, HfFolder
 from parameterized import parameterized
@@ -2541,6 +2542,7 @@ def test_causal_mask_sliding(self):
         # non auto-regressive case
         self.check_to_causal(mask_converter, q_len=7, kv_len=7)
 
+    @pytest.mark.torch_compile_test
     def test_torch_compile_fullgraph(self):
         model = Prepare4dCausalAttentionMaskModel()