Spaces:

HF-slyfox
/

harness

Running

App Files Files Community

harness / diffs /39950.patch

ArthurZ HF Staff

Initial harness: 100 perf tasks + Gradio browser

dfefe0b verified 7 days ago

raw

history blame contribute delete

64.8 kB

	diff --git a/conftest.py b/conftest.py
	index 2134dceb84b9..7a8344ea5056 100644
	--- a/conftest.py
	+++ b/conftest.py
	@@ -83,6 +83,8 @@ def pytest_configure(config):
	config.addinivalue_line("markers", "is_staging_test: mark test to run only in the staging environment")
	config.addinivalue_line("markers", "accelerate_tests: mark test that require accelerate")
	config.addinivalue_line("markers", "not_device_test: mark the tests always running on cpu")
	+ config.addinivalue_line("markers", "torch_compile_test: mark test which tests torch compile functionality")
	+ config.addinivalue_line("markers", "torch_export_test: mark test which tests torch export functionality")


	def pytest_collection_modifyitems(items):
	diff --git a/tests/generation/test_utils.py b/tests/generation/test_utils.py
	index c9d20e692e92..76161928f6ba 100644
	--- a/tests/generation/test_utils.py
	+++ b/tests/generation/test_utils.py
	@@ -2048,6 +2048,7 @@ def test_generate_with_quant_cache(self):
	model.generate(generation_kwargs, inputs_dict)

	@pytest.mark.generate
	+ @pytest.mark.torch_compile_test
	@require_torch_greater_or_equal("2.6") # Uses torch.compiler.set_stance
	def test_generate_compile_model_forward(self):
	"""
	@@ -2744,6 +2745,7 @@ def test_speculative_sampling_target_distribution(self):
	self.assertTrue(last_token_counts[1] > last_token_counts[3] > last_token_counts[7] > 0)
	self.assertTrue(last_token_counts[8] > last_token_counts[3])

	+ @pytest.mark.torch_export_test
	def test_cache_dependant_input_preparation_exporting(self):
	self.assertFalse(
	is_torchdynamo_exporting()
	@@ -4342,6 +4344,7 @@ def test_prepare_inputs_for_generation_encoder_decoder_llm(self):
	self.assertTrue(model_inputs["encoder_outputs"] == "foo")
	# See the decoder-only test for more corner cases. The code is the same, so we don't repeat it here.

	+ @pytest.mark.torch_compile_test
	def test_generate_compile_fullgraph_tiny(self):
	"""
	Tests that we can call end-to-end generation with a tiny model (i.e. doesn't crash)
	@@ -4931,6 +4934,7 @@ def test_cache_device_map_with_vision_layer_device_map(self):
	_ = model.generate(**inputs, max_new_tokens=2, do_sample=False)

	@require_torch_accelerator
	+ @pytest.mark.torch_compile_test
	def test_cpu_offload_doesnt_compile(self):
	"""Test that CPU offload doesn't trigger compilation"""
	tokenizer = AutoTokenizer.from_pretrained("hf-internal-testing/tiny-random-MistralForCausalLM")
	diff --git a/tests/models/albert/test_modeling_albert.py b/tests/models/albert/test_modeling_albert.py
	index f0440fb349d6..eb857f3383c9 100644
	--- a/tests/models/albert/test_modeling_albert.py
	+++ b/tests/models/albert/test_modeling_albert.py
	@@ -15,6 +15,7 @@

	import unittest

	+import pytest
	from packaging import version

	from transformers import AlbertConfig, AutoTokenizer, is_torch_available
	@@ -337,6 +338,7 @@ def test_inference_no_head_absolute_embedding(self):
	torch.testing.assert_close(output[:, 1:4, 1:4], expected_slice, rtol=1e-4, atol=1e-4)

	@slow
	+ @pytest.mark.torch_export_test
	def test_export(self):
	if version.parse(torch.__version__) < version.parse("2.4.0"):
	self.skipTest(reason="This test requires torch >= 2.4 to run.")
	diff --git a/tests/models/aria/test_modeling_aria.py b/tests/models/aria/test_modeling_aria.py
	index 94dafdc2d49c..c5829fb97ffb 100644
	--- a/tests/models/aria/test_modeling_aria.py
	+++ b/tests/models/aria/test_modeling_aria.py
	@@ -15,6 +15,7 @@

	import unittest

	+import pytest
	import requests

	from transformers import (
	@@ -211,6 +212,7 @@ def test_training_gradient_checkpointing_use_reentrant_false(self):
	pass

	@unittest.skip(reason="Compile not yet supported because in LLava models")
	+ @pytest.mark.torch_compile_test
	def test_sdpa_can_compile_dynamic(self):
	pass

	diff --git a/tests/models/aya_vision/test_modeling_aya_vision.py b/tests/models/aya_vision/test_modeling_aya_vision.py
	index d472e0eb90f1..f6f33d39ea78 100644
	--- a/tests/models/aya_vision/test_modeling_aya_vision.py
	+++ b/tests/models/aya_vision/test_modeling_aya_vision.py
	@@ -267,6 +267,7 @@ def test_initialization(self):
	pass

	@unittest.skip(reason="Compile not yet supported because in LLava models")
	+ @pytest.mark.torch_compile_test
	def test_sdpa_can_compile_dynamic(self):
	pass

	diff --git a/tests/models/beit/test_modeling_beit.py b/tests/models/beit/test_modeling_beit.py
	index af8fd48e00c0..ff696f8cf607 100644
	--- a/tests/models/beit/test_modeling_beit.py
	+++ b/tests/models/beit/test_modeling_beit.py
	@@ -15,6 +15,7 @@

	import unittest

	+import pytest
	from datasets import load_dataset

	from transformers import BeitConfig
	@@ -285,6 +286,7 @@ def test_feed_forward_chunking(self):
	pass

	@unittest.skip(reason="BEiT can't compile dynamic")
	+ @pytest.mark.torch_compile_test
	def test_sdpa_can_compile_dynamic(self):
	pass

	diff --git a/tests/models/bert/test_modeling_bert.py b/tests/models/bert/test_modeling_bert.py
	index 97bca3317ae2..9cc0a8be2437 100644
	--- a/tests/models/bert/test_modeling_bert.py
	+++ b/tests/models/bert/test_modeling_bert.py
	@@ -13,6 +13,7 @@
	# limitations under the License.
	import unittest

	+import pytest
	from packaging import version

	from transformers import AutoTokenizer, BertConfig, is_torch_available
	@@ -722,6 +723,7 @@ def test_sdpa_ignored_mask(self):
	)

	@slow
	+ @pytest.mark.torch_export_test
	def test_export(self):
	if version.parse(torch.__version__) < version.parse("2.4.0"):
	self.skipTest(reason="This test requires torch >= 2.4 to run.")
	diff --git a/tests/models/clip/test_modeling_clip.py b/tests/models/clip/test_modeling_clip.py
	index ad7d817f962c..b506a442b9eb 100644
	--- a/tests/models/clip/test_modeling_clip.py
	+++ b/tests/models/clip/test_modeling_clip.py
	@@ -19,6 +19,7 @@
	import unittest

	import numpy as np
	+import pytest
	import requests
	from parameterized import parameterized
	from pytest import mark
	@@ -708,6 +709,7 @@ def test_sdpa_can_dispatch_on_flash(self):
	self.skipTest(reason="CLIP text tower has two attention masks: `causal_attention_mask` and `attention_mask`")

	@require_torch_sdpa
	+ @pytest.mark.torch_compile_test
	def test_sdpa_can_compile_dynamic(self):
	self.skipTest(reason="CLIP model can't be compiled dynamic, error in clip_loss`")

	diff --git a/tests/models/cohere2/test_modeling_cohere2.py b/tests/models/cohere2/test_modeling_cohere2.py
	index 71335c37075e..1cf2a8424b54 100644
	--- a/tests/models/cohere2/test_modeling_cohere2.py
	+++ b/tests/models/cohere2/test_modeling_cohere2.py
	@@ -225,6 +225,7 @@ def test_model_flash_attn(self):

	self.assertEqual(output_text, EXPECTED_TEXTS)

	+ @pytest.mark.torch_export_test
	def test_export_static_cache(self):
	if version.parse(torch.__version__) < version.parse("2.5.0"):
	self.skipTest(reason="This test requires torch >= 2.5 to run.")
	diff --git a/tests/models/colpali/test_modeling_colpali.py b/tests/models/colpali/test_modeling_colpali.py
	index c6c9892d1737..62f87af46007 100644
	--- a/tests/models/colpali/test_modeling_colpali.py
	+++ b/tests/models/colpali/test_modeling_colpali.py
	@@ -19,6 +19,7 @@
	import unittest
	from typing import ClassVar

	+import pytest
	import torch
	from datasets import load_dataset

	@@ -287,6 +288,7 @@ def test_sdpa_can_dispatch_on_flash(self):
	pass

	@unittest.skip(reason="Pass because ColPali requires `attention_mask is not None`")
	+ @pytest.mark.torch_compile_test
	def test_sdpa_can_compile_dynamic(self):
	pass

	diff --git a/tests/models/colqwen2/test_modeling_colqwen2.py b/tests/models/colqwen2/test_modeling_colqwen2.py
	index 53a37108da83..3bf5b8a6c496 100644
	--- a/tests/models/colqwen2/test_modeling_colqwen2.py
	+++ b/tests/models/colqwen2/test_modeling_colqwen2.py
	@@ -17,6 +17,7 @@
	import unittest
	from typing import ClassVar

	+import pytest
	import torch
	from datasets import load_dataset

	@@ -277,6 +278,7 @@ def test_sdpa_can_dispatch_on_flash(self):
	pass

	@unittest.skip(reason="Pass because ColQwen2 requires `attention_mask is not None`")
	+ @pytest.mark.torch_compile_test
	def test_sdpa_can_compile_dynamic(self):
	pass

	diff --git a/tests/models/data2vec/test_modeling_data2vec_vision.py b/tests/models/data2vec/test_modeling_data2vec_vision.py
	index a1dde3d31b96..709436ce1419 100644
	--- a/tests/models/data2vec/test_modeling_data2vec_vision.py
	+++ b/tests/models/data2vec/test_modeling_data2vec_vision.py
	@@ -15,6 +15,8 @@

	import unittest

	+import pytest
	+
	from transformers import Data2VecVisionConfig
	from transformers.testing_utils import (
	require_torch,
	@@ -214,6 +216,7 @@ def test_config(self):
	@unittest.skip(
	reason="Will fix only if requested by the community: it fails with `torch._dynamo.exc.InternalTorchDynamoError: IndexError: list index out of range`. Without compile, the test pass."
	)
	+ @pytest.mark.torch_compile_test
	def test_sdpa_can_compile_dynamic(self):
	pass

	diff --git a/tests/models/deepseek_v2/test_modeling_deepseek_v2.py b/tests/models/deepseek_v2/test_modeling_deepseek_v2.py
	index 0bdc6884590f..f1c6cf6786f1 100644
	--- a/tests/models/deepseek_v2/test_modeling_deepseek_v2.py
	+++ b/tests/models/deepseek_v2/test_modeling_deepseek_v2.py
	@@ -16,6 +16,8 @@

	import unittest

	+import pytest
	+
	from transformers import BitsAndBytesConfig, Cache, DeepseekV2Config, is_torch_available
	from transformers.testing_utils import require_read_token, require_torch, require_torch_accelerator, slow, torch_device

	@@ -173,10 +175,12 @@ def _check_past_key_values_for_generate(self, batch_size, decoder_past_key_value
	self.assertEqual(layer.values.shape, expected_value_shape)

	@unittest.skip("Deepseek-V2 uses MLA which has a special head dim and is not compatible with StaticCache shape")
	+ @pytest.mark.torch_compile_test
	def test_generate_compilation_all_outputs(self):
	pass

	@unittest.skip("Deepseek-V2 uses MLA which has a special head dim and is not compatible with StaticCache shape")
	+ @pytest.mark.torch_compile_test
	def test_generate_compile_model_forward(self):
	pass

	@@ -185,10 +189,12 @@ def test_generate_from_inputs_embeds_with_static_cache(self):
	pass

	@unittest.skip("Deepseek-V2 uses MLA which has a special head dim and is not compatible with StaticCache shape")
	+ @pytest.mark.torch_compile_test
	def test_generate_with_static_cache(self):
	pass

	@unittest.skip("Dynamic control flow in MoE")
	+ @pytest.mark.torch_compile_test
	def test_torch_compile_for_training(self):
	pass

	diff --git a/tests/models/deepseek_v3/test_modeling_deepseek_v3.py b/tests/models/deepseek_v3/test_modeling_deepseek_v3.py
	index 87f7b2abb0e9..3e1dc11998c4 100644
	--- a/tests/models/deepseek_v3/test_modeling_deepseek_v3.py
	+++ b/tests/models/deepseek_v3/test_modeling_deepseek_v3.py
	@@ -15,6 +15,7 @@

	import unittest

	+import pytest
	from packaging import version
	from parameterized import parameterized

	@@ -311,6 +312,7 @@ def test_generate_compilation_all_outputs(self):
	pass

	@unittest.skip("Deepseek-V3 uses MLA so it is not compatible with the standard cache format")
	+ @pytest.mark.torch_compile_test
	def test_generate_compile_model_forward(self):
	pass

	@@ -533,6 +535,7 @@ def tearDown(self):

	@slow
	@require_torch_accelerator
	+ @pytest.mark.torch_compile_test
	@require_read_token
	def test_compile_static_cache(self):
	# `torch==2.2` will throw an error on this test (as in other compilation tests), but torch==2.1.2 and torch>2.2
	diff --git a/tests/models/depth_anything/test_modeling_depth_anything.py b/tests/models/depth_anything/test_modeling_depth_anything.py
	index 3527e1d6b8cf..f0c638a76f22 100644
	--- a/tests/models/depth_anything/test_modeling_depth_anything.py
	+++ b/tests/models/depth_anything/test_modeling_depth_anything.py
	@@ -15,6 +15,8 @@

	import unittest

	+import pytest
	+
	from transformers import DepthAnythingConfig, Dinov2Config
	from transformers.file_utils import is_torch_available, is_vision_available
	from transformers.pytorch_utils import is_torch_greater_or_equal_than_2_4
	@@ -286,6 +288,7 @@ def test_inference(self):

	torch.testing.assert_close(predicted_depth[0, :3, :3], expected_slice, rtol=1e-4, atol=1e-4)

	+ @pytest.mark.torch_export_test
	def test_export(self):
	for strict in [False, True]:
	with self.subTest(strict=strict):
	diff --git a/tests/models/depth_pro/test_modeling_depth_pro.py b/tests/models/depth_pro/test_modeling_depth_pro.py
	index 50ac4cd1d28f..0e644c7c1892 100644
	--- a/tests/models/depth_pro/test_modeling_depth_pro.py
	+++ b/tests/models/depth_pro/test_modeling_depth_pro.py
	@@ -15,6 +15,8 @@

	import unittest

	+import pytest
	+
	from transformers import DepthProConfig
	from transformers.file_utils import is_torch_available, is_vision_available
	from transformers.testing_utils import require_torch, require_vision, slow, torch_device
	@@ -221,6 +223,7 @@ def test_config(self):
	self.config_tester.run_common_tests()

	@unittest.skip(reason="Inductor error: name 'OpaqueUnaryFn_log2' is not defined")
	+ @pytest.mark.torch_compile_test
	def test_sdpa_can_compile_dynamic(self):
	pass

	diff --git a/tests/models/diffllama/test_modeling_diffllama.py b/tests/models/diffllama/test_modeling_diffllama.py
	index f376fab87e14..10675b1681df 100644
	--- a/tests/models/diffllama/test_modeling_diffllama.py
	+++ b/tests/models/diffllama/test_modeling_diffllama.py
	@@ -570,6 +570,7 @@ def tearDown(self):
	@slow
	@require_torch_accelerator
	@require_read_token
	+ @pytest.mark.torch_compile_test
	def test_compile_static_cache(self):
	# `torch==2.2` will throw an error on this test (as in other compilation tests), but torch==2.1.2 and torch>2.2
	# work as intended. See https://github.com/pytorch/pytorch/issues/121943
	diff --git a/tests/models/distilbert/test_modeling_distilbert.py b/tests/models/distilbert/test_modeling_distilbert.py
	index 3124689b0eaf..871b32b3af70 100644
	--- a/tests/models/distilbert/test_modeling_distilbert.py
	+++ b/tests/models/distilbert/test_modeling_distilbert.py
	@@ -399,6 +399,7 @@ def test_inference_no_head_absolute_embedding(self):

	torch.testing.assert_close(output[:, 1:4, 1:4], expected_slice, rtol=1e-4, atol=1e-4)

	+ @pytest.mark.torch_export_test
	@slow
	def test_export(self):
	if not is_torch_greater_or_equal_than_2_4:
	diff --git a/tests/models/dots1/test_modeling_dots1.py b/tests/models/dots1/test_modeling_dots1.py
	index 2df3fd965446..9426ff7d300a 100644
	--- a/tests/models/dots1/test_modeling_dots1.py
	+++ b/tests/models/dots1/test_modeling_dots1.py
	@@ -96,6 +96,7 @@ def test_generate_compilation_all_outputs(self):
	pass

	@unittest.skip("dots.llm1's moe is not compatible `token_indices, weight_indices = torch.where(mask)`")
	+ @pytest.mark.torch_compile_test
	def test_generate_compile_model_forward(self):
	pass

	diff --git a/tests/models/dpt/test_modeling_dpt.py b/tests/models/dpt/test_modeling_dpt.py
	index dac36338beb3..1d693e7f408c 100644
	--- a/tests/models/dpt/test_modeling_dpt.py
	+++ b/tests/models/dpt/test_modeling_dpt.py
	@@ -15,6 +15,8 @@

	import unittest

	+import pytest
	+
	from transformers import DPTConfig
	from transformers.file_utils import is_torch_available, is_vision_available
	from transformers.pytorch_utils import is_torch_greater_or_equal_than_2_4
	@@ -255,6 +257,7 @@ def test_training_gradient_checkpointing_use_reentrant_false(self):
	pass

	@unittest.skip(reason="Inductor error for dynamic shape")
	+ @pytest.mark.torch_compile_test
	def test_sdpa_can_compile_dynamic(self):
	pass

	@@ -420,6 +423,7 @@ def test_post_processing_depth_estimation(self):
	self.assertTrue(output_enlarged.shape == expected_shape)
	torch.testing.assert_close(predicted_depth_l, output_enlarged, atol=1e-3, rtol=1e-3)

	+ @pytest.mark.torch_export_test
	def test_export(self):
	for strict in [True, False]:
	with self.subTest(strict=strict):
	diff --git a/tests/models/exaone4/test_modeling_exaone4.py b/tests/models/exaone4/test_modeling_exaone4.py
	index 4ac87ce900b5..7bd98b1850fc 100644
	--- a/tests/models/exaone4/test_modeling_exaone4.py
	+++ b/tests/models/exaone4/test_modeling_exaone4.py
	@@ -354,6 +354,7 @@ def test_model_generation_beyond_sliding_window(self):
	del model
	cleanup(torch_device, gc_collect=True)

	+ @pytest.mark.torch_export_test
	@slow
	def test_export_static_cache(self):
	if version.parse(torch.__version__) < version.parse("2.4.0"):
	diff --git a/tests/models/falcon_mamba/test_modeling_falcon_mamba.py b/tests/models/falcon_mamba/test_modeling_falcon_mamba.py
	index 9686b1660fdb..eafebbcb5365 100644
	--- a/tests/models/falcon_mamba/test_modeling_falcon_mamba.py
	+++ b/tests/models/falcon_mamba/test_modeling_falcon_mamba.py
	@@ -17,6 +17,8 @@
	import unittest
	from unittest.util import safe_repr

	+import pytest
	+
	from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, FalconMambaConfig, is_torch_available
	from transformers.testing_utils import (
	Expectations,
	@@ -487,6 +489,7 @@ def test_generation_4bit(self):
	"Hello today Iava,\n\nI'm sorry to hear that you're having trouble with the ",
	)

	+ @pytest.mark.torch_compile_test
	def test_generation_torch_compile(self):
	model = AutoModelForCausalLM.from_pretrained(self.model_id, torch_dtype=torch.float16).to(torch_device)
	model = torch.compile(model)
	diff --git a/tests/models/gemma/test_modeling_gemma.py b/tests/models/gemma/test_modeling_gemma.py
	index 8a1e2ea9eb7f..4b5d939359ac 100644
	--- a/tests/models/gemma/test_modeling_gemma.py
	+++ b/tests/models/gemma/test_modeling_gemma.py
	@@ -356,6 +356,7 @@ def test_model_7b_4bit(self):

	@slow
	@require_torch_accelerator
	+ @pytest.mark.torch_compile_test
	@require_read_token
	def test_compile_static_cache(self):
	# `torch==2.2` will throw an error on this test (as in other compilation tests), but torch==2.1.2 and torch>2.2
	@@ -394,6 +395,7 @@ def test_compile_static_cache(self):
	static_compiled_text = tokenizer.batch_decode(generated_ids, skip_special_tokens=True)
	self.assertEqual(EXPECTED_TEXT_COMPLETION, static_compiled_text)

	+ @pytest.mark.torch_export_test
	@slow
	@require_read_token
	def test_export_static_cache(self):
	diff --git a/tests/models/gemma2/test_modeling_gemma2.py b/tests/models/gemma2/test_modeling_gemma2.py
	index 5d778d8cb2ec..b8bfcaad43c1 100644
	--- a/tests/models/gemma2/test_modeling_gemma2.py
	+++ b/tests/models/gemma2/test_modeling_gemma2.py
	@@ -306,6 +306,7 @@ def test_model_9b_flash_attn(self):

	self.assertEqual(output_text, EXPECTED_TEXTS)

	+ @pytest.mark.torch_export_test
	@slow
	@require_read_token
	def test_export_static_cache(self):
	@@ -379,6 +380,7 @@ def test_export_static_cache(self):
	@slow
	@require_read_token
	@require_large_cpu_ram
	+ @pytest.mark.torch_export_test
	def test_export_hybrid_cache(self):
	from transformers.integrations.executorch import TorchExportableModuleForDecoderOnlyLM
	from transformers.pytorch_utils import is_torch_greater_or_equal
	diff --git a/tests/models/gemma3/test_modeling_gemma3.py b/tests/models/gemma3/test_modeling_gemma3.py
	index f9a19646e7e0..eb87743b2a8a 100644
	--- a/tests/models/gemma3/test_modeling_gemma3.py
	+++ b/tests/models/gemma3/test_modeling_gemma3.py
	@@ -819,6 +819,7 @@ def test_generation_beyond_sliding_window(self, attn_implementation: str):
	EXPECTED_COMPLETIONS = [" and I'm going to take a walk.\n\nI really enjoy the scenery, and I'", ", green, yellow, orange, purple, brown, black, white, gray.\n\nI'"] # fmt: skip
	self.assertEqual(output_text, EXPECTED_COMPLETIONS)

	+ @pytest.mark.torch_export_test
	def test_export_text_only_with_hybrid_cache(self):
	if not is_torch_greater_or_equal("2.6.0"):
	self.skipTest(reason="This test requires torch >= 2.6 to run.")
	diff --git a/tests/models/glm4_moe/test_modeling_glm4_moe.py b/tests/models/glm4_moe/test_modeling_glm4_moe.py
	index 59631fb37228..3d3582cb2435 100644
	--- a/tests/models/glm4_moe/test_modeling_glm4_moe.py
	+++ b/tests/models/glm4_moe/test_modeling_glm4_moe.py
	@@ -15,6 +15,7 @@

	import unittest

	+import pytest
	import torch
	from packaging import version

	@@ -93,6 +94,7 @@ def tearDown(self):

	@slow
	@require_torch_accelerator
	+ @pytest.mark.torch_compile_test
	@require_read_token
	def test_compile_static_cache(self):
	# `torch==2.2` will throw an error on this test (as in other compilation tests), but torch==2.1.2 and torch>2.2
	diff --git a/tests/models/idefics3/test_modeling_idefics3.py b/tests/models/idefics3/test_modeling_idefics3.py
	index 5cf06a50be10..49d940f6fb0a 100644
	--- a/tests/models/idefics3/test_modeling_idefics3.py
	+++ b/tests/models/idefics3/test_modeling_idefics3.py
	@@ -195,6 +195,7 @@ def test_flash_attn_2_inference_padding_right(self):
	pass

	@unittest.skip(reason="Compile not yet supported in idefics3 models")
	+ @pytest.mark.torch_compile_test
	def test_sdpa_can_compile_dynamic(self):
	pass

	@@ -379,6 +380,7 @@ def test_eager_matches_sdpa_generate(self):
	pass

	@unittest.skip(reason="Compile not yet supported in Idefics3 models end-to-end")
	+ @pytest.mark.torch_compile_test
	def test_sdpa_can_compile_dynamic(self):
	pass

	diff --git a/tests/models/internvl/test_modeling_internvl.py b/tests/models/internvl/test_modeling_internvl.py
	index 4317cb36825e..a4dd976fa8ab 100644
	--- a/tests/models/internvl/test_modeling_internvl.py
	+++ b/tests/models/internvl/test_modeling_internvl.py
	@@ -17,6 +17,7 @@
	import unittest
	from io import BytesIO

	+import pytest
	import requests

	from transformers import (
	@@ -216,6 +217,7 @@ def test_initialization(self):
	)

	@unittest.skip(reason="Compile not yet supported because in LLava models")
	+ @pytest.mark.torch_compile_test
	def test_sdpa_can_compile_dynamic(self):
	pass

	diff --git a/tests/models/janus/test_modeling_janus.py b/tests/models/janus/test_modeling_janus.py
	index 2fa257bc5c42..9c334b609175 100644
	--- a/tests/models/janus/test_modeling_janus.py
	+++ b/tests/models/janus/test_modeling_janus.py
	@@ -20,6 +20,7 @@
	from functools import reduce

	import numpy as np
	+import pytest
	import requests

	from transformers import (
	@@ -294,6 +295,7 @@ def check_training_gradient_checkpointing(self, gradient_checkpointing_kwargs=No
	pass

	@unittest.skip("There are recompilations in Janus") # TODO (joao, raushan): fix me
	+ @pytest.mark.torch_compile_test
	def test_generate_compile_model_forward(self):
	pass

	diff --git a/tests/models/layoutlmv2/test_image_processing_layoutlmv2.py b/tests/models/layoutlmv2/test_image_processing_layoutlmv2.py
	index f574f6751104..9c2a3eee735d 100644
	--- a/tests/models/layoutlmv2/test_image_processing_layoutlmv2.py
	+++ b/tests/models/layoutlmv2/test_image_processing_layoutlmv2.py
	@@ -14,6 +14,7 @@

	import unittest

	+import pytest
	import requests
	from packaging import version

	@@ -202,6 +203,7 @@ def test_slow_fast_equivalence_batched(self):
	@slow
	@require_torch_accelerator
	@require_vision
	+ @pytest.mark.torch_compile_test
	def test_can_compile_fast_image_processor(self):
	if self.fast_image_processing_class is None:
	self.skipTest("Skipping compilation test as fast image processor is not defined")
	diff --git a/tests/models/lfm2/test_modeling_lfm2.py b/tests/models/lfm2/test_modeling_lfm2.py
	index 7921fcbf1560..4603f54dc7f7 100644
	--- a/tests/models/lfm2/test_modeling_lfm2.py
	+++ b/tests/models/lfm2/test_modeling_lfm2.py
	@@ -15,6 +15,8 @@

	import unittest

	+import pytest
	+
	from transformers import is_torch_available
	from transformers.testing_utils import (
	require_read_token,
	@@ -88,6 +90,7 @@ def test_contrastive_generate_low_memory(self):
	@unittest.skip(
	"Lfm2 has a special cache format which is not compatible with compile as it has static address for conv cache"
	)
	+ @pytest.mark.torch_compile_test
	def test_sdpa_can_compile_dynamic(self):
	pass

	diff --git a/tests/models/llama/test_modeling_llama.py b/tests/models/llama/test_modeling_llama.py
	index 26be82b9da82..5be6e9803e05 100644
	--- a/tests/models/llama/test_modeling_llama.py
	+++ b/tests/models/llama/test_modeling_llama.py
	@@ -15,6 +15,7 @@

	import unittest

	+import pytest
	from packaging import version

	from transformers import AutoTokenizer, StaticCache, is_torch_available
	@@ -256,6 +257,7 @@ def test_model_7b_dola_generation(self):

	@slow
	@require_torch_accelerator
	+ @pytest.mark.torch_compile_test
	def test_compile_static_cache(self):
	# `torch==2.2` will throw an error on this test (as in other compilation tests), but torch==2.1.2 and torch>2.2
	# work as intended. See https://github.com/pytorch/pytorch/issues/121943
	@@ -296,6 +298,7 @@ def test_compile_static_cache(self):
	self.assertEqual(EXPECTED_TEXT_COMPLETION, static_text)

	@slow
	+ @pytest.mark.torch_export_test
	def test_export_static_cache(self):
	if version.parse(torch.__version__) < version.parse("2.4.0"):
	self.skipTest(reason="This test requires torch >= 2.4 to run.")
	diff --git a/tests/models/llava_onevision/test_image_processing_llava_onevision.py b/tests/models/llava_onevision/test_image_processing_llava_onevision.py
	index 4aba232c9dfb..29f20cc7124e 100644
	--- a/tests/models/llava_onevision/test_image_processing_llava_onevision.py
	+++ b/tests/models/llava_onevision/test_image_processing_llava_onevision.py
	@@ -15,6 +15,7 @@
	import unittest

	import numpy as np
	+import pytest

	from transformers.image_utils import OPENAI_CLIP_MEAN, OPENAI_CLIP_STD, ChannelDimension
	from transformers.testing_utils import require_torch, require_vision
	@@ -246,6 +247,7 @@ def test_multi_images(self):
	@unittest.skip(
	reason="LlavaOnevisionImageProcessorFast doesn't compile (infinitely) when using class transforms"
	) # FIXME yoni
	+ @pytest.mark.torch_compile_test
	def test_can_compile_fast_image_processor(self):
	pass

	diff --git a/tests/models/mamba/test_modeling_mamba.py b/tests/models/mamba/test_modeling_mamba.py
	index aff98558d52b..2fbe6ef81b37 100644
	--- a/tests/models/mamba/test_modeling_mamba.py
	+++ b/tests/models/mamba/test_modeling_mamba.py
	@@ -17,6 +17,7 @@
	import unittest
	from unittest.util import safe_repr

	+import pytest
	from parameterized import parameterized

	from transformers import AutoTokenizer, MambaConfig, is_torch_available
	@@ -518,6 +519,7 @@ def test_simple_generate_cuda_kernels_big(self, device):
	self.assertEqual(output_sentence, expected_output)

	@slow
	+ @pytest.mark.torch_compile_test
	def test_compile_mamba_cache(self):
	expected_output = "Hello my name is John and I am a\n\nI am a single father of a beautiful daughter. I am a"

	diff --git a/tests/models/mask2former/test_modeling_mask2former.py b/tests/models/mask2former/test_modeling_mask2former.py
	index 160dc553be9a..de0efc0410d9 100644
	--- a/tests/models/mask2former/test_modeling_mask2former.py
	+++ b/tests/models/mask2former/test_modeling_mask2former.py
	@@ -16,6 +16,7 @@
	import unittest

	import numpy as np
	+import pytest

	from tests.test_modeling_common import floats_tensor
	from transformers import AutoModelForImageClassification, Mask2FormerConfig, is_torch_available, is_vision_available
	@@ -576,6 +577,7 @@ def test_with_segmentation_maps_and_loss(self):

	self.assertTrue(outputs.loss is not None)

	+ @pytest.mark.torch_export_test
	def test_export(self):
	if not is_torch_greater_or_equal_than_2_4:
	self.skipTest(reason="This test requires torch >= 2.4 to run.")
	diff --git a/tests/models/mimi/test_modeling_mimi.py b/tests/models/mimi/test_modeling_mimi.py
	index 2123e93c698b..1ca50b33560d 100644
	--- a/tests/models/mimi/test_modeling_mimi.py
	+++ b/tests/models/mimi/test_modeling_mimi.py
	@@ -19,6 +19,7 @@
	import unittest

	import numpy as np
	+import pytest
	from datasets import Audio, load_dataset
	from pytest import mark

	@@ -446,6 +447,7 @@ def test_flash_attn_2_inference_equivalence_right_padding(self):
	pass

	@unittest.skip(reason="The MimiModel does not have support dynamic compile yet")
	+ @pytest.mark.torch_compile_test
	def test_sdpa_can_compile_dynamic(self):
	pass

	diff --git a/tests/models/mistral/test_modeling_mistral.py b/tests/models/mistral/test_modeling_mistral.py
	index a17f464370b8..dce2f756119e 100644
	--- a/tests/models/mistral/test_modeling_mistral.py
	+++ b/tests/models/mistral/test_modeling_mistral.py
	@@ -278,6 +278,7 @@ def test_speculative_generation(self):
	text = tokenizer.decode(generated_ids[0], skip_special_tokens=True)
	self.assertEqual(EXPECTED_TEXT_COMPLETION, text)

	+ @pytest.mark.torch_compile_test
	@slow
	def test_compile_static_cache(self):
	# `torch==2.2` will throw an error on this test (as in other compilation tests), but torch==2.1.2 and torch>2.2
	diff --git a/tests/models/mistral3/test_modeling_mistral3.py b/tests/models/mistral3/test_modeling_mistral3.py
	index 666997d4a5e1..99b2394037fc 100644
	--- a/tests/models/mistral3/test_modeling_mistral3.py
	+++ b/tests/models/mistral3/test_modeling_mistral3.py
	@@ -16,6 +16,7 @@
	import unittest

	import accelerate
	+import pytest

	from transformers import (
	AutoProcessor,
	@@ -207,6 +208,7 @@ def test_initialization(self):
	)

	@unittest.skip(reason="Compile not yet supported because in LLava models")
	+ @pytest.mark.torch_compile_test
	def test_sdpa_can_compile_dynamic(self):
	pass

	diff --git a/tests/models/mllama/test_modeling_mllama.py b/tests/models/mllama/test_modeling_mllama.py
	index f2b6969c4109..0c623d5fa396 100644
	--- a/tests/models/mllama/test_modeling_mllama.py
	+++ b/tests/models/mllama/test_modeling_mllama.py
	@@ -352,6 +352,7 @@ def test_generate_with_quant_cache(self):
	pass

	@unittest.skip("For some unknown reasons the tests fails in CrossAttention layer when doing torch.sdpa(). ")
	+ @pytest.mark.torch_compile_test
	def test_sdpa_can_compile_dynamic(self):
	pass

	diff --git a/tests/models/mobilebert/test_modeling_mobilebert.py b/tests/models/mobilebert/test_modeling_mobilebert.py
	index 126631fd9ce4..72bc842ec9c8 100644
	--- a/tests/models/mobilebert/test_modeling_mobilebert.py
	+++ b/tests/models/mobilebert/test_modeling_mobilebert.py
	@@ -15,6 +15,7 @@

	import unittest

	+import pytest
	from packaging import version

	from transformers import AutoTokenizer, MobileBertConfig, MobileBertForMaskedLM, is_torch_available
	@@ -386,6 +387,7 @@ def test_inference_no_head(self):

	self.assertTrue(lower_bound and upper_bound)

	+ @pytest.mark.torch_export_test
	@slow
	def test_export(self):
	if version.parse(torch.__version__) < version.parse("2.4.0"):
	diff --git a/tests/models/modernbert/test_modeling_modernbert.py b/tests/models/modernbert/test_modeling_modernbert.py
	index 7e60d51d5f91..2a9c63089819 100644
	--- a/tests/models/modernbert/test_modeling_modernbert.py
	+++ b/tests/models/modernbert/test_modeling_modernbert.py
	@@ -390,6 +390,7 @@ def test_flash_attn_2_inference_equivalence_right_padding(self):
	def test_flash_attn_2_conversion(self):
	self.skipTest(reason="ModernBert doesn't use the ModernBertFlashAttention2 class method.")

	+ @pytest.mark.torch_compile_test
	def test_saved_config_excludes_reference_compile(self):
	config = ModernBertConfig(reference_compile=True)
	with tempfile.TemporaryDirectory() as tmpdirname:
	@@ -501,6 +502,7 @@ def test_inference_sequence_classification(self):
	expected = torch.tensor([[1.6466, 4.5662]])
	torch.testing.assert_close(output, expected, rtol=1e-4, atol=1e-4)

	+ @pytest.mark.torch_export_test
	@slow
	def test_export(self):
	if version.parse(torch.__version__) < version.parse("2.4.0"):
	diff --git a/tests/models/moshi/test_modeling_moshi.py b/tests/models/moshi/test_modeling_moshi.py
	index f750f35cf3c9..286f93e84805 100644
	--- a/tests/models/moshi/test_modeling_moshi.py
	+++ b/tests/models/moshi/test_modeling_moshi.py
	@@ -178,6 +178,7 @@ def setUp(self):
	)

	@unittest.skip(reason="The MoshiModel does not have support dynamic compile yet")
	+ @pytest.mark.torch_compile_test
	def test_sdpa_can_compile_dynamic(self):
	pass

	@@ -636,6 +637,7 @@ def test_eager_matches_sdpa_inference(
	pass

	@unittest.skip(reason="The Moshi model does not have support dynamic compile yet")
	+ @pytest.mark.torch_compile_test
	def test_sdpa_can_compile_dynamic(self):
	pass

	diff --git a/tests/models/musicgen/test_modeling_musicgen.py b/tests/models/musicgen/test_modeling_musicgen.py
	index bd4290f05ce1..4098c45ba218 100644
	--- a/tests/models/musicgen/test_modeling_musicgen.py
	+++ b/tests/models/musicgen/test_modeling_musicgen.py
	@@ -20,6 +20,7 @@
	import unittest

	import numpy as np
	+import pytest
	from pytest import mark

	from transformers import (
	@@ -1235,6 +1236,7 @@ def test_generation_tester_mixin_inheritance(self):
	pass

	@unittest.skip(reason=("MusicGen has a set of composite models which might not have SDPA themselves, e.g. T5."))
	+ @pytest.mark.torch_compile_test
	def test_sdpa_can_compile_dynamic(self):
	pass

	diff --git a/tests/models/musicgen_melody/test_modeling_musicgen_melody.py b/tests/models/musicgen_melody/test_modeling_musicgen_melody.py
	index 73af767e04f2..180436e6268c 100644
	--- a/tests/models/musicgen_melody/test_modeling_musicgen_melody.py
	+++ b/tests/models/musicgen_melody/test_modeling_musicgen_melody.py
	@@ -20,6 +20,7 @@
	import unittest

	import numpy as np
	+import pytest
	from pytest import mark

	from transformers import (
	@@ -1236,6 +1237,7 @@ def test_generation_tester_mixin_inheritance(self):
	pass

	@unittest.skip(reason=("MusicGen has a set of composite models which might not have SDPA themselves, e.g. T5."))
	+ @pytest.mark.torch_compile_test
	def test_sdpa_can_compile_dynamic(self):
	pass

	diff --git a/tests/models/olmo/test_modeling_olmo.py b/tests/models/olmo/test_modeling_olmo.py
	index ea23f4e96fda..38395fbbbaa3 100644
	--- a/tests/models/olmo/test_modeling_olmo.py
	+++ b/tests/models/olmo/test_modeling_olmo.py
	@@ -15,6 +15,7 @@

	import unittest

	+import pytest
	from packaging import version
	from parameterized import parameterized

	@@ -327,6 +328,7 @@ def test_simple_encode_decode(self):

	self.assertEqual(rust_tokenizer.encode(" Hello"), [24387])

	+ @pytest.mark.torch_export_test
	@slow
	def test_export_static_cache(self):
	if version.parse(torch.__version__) < version.parse("2.4.0"):
	diff --git a/tests/models/olmo2/test_modeling_olmo2.py b/tests/models/olmo2/test_modeling_olmo2.py
	index 20b0c49d3f0b..b980d10a0f0e 100644
	--- a/tests/models/olmo2/test_modeling_olmo2.py
	+++ b/tests/models/olmo2/test_modeling_olmo2.py
	@@ -15,6 +15,7 @@

	import unittest

	+import pytest
	from packaging import version
	from parameterized import parameterized

	@@ -327,6 +328,7 @@ def test_simple_encode_decode(self):

	self.assertEqual(rust_tokenizer.encode(" Hello"), [22691])

	+ @pytest.mark.torch_export_test
	@slow
	def test_export_static_cache(self):
	if version.parse(torch.__version__) < version.parse("2.4.0"):
	diff --git a/tests/models/paligemma2/test_modeling_paligemma2.py b/tests/models/paligemma2/test_modeling_paligemma2.py
	index 7523f83fd96e..17735b89c6e9 100644
	--- a/tests/models/paligemma2/test_modeling_paligemma2.py
	+++ b/tests/models/paligemma2/test_modeling_paligemma2.py
	@@ -318,6 +318,7 @@ def test_generate_with_static_cache(self):
	pass

	@pytest.mark.generate
	+ @pytest.mark.torch_compile_test
	@is_flaky
	def test_generate_compile_model_forward(self):
	super().test_generate_compile_model_forward()
	diff --git a/tests/models/phi3/test_modeling_phi3.py b/tests/models/phi3/test_modeling_phi3.py
	index f80015eeeb56..71b99e6786c3 100644
	--- a/tests/models/phi3/test_modeling_phi3.py
	+++ b/tests/models/phi3/test_modeling_phi3.py
	@@ -16,6 +16,8 @@

	import unittest

	+import pytest
	+
	from transformers import Phi3Config, StaticCache, is_torch_available
	from transformers.models.auto.configuration_auto import AutoConfig
	from transformers.testing_utils import (
	@@ -342,6 +344,7 @@ def test_phi3_mini_4k_sliding_window(self):

	self.assertListEqual(output_text, EXPECTED_OUTPUT)

	+ @pytest.mark.torch_export_test
	@slow
	def test_export_static_cache(self):
	from transformers.pytorch_utils import is_torch_greater_or_equal_than_2_4
	diff --git a/tests/models/phi4_multimodal/test_image_processing_phi4_multimodal.py b/tests/models/phi4_multimodal/test_image_processing_phi4_multimodal.py
	index 3ad87b5780db..25a5ef9f3c93 100644
	--- a/tests/models/phi4_multimodal/test_image_processing_phi4_multimodal.py
	+++ b/tests/models/phi4_multimodal/test_image_processing_phi4_multimodal.py
	@@ -20,6 +20,7 @@
	import warnings

	import numpy as np
	+import pytest
	from packaging import version

	from transformers.testing_utils import require_torch, require_vision, slow, torch_device
	@@ -288,6 +289,7 @@ def test_image_processor_preprocess_arguments(self):
	self.skipTest(reason="No validation found for `preprocess` method")

	@slow
	+ @pytest.mark.torch_compile_test
	def test_can_compile_fast_image_processor(self):
	if self.fast_image_processing_class is None:
	self.skipTest("Skipping compilation test as fast image processor is not defined")
	diff --git a/tests/models/phi4_multimodal/test_modeling_phi4_multimodal.py b/tests/models/phi4_multimodal/test_modeling_phi4_multimodal.py
	index 07fd24577bf8..497d6ae08cfa 100644
	--- a/tests/models/phi4_multimodal/test_modeling_phi4_multimodal.py
	+++ b/tests/models/phi4_multimodal/test_modeling_phi4_multimodal.py
	@@ -14,6 +14,7 @@

	import unittest

	+import pytest
	import requests
	from parameterized import parameterized

	@@ -253,6 +254,7 @@ def test_generate_compilation_all_outputs(self):
	@unittest.skip(
	reason="Supported only for text-only inputs (otherwise dynamic control flows for multimodal inputs)"
	)
	+ @pytest.mark.torch_compile_test
	def test_generate_compile_model_forward(self):
	pass

	diff --git a/tests/models/pixtral/test_image_processing_pixtral.py b/tests/models/pixtral/test_image_processing_pixtral.py
	index 43ee7cfc273a..b2763a348a9d 100644
	--- a/tests/models/pixtral/test_image_processing_pixtral.py
	+++ b/tests/models/pixtral/test_image_processing_pixtral.py
	@@ -15,6 +15,7 @@
	import unittest

	import numpy as np
	+import pytest
	import requests
	from packaging import version

	@@ -263,6 +264,7 @@ def test_slow_fast_equivalence_batched(self):
	@slow
	@require_torch_gpu
	@require_vision
	+ @pytest.mark.torch_compile_test
	def test_can_compile_fast_image_processor(self):
	if self.fast_image_processing_class is None:
	self.skipTest("Skipping compilation test as fast image processor is not defined")
	diff --git a/tests/models/prompt_depth_anything/test_modeling_prompt_depth_anything.py b/tests/models/prompt_depth_anything/test_modeling_prompt_depth_anything.py
	index 697557b6ac01..e0aad3d5d9ef 100644
	--- a/tests/models/prompt_depth_anything/test_modeling_prompt_depth_anything.py
	+++ b/tests/models/prompt_depth_anything/test_modeling_prompt_depth_anything.py
	@@ -15,6 +15,7 @@

	import unittest

	+import pytest
	import requests

	from transformers import Dinov2Config, PromptDepthAnythingConfig
	@@ -284,6 +285,7 @@ def test_inference(self):

	self.assertTrue(torch.allclose(predicted_depth[0, :3, :3], expected_slice, atol=1e-3))

	+ @pytest.mark.torch_export_test
	def test_export(self):
	for strict in [False, True]:
	if strict and get_torch_major_and_minor_version() == "2.7":
	diff --git a/tests/models/qwen2/test_modeling_qwen2.py b/tests/models/qwen2/test_modeling_qwen2.py
	index 51bd943cf916..d520b593f638 100644
	--- a/tests/models/qwen2/test_modeling_qwen2.py
	+++ b/tests/models/qwen2/test_modeling_qwen2.py
	@@ -239,6 +239,7 @@ def test_speculative_generation(self):
	backend_empty_cache(torch_device)
	gc.collect()

	+ @pytest.mark.torch_export_test
	@slow
	def test_export_static_cache(self):
	if version.parse(torch.__version__) < version.parse("2.4.0"):
	diff --git a/tests/models/qwen2_5_omni/test_modeling_qwen2_5_omni.py b/tests/models/qwen2_5_omni/test_modeling_qwen2_5_omni.py
	index 28be4eba3f85..b930aef695bb 100644
	--- a/tests/models/qwen2_5_omni/test_modeling_qwen2_5_omni.py
	+++ b/tests/models/qwen2_5_omni/test_modeling_qwen2_5_omni.py
	@@ -21,6 +21,7 @@
	from urllib.request import urlopen

	import librosa
	+import pytest
	import requests

	from transformers import (
	@@ -281,6 +282,7 @@ def test_correct_missing_keys(self):
	pass

	@unittest.skip(reason="Compile not yet supported because in QwenOmniThinker models")
	+ @pytest.mark.torch_compile_test
	def test_sdpa_can_compile_dynamic(self):
	pass

	@@ -444,6 +446,7 @@ def test_generate_from_inputs_embeds_with_static_cache(self):
	# TODO (joao, raushan): there are multiple standardization issues in this model that prevent this test from
	# passing, fix me
	@unittest.skip("Cannot handle 4D attention mask")
	+ @pytest.mark.torch_compile_test
	def test_generate_compile_model_forward(self):
	pass

	diff --git a/tests/models/qwen2_audio/test_modeling_qwen2_audio.py b/tests/models/qwen2_audio/test_modeling_qwen2_audio.py
	index 4533fbbf99d8..1c4aa2c09387 100644
	--- a/tests/models/qwen2_audio/test_modeling_qwen2_audio.py
	+++ b/tests/models/qwen2_audio/test_modeling_qwen2_audio.py
	@@ -19,6 +19,7 @@
	from urllib.request import urlopen

	import librosa
	+import pytest

	from transformers import (
	AutoProcessor,
	@@ -148,6 +149,7 @@ def setUp(self):
	self.config_tester = ConfigTester(self, config_class=Qwen2AudioConfig, has_text_modality=False)

	@unittest.skip(reason="Compile not yet supported because in Qwen2Audio models")
	+ @pytest.mark.torch_compile_test
	def test_sdpa_can_compile_dynamic(self):
	pass

	diff --git a/tests/models/qwen3/test_modeling_qwen3.py b/tests/models/qwen3/test_modeling_qwen3.py
	index 205228073e19..223112f24a0f 100644
	--- a/tests/models/qwen3/test_modeling_qwen3.py
	+++ b/tests/models/qwen3/test_modeling_qwen3.py
	@@ -231,6 +231,7 @@ def test_speculative_generation(self):

	self.assertEqual(EXPECTED_TEXT_COMPLETION, text)

	+ @pytest.mark.torch_export_test
	@slow
	def test_export_static_cache(self):
	if version.parse(torch.__version__) < version.parse("2.4.0"):
	diff --git a/tests/models/roberta/test_modeling_roberta.py b/tests/models/roberta/test_modeling_roberta.py
	index 4f4d93b07f4d..5001438e4c6e 100644
	--- a/tests/models/roberta/test_modeling_roberta.py
	+++ b/tests/models/roberta/test_modeling_roberta.py
	@@ -15,6 +15,8 @@

	import unittest

	+import pytest
	+
	from transformers import AutoTokenizer, RobertaConfig, is_torch_available
	from transformers.testing_utils import TestCasePlus, require_torch, slow, torch_device

	@@ -575,6 +577,7 @@ def test_inference_classification_head(self):

	torch.testing.assert_close(output, expected_tensor, rtol=1e-4, atol=1e-4)

	+ @pytest.mark.torch_export_test
	@slow
	def test_export(self):
	if not is_torch_greater_or_equal_than_2_4:
	diff --git a/tests/models/sam/test_modeling_sam.py b/tests/models/sam/test_modeling_sam.py
	index 7f411c41117a..4b6f1df1451a 100644
	--- a/tests/models/sam/test_modeling_sam.py
	+++ b/tests/models/sam/test_modeling_sam.py
	@@ -16,6 +16,7 @@
	import tempfile
	import unittest

	+import pytest
	import requests

	from transformers import SamConfig, SamMaskDecoderConfig, SamPromptEncoderConfig, SamVisionConfig, pipeline
	@@ -257,6 +258,7 @@ def test_hidden_states_output(self):
	pass

	@require_torch_sdpa
	+ @pytest.mark.torch_compile_test
	def test_sdpa_can_compile_dynamic(self):
	self.skipTest(reason="SAM model can't be compiled dynamic yet")

	@@ -658,6 +660,7 @@ def test_model_from_pretrained(self):
	self.assertIsNotNone(model)

	@require_torch_sdpa
	+ @pytest.mark.torch_compile_test
	def test_sdpa_can_compile_dynamic(self):
	self.skipTest(reason="SAM model can't be compiled dynamic yet")

	diff --git a/tests/models/sam_hq/test_modeling_sam_hq.py b/tests/models/sam_hq/test_modeling_sam_hq.py
	index 192f7c8b02d0..98a7f5a45256 100644
	--- a/tests/models/sam_hq/test_modeling_sam_hq.py
	+++ b/tests/models/sam_hq/test_modeling_sam_hq.py
	@@ -17,6 +17,7 @@
	import tempfile
	import unittest

	+import pytest
	import requests

	from transformers import (
	@@ -265,6 +266,7 @@ def test_hidden_states_output(self):
	pass

	@require_torch_sdpa
	+ @pytest.mark.torch_compile_test
	def test_sdpa_can_compile_dynamic(self):
	self.skipTest(reason="SAM model can't be compiled dynamic yet")

	@@ -706,6 +708,7 @@ def test_model_from_pretrained(self):
	self.assertIsNotNone(model)

	@require_torch_sdpa
	+ @pytest.mark.torch_compile_test
	def test_sdpa_can_compile_dynamic(self):
	self.skipTest(reason="SamHQModel can't be compiled dynamic yet")

	diff --git a/tests/models/smollm3/test_modeling_smollm3.py b/tests/models/smollm3/test_modeling_smollm3.py
	index f855e0b36a5f..cb58cca5d49b 100644
	--- a/tests/models/smollm3/test_modeling_smollm3.py
	+++ b/tests/models/smollm3/test_modeling_smollm3.py
	@@ -172,6 +172,7 @@ def test_model_3b_long_prompt(self):
	backend_empty_cache(torch_device)
	gc.collect()

	+ @pytest.mark.torch_export_test
	@slow
	def test_export_static_cache(self):
	if version.parse(torch.__version__) < version.parse("2.4.0"):
	diff --git a/tests/models/smolvlm/test_modeling_smolvlm.py b/tests/models/smolvlm/test_modeling_smolvlm.py
	index d1140b6ec114..495c0d346aa5 100644
	--- a/tests/models/smolvlm/test_modeling_smolvlm.py
	+++ b/tests/models/smolvlm/test_modeling_smolvlm.py
	@@ -186,6 +186,7 @@ def test_flash_attn_2_inference_padding_right(self):
	pass

	@unittest.skip(reason="Compile not yet supported in SmolVLM models")
	+ @pytest.mark.torch_compile_test
	def test_sdpa_can_compile_dynamic(self):
	pass

	@@ -387,6 +388,7 @@ def test_generate_with_static_cache(self):
	pass

	@unittest.skip(reason="Compile not yet supported in SmolVLM models")
	+ @pytest.mark.torch_compile_test
	def test_sdpa_can_compile_dynamic(self):
	pass

	diff --git a/tests/models/t5/test_modeling_t5.py b/tests/models/t5/test_modeling_t5.py
	index 97b8cc2511b2..535008a1a02f 100644
	--- a/tests/models/t5/test_modeling_t5.py
	+++ b/tests/models/t5/test_modeling_t5.py
	@@ -19,6 +19,8 @@
	import tempfile
	import unittest

	+import pytest
	+
	from transformers import T5Config, is_torch_available
	from transformers.models.auto.modeling_auto import MODEL_FOR_SEQUENCE_CLASSIFICATION_MAPPING_NAMES
	from transformers.pytorch_utils import is_torch_greater_or_equal_than_2_4
	@@ -1610,6 +1612,7 @@ def test_contrastive_search_t5(self):

	@slow
	@require_torch_accelerator
	+ @pytest.mark.torch_compile_test
	def test_compile_static_cache(self):
	NUM_TOKENS_TO_GENERATE = 40
	EXPECTED_TEXT_COMPLETION = [
	@@ -1650,6 +1653,7 @@ def test_compile_static_cache(self):

	@slow
	@require_torch_accelerator
	+ @pytest.mark.torch_compile_test
	def test_compile_static_cache_encoder(self):
	prompts = [
	"summarize: Simply put, the theory of relativity states that 1) the speed of light is constant in all inertial "
	@@ -1668,6 +1672,7 @@ def test_compile_static_cache_encoder(self):
	logits_compiled = model(**inputs)
	torch.testing.assert_close(logits[0][:, -3:, -3], logits_compiled[0][:, -3:, -3], rtol=1e-5, atol=1e-5)

	+ @pytest.mark.torch_export_test
	@slow
	def test_export_encoder(self):
	"""Test exporting T5EncoderModel to torch export format."""
	@@ -1704,6 +1709,7 @@ def test_export_encoder(self):
	# Verify outputs are close enough
	self.assertTrue(torch.allclose(original_output, exported_output, atol=1e-5))

	+ @pytest.mark.torch_export_test
	@slow
	def test_export_decoder(self):
	"""Test exporting T5 decoder with static cache to torch export format."""
	@@ -1765,6 +1771,7 @@ def test_export_decoder(self):
	# Verify cache buffers are 3D
	self.assertEqual(buffer.shape[2], max_cache_len)

	+ @pytest.mark.torch_export_test
	@slow
	def test_export_t5_summarization(self):
	"""Test composing exported T5 encoder and decoder for summarization."""
	diff --git a/tests/models/vitmatte/test_image_processing_vitmatte.py b/tests/models/vitmatte/test_image_processing_vitmatte.py
	index a22ab5ee0cf5..a76953920897 100644
	--- a/tests/models/vitmatte/test_image_processing_vitmatte.py
	+++ b/tests/models/vitmatte/test_image_processing_vitmatte.py
	@@ -18,6 +18,7 @@
	import warnings

	import numpy as np
	+import pytest
	import requests
	from packaging import version

	@@ -340,6 +341,7 @@ def test_slow_fast_equivalence_batched(self):
	@slow
	@require_torch_accelerator
	@require_vision
	+ @pytest.mark.torch_compile_test
	def test_can_compile_fast_image_processor(self):
	# override as trimaps are needed for the image processor
	if self.fast_image_processing_class is None:
	diff --git a/tests/models/vitpose_backbone/test_modeling_vitpose_backbone.py b/tests/models/vitpose_backbone/test_modeling_vitpose_backbone.py
	index a95d6ca1fa58..5a35795a7495 100644
	--- a/tests/models/vitpose_backbone/test_modeling_vitpose_backbone.py
	+++ b/tests/models/vitpose_backbone/test_modeling_vitpose_backbone.py
	@@ -16,6 +16,8 @@
	import inspect
	import unittest

	+import pytest
	+
	from transformers import VitPoseBackboneConfig
	from transformers.testing_utils import require_torch, torch_device
	from transformers.utils import is_torch_available
	@@ -193,6 +195,7 @@ def test_forward_signature(self):
	expected_arg_names = ["pixel_values"]
	self.assertListEqual(arg_names[:1], expected_arg_names)

	+ @pytest.mark.torch_export_test
	def test_torch_export(self):
	# Dense architecture
	super().test_torch_export()
	diff --git a/tests/models/whisper/test_modeling_whisper.py b/tests/models/whisper/test_modeling_whisper.py
	index a1b010435588..b1a57bcc564f 100644
	--- a/tests/models/whisper/test_modeling_whisper.py
	+++ b/tests/models/whisper/test_modeling_whisper.py
	@@ -1420,6 +1420,7 @@ def test_labels_sequence_max_length_error_after_changing_config(self):

	# TODO (joao, eustache): fix me :) The model is not returning a `Cache` by default
	@unittest.skip(reason="Whisper's custom generate is not consistent regarding the cache return types")
	+ @pytest.mark.torch_compile_test
	def test_generate_compile_model_forward(self):
	pass

	diff --git a/tests/quantization/aqlm_integration/test_aqlm.py b/tests/quantization/aqlm_integration/test_aqlm.py
	index 2fbc4595f302..9d935e9f7623 100644
	--- a/tests/quantization/aqlm_integration/test_aqlm.py
	+++ b/tests/quantization/aqlm_integration/test_aqlm.py
	@@ -18,6 +18,7 @@
	import unittest
	from unittest import skip

	+import pytest
	from packaging import version

	from transformers import AqlmConfig, AutoConfig, AutoModelForCausalLM, AutoTokenizer, OPTForCausalLM, StaticCache
	@@ -198,6 +199,7 @@ def test_quantized_model_multi_gpu(self):
	is_aqlm_available() and version.parse(importlib.metadata.version("aqlm")) >= version.parse("1.0.3"),
	"test requires `aqlm>=1.0.3`",
	)
	+ @pytest.mark.torch_compile_test
	def test_quantized_model_compile(self):
	"""
	Simple test that checks if the quantized model is working properly
	diff --git a/tests/quantization/bnb/test_4bit.py b/tests/quantization/bnb/test_4bit.py
	index a5ba1a861fe3..b49fd43f1793 100644
	--- a/tests/quantization/bnb/test_4bit.py
	+++ b/tests/quantization/bnb/test_4bit.py
	@@ -16,6 +16,7 @@
	import tempfile
	import unittest

	+import pytest
	from packaging import version

	from transformers import (
	@@ -849,6 +850,7 @@ def setUp(self):
	self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
	self.model_4bit = AutoModelForCausalLM.from_pretrained(self.model_name, load_in_4bit=True)

	+ @pytest.mark.torch_compile_test
	def test_generate_compile(self):
	encoded_input = self.tokenizer(self.input_text, return_tensors="pt")

	diff --git a/tests/quantization/bnb/test_mixed_int8.py b/tests/quantization/bnb/test_mixed_int8.py
	index 304d97879f29..8a3bcb84af33 100644
	--- a/tests/quantization/bnb/test_mixed_int8.py
	+++ b/tests/quantization/bnb/test_mixed_int8.py
	@@ -16,6 +16,7 @@
	import tempfile
	import unittest

	+import pytest
	from packaging import version

	from transformers import (
	@@ -996,6 +997,7 @@ def setUp(self):
	self.tokenizer = AutoTokenizer.from_pretrained(self.model_name)
	self.model_8bit = AutoModelForCausalLM.from_pretrained(self.model_name, load_in_8bit=True)

	+ @pytest.mark.torch_compile_test
	def test_generate_compile(self):
	encoded_input = self.tokenizer(self.input_text, return_tensors="pt")

	diff --git a/tests/quantization/spqr_integration/test_spqr.py b/tests/quantization/spqr_integration/test_spqr.py
	index 443b687d54a8..973ecd6e7db4 100644
	--- a/tests/quantization/spqr_integration/test_spqr.py
	+++ b/tests/quantization/spqr_integration/test_spqr.py
	@@ -16,6 +16,8 @@
	import tempfile
	import unittest

	+import pytest
	+
	from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer, SpQRConfig, StaticCache
	from transformers.testing_utils import (
	backend_empty_cache,
	@@ -179,6 +181,7 @@ def test_quantized_model_multi_gpu(self):

	self.assertEqual(self.tokenizer.decode(output[0], skip_special_tokens=True), self.EXPECTED_OUTPUT)

	+ @pytest.mark.torch_compile_test
	def test_quantized_model_compile(self):
	"""
	Simple test that checks if the quantized model is working properly
	diff --git a/tests/test_image_processing_common.py b/tests/test_image_processing_common.py
	index c8b75c73d984..635d6a35dc85 100644
	--- a/tests/test_image_processing_common.py
	+++ b/tests/test_image_processing_common.py
	@@ -22,6 +22,7 @@
	from copy import deepcopy

	import numpy as np
	+import pytest
	import requests
	from packaging import version

	@@ -614,6 +615,7 @@ def test_override_instance_attributes_does_not_affect_other_instances(self):
	@slow
	@require_torch_accelerator
	@require_vision
	+ @pytest.mark.torch_compile_test
	def test_can_compile_fast_image_processor(self):
	if self.fast_image_processing_class is None:
	self.skipTest("Skipping compilation test as fast image processor is not defined")
	diff --git a/tests/test_modeling_common.py b/tests/test_modeling_common.py
	index ef8a1712530e..50d7b2724d5e 100755
	--- a/tests/test_modeling_common.py
	+++ b/tests/test_modeling_common.py
	@@ -27,6 +27,7 @@
	from contextlib import contextmanager

	import numpy as np
	+import pytest
	from packaging import version
	from parameterized import parameterized
	from pytest import mark
	@@ -3866,6 +3867,7 @@ def test_sdpa_can_dispatch_on_flash(self):

	@require_torch_sdpa
	@require_torch_accelerator
	+ @pytest.mark.torch_compile_test
	@slow
	def test_sdpa_can_compile_dynamic(self):
	if not self.has_attentions:
	@@ -4114,6 +4116,7 @@ def test_flash_attn_2_fp32_ln(self):
	@require_flash_attn
	@require_torch_gpu
	@mark.flash_attn_test
	+ @pytest.mark.torch_compile_test
	@slow
	def test_flash_attn_2_can_compile_with_attention_mask_None_without_graph_break(self):
	if version.parse(torch.__version__) < version.parse("2.3"):
	@@ -4581,6 +4584,7 @@ def test_custom_4d_attention_mask(self):

	@slow
	@require_torch_accelerator
	+ @pytest.mark.torch_compile_test
	def test_torch_compile_for_training(self):
	if version.parse(torch.__version__) < version.parse("2.3"):
	self.skipTest(reason="This test requires torch >= 2.3 to run.")
	@@ -4653,6 +4657,7 @@ def test_forward_with_logits_to_keep(self):

	@slow
	@require_torch_greater_or_equal("2.5")
	+ @pytest.mark.torch_export_test
	def test_torch_export(self, config=None, inputs_dict=None, tolerance=1e-4):
	"""
	Test if model can be exported with torch.export.export()
	diff --git a/tests/test_video_processing_common.py b/tests/test_video_processing_common.py
	index 8507108163ca..5f8f378c12cc 100644
	--- a/tests/test_video_processing_common.py
	+++ b/tests/test_video_processing_common.py
	@@ -21,6 +21,7 @@
	from copy import deepcopy

	import numpy as np
	+import pytest
	from packaging import version

	from transformers import AutoVideoProcessor
	@@ -168,6 +169,7 @@ def test_init_without_params(self):
	@slow
	@require_torch_accelerator
	@require_vision
	+ @pytest.mark.torch_compile_test
	def test_can_compile_fast_video_processor(self):
	if self.fast_video_processing_class is None:
	self.skipTest("Skipping compilation test as fast video processor is not defined")
	diff --git a/tests/trainer/test_trainer.py b/tests/trainer/test_trainer.py
	index 6ee9d23e35ec..4c2b64f0d07f 100644
	--- a/tests/trainer/test_trainer.py
	+++ b/tests/trainer/test_trainer.py
	@@ -31,6 +31,7 @@
	from unittest.mock import Mock, patch

	import numpy as np
	+import pytest
	from huggingface_hub import HfFolder, ModelCard, create_branch, list_repo_commits, list_repo_files
	from packaging import version
	from parameterized import parameterized
	@@ -1358,6 +1359,7 @@ def test_number_of_steps_in_training(self):
	train_output = trainer.train()
	self.assertEqual(train_output.global_step, 10)

	+ @pytest.mark.torch_compile_test
	def test_torch_compile_loss_func_compatibility(self):
	config = LlamaConfig(vocab_size=100, hidden_size=32, num_hidden_layers=3, num_attention_heads=4)
	tiny_llama = LlamaForCausalLM(config)
	@@ -1377,6 +1379,7 @@ def test_torch_compile_loss_func_compatibility(self):

	@require_peft
	@require_bitsandbytes
	+ @pytest.mark.torch_compile_test
	def test_bnb_compile(self):
	from peft import LoraConfig, get_peft_model

	diff --git a/tests/utils/test_cache_utils.py b/tests/utils/test_cache_utils.py
	index 37d15452c7ed..54a7dc24cf63 100644
	--- a/tests/utils/test_cache_utils.py
	+++ b/tests/utils/test_cache_utils.py
	@@ -15,6 +15,7 @@
	import copy
	import unittest

	+import pytest
	from packaging import version
	from parameterized import parameterized

	@@ -594,6 +595,7 @@ def test_cache_gptj_model(self, cache_implementation):
	class CacheExportIntegrationTest(unittest.TestCase):
	"""Cache tests that rely on `torch.export()` and model loading"""

	+ @pytest.mark.torch_export_test
	def test_dynamic_cache_exportability(self):
	model = AutoModelForCausalLM.from_pretrained("hf-internal-testing/tiny-random-MistralForCausalLM")
	model = model.eval()
	@@ -635,6 +637,7 @@ def test_dynamic_cache_exportability(self):
	self.assertTrue(torch.allclose(l1.keys, l2.keys, atol=1e-5))
	self.assertTrue(torch.allclose(l1.values, l2.values, atol=1e-5))

	+ @pytest.mark.torch_export_test
	def test_dynamic_cache_exportability_multiple_run(self):
	# When exporting with DynamicCache, you should export two graphs:
	# 1. A graph without cache
	@@ -730,6 +733,7 @@ def test_dynamic_cache_exportability_multiple_run(self):
	self.assertTrue(torch.allclose(l1.values, l2.values, atol=1e-5))

	@unittest.skip("Runs on my machine locally, passed, no idea why it does not online")
	+ @pytest.mark.torch_export_test
	def test_static_cache_exportability(self):
	"""
	Tests that static cache works with `torch.export()`
	@@ -808,6 +812,7 @@ def test_static_cache_exportability(self):
	strict=strict,
	)

	+ @pytest.mark.torch_export_test
	def test_hybrid_cache_exportability(self):
	"""
	Tests that static cache works with `torch.export()`
	diff --git a/tests/utils/test_deprecation.py b/tests/utils/test_deprecation.py
	index 81b46af37eb4..f09c42101941 100644
	--- a/tests/utils/test_deprecation.py
	+++ b/tests/utils/test_deprecation.py
	@@ -15,6 +15,7 @@
	import unittest
	import warnings

	+import pytest
	from parameterized import parameterized

	from transformers import __version__, is_torch_available
	@@ -174,6 +175,7 @@ def dummy_function(new_name=None, **kwargs):
	result = dummy_function(deprecated_name="old_value", new_name="new_value")
	self.assertEqual(result, "new_value")

	+ @pytest.mark.torch_compile_test
	@require_torch_accelerator
	def test_compile_safe(self):
	@deprecate_kwarg("deprecated_factor", new_name="new_factor", version=INFINITE_VERSION)
	diff --git a/tests/utils/test_generic.py b/tests/utils/test_generic.py
	index e08c26fd02e8..77e7cdba7c2c 100644
	--- a/tests/utils/test_generic.py
	+++ b/tests/utils/test_generic.py
	@@ -16,6 +16,7 @@
	import warnings

	import numpy as np
	+import pytest

	from transformers.configuration_utils import PretrainedConfig
	from transformers.modeling_outputs import BaseModelOutput
	@@ -261,6 +262,7 @@ def test_decorator_eager(self):
	message = f"output should be a {expected_type.__name__} when config.use_return_dict={config_return_dict} and return_dict={return_dict}"
	self.assertIsInstance(output, expected_type, message)

	+ @pytest.mark.torch_compile_test
	def test_decorator_compiled(self):
	"""Test that the can_return_tuple decorator works with compiled mode."""
	config = PretrainedConfig()
	@@ -277,6 +279,7 @@ def test_decorator_compiled(self):
	output = compiled_model(torch.tensor(10), return_dict=False)
	self.assertIsInstance(output, tuple)

	+ @pytest.mark.torch_export_test
	def test_decorator_torch_export(self):
	"""Test that the can_return_tuple decorator works with torch.export."""
	config = PretrainedConfig()
	diff --git a/tests/utils/test_model_output.py b/tests/utils/test_model_output.py
	index 00473e878600..eef5feb014f5 100644
	--- a/tests/utils/test_model_output.py
	+++ b/tests/utils/test_model_output.py
	@@ -17,6 +17,8 @@
	from dataclasses import dataclass
	from typing import Optional

	+import pytest
	+
	from transformers import AlbertForMaskedLM
	from transformers.testing_utils import require_torch
	from transformers.utils import ModelOutput, is_torch_available
	@@ -160,6 +162,7 @@ def test_torch_pytree(self):
	# TODO: @ydshieh
	@unittest.skip(reason="CPU OOM")
	@require_torch
	+ @pytest.mark.torch_export_test
	def test_export_serialization(self):
	if not is_torch_greater_or_equal_than_2_2:
	self.skipTest(reason="Export serialization requires torch >= 2.2.0")
	diff --git a/tests/utils/test_modeling_utils.py b/tests/utils/test_modeling_utils.py
	index a1b8b0c35a73..a7837d5d2336 100644
	--- a/tests/utils/test_modeling_utils.py
	+++ b/tests/utils/test_modeling_utils.py
	@@ -27,6 +27,7 @@
	import warnings
	from pathlib import Path

	+import pytest
	import requests
	from huggingface_hub import HfApi, HfFolder
	from parameterized import parameterized
	@@ -2541,6 +2542,7 @@ def test_causal_mask_sliding(self):
	# non auto-regressive case
	self.check_to_causal(mask_converter, q_len=7, kv_len=7)

	+ @pytest.mark.torch_compile_test
	def test_torch_compile_fullgraph(self):
	model = Prepare4dCausalAttentionMaskModel()