Spaces:

HF-slyfox
/

harness

Running

App Files Files Community

harness / diffs /37449.patch

ArthurZ HF Staff

Initial harness: 100 perf tasks + Gradio browser

dfefe0b verified 7 days ago

raw

history blame contribute delete

11.9 kB

	diff --git a/docker/transformers-all-latest-gpu/Dockerfile b/docker/transformers-all-latest-gpu/Dockerfile
	index 1eb50ee4ad7f..72853d4ca4d6 100644
	--- a/docker/transformers-all-latest-gpu/Dockerfile
	+++ b/docker/transformers-all-latest-gpu/Dockerfile
	@@ -14,6 +14,8 @@ ARG PYTORCH='2.6.0'
	ARG INTEL_TORCH_EXT='2.3.0'
	# Example: `cu102`, `cu113`, etc.
	ARG CUDA='cu121'
	+# Disable kernel mapping for now until all tests pass
	+ENV DISABLE_KERNEL_MAPPING=1

	RUN apt update
	RUN apt install -y git libsndfile1-dev tesseract-ocr espeak-ng python3 python3-pip ffmpeg git-lfs
	diff --git a/src/transformers/models/aria/modeling_aria.py b/src/transformers/models/aria/modeling_aria.py
	index d64e2746d492..fdb825cad370 100644
	--- a/src/transformers/models/aria/modeling_aria.py
	+++ b/src/transformers/models/aria/modeling_aria.py
	@@ -228,7 +228,6 @@ def forward(self, key_value_states: torch.Tensor, attn_mask: Optional[torch.Tens
	return out


	-@use_kernel_forward_from_hub("MLP")
	class AriaSharedExpertsMLP(nn.Module):
	"""
	Shared Expert MLP for shared experts.
	diff --git a/src/transformers/models/bamba/modeling_bamba.py b/src/transformers/models/bamba/modeling_bamba.py
	index 0cf23edb7510..8fd2483bcd60 100644
	--- a/src/transformers/models/bamba/modeling_bamba.py
	+++ b/src/transformers/models/bamba/modeling_bamba.py
	@@ -882,7 +882,6 @@ def forward(
	return self.torch_forward(hidden_states, cache_params, cache_position, attention_mask)


	-@use_kernel_forward_from_hub("MLP")
	class BambaMLP(nn.Module):
	def __init__(self, config):
	super().__init__()
	diff --git a/src/transformers/models/cohere/modeling_cohere.py b/src/transformers/models/cohere/modeling_cohere.py
	index fd888c38d7fd..8cbb7128c734 100644
	--- a/src/transformers/models/cohere/modeling_cohere.py
	+++ b/src/transformers/models/cohere/modeling_cohere.py
	@@ -36,7 +36,6 @@
	from ...activations import ACT2FN
	from ...cache_utils import Cache, DynamicCache, StaticCache
	from ...generation import GenerationMixin
	-from ...integrations import use_kernel_forward_from_hub
	from ...modeling_attn_mask_utils import AttentionMaskConverter
	from ...modeling_flash_attention_utils import FlashAttentionKwargs
	from ...modeling_outputs import BaseModelOutputWithPast, CausalLMOutputWithPast
	@@ -118,7 +117,6 @@ def forward(self, x, position_ids):
	return cos.to(dtype=x.dtype), sin.to(dtype=x.dtype)


	-@use_kernel_forward_from_hub("MLP")
	class CohereMLP(nn.Module):
	def __init__(self, config):
	super().__init__()
	diff --git a/src/transformers/models/cohere2/modeling_cohere2.py b/src/transformers/models/cohere2/modeling_cohere2.py
	index e419379969d3..18a3a50ac157 100644
	--- a/src/transformers/models/cohere2/modeling_cohere2.py
	+++ b/src/transformers/models/cohere2/modeling_cohere2.py
	@@ -28,7 +28,6 @@
	from ...activations import ACT2FN
	from ...cache_utils import Cache, HybridCache, StaticCache
	from ...generation import GenerationMixin
	-from ...integrations import use_kernel_forward_from_hub
	from ...modeling_flash_attention_utils import FlashAttentionKwargs
	from ...modeling_outputs import BaseModelOutputWithPast, CausalLMOutputWithPast
	from ...modeling_rope_utils import ROPE_INIT_FUNCTIONS, dynamic_rope_update
	@@ -268,7 +267,6 @@ def forward(
	return attn_output, attn_weights


	-@use_kernel_forward_from_hub("MLP")
	class Cohere2MLP(nn.Module):
	def __init__(self, config):
	super().__init__()
	diff --git a/src/transformers/models/diffllama/modeling_diffllama.py b/src/transformers/models/diffllama/modeling_diffllama.py
	index ed536cbebaf3..e7fecb4be6a9 100644
	--- a/src/transformers/models/diffllama/modeling_diffllama.py
	+++ b/src/transformers/models/diffllama/modeling_diffllama.py
	@@ -74,7 +74,6 @@
	_CONFIG_FOR_DOC = "DiffLlamaConfig"


	-@use_kernel_forward_from_hub("MLP")
	class DiffLlamaMLP(nn.Module):
	def __init__(self, config):
	super().__init__()
	diff --git a/src/transformers/models/emu3/modeling_emu3.py b/src/transformers/models/emu3/modeling_emu3.py
	index 4646b9f9bdee..fcc55b67d153 100644
	--- a/src/transformers/models/emu3/modeling_emu3.py
	+++ b/src/transformers/models/emu3/modeling_emu3.py
	@@ -84,7 +84,6 @@ def extra_repr(self):
	return f"{tuple(self.weight.shape)}, eps={self.variance_epsilon}"


	-@use_kernel_forward_from_hub("MLP")
	class Emu3MLP(nn.Module):
	def __init__(self, config):
	super().__init__()
	diff --git a/src/transformers/models/gemma/modeling_gemma.py b/src/transformers/models/gemma/modeling_gemma.py
	index 679bc0869855..40497433284a 100644
	--- a/src/transformers/models/gemma/modeling_gemma.py
	+++ b/src/transformers/models/gemma/modeling_gemma.py
	@@ -27,7 +27,6 @@
	from ...activations import ACT2FN
	from ...cache_utils import Cache, DynamicCache, StaticCache
	from ...generation import GenerationMixin
	-from ...integrations import use_kernel_forward_from_hub
	from ...modeling_attn_mask_utils import AttentionMaskConverter
	from ...modeling_flash_attention_utils import FlashAttentionKwargs
	from ...modeling_outputs import (
	@@ -85,7 +84,6 @@ def extra_repr(self):
	return f"{tuple(self.weight.shape)}, eps={self.eps}"


	-@use_kernel_forward_from_hub("MLP")
	class GemmaMLP(nn.Module):
	def __init__(self, config):
	super().__init__()
	diff --git a/src/transformers/models/gemma2/modeling_gemma2.py b/src/transformers/models/gemma2/modeling_gemma2.py
	index c7040de011ba..144a94ef33e9 100644
	--- a/src/transformers/models/gemma2/modeling_gemma2.py
	+++ b/src/transformers/models/gemma2/modeling_gemma2.py
	@@ -28,7 +28,6 @@
	from ...activations import ACT2FN
	from ...cache_utils import Cache, HybridCache, StaticCache
	from ...generation import GenerationMixin
	-from ...integrations import use_kernel_forward_from_hub
	from ...modeling_flash_attention_utils import FlashAttentionKwargs
	from ...modeling_outputs import (
	BaseModelOutputWithPast,
	@@ -78,7 +77,6 @@ def extra_repr(self):
	return f"{tuple(self.weight.shape)}, eps={self.eps}"


	-@use_kernel_forward_from_hub("MLP")
	class Gemma2MLP(nn.Module):
	def __init__(self, config):
	super().__init__()
	diff --git a/src/transformers/models/gemma3/modeling_gemma3.py b/src/transformers/models/gemma3/modeling_gemma3.py
	index 23f28281a1de..0988e2692aa4 100644
	--- a/src/transformers/models/gemma3/modeling_gemma3.py
	+++ b/src/transformers/models/gemma3/modeling_gemma3.py
	@@ -31,7 +31,6 @@
	from ...activations import ACT2FN
	from ...cache_utils import Cache, HybridCache, StaticCache
	from ...generation import GenerationMixin
	-from ...integrations import use_kernel_forward_from_hub
	from ...modeling_flash_attention_utils import FlashAttentionKwargs
	from ...modeling_outputs import BaseModelOutputWithPast, CausalLMOutputWithPast, ModelOutput
	from ...modeling_rope_utils import ROPE_INIT_FUNCTIONS, dynamic_rope_update
	@@ -107,7 +106,6 @@ def forward(self, input_ids: torch.Tensor):
	return super().forward(input_ids) * self.embed_scale.to(self.weight.dtype)


	-@use_kernel_forward_from_hub("MLP")
	class Gemma3MLP(nn.Module):
	def __init__(self, config: Gemma3TextConfig):
	super().__init__()
	diff --git a/src/transformers/models/granite/modeling_granite.py b/src/transformers/models/granite/modeling_granite.py
	index 6f15f9ca095a..80d3ad696dc0 100644
	--- a/src/transformers/models/granite/modeling_granite.py
	+++ b/src/transformers/models/granite/modeling_granite.py
	@@ -228,7 +228,6 @@ def extra_repr(self):
	return f"{tuple(self.weight.shape)}, eps={self.variance_epsilon}"


	-@use_kernel_forward_from_hub("MLP")
	class GraniteMLP(nn.Module):
	def __init__(self, config):
	super().__init__()
	diff --git a/src/transformers/models/helium/modeling_helium.py b/src/transformers/models/helium/modeling_helium.py
	index 2597ce27fa94..d565af9e27f1 100644
	--- a/src/transformers/models/helium/modeling_helium.py
	+++ b/src/transformers/models/helium/modeling_helium.py
	@@ -29,7 +29,6 @@
	from ...activations import ACT2FN
	from ...cache_utils import Cache, DynamicCache, StaticCache
	from ...generation import GenerationMixin
	-from ...integrations import use_kernel_forward_from_hub
	from ...modeling_attn_mask_utils import AttentionMaskConverter
	from ...modeling_flash_attention_utils import FlashAttentionKwargs
	from ...modeling_outputs import (
	@@ -118,7 +117,6 @@ def forward(self, x, position_ids):
	return cos.to(dtype=x.dtype), sin.to(dtype=x.dtype)


	-@use_kernel_forward_from_hub("MLP")
	class HeliumMLP(nn.Module):
	def __init__(self, config):
	super().__init__()
	diff --git a/src/transformers/models/llama/modeling_llama.py b/src/transformers/models/llama/modeling_llama.py
	index e8dd13952661..d36fb1b6a47e 100644
	--- a/src/transformers/models/llama/modeling_llama.py
	+++ b/src/transformers/models/llama/modeling_llama.py
	@@ -160,7 +160,6 @@ def apply_rotary_pos_emb(q, k, cos, sin, position_ids=None, unsqueeze_dim=1):
	return q_embed, k_embed


	-@use_kernel_forward_from_hub("MLP")
	class LlamaMLP(nn.Module):
	def __init__(self, config):
	super().__init__()
	diff --git a/src/transformers/models/mistral/modeling_mistral.py b/src/transformers/models/mistral/modeling_mistral.py
	index 8f1b416d5b16..7f88b8d8570c 100644
	--- a/src/transformers/models/mistral/modeling_mistral.py
	+++ b/src/transformers/models/mistral/modeling_mistral.py
	@@ -45,7 +45,6 @@
	_CONFIG_FOR_DOC = "MistralConfig"


	-@use_kernel_forward_from_hub("MLP")
	class MistralMLP(nn.Module):
	def __init__(self, config):
	super().__init__()
	diff --git a/src/transformers/models/olmo/modeling_olmo.py b/src/transformers/models/olmo/modeling_olmo.py
	index 8b8783d1ad8d..5b6ca9f4b356 100644
	--- a/src/transformers/models/olmo/modeling_olmo.py
	+++ b/src/transformers/models/olmo/modeling_olmo.py
	@@ -14,7 +14,6 @@
	from ...activations import ACT2FN
	from ...cache_utils import Cache, DynamicCache, StaticCache
	from ...generation import GenerationMixin
	-from ...integrations import use_kernel_forward_from_hub
	from ...modeling_attn_mask_utils import AttentionMaskConverter
	from ...modeling_flash_attention_utils import FlashAttentionKwargs
	from ...modeling_outputs import BaseModelOutputWithPast, CausalLMOutputWithPast
	@@ -58,7 +57,6 @@ def forward(self, hidden_states: torch.Tensor) -> torch.Tensor:
	)


	-@use_kernel_forward_from_hub("MLP")
	class OlmoMLP(nn.Module):
	def __init__(self, config):
	super().__init__()
	diff --git a/src/transformers/models/olmo2/modeling_olmo2.py b/src/transformers/models/olmo2/modeling_olmo2.py
	index bcf990ccda60..4046dc582673 100644
	--- a/src/transformers/models/olmo2/modeling_olmo2.py
	+++ b/src/transformers/models/olmo2/modeling_olmo2.py
	@@ -218,7 +218,6 @@ def forward(
	return attn_output, attn_weights


	-@use_kernel_forward_from_hub("MLP")
	class Olmo2MLP(nn.Module):
	def __init__(self, config):
	super().__init__()
	diff --git a/src/transformers/models/qwen2/modeling_qwen2.py b/src/transformers/models/qwen2/modeling_qwen2.py
	index d3180b35b3a4..7b62632bd8e4 100644
	--- a/src/transformers/models/qwen2/modeling_qwen2.py
	+++ b/src/transformers/models/qwen2/modeling_qwen2.py
	@@ -45,7 +45,6 @@
	_CONFIG_FOR_DOC = "Qwen2Config"


	-@use_kernel_forward_from_hub("MLP")
	class Qwen2MLP(nn.Module):
	def __init__(self, config):
	super().__init__()
	diff --git a/src/transformers/models/qwen3/modeling_qwen3.py b/src/transformers/models/qwen3/modeling_qwen3.py
	index 5852470d1c23..15773b4516ae 100644
	--- a/src/transformers/models/qwen3/modeling_qwen3.py
	+++ b/src/transformers/models/qwen3/modeling_qwen3.py
	@@ -81,7 +81,6 @@ def extra_repr(self):
	return f"{tuple(self.weight.shape)}, eps={self.variance_epsilon}"


	-@use_kernel_forward_from_hub("MLP")
	class Qwen3MLP(nn.Module):
	def __init__(self, config):
	super().__init__()