jiang-cc
/

AD-Copilot-Thinking

Feature Extraction

Model card Files Files and versions

jiang-cc commited on Jul 19, 2025

Commit

b281ecf

·

verified ·

1 Parent(s): 6e620d4

Upload processor

Files changed (1) hide show

modeling_yangjian.py +3 -3

modeling_yangjian.py CHANGED Viewed

@@ -226,9 +226,9 @@ class OptimizedCrossAttention(nn.Module):
             # q, k, v: [batch_size, num_heads, seq_len, head_dim]
         # 选择 attention 实现
-        attention_interface: Callable = eager_attention_forward
-        if hasattr(self.config, '_attn_implementation') and self.config._attn_implementation != "eager":
-            attention_interface = ALL_ATTENTION_FUNCTIONS[self.config._attn_implementation]
         # 构造 cu_seqlens 参数（FlashAttention 必需）
         cu_seqlens_q = torch.arange(0, (batch_size*self.num_heads + 1) * seq_len_q, step=seq_len_q, dtype=torch.int32, device=q.device)

             # q, k, v: [batch_size, num_heads, seq_len, head_dim]
         # 选择 attention 实现
+        attention_interface: Callable = ALL_ATTENTION_FUNCTIONS["sdpa"]
+        # if hasattr(self.config, '_attn_implementation') and self.config._attn_implementation != "eager":
+        #     attention_interface = ALL_ATTENTION_FUNCTIONS[self.config._attn_implementation]
         # 构造 cu_seqlens 参数（FlashAttention 必需）
         cu_seqlens_q = torch.arange(0, (batch_size*self.num_heads + 1) * seq_len_q, step=seq_len_q, dtype=torch.int32, device=q.device)