specsGuy
/

Deepseek-ocr

Image-Text-to-Text

feature-extraction

vision-language

Model card Files Files and versions

specsGuy commited on Nov 10, 2025

Commit

4199e49

·

verified ·

1 Parent(s): c9d64d3

Update modeling_deepseekv2.py

Files changed (1) hide show

modeling_deepseekv2.py +11 -4

modeling_deepseekv2.py CHANGED Viewed

@@ -34,10 +34,17 @@ from torch.nn import BCEWithLogitsLoss, CrossEntropyLoss, MSELoss
 from transformers.activations import ACT2FN
 from transformers.cache_utils import Cache, DynamicCache
 from transformers.modeling_attn_mask_utils import _prepare_4d_causal_attention_mask
-from transformers.models.llama.modeling_llama import (
-    LlamaAttention,
-    LlamaFlashAttention2
-)
 from transformers.modeling_outputs import (
     BaseModelOutputWithPast,
     CausalLMOutputWithPast,

 from transformers.activations import ACT2FN
 from transformers.cache_utils import Cache, DynamicCache
 from transformers.modeling_attn_mask_utils import _prepare_4d_causal_attention_mask
+try:
+    from transformers.models.llama.modeling_llama import (
+        LlamaFlashAttention2,
+        LlamaSdpaAttention,
+        LlamaAttention,
+    )
+except ImportError:
+    # Fallback for CPU or environments without flash-attn
+    from transformers.models.llama.modeling_llama import LlamaAttention
+    LlamaFlashAttention2 = None
+    LlamaSdpaAttention = LlamaAttention
 from transformers.modeling_outputs import (
     BaseModelOutputWithPast,
     CausalLMOutputWithPast,