Nsan05
/

tiny-random-MiniCPM-o-2_6

@@ -56,7 +56,11 @@ from transformers.cache_utils import StaticCache
 from transformers.modeling_outputs import BaseModelOutputWithPast
 from transformers.modeling_outputs import ModelOutput
 from transformers.models.whisper.modeling_whisper import ACT2FN
-from transformers.models.whisper.modeling_whisper import WHISPER_ATTENTION_CLASSES
 from transformers.models.whisper.modeling_whisper import WhisperConfig
 from transformers.models.whisper.modeling_whisper import WhisperEncoder
@@ -206,7 +210,7 @@ class MiniCPMO(MiniCPMOPreTrainedModel):
         return Resampler(
             num_queries=self.config.query_num,
             embed_dim=embed_dim,
-            num_heads=embed_dim // 128,
             kv_dim=vision_dim,
             adaptive=True,
         )

 from transformers.modeling_outputs import BaseModelOutputWithPast
 from transformers.modeling_outputs import ModelOutput
 from transformers.models.whisper.modeling_whisper import ACT2FN
+try:
+    from transformers.models.whisper.modeling_whisper import WHISPER_ATTENTION_CLASSES
+except ImportError:
+    from transformers.models.whisper.modeling_whisper import WhisperAttention
+    WHISPER_ATTENTION_CLASSES = {"sdpa": WhisperAttention, "eager": WhisperAttention, "flash_attention_2": WhisperAttention}
 from transformers.models.whisper.modeling_whisper import WhisperConfig
 from transformers.models.whisper.modeling_whisper import WhisperEncoder
         return Resampler(
             num_queries=self.config.query_num,
             embed_dim=embed_dim,
+            num_heads=max(1, embed_dim // 128),
             kv_dim=vision_dim,
             adaptive=True,
         )