kokolamba
/

SubspaceDecoder_mha

kokolamba commited on Sep 30, 2025

Commit

d527508

1 Parent(s): cf51f7c

Add patch to fix when attention mask is None in task_heads.py

Files changed (1) hide show

task_heads.py CHANGED Viewed

@@ -154,6 +154,20 @@ class SharedSpaceDecoderForCausalLM(SharedSpaceDecoderPreTrainedModel):
             - loss: Cross-entropy loss if labels provided, else None
             - hidden_states: Final layer hidden states [batch_size, seq_len, hidden_size]
         """
         # Run the base decoder model
         # This applies all the transformer layers with causal attention

             - loss: Cross-entropy loss if labels provided, else None
             - hidden_states: Final layer hidden states [batch_size, seq_len, hidden_size]
         """
+        # Adding a patch for when attention_mask is None
+        # ---------------------------
+        # >>> PATCH: ensure a mask if none is provided
+        # ---------------------------
+        if attention_mask is None and input_ids is not None:
+            # Create an all-ones mask (no padding) so SDPA mask prep won’t crash
+            # dtype long/bool are both accepted by HF mask utils; long is common.
+            attention_mask = torch.ones(
+                (input_ids.size(0), input_ids.size(1)),
+                dtype=torch.long,
+                device=input_ids.device,
+            )
+        # ---------------------------
         # Run the base decoder model
         # This applies all the transformer layers with causal attention