microsoft
/

Phi-4-multimodal-instruct

@@ -2134,7 +2134,10 @@ class Phi4MMForCausalLM(Phi4MMPreTrainedModel, GenerationMixin):
         hidden_states = outputs[0]
         # Only compute necessary logits, and do not upcast them to float if we are not computing the loss
-        logits = self.lm_head(hidden_states[:, -num_logits_to_keep:, :])
         loss = None
         if labels is not None:

         hidden_states = outputs[0]
         # Only compute necessary logits, and do not upcast them to float if we are not computing the loss
+        if num_logits_to_keep:
+            logits = self.lm_head(hidden_states[:, -num_logits_to_keep:, :])
+        else:
+            logits = self.lm_head(hidden_states)
         loss = None
         if labels is not None: