Charlie81
/

LoRE

Charlie81 commited on Jul 10, 2025

Commit

1ec67ec

1 Parent(s): c306fa9

aaux to torch tensor

Files changed (1) hide show

myolmoe/modeling_myolmoe.py CHANGED Viewed

@@ -1037,7 +1037,8 @@ class MyOlmoeForCausalLM(OlmoePreTrainedModel, GenerationMixin):
         if labels is not None:
             loss = self.loss_function(logits, labels, self.vocab_size, **loss_kwargs)
         #
-        total_aux_loss = 0
         if output_router_logits and outputs.router_logits is not None:
             # Regular load balancing loss
             total_aux_loss += load_balancing_loss_func(

         if labels is not None:
             loss = self.loss_function(logits, labels, self.vocab_size, **loss_kwargs)
         #
+        total_aux_loss = torch.tensor(0.0, device=loss.device, dtype=loss.dtype)
         if output_router_logits and outputs.router_logits is not None:
             # Regular load balancing loss
             total_aux_loss += load_balancing_loss_func(