hrezaei
/

flan-t5la-small

Feature Extraction

Generated from Trainer

Eval Results (legacy)

Model card Files Files and versions

Metrics Training metrics Community

hrezaei commited on Oct 28, 2025

Commit

91aef84

·

verified ·

1 Parent(s): c586e86

Fix the loss computation

Files changed (1) hide show

t5la_modeling.py +2 -2

t5la_modeling.py CHANGED Viewed

@@ -327,14 +327,14 @@ class T5LaForConditionalGeneration(T5ForConditionalGeneration):
                     lookahead_targets.view(-1),
                     # vocab_size=self.config.vocab_size,
                 )
-                """if self.config.lookahead_type == "la":
                     # If we simply add, the loss will be larger than a non-LA T5 model because
                     # in a normal T5, the number of tokens are much lower:
                     loss = (loss + lookahead_loss) / (1 + self.config.lookahead_size)
                 else:
                     loss = (loss * lm_logits.shape[1] + lookahead_loss * self.config.lookahead_size) / (
                         lm_logits.shape[1] + self.config.lookahead_size
-                    )"""
         if not return_dict:
             output = (lm_logits,) + decoder_outputs[1:] + encoder_outputs

                     lookahead_targets.view(-1),
                     # vocab_size=self.config.vocab_size,
                 )
+                if self.config.lookahead_type == "la":
                     # If we simply add, the loss will be larger than a non-LA T5 model because
                     # in a normal T5, the number of tokens are much lower:
                     loss = (loss + lookahead_loss) / (1 + self.config.lookahead_size)
                 else:
                     loss = (loss * lm_logits.shape[1] + lookahead_loss * self.config.lookahead_size) / (
                         lm_logits.shape[1] + self.config.lookahead_size
+                    )
         if not return_dict:
             output = (lm_logits,) + decoder_outputs[1:] + encoder_outputs