TuKoResearch
/

AuriStream-base

Model card Files Files and versions

klemenk commited on Dec 14, 2025

Commit

ffc5144

·

verified ·

1 Parent(s): 248af8b

Update modeling_auristream.py

Files changed (1) hide show

modeling_auristream.py +4 -0

modeling_auristream.py CHANGED Viewed

@@ -369,6 +369,9 @@ class AuriStreamModel(AuriStreamPreTrainedModel):
         all_logits = [logits] if output_logits else None
         # Compute future head logits
         if self.future_heads is not None:
             for i, head in enumerate(self.future_heads):
                 future_logits = head(x[:, :-(i + 1)])
@@ -378,6 +381,7 @@ class AuriStreamModel(AuriStreamPreTrainedModel):
         # Compute loss if labels provided
         loss = None
         if labels is not None:
             loss = F.cross_entropy(
                 logits.reshape(-1, self.config.vocab_size),
                 labels.reshape(-1),

         all_logits = [logits] if output_logits else None
         # Compute future head logits
+        # lm_head is the first "standard" lm head which predicts token i+1 (as all GPT models have)
+        # self.future_heads holds all the other "MTP" future prediction heads, so self.future_heads
+        # corresponds to the head that predicts token i+2 - aka the "second head"
         if self.future_heads is not None:
             for i, head in enumerate(self.future_heads):
                 future_logits = head(x[:, :-(i + 1)])
         # Compute loss if labels provided
         loss = None
         if labels is not None:
+            # compute loss from the first "standard" lm head
             loss = F.cross_entropy(
                 logits.reshape(-1, self.config.vocab_size),
                 labels.reshape(-1),