allow Output subclasses in contrastive search

Hi,

I am trying to use contrastive search on a custom multimodal model and I found out that it was overriding the model outputs to have the default type (like CausalLMOutputWithPast for instance). This can be an issue for models that rely on extra attributes in the output class. I think we could simply replace the attributes we want in Outputs rather than recreating the object. We achieve the same thing but it's a bit cleaner in my opinion. Happy to discuss if I'm missing something!

Also happy to add assertions to check that outputs inherit from the correct class if you want.

Files changed (1) hide show

custom_generate/generate.py +6 -3

custom_generate/generate.py CHANGED Viewed

@@ -1,3 +1,4 @@
 import logging
 from typing import TYPE_CHECKING, Optional, Union
@@ -14,7 +15,6 @@ from transformers.generation.utils import (
     GenerateNonBeamOutput,
     GenerationMixin,
 )
-from transformers.modeling_outputs import CausalLMOutputWithPast, Seq2SeqLMOutput
 from transformers.utils import ModelOutput
@@ -414,7 +414,8 @@ def _contrastive_search(
                 for layer in outputs.decoder_attentions:
                     layer = torch.stack(torch.split(layer, top_k, dim=0))[range(batch_size), selected_idx, ...]
                     next_step_decoder_attentions += (layer,)
-            outputs = Seq2SeqLMOutput(
                 past_key_values=next_past_key_values,
                 decoder_hidden_states=next_decoder_hidden_states,
                 decoder_attentions=next_step_decoder_attentions or None,
@@ -426,11 +427,13 @@ def _contrastive_search(
                 for layer in outputs.attentions:
                     layer = torch.stack(torch.split(layer, top_k, dim=0))[range(batch_size), selected_idx, ...]
                     next_step_attentions += (layer,)
-            outputs = CausalLMOutputWithPast(
                 past_key_values=next_past_key_values,
                 hidden_states=next_decoder_hidden_states,
                 attentions=next_step_attentions or None,
             )
         # contrastive_search main logic end
         # synced_gpus: don't waste resources running the code we don't need; kwargs must be updated before skipping

+from dataclasses import replace
 import logging
 from typing import TYPE_CHECKING, Optional, Union
     GenerateNonBeamOutput,
     GenerationMixin,
 )
 from transformers.utils import ModelOutput
                 for layer in outputs.decoder_attentions:
                     layer = torch.stack(torch.split(layer, top_k, dim=0))[range(batch_size), selected_idx, ...]
                     next_step_decoder_attentions += (layer,)
+            outputs = replace(
+                outputs,
                 past_key_values=next_past_key_values,
                 decoder_hidden_states=next_decoder_hidden_states,
                 decoder_attentions=next_step_decoder_attentions or None,
                 for layer in outputs.attentions:
                     layer = torch.stack(torch.split(layer, top_k, dim=0))[range(batch_size), selected_idx, ...]
                     next_step_attentions += (layer,)
+            outputs = replace(
+                outputs,
                 past_key_values=next_past_key_values,
                 hidden_states=next_decoder_hidden_states,
                 attentions=next_step_attentions or None,
             )
         # contrastive_search main logic end
         # synced_gpus: don't waste resources running the code we don't need; kwargs must be updated before skipping