transformers-community
/

contrastive-search

Text Generation

custom_generate

text-generation-inference

Model card Files Files and versions

RaushanTurganbay HF Staff commited on 15 days ago

Commit

2ffb799

·

verified ·

1 Parent(s): 818d17e

now fixed

Files changed (1) hide show

custom_generate/generate.py +2 -6

custom_generate/generate.py CHANGED Viewed

@@ -297,12 +297,11 @@ def _contrastive_search(
             for i in range(top_k):
                 # compute the candidate tokens by the language model and collect their hidden_states
                 next_model_inputs = model.prepare_inputs_for_generation(top_k_ids[:, i].view(-1, 1), **model_kwargs)
                 outputs = model(
                     **next_model_inputs,
                     return_dict=True,
-                    output_hidden_states=True,
-                    output_attentions=output_attentions,
                 )
                 # Remove past K-V from output since we don't need to stack later
                 outputs["past_key_values"] = None
@@ -316,12 +315,11 @@ def _contrastive_search(
             # compute the candidate tokens by the language model and collect their hidden_states
             # assembles top_k_ids into batch of size k
             next_model_inputs = model.prepare_inputs_for_generation(top_k_ids.view(-1, 1), **model_kwargs)
             outputs = model(
                 **next_model_inputs,
                 return_dict=True,
-                output_hidden_states=True,
-                output_attentions=output_attentions,
             )
         # This is essential to avoid having a last reference to the big past K-V and double the necessary memory
@@ -385,8 +383,6 @@ def _contrastive_search(
             selected_outputs = model(
                 **next_model_input,
                 return_dict=True,
-                output_hidden_states=False,
-                output_attentions=False,
             )
             next_past_key_values = selected_outputs["past_key_values"]

             for i in range(top_k):
                 # compute the candidate tokens by the language model and collect their hidden_states
                 next_model_inputs = model.prepare_inputs_for_generation(top_k_ids[:, i].view(-1, 1), **model_kwargs)
+                next_model_inputs['output_hidden_states'] = True
                 outputs = model(
                     **next_model_inputs,
                     return_dict=True,
                 )
                 # Remove past K-V from output since we don't need to stack later
                 outputs["past_key_values"] = None
             # compute the candidate tokens by the language model and collect their hidden_states
             # assembles top_k_ids into batch of size k
             next_model_inputs = model.prepare_inputs_for_generation(top_k_ids.view(-1, 1), **model_kwargs)
+            next_model_inputs['output_hidden_states'] = True
             outputs = model(
                 **next_model_inputs,
                 return_dict=True,
             )
         # This is essential to avoid having a last reference to the big past K-V and double the necessary memory
             selected_outputs = model(
                 **next_model_input,
                 return_dict=True,
             )
             next_past_key_values = selected_outputs["past_key_values"]