orionweller
/

test-flex-gpt

Model card Files Files and versions

oweller2 commited on Nov 21, 2024

Commit

1f61dbc

·

1 Parent(s): 3cd62e1

update

Files changed (1) hide show

modeling_flexbert.py +19 -22

modeling_flexbert.py CHANGED Viewed

@@ -1708,28 +1708,25 @@ class FlexBertForCausalLM(FlexBertPreTrainedModel):
                 attentions=None,
             )
-    def prepare_inputs_for_generation(self, input_ids: torch.Tensor, attention_mask: torch.Tensor, **model_kwargs):
-        input_shape = input_ids.shape
-        effective_batch_size = input_shape[0]
-        breakpoint()
-        #  add a dummy token
-        if self.config.pad_token_id is None:
-            raise ValueError("The PAD token should be defined for generation")
-        attention_mask = torch.cat(
-            [attention_mask, attention_mask.new_zeros((attention_mask.shape[0], 1))],
-            dim=-1,
-        )
-        dummy_token = torch.full(
-            (effective_batch_size, 1),
-            self.config.pad_token_id,
-            dtype=torch.long,
-            device=input_ids.device,
-        )
-        input_ids = torch.cat([input_ids, dummy_token], dim=1)
-        return {"input_ids": input_ids, "attention_mask": attention_mask}
     def get_number_parameters(self, count_embeddings: bool = True, trainable: bool = True) -> int:
         """Returns the number of parameters in the model.

                 attentions=None,
             )
+    def prepare_inputs_for_generation(
+        self,
+        input_ids: torch.Tensor,
+        past_key_values: Optional[torch.FloatTensor] = None,
+        attention_mask: Optional[torch.Tensor] = None,
+        **kwargs
+    ) -> dict:
+            # only last token for inputs if past is defined
+            if past_key_values is not None:
+                input_ids = input_ids[:, -1].unsqueeze(-1)
+                if attention_mask is not None:
+                    attention_mask = attention_mask[:, -1:]
+            return {
+                "input_ids": input_ids,
+                "past_key_values": past_key_values,
+                "use_cache": kwargs.get("use_cache", True),
+                "attention_mask": attention_mask,
+            }
     def get_number_parameters(self, count_embeddings: bool = True, trainable: bool = True) -> int:
         """Returns the number of parameters in the model.