ZeroVision-rxxiang Todokete commited on
Commit
3c269af
·
verified ·
1 Parent(s): c3314a5

Fix: Handle cache_position argument for newer Transformers (#1)

Browse files

- Fix: Handle cache_position argument for newer Transformers (a044327e935386945273905b74c1c7d61e7234d0)


Co-authored-by: Todokete <Todokete@users.noreply.huggingface.co>

Files changed (1) hide show
  1. modeling_mixsense_llama.py +4 -1
modeling_mixsense_llama.py CHANGED
@@ -1115,6 +1115,7 @@ class MixsenseLlamaForCausalLM(LlamaForCausalLM, MixsenseMetaForCausalLM):
1115
  images: Optional[torch.FloatTensor] = None,
1116
  image_sizes: Optional[List[List[int]]] = None,
1117
  return_dict: Optional[bool] = None,
 
1118
  ) -> Union[Tuple, CausalLMOutputWithPast]:
1119
  if inputs_embeds is None:
1120
  (
@@ -1144,6 +1145,7 @@ class MixsenseLlamaForCausalLM(LlamaForCausalLM, MixsenseMetaForCausalLM):
1144
  output_attentions=output_attentions,
1145
  output_hidden_states=output_hidden_states,
1146
  return_dict=return_dict,
 
1147
  )
1148
 
1149
  @torch.no_grad()
@@ -1181,7 +1183,7 @@ class MixsenseLlamaForCausalLM(LlamaForCausalLM, MixsenseMetaForCausalLM):
1181
  return output
1182
 
1183
  def prepare_inputs_for_generation(
1184
- self, input_ids, past_key_values=None, inputs_embeds=None, **kwargs
1185
  ):
1186
  images = kwargs.pop("images", None)
1187
  image_sizes = kwargs.pop("image_sizes", None)
@@ -1189,6 +1191,7 @@ class MixsenseLlamaForCausalLM(LlamaForCausalLM, MixsenseMetaForCausalLM):
1189
  input_ids,
1190
  past_key_values=past_key_values,
1191
  inputs_embeds=inputs_embeds,
 
1192
  **kwargs,
1193
  )
1194
  if images is not None:
 
1115
  images: Optional[torch.FloatTensor] = None,
1116
  image_sizes: Optional[List[List[int]]] = None,
1117
  return_dict: Optional[bool] = None,
1118
+ cache_position: Optional[torch.LongTensor] = None,
1119
  ) -> Union[Tuple, CausalLMOutputWithPast]:
1120
  if inputs_embeds is None:
1121
  (
 
1145
  output_attentions=output_attentions,
1146
  output_hidden_states=output_hidden_states,
1147
  return_dict=return_dict,
1148
+ cache_position=cache_position,
1149
  )
1150
 
1151
  @torch.no_grad()
 
1183
  return output
1184
 
1185
  def prepare_inputs_for_generation(
1186
+ self, input_ids, past_key_values=None, inputs_embeds=None, cache_position=None, **kwargs
1187
  ):
1188
  images = kwargs.pop("images", None)
1189
  image_sizes = kwargs.pop("image_sizes", None)
 
1191
  input_ids,
1192
  past_key_values=past_key_values,
1193
  inputs_embeds=inputs_embeds,
1194
+ cache_position=cache_position,
1195
  **kwargs,
1196
  )
1197
  if images is not None: