Fix: Handle cache_position argument for newer Transformers (#1)
Browse files- Fix: Handle cache_position argument for newer Transformers (a044327e935386945273905b74c1c7d61e7234d0)
Co-authored-by: Todokete <Todokete@users.noreply.huggingface.co>
modeling_mixsense_llama.py
CHANGED
|
@@ -1115,6 +1115,7 @@ class MixsenseLlamaForCausalLM(LlamaForCausalLM, MixsenseMetaForCausalLM):
|
|
| 1115 |
images: Optional[torch.FloatTensor] = None,
|
| 1116 |
image_sizes: Optional[List[List[int]]] = None,
|
| 1117 |
return_dict: Optional[bool] = None,
|
|
|
|
| 1118 |
) -> Union[Tuple, CausalLMOutputWithPast]:
|
| 1119 |
if inputs_embeds is None:
|
| 1120 |
(
|
|
@@ -1144,6 +1145,7 @@ class MixsenseLlamaForCausalLM(LlamaForCausalLM, MixsenseMetaForCausalLM):
|
|
| 1144 |
output_attentions=output_attentions,
|
| 1145 |
output_hidden_states=output_hidden_states,
|
| 1146 |
return_dict=return_dict,
|
|
|
|
| 1147 |
)
|
| 1148 |
|
| 1149 |
@torch.no_grad()
|
|
@@ -1181,7 +1183,7 @@ class MixsenseLlamaForCausalLM(LlamaForCausalLM, MixsenseMetaForCausalLM):
|
|
| 1181 |
return output
|
| 1182 |
|
| 1183 |
def prepare_inputs_for_generation(
|
| 1184 |
-
self, input_ids, past_key_values=None, inputs_embeds=None, **kwargs
|
| 1185 |
):
|
| 1186 |
images = kwargs.pop("images", None)
|
| 1187 |
image_sizes = kwargs.pop("image_sizes", None)
|
|
@@ -1189,6 +1191,7 @@ class MixsenseLlamaForCausalLM(LlamaForCausalLM, MixsenseMetaForCausalLM):
|
|
| 1189 |
input_ids,
|
| 1190 |
past_key_values=past_key_values,
|
| 1191 |
inputs_embeds=inputs_embeds,
|
|
|
|
| 1192 |
**kwargs,
|
| 1193 |
)
|
| 1194 |
if images is not None:
|
|
|
|
| 1115 |
images: Optional[torch.FloatTensor] = None,
|
| 1116 |
image_sizes: Optional[List[List[int]]] = None,
|
| 1117 |
return_dict: Optional[bool] = None,
|
| 1118 |
+
cache_position: Optional[torch.LongTensor] = None,
|
| 1119 |
) -> Union[Tuple, CausalLMOutputWithPast]:
|
| 1120 |
if inputs_embeds is None:
|
| 1121 |
(
|
|
|
|
| 1145 |
output_attentions=output_attentions,
|
| 1146 |
output_hidden_states=output_hidden_states,
|
| 1147 |
return_dict=return_dict,
|
| 1148 |
+
cache_position=cache_position,
|
| 1149 |
)
|
| 1150 |
|
| 1151 |
@torch.no_grad()
|
|
|
|
| 1183 |
return output
|
| 1184 |
|
| 1185 |
def prepare_inputs_for_generation(
|
| 1186 |
+
self, input_ids, past_key_values=None, inputs_embeds=None, cache_position=None, **kwargs
|
| 1187 |
):
|
| 1188 |
images = kwargs.pop("images", None)
|
| 1189 |
image_sizes = kwargs.pop("image_sizes", None)
|
|
|
|
| 1191 |
input_ids,
|
| 1192 |
past_key_values=past_key_values,
|
| 1193 |
inputs_embeds=inputs_embeds,
|
| 1194 |
+
cache_position=cache_position,
|
| 1195 |
**kwargs,
|
| 1196 |
)
|
| 1197 |
if images is not None:
|