Update modeling_phi3.py
Browse files- modeling_phi3.py +1 -5
modeling_phi3.py
CHANGED
|
@@ -1294,10 +1294,6 @@ class Phi3ForCausalLM(Phi3PreTrainedModel):
|
|
| 1294 |
cache_length = past_length = past_key_values[0][0].shape[2]
|
| 1295 |
max_cache_length = None
|
| 1296 |
|
| 1297 |
-
else:
|
| 1298 |
-
cache_length = past_length = past_key_values[0][0].shape[2]
|
| 1299 |
-
max_cache_length = None
|
| 1300 |
-
|
| 1301 |
# Keep only the unprocessed tokens:
|
| 1302 |
# 1 - If the length of the attention_mask exceeds the length of input_ids, then we are in a setting where
|
| 1303 |
# some of the inputs are exclusively passed as part of the cache (e.g. when passing input_embeds as
|
|
@@ -1564,4 +1560,4 @@ class Phi3ForTokenClassification(Phi3PreTrainedModel):
|
|
| 1564 |
logits=logits,
|
| 1565 |
hidden_states=model_outputs.hidden_states,
|
| 1566 |
attentions=model_outputs.attentions,
|
| 1567 |
-
)
|
|
|
|
| 1294 |
cache_length = past_length = past_key_values[0][0].shape[2]
|
| 1295 |
max_cache_length = None
|
| 1296 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1297 |
# Keep only the unprocessed tokens:
|
| 1298 |
# 1 - If the length of the attention_mask exceeds the length of input_ids, then we are in a setting where
|
| 1299 |
# some of the inputs are exclusively passed as part of the cache (e.g. when passing input_embeds as
|
|
|
|
| 1560 |
logits=logits,
|
| 1561 |
hidden_states=model_outputs.hidden_states,
|
| 1562 |
attentions=model_outputs.attentions,
|
| 1563 |
+
)
|