loubb
/

aria-medium-base

Model card Files Files and versions

loua19 commited on Jun 5, 2025

Commit

57bb396

·

1 Parent(s): fdd8ec9

fix

Files changed (1) hide show

modeling_aria.py +2 -3

modeling_aria.py CHANGED Viewed

@@ -356,8 +356,7 @@ class AriaModel(AriaPreTrainedModel):
                 base=500000,
                 dtype=hidden_states.dtype,
             ).to(input_ids.device)
-        freqs_cis = self.freqs_cis[: input_ids.shape[1]]
         kwargs = {
             "position_ids": position_ids,
             "past_key_values": past_key_values,
@@ -475,7 +474,7 @@ class AriaModel(AriaPreTrainedModel):
             target_length = (
                 attention_mask.shape[-1]
                 if isinstance(attention_mask, torch.Tensor)
-                else past_seen_tokens + sequence_length + 1
             )
         # In case the provided `attention` mask is 2D, we generate a causal mask here (4D).

                 base=500000,
                 dtype=hidden_states.dtype,
             ).to(input_ids.device)
+        freqs_cis = self.freqs_cis[cache_position]
         kwargs = {
             "position_ids": position_ids,
             "past_key_values": past_key_values,
             target_length = (
                 attention_mask.shape[-1]
                 if isinstance(attention_mask, torch.Tensor)
+                else past_seen_tokens + sequence_length
             )
         # In case the provided `attention` mask is 2D, we generate a causal mask here (4D).