Commit
·
72802bf
1
Parent(s):
9a53f41
Update modeling_aquila.py
Browse files- modeling_aquila.py +1 -1
modeling_aquila.py
CHANGED
|
@@ -346,7 +346,7 @@ class AquilaAttention(nn.Module):
|
|
| 346 |
value_states = repeat_kv(value_states, self.num_key_value_groups)
|
| 347 |
|
| 348 |
attn_weights = torch.matmul(query_states, key_states.transpose(2, 3)) / math.sqrt(self.head_dim)
|
| 349 |
-
|
| 350 |
if attn_weights.size() != (bsz, self.num_heads, q_len, kv_seq_len):
|
| 351 |
raise ValueError(
|
| 352 |
f"Attention weights should be of size {(bsz, self.num_heads, q_len, kv_seq_len)}, but is"
|
|
|
|
| 346 |
value_states = repeat_kv(value_states, self.num_key_value_groups)
|
| 347 |
|
| 348 |
attn_weights = torch.matmul(query_states, key_states.transpose(2, 3)) / math.sqrt(self.head_dim)
|
| 349 |
+
attn_weights = torch.clamp(attn_weights, min=-1024., max=1024.)
|
| 350 |
if attn_weights.size() != (bsz, self.num_heads, q_len, kv_seq_len):
|
| 351 |
raise ValueError(
|
| 352 |
f"Attention weights should be of size {(bsz, self.num_heads, q_len, kv_seq_len)}, but is"
|