Merge branch 'main' of https://huggingface.co/Qwen/Qwen-7B-Chat-Int4 into pr/6
Browse files- modeling_qwen.py +1 -1
modeling_qwen.py
CHANGED
|
@@ -544,7 +544,7 @@ class QWenAttention(nn.Module):
|
|
| 544 |
-1, -1, causal_mask.size(2), -1
|
| 545 |
)
|
| 546 |
if causal_mask is not None:
|
| 547 |
-
attention_mask.
|
| 548 |
else:
|
| 549 |
attention_mask = causal_mask
|
| 550 |
attn_output = F.scaled_dot_product_attention(
|
|
|
|
| 544 |
-1, -1, causal_mask.size(2), -1
|
| 545 |
)
|
| 546 |
if causal_mask is not None:
|
| 547 |
+
attention_mask.masked_fill_(~causal_mask, torch.finfo(query.dtype).min)
|
| 548 |
else:
|
| 549 |
attention_mask = causal_mask
|
| 550 |
attn_output = F.scaled_dot_product_attention(
|