Commit ·
08c8530
1
Parent(s): 4690395
Pull updates from branch 'main' of https://huggingface.co/Qwen/Qwen-7B-Chat-Int4.
Browse files- README.md +1 -1
- assets/wechat.png +0 -0
- modeling_qwen.py +2 -4
README.md
CHANGED
|
@@ -18,7 +18,7 @@ inference: false
|
|
| 18 |
<p align="center">
|
| 19 |
🤗 <a href="https://huggingface.co/Qwen">Hugging Face</a>   |   🤖 <a href="https://modelscope.cn/organization/qwen">ModelScope</a>   |    📑 <a href="https://arxiv.org/abs/2309.16609">Paper</a>    |   🖥️ <a href="https://modelscope.cn/studios/qwen/Qwen-7B-Chat-Demo/summary">Demo</a>
|
| 20 |
<br>
|
| 21 |
-
<a href="assets/wechat.png">WeChat (微信)</a>   |   <a href="https://discord.gg/z3GAxXZ9Ce">Discord</a>   |   <a href="https://dashscope.aliyun.com">API</a>
|
| 22 |
</p>
|
| 23 |
<br>
|
| 24 |
|
|
|
|
| 18 |
<p align="center">
|
| 19 |
🤗 <a href="https://huggingface.co/Qwen">Hugging Face</a>   |   🤖 <a href="https://modelscope.cn/organization/qwen">ModelScope</a>   |    📑 <a href="https://arxiv.org/abs/2309.16609">Paper</a>    |   🖥️ <a href="https://modelscope.cn/studios/qwen/Qwen-7B-Chat-Demo/summary">Demo</a>
|
| 20 |
<br>
|
| 21 |
+
<a href="https://github.com/QwenLM/Qwen/blob/main/assets/wechat.png">WeChat (微信)</a>   |   <a href="https://discord.gg/z3GAxXZ9Ce">Discord</a>   |   <a href="https://dashscope.aliyun.com">API</a>
|
| 22 |
</p>
|
| 23 |
<br>
|
| 24 |
|
assets/wechat.png
CHANGED
|
|
modeling_qwen.py
CHANGED
|
@@ -540,9 +540,7 @@ class QWenAttention(nn.Module):
|
|
| 540 |
|
| 541 |
if not self.use_cache_quantization and SUPPORT_TORCH2:
|
| 542 |
if attention_mask is not None:
|
| 543 |
-
attention_mask = attention_mask.expand(
|
| 544 |
-
-1, -1, causal_mask.size(2), -1
|
| 545 |
-
)
|
| 546 |
if causal_mask is not None:
|
| 547 |
attention_mask = attention_mask.masked_fill(~causal_mask, torch.finfo(query.dtype).min)
|
| 548 |
else:
|
|
@@ -1356,7 +1354,7 @@ def apply_rotary_pos_emb(t, freqs):
|
|
| 1356 |
t (tensor(batch_size, seq_len, n_head, head_dim)):
|
| 1357 |
the input embedding/hidden states
|
| 1358 |
freqs (list[tensor(1, seq_len, 1, rotary_dim), tensor(1, seq_len, 1, rotary_dim)]):
|
| 1359 |
-
the cached cos/sin position embeddings
|
| 1360 |
"""
|
| 1361 |
rot_dim = freqs[0].shape[-1]
|
| 1362 |
cos, sin = freqs
|
|
|
|
| 540 |
|
| 541 |
if not self.use_cache_quantization and SUPPORT_TORCH2:
|
| 542 |
if attention_mask is not None:
|
| 543 |
+
attention_mask = attention_mask.expand(-1, -1, query.size(2), -1)
|
|
|
|
|
|
|
| 544 |
if causal_mask is not None:
|
| 545 |
attention_mask = attention_mask.masked_fill(~causal_mask, torch.finfo(query.dtype).min)
|
| 546 |
else:
|
|
|
|
| 1354 |
t (tensor(batch_size, seq_len, n_head, head_dim)):
|
| 1355 |
the input embedding/hidden states
|
| 1356 |
freqs (list[tensor(1, seq_len, 1, rotary_dim), tensor(1, seq_len, 1, rotary_dim)]):
|
| 1357 |
+
the cached cos/sin position embeddings
|
| 1358 |
"""
|
| 1359 |
rot_dim = freqs[0].shape[-1]
|
| 1360 |
cos, sin = freqs
|