Update resampler.py
#9
by
Axongim
- opened
- resampler.py +31 -29
resampler.py
CHANGED
|
@@ -1,3 +1,5 @@
|
|
|
|
|
|
|
|
| 1 |
from functools import partial
|
| 2 |
from typing import Optional, Tuple
|
| 3 |
import numpy as np
|
|
@@ -725,11 +727,11 @@ def _in_projection_packed(
|
|
| 725 |
kv_proj = kv_proj.unflatten(-1, (2, E)).unsqueeze(0).transpose(0, -2).squeeze(-2).contiguous()
|
| 726 |
return (q_proj, kv_proj[0], kv_proj[1])
|
| 727 |
else:
|
| 728 |
-
w_q, w_k, w_v = w
|
| 729 |
-
|
| 730 |
-
b_q = b_k = b_v =
|
| 731 |
-
|
| 732 |
-
b_q, b_k, b_v = b
|
| 733 |
return linear(q, w_q, b_q), linear(k, w_k, b_k), linear(v, w_v, b_v)
|
| 734 |
|
| 735 |
|
|
@@ -740,34 +742,34 @@ def _in_projection(
|
|
| 740 |
w_q: Tensor,
|
| 741 |
w_k: Tensor,
|
| 742 |
w_v: Tensor,
|
| 743 |
-
b_q
|
| 744 |
-
b_k
|
| 745 |
-
b_v
|
| 746 |
-
) ->
|
| 747 |
r"""
|
| 748 |
-
|
| 749 |
-
|
| 750 |
-
|
| 751 |
-
|
| 752 |
-
|
| 753 |
-
q, k, v:
|
| 754 |
-
w_q, w_k, w_v:
|
| 755 |
-
b_q, b_k, b_v:
|
| 756 |
-
|
| 757 |
-
|
| 758 |
-
- q: :math:`(Qdims..., Eq)`
|
| 759 |
-
|
| 760 |
-
- k: :math:`(Kdims..., Ek)`
|
| 761 |
-
|
| 762 |
-
- v: :math:`(Vdims..., Ev)`
|
| 763 |
-
|
| 764 |
- w_q: :math:`(Eq, Eq)`
|
| 765 |
- w_k: :math:`(Eq, Ek)`
|
| 766 |
- w_v: :math:`(Eq, Ev)`
|
| 767 |
- b_q: :math:`(Eq)`
|
| 768 |
- b_k: :math:`(Eq)`
|
| 769 |
- b_v: :math:`(Eq)`
|
| 770 |
-
|
| 771 |
- q': :math:`[Qdims..., Eq]`
|
| 772 |
- k': :math:`[Kdims..., Eq]`
|
| 773 |
- v': :math:`[Vdims..., Eq]`
|
|
@@ -777,6 +779,6 @@ def _in_projection(
|
|
| 777 |
assert w_k.shape == (Eq, Ek), f"expecting key weights shape of {(Eq, Ek)}, but got {w_k.shape}"
|
| 778 |
assert w_v.shape == (Eq, Ev), f"expecting value weights shape of {(Eq, Ev)}, but got {w_v.shape}"
|
| 779 |
assert b_q is None or b_q.shape == (Eq,), f"expecting query bias shape of {(Eq,)}, but got {b_q.shape}"
|
| 780 |
-
|
| 781 |
-
|
| 782 |
-
|
|
|
|
| 1 |
+
from typing import List
|
| 2 |
+
from torch import Tensor
|
| 3 |
from functools import partial
|
| 4 |
from typing import Optional, Tuple
|
| 5 |
import numpy as np
|
|
|
|
| 727 |
kv_proj = kv_proj.unflatten(-1, (2, E)).unsqueeze(0).transpose(0, -2).squeeze(-2).contiguous()
|
| 728 |
return (q_proj, kv_proj[0], kv_proj[1])
|
| 729 |
else:
|
| 730 |
+
w_q, w_k, w_v = w.分块(3)
|
| 731 |
+
如果 b 是 无:
|
| 732 |
+
b_q = b_k = b_v = 无
|
| 733 |
+
否则:
|
| 734 |
+
b_q, b_k, b_v = b.块(3)
|
| 735 |
return linear(q, w_q, b_q), linear(k, w_k, b_k), linear(v, w_v, b_v)
|
| 736 |
|
| 737 |
|
|
|
|
| 742 |
w_q: Tensor,
|
| 743 |
w_k: Tensor,
|
| 744 |
w_v: Tensor,
|
| 745 |
+
b_q:可选[张量] = 无,
|
| 746 |
+
b_k:可选[张量] = 无,
|
| 747 |
+
b_v:可选[张量] = 无,
|
| 748 |
+
) -> 元组[张量, 张量, 张量]:
|
| 749 |
r"""
|
| 750 |
+
执行注意力操作的输入投影步骤。这仅仅是
|
| 751 |
+
一个三重线性投影,对权重施加形状约束,使得
|
| 752 |
+
确保投影输出的嵌入维度一致性。
|
| 753 |
+
输出是一个包含查询、键和值的投影张量的三元组。
|
| 754 |
+
参数:
|
| 755 |
+
q, k, v: 要投影的查询、键和值张量。
|
| 756 |
+
w_q, w_k, w_v: 分别用于q、k和v的权重。
|
| 757 |
+
b_q, b_k, b_v: 分别为q、k和v的可选偏置。
|
| 758 |
+
形状:
|
| 759 |
+
输入:
|
| 760 |
+
- q: :math:`(Qdims..., Eq)` 其中 Eq 是查询嵌入维度,Qdims 是任意的
|
| 761 |
+
领先维度的数量。
|
| 762 |
+
- k: :math:`(Kdims..., Ek)` 其中 Ek 是键嵌入维度,Kdims 可以是任何
|
| 763 |
+
领先维度的数量。
|
| 764 |
+
- v: :math:`(Vdims..., Ev)` 其中 Ev 是值嵌入维度,Vdims 是任何
|
| 765 |
+
领先维度的数量。
|
| 766 |
- w_q: :math:`(Eq, Eq)`
|
| 767 |
- w_k: :math:`(Eq, Ek)`
|
| 768 |
- w_v: :math:`(Eq, Ev)`
|
| 769 |
- b_q: :math:`(Eq)`
|
| 770 |
- b_k: :math:`(Eq)`
|
| 771 |
- b_v: :math:`(Eq)`
|
| 772 |
+
输出:在输出三元组 :math:`(q', k', v')` 中,
|
| 773 |
- q': :math:`[Qdims..., Eq]`
|
| 774 |
- k': :math:`[Kdims..., Eq]`
|
| 775 |
- v': :math:`[Vdims..., Eq]`
|
|
|
|
| 779 |
assert w_k.shape == (Eq, Ek), f"expecting key weights shape of {(Eq, Ek)}, but got {w_k.shape}"
|
| 780 |
assert w_v.shape == (Eq, Ev), f"expecting value weights shape of {(Eq, Ev)}, but got {w_v.shape}"
|
| 781 |
assert b_q is None or b_q.shape == (Eq,), f"expecting query bias shape of {(Eq,)}, but got {b_q.shape}"
|
| 782 |
+
断言 b_k 是 None 或者 b_k.形状 == (等价,), 期望的 key bias 形状为 {(等价,)},但得到的是 {b_k.形状}"
|
| 783 |
+
断言 b_v 是 None 或者 b_v.shape == (Eq,), f"期望值偏差形状为{(Eq,)},但得到{b_v.shape}"
|
| 784 |
+
返回 线性(q, w_q, b_q), 线性(k, w_k, b_k), 线性(v, w_v, b_v)
|