openbmb/MiniCPM-V-2_6-int4 · Update resampler.py

Update resampler.py

by Axongim - opened Jun 14, 2025

base: refs/heads/main

←

from: refs/pr/9

Discussion Files changed

+31

-29

Files changed (1) hide show

resampler.py +31 -29

resampler.py CHANGED Viewed

@@ -1,3 +1,5 @@
 from functools import partial
 from typing import Optional, Tuple
 import numpy as np
@@ -725,11 +727,11 @@ def _in_projection_packed(
             kv_proj = kv_proj.unflatten(-1, (2, E)).unsqueeze(0).transpose(0, -2).squeeze(-2).contiguous()
             return (q_proj, kv_proj[0], kv_proj[1])
     else:
-        w_q, w_k, w_v = w.chunk(3)
-        if b is None:
-            b_q = b_k = b_v = None
-        else:
-            b_q, b_k, b_v = b.chunk(3)
         return linear(q, w_q, b_q), linear(k, w_k, b_k), linear(v, w_v, b_v)
@@ -740,34 +742,34 @@ def _in_projection(
     w_q: Tensor,
     w_k: Tensor,
     w_v: Tensor,
-    b_q: Optional[Tensor] = None,
-    b_k: Optional[Tensor] = None,
-    b_v: Optional[Tensor] = None,
-) -> Tuple[Tensor, Tensor, Tensor]:
     r"""
-    Performs the in-projection step of the attention operation. This is simply
-    a triple of linear projections, with shape constraints on the weights which
-    ensure embedding dimension uniformity in the projected outputs.
-    Output is a triple containing projection tensors for query, key and value.
-    Args:
-        q, k, v: query, key and value tensors to be projected.
-        w_q, w_k, w_v: weights for q, k and v, respectively.
-        b_q, b_k, b_v: optional biases for q, k and v, respectively.
-    Shape:
-        Inputs:
-        - q: :math:`(Qdims..., Eq)` where Eq is the query embedding dimension and Qdims are any
-            number of leading dimensions.
-        - k: :math:`(Kdims..., Ek)` where Ek is the key embedding dimension and Kdims are any
-            number of leading dimensions.
-        - v: :math:`(Vdims..., Ev)` where Ev is the value embedding dimension and Vdims are any
-            number of leading dimensions.
         - w_q: :math:`(Eq, Eq)`
         - w_k: :math:`(Eq, Ek)`
         - w_v: :math:`(Eq, Ev)`
         - b_q: :math:`(Eq)`
         - b_k: :math:`(Eq)`
         - b_v: :math:`(Eq)`
-        Output: in output triple :math:`(q', k', v')`,
          - q': :math:`[Qdims..., Eq]`
          - k': :math:`[Kdims..., Eq]`
          - v': :math:`[Vdims..., Eq]`
@@ -777,6 +779,6 @@ def _in_projection(
     assert w_k.shape == (Eq, Ek), f"expecting key weights shape of {(Eq, Ek)}, but got {w_k.shape}"
     assert w_v.shape == (Eq, Ev), f"expecting value weights shape of {(Eq, Ev)}, but got {w_v.shape}"
     assert b_q is None or b_q.shape == (Eq,), f"expecting query bias shape of {(Eq,)}, but got {b_q.shape}"
-    assert b_k is None or b_k.shape == (Eq,), f"expecting key bias shape of {(Eq,)}, but got {b_k.shape}"
-    assert b_v is None or b_v.shape == (Eq,), f"expecting value bias shape of {(Eq,)}, but got {b_v.shape}"
-    return linear(q, w_q, b_q), linear(k, w_k, b_k), linear(v, w_v, b_v)

+from typing import List
+from torch import Tensor
 from functools import partial
 from typing import Optional, Tuple
 import numpy as np
             kv_proj = kv_proj.unflatten(-1, (2, E)).unsqueeze(0).transpose(0, -2).squeeze(-2).contiguous()
             return (q_proj, kv_proj[0], kv_proj[1])
     else:
+        w_q, w_k, w_v = w.分块(3)
+        如果 b 是 无:
+            b_q = b_k = b_v = 无
+        否则:
+            b_q, b_k, b_v = b.块(3)
         return linear(q, w_q, b_q), linear(k, w_k, b_k), linear(v, w_v, b_v)
     w_q: Tensor,
     w_k: Tensor,
     w_v: Tensor,
+    b_q：可选[张量] = 无,
+    b_k：可选[张量] = 无,
+    b_v：可选[张量] = 无,
+) -> 元组[张量, 张量, 张量]:
     r"""
+    执行注意力操作的输入投影步骤。这仅仅是
+    一个三重线性投影，对权重施加形状约束，使得
+    确保投影输出的嵌入维度一致性。
+    输出是一个包含查询、键和值的投影张量的三元组。
+    参数:
+        q, k, v: 要投影的查询、键和值张量。
+        w_q, w_k, w_v: 分别用于q、k和v的权重。
+        b_q, b_k, b_v: 分别为q、k和v的可选偏置。
+    形状:
+        输入：
+        - q: :math:`(Qdims..., Eq)` 其中 Eq 是查询嵌入维度，Qdims 是任意的
+            领先维度的数量。
+        - k: :math:`(Kdims..., Ek)` 其中 Ek 是键嵌入维度，Kdims 可以是任何
+            领先维度的数量。
+        - v: :math:`(Vdims..., Ev)` 其中 Ev 是值嵌入维度，Vdims 是任何
+            领先维度的数量。
         - w_q: :math:`(Eq, Eq)`
         - w_k: :math:`(Eq, Ek)`
         - w_v: :math:`(Eq, Ev)`
         - b_q: :math:`(Eq)`
         - b_k: :math:`(Eq)`
         - b_v: :math:`(Eq)`
+        输出：在输出三元组 :math:`(q', k', v')` 中，
          - q': :math:`[Qdims..., Eq]`
          - k': :math:`[Kdims..., Eq]`
          - v': :math:`[Vdims..., Eq]`
     assert w_k.shape == (Eq, Ek), f"expecting key weights shape of {(Eq, Ek)}, but got {w_k.shape}"
     assert w_v.shape == (Eq, Ev), f"expecting value weights shape of {(Eq, Ev)}, but got {w_v.shape}"
     assert b_q is None or b_q.shape == (Eq,), f"expecting query bias shape of {(Eq,)}, but got {b_q.shape}"
+    断言 b_k 是 None 或者 b_k.形状 == (等价,), 期望的 key bias 形状为 {(等价,)}，但得到的是 {b_k.形状}"
+    断言 b_v 是 None 或者 b_v.shape == (Eq,), f"期望值偏差形状为{(Eq,)}，但得到{b_v.shape}"
+    返回 线性(q, w_q, b_q), 线性(k, w_k, b_k), 线性(v, w_v, b_v)