updated
Browse files- modeling_hymba.py +4 -4
modeling_hymba.py
CHANGED
|
@@ -1601,16 +1601,16 @@ class HymbaBlock(nn.Module):
|
|
| 1601 |
if self.reuse_kv:
|
| 1602 |
query_states, hidden_states = hidden_states.tensor_split((self.attn_hidden_size,), dim=1)
|
| 1603 |
query_states = query_states.transpose(1,2)
|
| 1604 |
-
print("[DEBUG] query_states shape: ", query_states
|
| 1605 |
else:
|
| 1606 |
query_states, key_states, value_states, hidden_states = hidden_states.tensor_split((self.attn_hidden_size, self.attn_hidden_size + self.k_hidden_size, self.attn_hidden_size + self.k_hidden_size + self.v_hidden_size), dim=1)
|
| 1607 |
|
| 1608 |
query_states = query_states.transpose(1,2)
|
| 1609 |
key_states = key_states.transpose(1,2)
|
| 1610 |
value_states = value_states.transpose(1,2)
|
| 1611 |
-
print("[DEBUG] query_states shape: ", query_states
|
| 1612 |
-
print("[DEBUG] key_states shape: ", key_states
|
| 1613 |
-
print("[DEBUG] value_states shape: ", value_states
|
| 1614 |
|
| 1615 |
if use_precomputed_states:
|
| 1616 |
hidden_states = causal_conv1d_update(
|
|
|
|
| 1601 |
if self.reuse_kv:
|
| 1602 |
query_states, hidden_states = hidden_states.tensor_split((self.attn_hidden_size,), dim=1)
|
| 1603 |
query_states = query_states.transpose(1,2)
|
| 1604 |
+
print("[DEBUG] query_states shape: ", query_states)
|
| 1605 |
else:
|
| 1606 |
query_states, key_states, value_states, hidden_states = hidden_states.tensor_split((self.attn_hidden_size, self.attn_hidden_size + self.k_hidden_size, self.attn_hidden_size + self.k_hidden_size + self.v_hidden_size), dim=1)
|
| 1607 |
|
| 1608 |
query_states = query_states.transpose(1,2)
|
| 1609 |
key_states = key_states.transpose(1,2)
|
| 1610 |
value_states = value_states.transpose(1,2)
|
| 1611 |
+
print("[DEBUG] query_states shape: ", query_states)
|
| 1612 |
+
print("[DEBUG] key_states shape: ", key_states)
|
| 1613 |
+
print("[DEBUG] value_states shape: ", value_states)
|
| 1614 |
|
| 1615 |
if use_precomputed_states:
|
| 1616 |
hidden_states = causal_conv1d_update(
|