fix: negative experts never be activated
Browse files- modeling_tcmoe.py +0 -1
modeling_tcmoe.py
CHANGED
|
@@ -334,7 +334,6 @@ class TCMoEBlock(nn.Module):
|
|
| 334 |
current_hidden_states = expert_layer(current_state) * weights[top_x, idx, None]
|
| 335 |
else:
|
| 336 |
current_hidden_states = expert_layer(current_state) * weights[top_x, idx, None] * -1.0
|
| 337 |
-
current_hidden_states = expert_layer(current_state) * weights[top_x, idx, None]
|
| 338 |
|
| 339 |
# However `index_add_` only support torch tensors for indexing so we'll use
|
| 340 |
# the `top_x` tensor here.
|
|
|
|
| 334 |
current_hidden_states = expert_layer(current_state) * weights[top_x, idx, None]
|
| 335 |
else:
|
| 336 |
current_hidden_states = expert_layer(current_state) * weights[top_x, idx, None] * -1.0
|
|
|
|
| 337 |
|
| 338 |
# However `index_add_` only support torch tensors for indexing so we'll use
|
| 339 |
# the `top_x` tensor here.
|