Update modeling_gemmoe.py
Browse files- modeling_gemmoe.py +0 -12
modeling_gemmoe.py
CHANGED
|
@@ -743,22 +743,12 @@ class GemmoeDecoderLayer(nn.Module):
|
|
| 743 |
output_attentions=output_attentions,
|
| 744 |
use_cache=use_cache,
|
| 745 |
)
|
| 746 |
-
|
| 747 |
-
# Check if the tensor sizes match before adding residual
|
| 748 |
-
if hidden_states.size() != residual.size():
|
| 749 |
-
hidden_states = hidden_states[:, -residual.size(1):, :]
|
| 750 |
-
|
| 751 |
hidden_states = residual + hidden_states
|
| 752 |
|
| 753 |
# Fully Connected
|
| 754 |
residual = hidden_states
|
| 755 |
hidden_states = self.post_attention_layernorm(hidden_states)
|
| 756 |
hidden_states, router_logits = self.block_sparse_moe(hidden_states)
|
| 757 |
-
|
| 758 |
-
# Check if the tensor sizes match before adding residual
|
| 759 |
-
if hidden_states.size() != residual.size():
|
| 760 |
-
hidden_states = hidden_states[:, -residual.size(1):, :]
|
| 761 |
-
|
| 762 |
hidden_states = residual + hidden_states
|
| 763 |
|
| 764 |
outputs = (hidden_states,)
|
|
@@ -775,8 +765,6 @@ class GemmoeDecoderLayer(nn.Module):
|
|
| 775 |
return outputs
|
| 776 |
|
| 777 |
|
| 778 |
-
|
| 779 |
-
|
| 780 |
GEMMOE_START_DOCSTRING = r"""
|
| 781 |
This model inherits from [`PreTrainedModel`]. Check the superclass documentation for the generic methods the
|
| 782 |
library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads
|
|
|
|
| 743 |
output_attentions=output_attentions,
|
| 744 |
use_cache=use_cache,
|
| 745 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 746 |
hidden_states = residual + hidden_states
|
| 747 |
|
| 748 |
# Fully Connected
|
| 749 |
residual = hidden_states
|
| 750 |
hidden_states = self.post_attention_layernorm(hidden_states)
|
| 751 |
hidden_states, router_logits = self.block_sparse_moe(hidden_states)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 752 |
hidden_states = residual + hidden_states
|
| 753 |
|
| 754 |
outputs = (hidden_states,)
|
|
|
|
| 765 |
return outputs
|
| 766 |
|
| 767 |
|
|
|
|
|
|
|
| 768 |
GEMMOE_START_DOCSTRING = r"""
|
| 769 |
This model inherits from [`PreTrainedModel`]. Check the superclass documentation for the generic methods the
|
| 770 |
library implements for all its model (such as downloading or saving, resizing the input embeddings, pruning heads
|