fix: MLP layer names.
Browse files- modeling_falcon.py +4 -4
modeling_falcon.py
CHANGED
|
@@ -763,18 +763,18 @@ class FalconMLP(nn.Module):
|
|
| 763 |
super().__init__()
|
| 764 |
hidden_size = config.hidden_size
|
| 765 |
|
| 766 |
-
self.
|
| 767 |
hidden_size, config.ff_factor * hidden_size, bias=config.bias
|
| 768 |
)
|
| 769 |
self.act = nn.GELU()
|
| 770 |
-
self.
|
| 771 |
config.ff_factor * hidden_size, hidden_size, bias=config.bias
|
| 772 |
)
|
| 773 |
self.hidden_dropout = config.hidden_dropout
|
| 774 |
|
| 775 |
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
| 776 |
-
x = self.act(self.
|
| 777 |
-
x = self.
|
| 778 |
return x
|
| 779 |
|
| 780 |
FALCON_ATTENTION_CLASSES = {
|
|
|
|
| 763 |
super().__init__()
|
| 764 |
hidden_size = config.hidden_size
|
| 765 |
|
| 766 |
+
self.dense_h_to_4h = FalconLinear(
|
| 767 |
hidden_size, config.ff_factor * hidden_size, bias=config.bias
|
| 768 |
)
|
| 769 |
self.act = nn.GELU()
|
| 770 |
+
self.dense_4h_to_h = FalconLinear(
|
| 771 |
config.ff_factor * hidden_size, hidden_size, bias=config.bias
|
| 772 |
)
|
| 773 |
self.hidden_dropout = config.hidden_dropout
|
| 774 |
|
| 775 |
def forward(self, x: torch.Tensor) -> torch.Tensor:
|
| 776 |
+
x = self.act(self.dense_h_to_4h(x))
|
| 777 |
+
x = self.dense_4h_to_h(x)
|
| 778 |
return x
|
| 779 |
|
| 780 |
FALCON_ATTENTION_CLASSES = {
|