layer id init
Browse files
myolmoe/modeling_myolmoe.py
CHANGED
|
@@ -530,7 +530,7 @@ class OlmoeDecoderLayer(nn.Module):
|
|
| 530 |
self.self_attn = OLMOE_ATTENTION_CLASSES[config._attn_implementation](
|
| 531 |
config=config, layer_idx=layer_idx
|
| 532 |
)
|
| 533 |
-
self.mlp = OlmoeSparseMoeBlock(config)
|
| 534 |
self.input_layernorm = OlmoeRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
|
| 535 |
self.post_attention_layernorm = OlmoeRMSNorm(
|
| 536 |
config.hidden_size, eps=config.rms_norm_eps
|
|
|
|
| 530 |
self.self_attn = OLMOE_ATTENTION_CLASSES[config._attn_implementation](
|
| 531 |
config=config, layer_idx=layer_idx
|
| 532 |
)
|
| 533 |
+
self.mlp = OlmoeSparseMoeBlock(config, layer_idx)
|
| 534 |
self.input_layernorm = OlmoeRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
|
| 535 |
self.post_attention_layernorm = OlmoeRMSNorm(
|
| 536 |
config.hidden_size, eps=config.rms_norm_eps
|