Charlie81 commited on
Commit
4f3261b
·
1 Parent(s): 4091143

layer id init

Browse files
Files changed (1) hide show
  1. myolmoe/modeling_myolmoe.py +1 -1
myolmoe/modeling_myolmoe.py CHANGED
@@ -530,7 +530,7 @@ class OlmoeDecoderLayer(nn.Module):
530
  self.self_attn = OLMOE_ATTENTION_CLASSES[config._attn_implementation](
531
  config=config, layer_idx=layer_idx
532
  )
533
- self.mlp = OlmoeSparseMoeBlock(config)
534
  self.input_layernorm = OlmoeRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
535
  self.post_attention_layernorm = OlmoeRMSNorm(
536
  config.hidden_size, eps=config.rms_norm_eps
 
530
  self.self_attn = OLMOE_ATTENTION_CLASSES[config._attn_implementation](
531
  config=config, layer_idx=layer_idx
532
  )
533
+ self.mlp = OlmoeSparseMoeBlock(config, layer_idx)
534
  self.input_layernorm = OlmoeRMSNorm(config.hidden_size, eps=config.rms_norm_eps)
535
  self.post_attention_layernorm = OlmoeRMSNorm(
536
  config.hidden_size, eps=config.rms_norm_eps