katuni4ka commited on
Commit
b9ab571
·
verified ·
1 Parent(s): 13aef71

Update modeling_deepseek_v32.py

Browse files
Files changed (1) hide show
  1. modeling_deepseek_v32.py +1 -1
modeling_deepseek_v32.py CHANGED
@@ -663,7 +663,7 @@ class DeepseekV32DecoderLayer(GradientCheckpointingLayer):
663
  self.hidden_size = config.hidden_size
664
  self.self_attn = DeepseekV32Attention(config, layer_idx)
665
 
666
- if config.mlp_layer_types[layer_idx] == "sparse":
667
  self.mlp = DeepseekV32MoE(config)
668
  else:
669
  self.mlp = DeepseekV32MLP(config)
 
663
  self.hidden_size = config.hidden_size
664
  self.self_attn = DeepseekV32Attention(config, layer_idx)
665
 
666
+ if config.first_k_dense_replace < layer_idx:
667
  self.mlp = DeepseekV32MoE(config)
668
  else:
669
  self.mlp = DeepseekV32MLP(config)