Update modeling_deepseek_v32.py
Browse files- modeling_deepseek_v32.py +1 -1
modeling_deepseek_v32.py
CHANGED
|
@@ -663,7 +663,7 @@ class DeepseekV32DecoderLayer(GradientCheckpointingLayer):
|
|
| 663 |
self.hidden_size = config.hidden_size
|
| 664 |
self.self_attn = DeepseekV32Attention(config, layer_idx)
|
| 665 |
|
| 666 |
-
if config.
|
| 667 |
self.mlp = DeepseekV32MoE(config)
|
| 668 |
else:
|
| 669 |
self.mlp = DeepseekV32MLP(config)
|
|
|
|
| 663 |
self.hidden_size = config.hidden_size
|
| 664 |
self.self_attn = DeepseekV32Attention(config, layer_idx)
|
| 665 |
|
| 666 |
+
if config.first_k_dense_replace < layer_idx:
|
| 667 |
self.mlp = DeepseekV32MoE(config)
|
| 668 |
else:
|
| 669 |
self.mlp = DeepseekV32MLP(config)
|