autoprogrammer commited on
Commit
1b31893
·
verified ·
1 Parent(s): 66d473d

Update modeling_deepseek.py

Browse files
Files changed (1) hide show
  1. modeling_deepseek.py +2 -2
modeling_deepseek.py CHANGED
@@ -527,8 +527,8 @@ class DeepseekV3MoE(nn.Module):
527
  topk_idx, topk_weight = self.gate(hidden_states)
528
  hidden_states = hidden_states.view(-1, hidden_states.shape[-1])
529
  flat_topk_idx = topk_idx.view(-1)
530
- if not self.training:
531
- y = self.moe_infer(hidden_states, topk_idx, topk_weight).view(*orig_shape)
532
  if self.config.n_shared_experts is not None:
533
  y = y + self.shared_experts(identity)
534
  return y
 
527
  topk_idx, topk_weight = self.gate(hidden_states)
528
  hidden_states = hidden_states.view(-1, hidden_states.shape[-1])
529
  flat_topk_idx = topk_idx.view(-1)
530
+ #if not self.training:
531
+ y = self.moe_infer(hidden_states, topk_idx, topk_weight).view(*orig_shape)
532
  if self.config.n_shared_experts is not None:
533
  y = y + self.shared_experts(identity)
534
  return y