if001
/

DeepSeek-R1-0528-tiny

Text Generation

text-generation-inference

Model card Files Files and versions

if001 commited on Aug 15

Commit

f08d3a2

·

verified ·

1 Parent(s): e47b14c

upload

Files changed (1) hide show

modeling_deepseek.py +2 -1

modeling_deepseek.py CHANGED Viewed

@@ -435,7 +435,7 @@ class MoEGate(nn.Module):
         ### select top-k experts
         if self.topk_method == "noaux_tc":
-            assert not self.training
             scores_for_choice = scores.view(bsz * seq_len, -1) + self.e_score_correction_bias.unsqueeze(0)
             group_scores = (
                 scores_for_choice.view(bsz * seq_len, self.n_group, -1).topk(2, dim=-1)[0].sum(dim = -1)
@@ -537,6 +537,7 @@ class DeepseekV3MoE(nn.Module):
         """
         self.ep_size = 1の想定
         """
         cnts = topk_ids.new_zeros((topk_ids.shape[0], len(self.experts)))
         cnts.scatter_(1, topk_ids, 1)
         tokens_per_expert = cnts.sum(dim=0)

         ### select top-k experts
         if self.topk_method == "noaux_tc":
+            # assert not self.training ## for lora training
             scores_for_choice = scores.view(bsz * seq_len, -1) + self.e_score_correction_bias.unsqueeze(0)
             group_scores = (
                 scores_for_choice.view(bsz * seq_len, self.n_group, -1).topk(2, dim=-1)[0].sum(dim = -1)
         """
         self.ep_size = 1の想定
         """
+        assert self.ep_size == 1
         cnts = topk_ids.new_zeros((topk_ids.shape[0], len(self.experts)))
         cnts.scatter_(1, topk_ids, 1)
         tokens_per_expert = cnts.sum(dim=0)