SequentialLearning
/

SuperLinear

mixture-of-experts

Model card Files Files and versions

razmars commited on Apr 28, 2025

Commit

6ab5ff7

·

verified ·

1 Parent(s): 57fb83a

Update modeling_super_linear.py

Files changed (1) hide show

modeling_super_linear.py +3 -3

modeling_super_linear.py CHANGED Viewed

@@ -362,10 +362,10 @@ class SparseNoisyMoE(nn.Module):
         # Get expert probabilities and convert to numpy
         probs_np = expert_probs.detach().cpu().numpy()
         # Create a nicer figure with a modern style
         plt.style.use('ggplot')
-        fig, ax = plt.figure(figsize=(12, 8), dpi=120)
         ax = plt.subplot(111)
         # Create color gradient based on probability values
@@ -374,7 +374,6 @@ class SparseNoisyMoE(nn.Module):
         # Plot bars with more attractive styling
         bars = plt.bar(range(len(probs_np)), probs_np, color=colors, width=0.6,
                        edgecolor='black', linewidth=0.5, alpha=0.85)
         # Add value annotations on top of each bar
         for i, bar in enumerate(bars):
             height = bar.get_height()
@@ -420,6 +419,7 @@ class SparseNoisyMoE(nn.Module):
         plt.close()
         print(expert_probs.shape)
         if get_prob:
             expert_probs = F.softmax(self.gate_outputs, dim=1)
             print(expert_probs.shape)

         # Get expert probabilities and convert to numpy
         probs_np = expert_probs.detach().cpu().numpy()
         # Create a nicer figure with a modern style
         plt.style.use('ggplot')
+        plt.figure(figsize=(12, 8), dpi=120)
+        plt.subplot(111)
         ax = plt.subplot(111)
         # Create color gradient based on probability values
         # Plot bars with more attractive styling
         bars = plt.bar(range(len(probs_np)), probs_np, color=colors, width=0.6,
                        edgecolor='black', linewidth=0.5, alpha=0.85)
         # Add value annotations on top of each bar
         for i, bar in enumerate(bars):
             height = bar.get_height()
         plt.close()
         print(expert_probs.shape)
         if get_prob:
             expert_probs = F.softmax(self.gate_outputs, dim=1)
             print(expert_probs.shape)