SequentialLearning
/

SuperLinear

@@ -355,28 +355,66 @@ class SparseNoisyMoE(nn.Module):
         load_balancing_loss = self.calculate_load_balancing_loss(self.gate_outputs, batch_size)
-        expert_probs = F.softmax(self.gate_outputs, dim=1)
         expert_probs = expert_probs[1,:]
         # Plot the expert probabilities
         import matplotlib.pyplot as plt
-        plt.figure(figsize=(10, 6))
-        plt.bar(range(len(expert_probs)), expert_probs.detach().cpu().numpy())
-        plt.xlabel('Expert Index')
-        plt.ylabel('Probability')
-        plt.title('Expert Selection Probabilities')
-        # Create more descriptive x-axis labels
         if hasattr(self, 'experts') and isinstance(getattr(self, 'experts', None), dict):
             # If experts are stored in a dictionary with meaningful keys
             expert_names = list(self.experts.keys())
-            plt.xticks(range(len(expert_probs)), expert_names, rotation=45)
         else:
             # Default numbering if expert names aren't available
-            plt.xticks(range(len(expert_probs)), [f'Expert {i}' for i in range(len(expert_probs))])
         plt.tight_layout()
-        plt.savefig('expert_probabilities.png')
         plt.close()
         print(expert_probs.shape)

         load_balancing_loss = self.calculate_load_balancing_loss(self.gate_outputs, batch_size)
         expert_probs = expert_probs[1,:]
         # Plot the expert probabilities
         import matplotlib.pyplot as plt
+        # Get expert probabilities and convert to numpy
+        probs_np = expert_probs.detach().cpu().numpy()
+        # Create a nicer figure with a modern style
+        plt.style.use('ggplot')
+        fig, ax = plt.figure(figsize=(12, 8), dpi=120)
+        ax = plt.subplot(111)
+        # Create color gradient based on probability values
+        colors = plt.cm.viridis(probs_np)
+        # Plot bars with more attractive styling
+        bars = plt.bar(range(len(probs_np)), probs_np, color=colors, width=0.6,
+                       edgecolor='black', linewidth=0.5, alpha=0.85)
+        # Add value annotations on top of each bar
+        for i, bar in enumerate(bars):
+            height = bar.get_height()
+            plt.text(bar.get_x() + bar.get_width()/2., height + 0.01,
+                     f'{height:.3f}', ha='center', va='bottom', fontsize=9,
+                     rotation=0, fontweight='bold')
+        # Add expert names to x-axis
         if hasattr(self, 'experts') and isinstance(getattr(self, 'experts', None), dict):
             # If experts are stored in a dictionary with meaningful keys
             expert_names = list(self.experts.keys())
+            plt.xticks(range(len(probs_np)), expert_names, rotation=45, ha='right')
         else:
             # Default numbering if expert names aren't available
+            plt.xticks(range(len(probs_np)), [f'Expert {i}' for i in range(len(probs_np))])
+        # Add grid for better readability
+        plt.grid(axis='y', linestyle='--', alpha=0.7)
+        # Add timestamp to title
+        timestamp = datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+        plt.title(f'Expert Selection Probabilities\n{timestamp}', fontsize=14, fontweight='bold')
+        plt.xlabel('Expert Models', fontsize=12)
+        plt.ylabel('Selection Probability', fontsize=12)
+        # Highlight the most probable expert
+        max_idx = np.argmax(probs_np)
+        bars[max_idx].set_color('orangered')
+        bars[max_idx].set_edgecolor('black')
+        bars[max_idx].set_linewidth(1.5)
+        # Add stats in a text box
+        textstr = f'Max: {probs_np.max():.4f} (Expert {max_idx})\n'
+        textstr += f'Min: {probs_np.min():.4f}\n'
+        textstr += f'Mean: {probs_np.mean():.4f}'
+        props = dict(boxstyle='round', facecolor='wheat', alpha=0.5)
+        plt.text(0.02, 0.97, textstr, transform=plt.gca().transAxes, fontsize=10,
+                 verticalalignment='top', bbox=props)
         plt.tight_layout()
+        plt.savefig('expert_probabilities.png', bbox_inches='tight')
         plt.close()
         print(expert_probs.shape)