Update modeling_super_linear.py
Browse files- modeling_super_linear.py +11 -0
modeling_super_linear.py
CHANGED
|
@@ -356,6 +356,17 @@ class SparseNoisyMoE(nn.Module):
|
|
| 356 |
load_balancing_loss = self.calculate_load_balancing_loss(self.gate_outputs, batch_size)
|
| 357 |
|
| 358 |
expert_probs = F.softmax(self.gate_outputs, dim=1)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 359 |
print(expert_probs.shape)
|
| 360 |
|
| 361 |
if get_prob:
|
|
|
|
| 356 |
load_balancing_loss = self.calculate_load_balancing_loss(self.gate_outputs, batch_size)
|
| 357 |
|
| 358 |
expert_probs = F.softmax(self.gate_outputs, dim=1)
|
| 359 |
+
import matplotlib.pyplot as plt
|
| 360 |
+
|
| 361 |
+
plt.figure(figsize=(10, 6))
|
| 362 |
+
plt.bar(range(len(expert_probs)), expert_probs.detach().cpu().numpy())
|
| 363 |
+
plt.xlabel('Expert Index')
|
| 364 |
+
plt.ylabel('Probability')
|
| 365 |
+
plt.title('Expert Selection Probabilities')
|
| 366 |
+
plt.xticks(range(len(expert_probs)), [f'Expert {i}' for i in range(len(expert_probs))])
|
| 367 |
+
plt.tight_layout()
|
| 368 |
+
plt.savefig('expert_probabilities.png')
|
| 369 |
+
plt.close()
|
| 370 |
print(expert_probs.shape)
|
| 371 |
|
| 372 |
if get_prob:
|