Update modeling_super_linear.py
Browse files- modeling_super_linear.py +3 -3
modeling_super_linear.py
CHANGED
|
@@ -362,10 +362,10 @@ class SparseNoisyMoE(nn.Module):
|
|
| 362 |
|
| 363 |
# Get expert probabilities and convert to numpy
|
| 364 |
probs_np = expert_probs.detach().cpu().numpy()
|
| 365 |
-
|
| 366 |
# Create a nicer figure with a modern style
|
| 367 |
plt.style.use('ggplot')
|
| 368 |
-
|
|
|
|
| 369 |
ax = plt.subplot(111)
|
| 370 |
|
| 371 |
# Create color gradient based on probability values
|
|
@@ -374,7 +374,6 @@ class SparseNoisyMoE(nn.Module):
|
|
| 374 |
# Plot bars with more attractive styling
|
| 375 |
bars = plt.bar(range(len(probs_np)), probs_np, color=colors, width=0.6,
|
| 376 |
edgecolor='black', linewidth=0.5, alpha=0.85)
|
| 377 |
-
|
| 378 |
# Add value annotations on top of each bar
|
| 379 |
for i, bar in enumerate(bars):
|
| 380 |
height = bar.get_height()
|
|
@@ -420,6 +419,7 @@ class SparseNoisyMoE(nn.Module):
|
|
| 420 |
plt.close()
|
| 421 |
print(expert_probs.shape)
|
| 422 |
|
|
|
|
| 423 |
if get_prob:
|
| 424 |
expert_probs = F.softmax(self.gate_outputs, dim=1)
|
| 425 |
print(expert_probs.shape)
|
|
|
|
| 362 |
|
| 363 |
# Get expert probabilities and convert to numpy
|
| 364 |
probs_np = expert_probs.detach().cpu().numpy()
|
|
|
|
| 365 |
# Create a nicer figure with a modern style
|
| 366 |
plt.style.use('ggplot')
|
| 367 |
+
plt.figure(figsize=(12, 8), dpi=120)
|
| 368 |
+
plt.subplot(111)
|
| 369 |
ax = plt.subplot(111)
|
| 370 |
|
| 371 |
# Create color gradient based on probability values
|
|
|
|
| 374 |
# Plot bars with more attractive styling
|
| 375 |
bars = plt.bar(range(len(probs_np)), probs_np, color=colors, width=0.6,
|
| 376 |
edgecolor='black', linewidth=0.5, alpha=0.85)
|
|
|
|
| 377 |
# Add value annotations on top of each bar
|
| 378 |
for i, bar in enumerate(bars):
|
| 379 |
height = bar.get_height()
|
|
|
|
| 419 |
plt.close()
|
| 420 |
print(expert_probs.shape)
|
| 421 |
|
| 422 |
+
|
| 423 |
if get_prob:
|
| 424 |
expert_probs = F.softmax(self.gate_outputs, dim=1)
|
| 425 |
print(expert_probs.shape)
|