Charlie81
/

LoRE

TensorBoard

Safetensors

Model card Files Files and versions

xet

Metrics Training metrics Community

Charlie81 commited on Jul 28, 2025

Commit

f126bc5

1 Parent(s): 61a401c

everything into the expertracking class

Browse files

Files changed (1) hide show

scripts/evalexperts.py +98 -3

scripts/evalexperts.py CHANGED Viewed

@@ -77,7 +77,7 @@ class ExpertTrackingHFLM(HFLM):
             )
             # Update statistics
-            self.update_expert_stats(  # Changed from _update_expert_stats to update_expert_stats
                 layer_idx=layer_idx,
                 topk_experts=topk_experts,
                 topk_probs=topk_probs,
@@ -89,7 +89,7 @@ class ExpertTrackingHFLM(HFLM):
         return expert_hook
-    def update_expert_stats(self, layer_idx: int, topk_experts: torch.Tensor,  # Renamed from _update_expert_stats
                           topk_probs: torch.Tensor, num_regular_experts: int,
                           num_small_experts: int, batch_size: int, seq_len: int):
         """Update expert usage statistics."""
@@ -143,7 +143,102 @@ class ExpertTrackingHFLM(HFLM):
                 if expert_idx not in self.expert_stats['small_expert_usage']:
                     self.expert_stats['small_expert_usage'][expert_idx] = 0
                 self.expert_stats['small_expert_usage'][expert_idx] += count
 def _update_expert_stats(self, layer_idx: int, topk_experts: torch.Tensor,
                         topk_probs: torch.Tensor, num_regular_experts: int,
                         num_small_experts: int, batch_size: int, seq_len: int):

             )
             # Update statistics
+            self.update_expert_stats(
                 layer_idx=layer_idx,
                 topk_experts=topk_experts,
                 topk_probs=topk_probs,
         return expert_hook
+    def update_expert_stats(self, layer_idx: int, topk_experts: torch.Tensor,
                           topk_probs: torch.Tensor, num_regular_experts: int,
                           num_small_experts: int, batch_size: int, seq_len: int):
         """Update expert usage statistics."""
                 if expert_idx not in self.expert_stats['small_expert_usage']:
                     self.expert_stats['small_expert_usage'][expert_idx] = 0
                 self.expert_stats['small_expert_usage'][expert_idx] += count
+    def get_expert_stats(self) -> Dict[str, Any]:
+        """Return expert usage statistics in a serializable format."""
+        stats = {
+            'total_tokens': self.expert_stats['total_tokens'],
+            'regular_expert_usage': {},
+            'small_expert_usage': {},
+            'layer_stats': {}
+        }
+        # Convert regular expert usage
+        for expert_idx, count in self.expert_stats['regular_expert_usage'].items():
+            stats['regular_expert_usage'][expert_idx] = {
+                'count': count,
+                'percentage': count / (self.expert_stats['total_tokens'] * getattr(self.model.config, 'top_k', 1)) * 100
+            }
+        # Convert small expert usage if they exist
+        if self.expert_stats['small_expert_usage']:
+            for expert_idx, count in self.expert_stats['small_expert_usage'].items():
+                stats['small_expert_usage'][expert_idx] = {
+                    'count': count,
+                    'percentage': count / (self.expert_stats['total_tokens'] * getattr(self.model.config, 'top_k', 1)) * 100
+                }
+        # Convert layer stats
+        for layer_idx, layer_stat in self.expert_stats['layer_stats'].items():
+            stats['layer_stats'][layer_idx] = {
+                'total_tokens': layer_stat['total_tokens'],
+                'regular_expert_counts': layer_stat['regular_expert_counts'],
+                'regular_expert_load': layer_stat['regular_expert_load'],
+                'small_expert_counts': layer_stat['small_expert_counts'],
+                'small_expert_load': layer_stat['small_expert_load']
+            }
+        return stats
+    def print_expert_stats(self) -> None:
+        """Print expert usage statistics in a human-readable format."""
+        if not self.expert_stats['total_tokens']:
+            print("No expert usage statistics collected.")
+            return
+        total_tokens = self.expert_stats['total_tokens']
+        top_k = getattr(self.model.config, 'top_k', 1)
+        total_expert_activations = total_tokens * top_k
+        print("\n" + "="*80)
+        print("EXPERT USAGE STATISTICS")
+        print("="*80)
+        print(f"Total tokens processed: {total_tokens:,}")
+        print(f"Total expert activations (top-{top_k}): {total_expert_activations:,}")
+        print("\nOverall Expert Usage:")
+        # Print regular experts
+        if self.expert_stats['regular_expert_usage']:
+            print("\nRegular Experts:")
+            for expert_idx, count in sorted(self.expert_stats['regular_expert_usage'].items()):
+                percentage = count / total_expert_activations * 100
+                print(f"  Expert {expert_idx}: {count:,} ({percentage:.2f}%)")
+        # Print small experts if they exist
+        if self.expert_stats['small_expert_usage']:
+            print("\nSmall Experts:")
+            for expert_idx, count in sorted(self.expert_stats['small_expert_usage'].items()):
+                percentage = count / total_expert_activations * 100
+                print(f"  Small Expert {expert_idx}: {count:,} ({percentage:.2f}%)")
+        # Print layer-wise statistics
+        print("\nLayer-wise Statistics:")
+        for layer_idx, layer_stat in self.expert_stats['layer_stats'].items():
+            print(f"\nLayer {layer_idx}:")
+            print(f"  Tokens processed: {layer_stat['total_tokens']:,}")
+            # Regular experts
+            print("  Regular Experts:")
+            for expert_idx, (count, load) in enumerate(zip(
+                layer_stat['regular_expert_counts'],
+                layer_stat['regular_expert_load']
+            )):
+                count_pct = count / (layer_stat['total_tokens'] * top_k) * 100
+                load_pct = load / layer_stat['total_tokens'] * 100
+                print(f"    Expert {expert_idx}: Count={count:,} ({count_pct:.2f}%), Load={load:.2f} ({load_pct:.2f}%)")
+            # Small experts if they exist
+            if layer_stat['small_expert_counts'] is not None:
+                print("  Small Experts:")
+                for expert_idx, (count, load) in enumerate(zip(
+                    layer_stat['small_expert_counts'],
+                    layer_stat['small_expert_load']
+                )):
+                    count_pct = count / (layer_stat['total_tokens'] * top_k) * 100
+                    load_pct = load / layer_stat['total_tokens'] * 100
+                    print(f"    Small Expert {expert_idx}: Count={count:,} ({count_pct:.2f}%), Load={load:.2f} ({load_pct:.2f}%)")
+        print("="*80 + "\n")
 def _update_expert_stats(self, layer_idx: int, topk_experts: torch.Tensor,
                         topk_probs: torch.Tensor, num_regular_experts: int,
                         num_small_experts: int, batch_size: int, seq_len: int):