Spaces:

ahadnagy
/

inference-performance-dashboard

Sleeping

App Files Files Community

Ákos Hadnagy commited on Sep 18

Commit

954d017

1 Parent(s): 54114a6

Add human-readable scenario names

Browse files

Files changed (2) hide show

app.py +43 -7
scenario_mappings.json +11 -0

app.py CHANGED Viewed

@@ -15,6 +15,7 @@ import polars as pl
 from datetime import datetime
 from typing import List, Dict, Any, Optional, Tuple
 import logging
 from benchmark_data_reader import BenchmarkDataReader
@@ -29,6 +30,7 @@ class BenchmarkDashboard:
         """Initialize the dashboard and load data."""
         self.reader = BenchmarkDataReader()
         self.df = None
         self.load_data()
     def load_data(self) -> None:
@@ -48,13 +50,39 @@ class BenchmarkDashboard:
             logger.error(f"Error loading data: {e}")
             self.df_pandas = pd.DataFrame()
     def get_filter_options(self) -> Tuple[List[str], List[str], List[str], List[str], str, str]:
         """Get unique values for filter dropdowns and date range."""
         if self.df_pandas.empty:
             return [], [], [], [], "", ""
         models = sorted(self.df_pandas['model_name'].dropna().unique().tolist())
-        scenarios = sorted(self.df_pandas['scenario_name'].dropna().unique().tolist())
         gpus = sorted(self.df_pandas['gpu_name'].dropna().unique().tolist())
         # Get benchmark runs grouped by date (or commit_id if available)
@@ -108,7 +136,9 @@ class BenchmarkDashboard:
         if selected_models:
             filtered_df = filtered_df[filtered_df['model_name'].isin(selected_models)]
         if selected_scenarios:
-            filtered_df = filtered_df[filtered_df['scenario_name'].isin(selected_scenarios)]
         if selected_gpus:
             filtered_df = filtered_df[filtered_df['gpu_name'].isin(selected_gpus)]
@@ -161,16 +191,20 @@ class BenchmarkDashboard:
                              xref="paper", yref="paper", x=0.5, y=0.5, showarrow=False)
             return fig
         # Create bar chart comparing performance across models and scenarios
         fig = px.bar(
-            filtered_df,
-            x='scenario_name',
             y=metric,
             color='model_name',
             title=f'Performance Comparison: {metric.replace("_", " ").title()}',
             labels={
                 metric: metric.replace("_", " ").title(),
-                'scenario_name': 'Benchmark Scenario',
                 'model_name': 'Model'
             },
             hover_data=['gpu_name', 'timestamp']
@@ -209,15 +243,17 @@ class BenchmarkDashboard:
                 # Only show trends if we have multiple data points for this model-scenario combination
                 if len(scenario_data) > 1:
                     fig.add_trace(go.Scatter(
                         x=scenario_data['timestamp'],
                         y=scenario_data[metric],
                         mode='lines+markers',
-                        name=f'{model} - {scenario}',
                         line=dict(width=2),
                         marker=dict(size=6),
                         hovertemplate=f'<b>{model}</b><br>' +
-                                     f'Scenario: {scenario}<br>' +
                                      'Time: %{x}<br>' +
                                      f'{metric.replace("_", " ").title()}: %{{y}}<br>' +
                                      '<extra></extra>'

 from datetime import datetime
 from typing import List, Dict, Any, Optional, Tuple
 import logging
+import json
 from benchmark_data_reader import BenchmarkDataReader
         """Initialize the dashboard and load data."""
         self.reader = BenchmarkDataReader()
         self.df = None
+        self.scenario_mappings = self.load_scenario_mappings()
         self.load_data()
     def load_data(self) -> None:
             logger.error(f"Error loading data: {e}")
             self.df_pandas = pd.DataFrame()
+    def load_scenario_mappings(self) -> Dict[str, str]:
+        """Load scenario name mappings from JSON file."""
+        try:
+            with open('scenario_mappings.json', 'r') as f:
+                return json.load(f)
+        except Exception as e:
+            logger.warning(f"Could not load scenario mappings: {e}")
+            return {}
+    def get_readable_scenario_name(self, scenario_name: str) -> str:
+        """Get human-readable scenario name or return original if not mapped."""
+        return self.scenario_mappings.get(scenario_name, scenario_name)
+    def get_raw_scenario_name(self, readable_name: str) -> str:
+        """Convert human-readable scenario name back to raw scenario name."""
+        # Find the raw name that maps to this readable name
+        for raw_name, mapped_name in self.scenario_mappings.items():
+            if mapped_name == readable_name:
+                return raw_name
+        # If not found in mappings, assume it's already a raw name
+        return readable_name
     def get_filter_options(self) -> Tuple[List[str], List[str], List[str], List[str], str, str]:
         """Get unique values for filter dropdowns and date range."""
         if self.df_pandas.empty:
             return [], [], [], [], "", ""
         models = sorted(self.df_pandas['model_name'].dropna().unique().tolist())
+        # Get scenarios with human-readable names for display
+        raw_scenarios = sorted(self.df_pandas['scenario_name'].dropna().unique().tolist())
+        scenarios = [self.get_readable_scenario_name(scenario) for scenario in raw_scenarios]
         gpus = sorted(self.df_pandas['gpu_name'].dropna().unique().tolist())
         # Get benchmark runs grouped by date (or commit_id if available)
         if selected_models:
             filtered_df = filtered_df[filtered_df['model_name'].isin(selected_models)]
         if selected_scenarios:
+            # Convert human-readable scenario names back to raw names for filtering
+            raw_scenarios = [self.get_raw_scenario_name(scenario) for scenario in selected_scenarios]
+            filtered_df = filtered_df[filtered_df['scenario_name'].isin(raw_scenarios)]
         if selected_gpus:
             filtered_df = filtered_df[filtered_df['gpu_name'].isin(selected_gpus)]
                              xref="paper", yref="paper", x=0.5, y=0.5, showarrow=False)
             return fig
+        # Add human-readable scenario names for display
+        plot_df = filtered_df.copy()
+        plot_df['scenario_display'] = plot_df['scenario_name'].apply(self.get_readable_scenario_name)
         # Create bar chart comparing performance across models and scenarios
         fig = px.bar(
+            plot_df,
+            x='scenario_display',
             y=metric,
             color='model_name',
             title=f'Performance Comparison: {metric.replace("_", " ").title()}',
             labels={
                 metric: metric.replace("_", " ").title(),
+                'scenario_display': 'Benchmark Scenario',
                 'model_name': 'Model'
             },
             hover_data=['gpu_name', 'timestamp']
                 # Only show trends if we have multiple data points for this model-scenario combination
                 if len(scenario_data) > 1:
+                    # Use human-readable scenario name for display
+                    readable_scenario = self.get_readable_scenario_name(scenario)
                     fig.add_trace(go.Scatter(
                         x=scenario_data['timestamp'],
                         y=scenario_data[metric],
                         mode='lines+markers',
+                        name=f'{model} - {readable_scenario}',
                         line=dict(width=2),
                         marker=dict(size=6),
                         hovertemplate=f'<b>{model}</b><br>' +
+                                     f'Scenario: {readable_scenario}<br>' +
                                      'Time: %{x}<br>' +
                                      f'{metric.replace("_", " ").title()}: %{{y}}<br>' +
                                      '<extra></extra>'

scenario_mappings.json ADDED Viewed

	@@ -0,0 +1,11 @@

+{
+  "eager_eager_attn": "Eager Execution + Eager Attention",
+  "eager_sdpa_default": "Eager Execution + SDPA Default",
+  "eager_sdpa_math": "Eager Execution + SDPA Math Backend",
+  "eager_sdpa_flash_attention": "Eager Execution + SDPA Flash Attention",
+  "eager_sdpa_efficient_attention": "Eager Execution + SDPA Efficient Attention",
+  "compiled_compile_max-autotune_eager_attn": "Compiled (Max-Autotune) + Eager Attention",
+  "compiled_compile_max-autotune_sdpa_default": "Compiled (Max-Autotune) + SDPA Default",
+  "compiled_compile_max-autotune_sdpa_math": "Compiled (Max-Autotune) + SDPA Math Backend",
+  "compiled_compile_max-autotune_sdpa_efficient_attention": "Compiled (Max-Autotune) + SDPA Efficient Attention"
+}