Ákos Hadnagy commited on
Commit
954d017
·
1 Parent(s): 54114a6

Add human-readable scenario names

Browse files
Files changed (2) hide show
  1. app.py +43 -7
  2. scenario_mappings.json +11 -0
app.py CHANGED
@@ -15,6 +15,7 @@ import polars as pl
15
  from datetime import datetime
16
  from typing import List, Dict, Any, Optional, Tuple
17
  import logging
 
18
 
19
  from benchmark_data_reader import BenchmarkDataReader
20
 
@@ -29,6 +30,7 @@ class BenchmarkDashboard:
29
  """Initialize the dashboard and load data."""
30
  self.reader = BenchmarkDataReader()
31
  self.df = None
 
32
  self.load_data()
33
 
34
  def load_data(self) -> None:
@@ -48,13 +50,39 @@ class BenchmarkDashboard:
48
  logger.error(f"Error loading data: {e}")
49
  self.df_pandas = pd.DataFrame()
50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
51
  def get_filter_options(self) -> Tuple[List[str], List[str], List[str], List[str], str, str]:
52
  """Get unique values for filter dropdowns and date range."""
53
  if self.df_pandas.empty:
54
  return [], [], [], [], "", ""
55
 
56
  models = sorted(self.df_pandas['model_name'].dropna().unique().tolist())
57
- scenarios = sorted(self.df_pandas['scenario_name'].dropna().unique().tolist())
 
 
 
 
58
  gpus = sorted(self.df_pandas['gpu_name'].dropna().unique().tolist())
59
 
60
  # Get benchmark runs grouped by date (or commit_id if available)
@@ -108,7 +136,9 @@ class BenchmarkDashboard:
108
  if selected_models:
109
  filtered_df = filtered_df[filtered_df['model_name'].isin(selected_models)]
110
  if selected_scenarios:
111
- filtered_df = filtered_df[filtered_df['scenario_name'].isin(selected_scenarios)]
 
 
112
  if selected_gpus:
113
  filtered_df = filtered_df[filtered_df['gpu_name'].isin(selected_gpus)]
114
 
@@ -161,16 +191,20 @@ class BenchmarkDashboard:
161
  xref="paper", yref="paper", x=0.5, y=0.5, showarrow=False)
162
  return fig
163
 
 
 
 
 
164
  # Create bar chart comparing performance across models and scenarios
165
  fig = px.bar(
166
- filtered_df,
167
- x='scenario_name',
168
  y=metric,
169
  color='model_name',
170
  title=f'Performance Comparison: {metric.replace("_", " ").title()}',
171
  labels={
172
  metric: metric.replace("_", " ").title(),
173
- 'scenario_name': 'Benchmark Scenario',
174
  'model_name': 'Model'
175
  },
176
  hover_data=['gpu_name', 'timestamp']
@@ -209,15 +243,17 @@ class BenchmarkDashboard:
209
 
210
  # Only show trends if we have multiple data points for this model-scenario combination
211
  if len(scenario_data) > 1:
 
 
212
  fig.add_trace(go.Scatter(
213
  x=scenario_data['timestamp'],
214
  y=scenario_data[metric],
215
  mode='lines+markers',
216
- name=f'{model} - {scenario}',
217
  line=dict(width=2),
218
  marker=dict(size=6),
219
  hovertemplate=f'<b>{model}</b><br>' +
220
- f'Scenario: {scenario}<br>' +
221
  'Time: %{x}<br>' +
222
  f'{metric.replace("_", " ").title()}: %{{y}}<br>' +
223
  '<extra></extra>'
 
15
  from datetime import datetime
16
  from typing import List, Dict, Any, Optional, Tuple
17
  import logging
18
+ import json
19
 
20
  from benchmark_data_reader import BenchmarkDataReader
21
 
 
30
  """Initialize the dashboard and load data."""
31
  self.reader = BenchmarkDataReader()
32
  self.df = None
33
+ self.scenario_mappings = self.load_scenario_mappings()
34
  self.load_data()
35
 
36
  def load_data(self) -> None:
 
50
  logger.error(f"Error loading data: {e}")
51
  self.df_pandas = pd.DataFrame()
52
 
53
+ def load_scenario_mappings(self) -> Dict[str, str]:
54
+ """Load scenario name mappings from JSON file."""
55
+ try:
56
+ with open('scenario_mappings.json', 'r') as f:
57
+ return json.load(f)
58
+ except Exception as e:
59
+ logger.warning(f"Could not load scenario mappings: {e}")
60
+ return {}
61
+
62
+ def get_readable_scenario_name(self, scenario_name: str) -> str:
63
+ """Get human-readable scenario name or return original if not mapped."""
64
+ return self.scenario_mappings.get(scenario_name, scenario_name)
65
+
66
+ def get_raw_scenario_name(self, readable_name: str) -> str:
67
+ """Convert human-readable scenario name back to raw scenario name."""
68
+ # Find the raw name that maps to this readable name
69
+ for raw_name, mapped_name in self.scenario_mappings.items():
70
+ if mapped_name == readable_name:
71
+ return raw_name
72
+ # If not found in mappings, assume it's already a raw name
73
+ return readable_name
74
+
75
  def get_filter_options(self) -> Tuple[List[str], List[str], List[str], List[str], str, str]:
76
  """Get unique values for filter dropdowns and date range."""
77
  if self.df_pandas.empty:
78
  return [], [], [], [], "", ""
79
 
80
  models = sorted(self.df_pandas['model_name'].dropna().unique().tolist())
81
+
82
+ # Get scenarios with human-readable names for display
83
+ raw_scenarios = sorted(self.df_pandas['scenario_name'].dropna().unique().tolist())
84
+ scenarios = [self.get_readable_scenario_name(scenario) for scenario in raw_scenarios]
85
+
86
  gpus = sorted(self.df_pandas['gpu_name'].dropna().unique().tolist())
87
 
88
  # Get benchmark runs grouped by date (or commit_id if available)
 
136
  if selected_models:
137
  filtered_df = filtered_df[filtered_df['model_name'].isin(selected_models)]
138
  if selected_scenarios:
139
+ # Convert human-readable scenario names back to raw names for filtering
140
+ raw_scenarios = [self.get_raw_scenario_name(scenario) for scenario in selected_scenarios]
141
+ filtered_df = filtered_df[filtered_df['scenario_name'].isin(raw_scenarios)]
142
  if selected_gpus:
143
  filtered_df = filtered_df[filtered_df['gpu_name'].isin(selected_gpus)]
144
 
 
191
  xref="paper", yref="paper", x=0.5, y=0.5, showarrow=False)
192
  return fig
193
 
194
+ # Add human-readable scenario names for display
195
+ plot_df = filtered_df.copy()
196
+ plot_df['scenario_display'] = plot_df['scenario_name'].apply(self.get_readable_scenario_name)
197
+
198
  # Create bar chart comparing performance across models and scenarios
199
  fig = px.bar(
200
+ plot_df,
201
+ x='scenario_display',
202
  y=metric,
203
  color='model_name',
204
  title=f'Performance Comparison: {metric.replace("_", " ").title()}',
205
  labels={
206
  metric: metric.replace("_", " ").title(),
207
+ 'scenario_display': 'Benchmark Scenario',
208
  'model_name': 'Model'
209
  },
210
  hover_data=['gpu_name', 'timestamp']
 
243
 
244
  # Only show trends if we have multiple data points for this model-scenario combination
245
  if len(scenario_data) > 1:
246
+ # Use human-readable scenario name for display
247
+ readable_scenario = self.get_readable_scenario_name(scenario)
248
  fig.add_trace(go.Scatter(
249
  x=scenario_data['timestamp'],
250
  y=scenario_data[metric],
251
  mode='lines+markers',
252
+ name=f'{model} - {readable_scenario}',
253
  line=dict(width=2),
254
  marker=dict(size=6),
255
  hovertemplate=f'<b>{model}</b><br>' +
256
+ f'Scenario: {readable_scenario}<br>' +
257
  'Time: %{x}<br>' +
258
  f'{metric.replace("_", " ").title()}: %{{y}}<br>' +
259
  '<extra></extra>'
scenario_mappings.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "eager_eager_attn": "Eager Execution + Eager Attention",
3
+ "eager_sdpa_default": "Eager Execution + SDPA Default",
4
+ "eager_sdpa_math": "Eager Execution + SDPA Math Backend",
5
+ "eager_sdpa_flash_attention": "Eager Execution + SDPA Flash Attention",
6
+ "eager_sdpa_efficient_attention": "Eager Execution + SDPA Efficient Attention",
7
+ "compiled_compile_max-autotune_eager_attn": "Compiled (Max-Autotune) + Eager Attention",
8
+ "compiled_compile_max-autotune_sdpa_default": "Compiled (Max-Autotune) + SDPA Default",
9
+ "compiled_compile_max-autotune_sdpa_math": "Compiled (Max-Autotune) + SDPA Math Backend",
10
+ "compiled_compile_max-autotune_sdpa_efficient_attention": "Compiled (Max-Autotune) + SDPA Efficient Attention"
11
+ }