openhands openhands commited on
Commit
71ba49b
·
1 Parent(s): a4b9436

Fix column name handling in visualizations for release_date and parameter_count

Browse files

- Handle different column name formats (e.g., 'Release_Date' vs 'release_date')
- Fix score column detection to handle 'Average Score' capitalization
- Fix model column detection to handle 'Language Model' capitalization
- Fix parameter count column detection for size chart

Co-authored-by: openhands <openhands@all-hands.dev>

Files changed (1) hide show
  1. visualizations.py +61 -17
visualizations.py CHANGED
@@ -13,12 +13,19 @@ def create_evolution_over_time_chart(df: pd.DataFrame) -> go.Figure:
13
  Create a line chart showing model performance evolution over release dates.
14
 
15
  Args:
16
- df: DataFrame with columns including 'release_date', 'Language Model', 'average score', 'openness'
17
 
18
  Returns:
19
  Plotly figure showing score evolution over time
20
  """
21
- if df.empty or 'release_date' not in df.columns:
 
 
 
 
 
 
 
22
  fig = go.Figure()
23
  fig.add_annotation(
24
  text="No release date data available",
@@ -29,7 +36,7 @@ def create_evolution_over_time_chart(df: pd.DataFrame) -> go.Figure:
29
  return fig
30
 
31
  # Filter out rows without release dates
32
- plot_df = df[df['release_date'].notna() & (df['release_date'] != '')].copy()
33
 
34
  if plot_df.empty:
35
  fig = go.Figure()
@@ -41,15 +48,19 @@ def create_evolution_over_time_chart(df: pd.DataFrame) -> go.Figure:
41
  )
42
  return fig
43
 
44
- # Convert release_date to datetime
45
- plot_df['release_date'] = pd.to_datetime(plot_df['release_date'], errors='coerce')
46
  plot_df = plot_df.dropna(subset=['release_date'])
47
 
48
  # Sort by release date
49
  plot_df = plot_df.sort_values('release_date')
50
 
51
- # Get the score column
52
- score_col = 'average score' if 'average score' in plot_df.columns else None
 
 
 
 
53
  if score_col is None:
54
  for col in plot_df.columns:
55
  if 'score' in col.lower() and 'average' in col.lower():
@@ -67,7 +78,13 @@ def create_evolution_over_time_chart(df: pd.DataFrame) -> go.Figure:
67
  return fig
68
 
69
  # Get model name column
70
- model_col = 'Language Model' if 'Language Model' in plot_df.columns else 'Language model'
 
 
 
 
 
 
71
 
72
  # Map openness to colors
73
  color_map = {
@@ -177,13 +194,27 @@ def create_accuracy_by_size_chart(df: pd.DataFrame) -> go.Figure:
177
  Create a scatter plot showing accuracy vs parameter count for open-weights models.
178
 
179
  Args:
180
- df: DataFrame with columns including 'parameter_count_b', 'active_parameter_count_b',
 
181
  'average score', 'openness', 'Language Model'
182
 
183
  Returns:
184
  Plotly figure showing accuracy vs model size
185
  """
186
- if df.empty or 'parameter_count_b' not in df.columns:
 
 
 
 
 
 
 
 
 
 
 
 
 
187
  fig = go.Figure()
188
  fig.add_annotation(
189
  text="No parameter count data available",
@@ -196,9 +227,12 @@ def create_accuracy_by_size_chart(df: pd.DataFrame) -> go.Figure:
196
  # Filter to only open-weights models with parameter data
197
  open_aliases = [aliases.CANONICAL_OPENNESS_OPEN] + list(aliases.OPENNESS_ALIASES.get(aliases.CANONICAL_OPENNESS_OPEN, []))
198
 
 
 
 
199
  plot_df = df[
200
- (df['parameter_count_b'].notna()) &
201
- (df['Openness'].isin(open_aliases) | df.get('openness', pd.Series()).isin(open_aliases))
202
  ].copy()
203
 
204
  if plot_df.empty:
@@ -211,8 +245,12 @@ def create_accuracy_by_size_chart(df: pd.DataFrame) -> go.Figure:
211
  )
212
  return fig
213
 
214
- # Get the score column
215
- score_col = 'average score' if 'average score' in plot_df.columns else None
 
 
 
 
216
  if score_col is None:
217
  for col in plot_df.columns:
218
  if 'score' in col.lower() and 'average' in col.lower():
@@ -230,15 +268,21 @@ def create_accuracy_by_size_chart(df: pd.DataFrame) -> go.Figure:
230
  return fig
231
 
232
  # Get model name column
233
- model_col = 'Language Model' if 'Language Model' in plot_df.columns else 'Language model'
 
 
 
 
 
 
234
 
235
  fig = go.Figure()
236
 
237
  # Determine if we should use active params (for MoE models) or total params
238
  # Use active params if available, otherwise total params
239
  for _, row in plot_df.iterrows():
240
- total_params = row['parameter_count_b']
241
- active_params = row.get('active_parameter_count_b')
242
  model_name = row.get(model_col, 'Unknown')
243
  score = row[score_col]
244
 
 
13
  Create a line chart showing model performance evolution over release dates.
14
 
15
  Args:
16
+ df: DataFrame with columns including 'release_date' or 'Release_Date', 'Language Model', 'average score', 'openness'
17
 
18
  Returns:
19
  Plotly figure showing score evolution over time
20
  """
21
+ # Handle different column name formats
22
+ release_date_col = None
23
+ for col in ['release_date', 'Release_Date', 'Release Date']:
24
+ if col in df.columns:
25
+ release_date_col = col
26
+ break
27
+
28
+ if df.empty or release_date_col is None:
29
  fig = go.Figure()
30
  fig.add_annotation(
31
  text="No release date data available",
 
36
  return fig
37
 
38
  # Filter out rows without release dates
39
+ plot_df = df[df[release_date_col].notna() & (df[release_date_col] != '')].copy()
40
 
41
  if plot_df.empty:
42
  fig = go.Figure()
 
48
  )
49
  return fig
50
 
51
+ # Convert release_date to datetime (normalize column name)
52
+ plot_df['release_date'] = pd.to_datetime(plot_df[release_date_col], errors='coerce')
53
  plot_df = plot_df.dropna(subset=['release_date'])
54
 
55
  # Sort by release date
56
  plot_df = plot_df.sort_values('release_date')
57
 
58
+ # Get the score column (handle different naming conventions)
59
+ score_col = None
60
+ for col in ['average score', 'Average Score', 'Average score']:
61
+ if col in plot_df.columns:
62
+ score_col = col
63
+ break
64
  if score_col is None:
65
  for col in plot_df.columns:
66
  if 'score' in col.lower() and 'average' in col.lower():
 
78
  return fig
79
 
80
  # Get model name column
81
+ model_col = None
82
+ for col in ['Language Model', 'Language model', 'llm_base']:
83
+ if col in plot_df.columns:
84
+ model_col = col
85
+ break
86
+ if model_col is None:
87
+ model_col = 'Language Model' # Default
88
 
89
  # Map openness to colors
90
  color_map = {
 
194
  Create a scatter plot showing accuracy vs parameter count for open-weights models.
195
 
196
  Args:
197
+ df: DataFrame with columns including 'parameter_count_b' or 'Parameter_Count_B',
198
+ 'active_parameter_count_b' or 'Active_Parameter_Count_B',
199
  'average score', 'openness', 'Language Model'
200
 
201
  Returns:
202
  Plotly figure showing accuracy vs model size
203
  """
204
+ # Handle different column name formats for parameter count
205
+ param_col = None
206
+ for col in ['parameter_count_b', 'Parameter_Count_B', 'Parameter Count B']:
207
+ if col in df.columns:
208
+ param_col = col
209
+ break
210
+
211
+ active_param_col = None
212
+ for col in ['active_parameter_count_b', 'Active_Parameter_Count_B', 'Active Parameter Count B']:
213
+ if col in df.columns:
214
+ active_param_col = col
215
+ break
216
+
217
+ if df.empty or param_col is None:
218
  fig = go.Figure()
219
  fig.add_annotation(
220
  text="No parameter count data available",
 
227
  # Filter to only open-weights models with parameter data
228
  open_aliases = [aliases.CANONICAL_OPENNESS_OPEN] + list(aliases.OPENNESS_ALIASES.get(aliases.CANONICAL_OPENNESS_OPEN, []))
229
 
230
+ # Get openness column
231
+ openness_col = 'Openness' if 'Openness' in df.columns else 'openness'
232
+
233
  plot_df = df[
234
+ (df[param_col].notna()) &
235
+ (df[openness_col].isin(open_aliases))
236
  ].copy()
237
 
238
  if plot_df.empty:
 
245
  )
246
  return fig
247
 
248
+ # Get the score column (handle different naming conventions)
249
+ score_col = None
250
+ for col in ['average score', 'Average Score', 'Average score']:
251
+ if col in plot_df.columns:
252
+ score_col = col
253
+ break
254
  if score_col is None:
255
  for col in plot_df.columns:
256
  if 'score' in col.lower() and 'average' in col.lower():
 
268
  return fig
269
 
270
  # Get model name column
271
+ model_col = None
272
+ for col in ['Language Model', 'Language model', 'llm_base']:
273
+ if col in plot_df.columns:
274
+ model_col = col
275
+ break
276
+ if model_col is None:
277
+ model_col = 'Language Model' # Default
278
 
279
  fig = go.Figure()
280
 
281
  # Determine if we should use active params (for MoE models) or total params
282
  # Use active params if available, otherwise total params
283
  for _, row in plot_df.iterrows():
284
+ total_params = row[param_col]
285
+ active_params = row.get(active_param_col) if active_param_col else None
286
  model_name = row.get(model_col, 'Unknown')
287
  score = row[score_col]
288