openhands openhands commited on
Commit
1f75b60
·
1 Parent(s): f1798d2

Refactor: Create generic create_scatter_chart() as single source of truth

Browse files

Major refactoring to eliminate code duplication across scatter plots:

- Add create_scatter_chart() in leaderboard_transformer.py (~270 lines)
- Handles all scatter plot types: cost, runtime, date, params
- Configurable x-axis type (log or date)
- Configurable Pareto frontier direction
- Consistent marker icons, hover text, and styling
- Auto-detects column names

- Add STANDARD_LAYOUT and STANDARD_FONT constants for shared styling

- Simplify visualizations.py from 536 lines to 159 lines
- create_evolution_over_time_chart() now uses generic function
- create_accuracy_by_size_chart() now uses generic function
- Only contains data filtering and column detection logic

Benefits:
- Single source of truth for all scatter plot styling
- Consistent fonts (Arial) across all charts
- Easier to maintain and extend
- ~375 lines of code removed

Co-authored-by: openhands <openhands@all-hands.dev>

Files changed (2) hide show
  1. leaderboard_transformer.py +303 -0
  2. visualizations.py +55 -468
leaderboard_transformer.py CHANGED
@@ -241,6 +241,309 @@ def get_marker_icon(model_name: str, openness: str, mark_by: str) -> dict:
241
  return get_company_from_model(model_name)
242
 
243
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
244
  INFORMAL_TO_FORMAL_NAME_MAP = {
245
  # Short Names
246
  "lit": "Literature Understanding",
 
241
  return get_company_from_model(model_name)
242
 
243
 
244
+ # Standard layout configuration for all charts
245
+ STANDARD_LAYOUT = dict(
246
+ template="plotly_white",
247
+ height=572,
248
+ font=dict(
249
+ family=FONT_FAMILY,
250
+ color="#0D0D0F", # neutral-950
251
+ ),
252
+ hoverlabel=dict(
253
+ bgcolor="#222328", # neutral-800
254
+ font_size=12,
255
+ font_family=FONT_FAMILY_SHORT,
256
+ font_color="#F7F8FB", # neutral-50
257
+ ),
258
+ legend=dict(
259
+ bgcolor='#F7F8FB', # neutral-50
260
+ ),
261
+ margin=dict(b=80), # Extra margin for logo and URL
262
+ )
263
+
264
+ # Standard font for annotations
265
+ STANDARD_FONT = dict(
266
+ size=10,
267
+ color='#0D0D0F', # neutral-950
268
+ family=FONT_FAMILY_SHORT
269
+ )
270
+
271
+
272
+ def create_scatter_chart(
273
+ df: pd.DataFrame,
274
+ x_col: str,
275
+ y_col: str,
276
+ title: str,
277
+ x_label: str,
278
+ y_label: str = "Average Score",
279
+ mark_by: str = None,
280
+ x_type: str = "log", # "log" or "date"
281
+ pareto_lower_is_better: bool = True, # For x-axis: True means lower x is better
282
+ model_col: str = None,
283
+ openness_col: str = None,
284
+ ) -> go.Figure:
285
+ """
286
+ Generic scatter chart with Pareto frontier, marker icons, and consistent styling.
287
+
288
+ This is the single source of truth for all scatter plots in the application.
289
+
290
+ Args:
291
+ df: DataFrame with the data to plot
292
+ x_col: Column name for x-axis values
293
+ y_col: Column name for y-axis values (typically score)
294
+ title: Chart title
295
+ x_label: X-axis label
296
+ y_label: Y-axis label (default: "Average Score")
297
+ mark_by: One of "Company", "Openness", or "Country" for marker icons
298
+ x_type: "log" for logarithmic scale, "date" for datetime scale
299
+ pareto_lower_is_better: If True, lower x values are better (cost, size);
300
+ If False, higher x values are better (time evolution)
301
+ model_col: Column name for model names (auto-detected if None)
302
+ openness_col: Column name for openness values (auto-detected if None)
303
+
304
+ Returns:
305
+ Plotly figure with scatter plot, Pareto frontier, and branding
306
+ """
307
+ from constants import MARK_BY_DEFAULT
308
+
309
+ if mark_by is None:
310
+ mark_by = MARK_BY_DEFAULT
311
+
312
+ # Auto-detect column names if not provided
313
+ if model_col is None:
314
+ for col in ['Language Model', 'Language model', 'llm_base']:
315
+ if col in df.columns:
316
+ model_col = col
317
+ break
318
+ if model_col is None:
319
+ model_col = 'Language Model'
320
+
321
+ if openness_col is None:
322
+ openness_col = 'Openness' if 'Openness' in df.columns else 'openness'
323
+
324
+ # Prepare data
325
+ plot_df = df.copy()
326
+
327
+ # Ensure required columns exist
328
+ if x_col not in plot_df.columns or y_col not in plot_df.columns:
329
+ fig = go.Figure()
330
+ fig.add_annotation(
331
+ text="Required data columns not available",
332
+ xref="paper", yref="paper",
333
+ x=0.5, y=0.5, showarrow=False,
334
+ font=STANDARD_FONT
335
+ )
336
+ fig.update_layout(**STANDARD_LAYOUT, title=title)
337
+ return fig
338
+
339
+ # Convert to appropriate types
340
+ plot_df[y_col] = pd.to_numeric(plot_df[y_col], errors='coerce')
341
+ if x_type == "date":
342
+ plot_df[x_col] = pd.to_datetime(plot_df[x_col], errors='coerce')
343
+ else:
344
+ plot_df[x_col] = pd.to_numeric(plot_df[x_col], errors='coerce')
345
+
346
+ # Drop rows with missing values
347
+ plot_df = plot_df.dropna(subset=[x_col, y_col])
348
+
349
+ if plot_df.empty:
350
+ fig = go.Figure()
351
+ fig.add_annotation(
352
+ text="No valid data points available",
353
+ xref="paper", yref="paper",
354
+ x=0.5, y=0.5, showarrow=False,
355
+ font=STANDARD_FONT
356
+ )
357
+ fig.update_layout(**STANDARD_LAYOUT, title=title)
358
+ return fig
359
+
360
+ fig = go.Figure()
361
+
362
+ # Calculate axis ranges
363
+ x_values = plot_df[x_col].tolist()
364
+ y_values = plot_df[y_col].tolist()
365
+
366
+ if x_type == "log":
367
+ min_x = min(x_values)
368
+ max_x = max(x_values)
369
+ x_range_log = [np.log10(min_x * 0.5) if min_x > 0 else -2,
370
+ np.log10(max_x * 1.5) if max_x > 0 else 2]
371
+ else:
372
+ min_x = min(x_values)
373
+ max_x = max(x_values)
374
+ if x_type == "date":
375
+ x_padding = (max_x - min_x) * 0.1 if max_x != min_x else pd.Timedelta(days=15)
376
+ x_range = [min_x - x_padding, max_x + x_padding]
377
+ else:
378
+ x_range = None
379
+
380
+ min_y = min(y_values)
381
+ max_y = max(y_values)
382
+ y_range = [min_y - 5 if min_y > 5 else 0, max_y + 5]
383
+
384
+ # Calculate Pareto frontier
385
+ frontier_rows = []
386
+ if pareto_lower_is_better:
387
+ # Lower x is better (cost, params): sort by x ascending, track max y
388
+ sorted_df = plot_df.sort_values(by=[x_col, y_col], ascending=[True, False])
389
+ max_score = float('-inf')
390
+ for _, row in sorted_df.iterrows():
391
+ if row[y_col] >= max_score:
392
+ frontier_rows.append(row)
393
+ max_score = row[y_col]
394
+ else:
395
+ # Higher x is better (time): sort by x ascending, track max y seen so far
396
+ sorted_df = plot_df.sort_values(by=x_col, ascending=True)
397
+ max_score = float('-inf')
398
+ for _, row in sorted_df.iterrows():
399
+ if row[y_col] > max_score:
400
+ frontier_rows.append(row)
401
+ max_score = row[y_col]
402
+
403
+ # Draw Pareto frontier line
404
+ if frontier_rows:
405
+ frontier_x = [row[x_col] for row in frontier_rows]
406
+ frontier_y = [row[y_col] for row in frontier_rows]
407
+ fig.add_trace(go.Scatter(
408
+ x=frontier_x,
409
+ y=frontier_y,
410
+ mode='lines',
411
+ name='Pareto Frontier',
412
+ showlegend=False,
413
+ line=dict(color='#FFE165', width=2, dash='dash'),
414
+ hoverinfo='skip'
415
+ ))
416
+
417
+ # Prepare hover text for all points
418
+ hover_texts = []
419
+ for _, row in plot_df.iterrows():
420
+ model_name = row.get(model_col, 'Unknown')
421
+ if isinstance(model_name, list):
422
+ model_name = model_name[0] if model_name else 'Unknown'
423
+ model_name = str(model_name).split('/')[-1]
424
+
425
+ h_pad = " "
426
+ hover_text = f"<br>{h_pad}<b>{model_name}</b>{h_pad}<br>"
427
+ hover_text += f"{h_pad}{x_label}: <b>{row[x_col]}</b>{h_pad}<br>"
428
+ hover_text += f"{h_pad}{y_label}: <b>{row[y_col]:.1f}</b>{h_pad}<br>"
429
+ hover_texts.append(hover_text)
430
+
431
+ # Add invisible scatter trace for hover detection
432
+ fig.add_trace(go.Scatter(
433
+ x=plot_df[x_col],
434
+ y=plot_df[y_col],
435
+ mode='markers',
436
+ name='Models',
437
+ showlegend=False,
438
+ text=hover_texts,
439
+ hoverinfo='text',
440
+ marker=dict(color='rgba(0,0,0,0)', size=25, opacity=0)
441
+ ))
442
+
443
+ # Add marker icon images
444
+ layout_images = []
445
+
446
+ for _, row in plot_df.iterrows():
447
+ x_val = row[x_col]
448
+ y_val = row[y_col]
449
+ model_name = row.get(model_col, '')
450
+ openness = row.get(openness_col, '')
451
+
452
+ marker_info = get_marker_icon(model_name, openness, mark_by)
453
+ logo_path = marker_info['path']
454
+
455
+ if os.path.exists(logo_path):
456
+ try:
457
+ with open(logo_path, 'rb') as f:
458
+ encoded_logo = base64.b64encode(f.read()).decode('utf-8')
459
+ logo_uri = f"data:image/svg+xml;base64,{encoded_logo}"
460
+
461
+ # Convert to domain coordinates (0-1 range)
462
+ if x_type == "log" and x_val > 0:
463
+ log_x = np.log10(x_val)
464
+ domain_x = (log_x - x_range_log[0]) / (x_range_log[1] - x_range_log[0])
465
+ elif x_type == "date":
466
+ total_range = (max_x - min_x).total_seconds() if max_x != min_x else 1
467
+ domain_x = ((x_val - min_x).total_seconds() / total_range) if total_range else 0.5
468
+ else:
469
+ domain_x = 0.5
470
+
471
+ domain_y = (y_val - y_range[0]) / (y_range[1] - y_range[0]) if (y_range[1] - y_range[0]) > 0 else 0.5
472
+
473
+ # Clamp to valid range
474
+ domain_x = max(0, min(1, domain_x))
475
+ domain_y = max(0, min(1, domain_y))
476
+
477
+ layout_images.append(dict(
478
+ source=logo_uri,
479
+ xref="x domain",
480
+ yref="y domain",
481
+ x=domain_x,
482
+ y=domain_y,
483
+ sizex=0.04,
484
+ sizey=0.06,
485
+ xanchor="center",
486
+ yanchor="middle",
487
+ layer="above"
488
+ ))
489
+ except Exception:
490
+ pass
491
+
492
+ # Add labels for frontier points only
493
+ for row in frontier_rows:
494
+ model_name = row.get(model_col, '')
495
+ if isinstance(model_name, list):
496
+ model_name = model_name[0] if model_name else ''
497
+ model_name = str(model_name).split('/')[-1]
498
+ if len(model_name) > 25:
499
+ model_name = model_name[:22] + '...'
500
+
501
+ x_val = row[x_col]
502
+ y_val = row[y_col]
503
+
504
+ # For log scale, annotation x needs to be in log space
505
+ if x_type == "log":
506
+ ann_x = np.log10(x_val) if x_val > 0 else 0
507
+ else:
508
+ ann_x = x_val
509
+
510
+ fig.add_annotation(
511
+ x=ann_x,
512
+ y=y_val,
513
+ text=model_name,
514
+ showarrow=False,
515
+ yshift=20,
516
+ font=STANDARD_FONT,
517
+ xanchor='center',
518
+ yanchor='bottom'
519
+ )
520
+
521
+ # Configure layout
522
+ xaxis_config = dict(title=x_label)
523
+ if x_type == "log":
524
+ xaxis_config['type'] = 'log'
525
+ xaxis_config['range'] = x_range_log
526
+ elif x_type == "date":
527
+ xaxis_config['range'] = x_range
528
+
529
+ layout_config = dict(
530
+ **STANDARD_LAYOUT,
531
+ title=title,
532
+ xaxis=xaxis_config,
533
+ yaxis=dict(title=y_label, range=y_range),
534
+ )
535
+
536
+ if layout_images:
537
+ layout_config['images'] = layout_images
538
+
539
+ fig.update_layout(**layout_config)
540
+
541
+ # Add branding
542
+ add_branding_to_figure(fig)
543
+
544
+ return fig
545
+
546
+
547
  INFORMAL_TO_FORMAL_NAME_MAP = {
548
  # Short Names
549
  "lit": "Literature Understanding",
visualizations.py CHANGED
@@ -1,73 +1,38 @@
1
  """
2
  Additional visualizations for the OpenHands Index leaderboard.
 
 
 
3
  """
4
  import pandas as pd
5
  import plotly.graph_objects as go
6
- import plotly.express as px
7
- from datetime import datetime
8
- import os
9
- import base64
10
  import aliases
11
- from constants import FONT_FAMILY, FONT_FAMILY_SHORT
12
 
13
- # Import shared utilities from leaderboard_transformer
14
- from leaderboard_transformer import (
15
- get_company_from_model,
16
- get_marker_icon,
17
- add_branding_to_figure,
18
- )
19
- from ui_components import get_svg_as_data_uri
20
- from constants import MARK_BY_DEFAULT
21
 
22
- # Standard layout configuration matching existing charts
23
- # Colors aligned with OpenHands brand
24
- STANDARD_LAYOUT = dict(
25
- template="plotly_white",
26
- height=572,
27
- font=dict(
28
- family=FONT_FAMILY,
29
- color="#0D0D0F", # neutral-950
30
- ),
31
- hoverlabel=dict(
32
- bgcolor="#222328", # neutral-800
33
- font_size=12,
34
- font_family=FONT_FAMILY_SHORT,
35
- font_color="#F7F8FB", # neutral-50
36
- ),
37
- legend=dict(
38
- bgcolor='#F7F8FB', # neutral-50
39
- ),
40
- margin=dict(b=80), # Extra margin for logo and URL
41
- )
42
 
43
- # Standard font for annotations - uses constants for consistency
44
- STANDARD_FONT = dict(
45
- size=10,
46
- color='#0D0D0F', # neutral-950
47
- family=FONT_FAMILY_SHORT
48
- )
49
 
50
 
51
  def create_evolution_over_time_chart(df: pd.DataFrame, mark_by: str = None) -> go.Figure:
52
  """
53
  Create a chart showing model performance evolution over release dates.
54
- Uses company logos as markers to match the existing chart styling.
55
 
56
  Args:
57
- df: DataFrame with columns including 'release_date' or 'Release_Date', 'Language Model', 'average score', 'openness'
58
- mark_by: One of "Company", "Openness", or "Country" - controls which icon to display
59
 
60
  Returns:
61
  Plotly figure showing score evolution over time
62
  """
63
- if mark_by is None:
64
- mark_by = MARK_BY_DEFAULT
65
- # Handle different column name formats
66
- release_date_col = None
67
- for col in ['release_date', 'Release_Date', 'Release Date']:
68
- if col in df.columns:
69
- release_date_col = col
70
- break
71
 
72
  if df.empty or release_date_col is None:
73
  fig = go.Figure()
@@ -77,38 +42,14 @@ def create_evolution_over_time_chart(df: pd.DataFrame, mark_by: str = None) -> g
77
  x=0.5, y=0.5, showarrow=False,
78
  font=STANDARD_FONT
79
  )
80
- fig.update_layout(**STANDARD_LAYOUT)
81
  return fig
82
 
83
- # Filter out rows without release dates
84
- plot_df = df[df[release_date_col].notna() & (df[release_date_col] != '')].copy()
85
-
86
- if plot_df.empty:
87
- fig = go.Figure()
88
- fig.add_annotation(
89
- text="No release date data available",
90
- xref="paper", yref="paper",
91
- x=0.5, y=0.5, showarrow=False,
92
- font=STANDARD_FONT
93
- )
94
- fig.update_layout(**STANDARD_LAYOUT)
95
- return fig
96
-
97
- # Convert release_date to datetime (normalize column name)
98
- plot_df['release_date'] = pd.to_datetime(plot_df[release_date_col], errors='coerce')
99
- plot_df = plot_df.dropna(subset=['release_date'])
100
-
101
- # Sort by release date
102
- plot_df = plot_df.sort_values('release_date')
103
-
104
- # Get the score column (handle different naming conventions)
105
- score_col = None
106
- for col in ['average score', 'Average Score', 'Average score']:
107
- if col in plot_df.columns:
108
- score_col = col
109
- break
110
  if score_col is None:
111
- for col in plot_df.columns:
 
112
  if 'score' in col.lower() and 'average' in col.lower():
113
  score_col = col
114
  break
@@ -121,202 +62,36 @@ def create_evolution_over_time_chart(df: pd.DataFrame, mark_by: str = None) -> g
121
  x=0.5, y=0.5, showarrow=False,
122
  font=STANDARD_FONT
123
  )
124
- fig.update_layout(**STANDARD_LAYOUT)
125
  return fig
126
 
127
- # Get model name column
128
- model_col = None
129
- for col in ['Language Model', 'Language model', 'llm_base']:
130
- if col in plot_df.columns:
131
- model_col = col
132
- break
133
- if model_col is None:
134
- model_col = 'Language Model' # Default
135
-
136
- fig = go.Figure()
137
-
138
- # Add Pareto frontier line (monotonically increasing best score over time)
139
- # Also track which rows are on the frontier for labeling
140
- frontier_rows = []
141
- if len(plot_df) > 1:
142
- # Compute Pareto frontier: only include points that set a new best score
143
- frontier_dates = []
144
- frontier_scores = []
145
- max_score_so_far = float('-inf')
146
-
147
- for _, row in plot_df.iterrows():
148
- current_score = row[score_col]
149
- current_date = row['release_date']
150
-
151
- if current_score > max_score_so_far:
152
- # This point is on the Pareto frontier
153
- frontier_dates.append(current_date)
154
- frontier_scores.append(current_score)
155
- frontier_rows.append(row)
156
- max_score_so_far = current_score
157
-
158
- if frontier_dates:
159
- fig.add_trace(go.Scatter(
160
- x=frontier_dates,
161
- y=frontier_scores,
162
- mode='lines',
163
- line=dict(color='#FFE165', width=2, dash='dash'), # primary yellow, dashed
164
- name='Pareto Frontier',
165
- hoverinfo='skip',
166
- showlegend=False
167
- ))
168
-
169
- # Calculate axis ranges
170
- min_date = plot_df['release_date'].min()
171
- max_date = plot_df['release_date'].max()
172
- min_score = plot_df[score_col].min()
173
- max_score = plot_df[score_col].max()
174
- y_min = min_score - 5 if min_score > 5 else 0
175
- y_max = max_score + 10 # Extra space for labels
176
-
177
- # Build hover text for each point
178
- hover_texts = []
179
- for _, row in plot_df.iterrows():
180
- model_name = row.get(model_col, 'Unknown')
181
- openness = row.get('Openness', row.get('openness', 'unknown'))
182
- h_pad = " "
183
- hover_text = f"<br>{h_pad}<b>{model_name}</b>{h_pad}<br>"
184
- hover_text += f"{h_pad}Release: <b>{row['release_date'].strftime('%Y-%m-%d')}</b>{h_pad}<br>"
185
- hover_text += f"{h_pad}Average Score: <b>{row[score_col]:.1f}</b>{h_pad}<br>"
186
- hover_text += f"{h_pad}Openness: <b>{openness}</b>{h_pad}<br>"
187
- hover_texts.append(hover_text)
188
-
189
- plot_df['hover_text'] = hover_texts
190
-
191
- # Add invisible markers for hover functionality
192
- fig.add_trace(go.Scatter(
193
- x=plot_df['release_date'],
194
- y=plot_df[score_col],
195
- mode='markers',
196
- name='Models',
197
- showlegend=False,
198
- text=plot_df['hover_text'],
199
- hoverinfo='text',
200
- marker=dict(
201
- color='rgba(0,0,0,0)', # Invisible markers
202
- size=25, # Large enough for hover detection
203
- opacity=0
204
- )
205
- ))
206
-
207
- # Add marker icon images for each data point using data coordinates
208
- layout_images = []
209
- openness_col = 'Openness' if 'Openness' in plot_df.columns else 'openness'
210
-
211
- for _, row in plot_df.iterrows():
212
- model_name = row.get(model_col, '')
213
- openness = row.get(openness_col, '')
214
- marker_info = get_marker_icon(model_name, openness, mark_by)
215
- logo_path = marker_info['path']
216
-
217
- # Read the SVG file and encode as base64 data URI
218
- if os.path.exists(logo_path):
219
- try:
220
- with open(logo_path, 'rb') as f:
221
- encoded_logo = base64.b64encode(f.read()).decode('utf-8')
222
- logo_uri = f"data:image/svg+xml;base64,{encoded_logo}"
223
-
224
- x_val = row['release_date']
225
- y_val = row[score_col]
226
-
227
- # Use data coordinates for precise alignment
228
- layout_images.append(dict(
229
- source=logo_uri,
230
- xref="x",
231
- yref="y",
232
- x=x_val,
233
- y=y_val,
234
- sizex=15 * 24 * 60 * 60 * 1000, # ~15 days in milliseconds
235
- sizey=3, # score units
236
- xanchor="center",
237
- yanchor="middle",
238
- layer="above"
239
- ))
240
- except Exception:
241
- pass
242
-
243
- # Add model name labels only for frontier points
244
- for row in frontier_rows:
245
- model_name = row.get(model_col, '')
246
- x_val = row['release_date']
247
- y_val = row[score_col]
248
-
249
- # Clean model name for label
250
- if isinstance(model_name, list):
251
- model_name = model_name[0] if model_name else ''
252
- model_name = str(model_name).split('/')[-1]
253
- if len(model_name) > 25:
254
- model_name = model_name[:22] + '...'
255
-
256
- fig.add_annotation(
257
- x=x_val,
258
- y=y_val,
259
- xref="x",
260
- yref="y",
261
- text=model_name,
262
- showarrow=False,
263
- yshift=20,
264
- font=STANDARD_FONT,
265
- xanchor='center',
266
- yanchor='bottom'
267
- )
268
-
269
- # Build layout configuration
270
- layout_config = dict(
271
- **STANDARD_LAYOUT,
272
  title="Model Performance Evolution Over Time",
273
- xaxis=dict(
274
- title="Model Release Date",
275
- range=[min_date - pd.Timedelta(days=15), max_date + pd.Timedelta(days=15)]
276
- ),
277
- yaxis=dict(
278
- title="Average Score",
279
- range=[y_min, y_max]
280
- ),
281
  )
282
-
283
- # Add company logo images to the layout
284
- if layout_images:
285
- layout_config['images'] = layout_images
286
-
287
- fig.update_layout(**layout_config)
288
-
289
- # Add OpenHands branding
290
- add_branding_to_figure(fig)
291
-
292
- return fig
293
 
294
 
295
  def create_accuracy_by_size_chart(df: pd.DataFrame, mark_by: str = None) -> go.Figure:
296
  """
297
  Create a scatter plot showing accuracy vs parameter count for open-weights models.
298
- Uses company logos as markers to match the Cost/Performance chart styling.
299
- Includes a Pareto efficiency frontier line.
300
 
301
  Args:
302
- df: DataFrame with columns including 'parameter_count_b' or 'Parameter_Count_B',
303
- 'average score', 'openness', 'Language Model'
304
- mark_by: One of "Company", "Openness", or "Country" - controls which icon to display
305
 
306
  Returns:
307
- Plotly figure showing accuracy vs model size (total parameters)
308
  """
309
- import numpy as np
310
-
311
- if mark_by is None:
312
- mark_by = MARK_BY_DEFAULT
313
-
314
- # Handle different column name formats for parameter count
315
- param_col = None
316
- for col in ['parameter_count_b', 'Parameter_Count_B', 'Parameter Count B']:
317
- if col in df.columns:
318
- param_col = col
319
- break
320
 
321
  if df.empty or param_col is None:
322
  fig = go.Figure()
@@ -326,13 +101,13 @@ def create_accuracy_by_size_chart(df: pd.DataFrame, mark_by: str = None) -> go.F
326
  x=0.5, y=0.5, showarrow=False,
327
  font=STANDARD_FONT
328
  )
329
- fig.update_layout(**STANDARD_LAYOUT)
330
  return fig
331
 
332
- # Filter to only open-weights models with parameter data
333
- open_aliases = [aliases.CANONICAL_OPENNESS_OPEN] + list(aliases.OPENNESS_ALIASES.get(aliases.CANONICAL_OPENNESS_OPEN, []))
334
-
335
- # Get openness column
336
  openness_col = 'Openness' if 'Openness' in df.columns else 'openness'
337
 
338
  plot_df = df[
@@ -348,15 +123,11 @@ def create_accuracy_by_size_chart(df: pd.DataFrame, mark_by: str = None) -> go.F
348
  x=0.5, y=0.5, showarrow=False,
349
  font=STANDARD_FONT
350
  )
351
- fig.update_layout(**STANDARD_LAYOUT)
352
  return fig
353
 
354
- # Get the score column (handle different naming conventions)
355
- score_col = None
356
- for col in ['average score', 'Average Score', 'Average score']:
357
- if col in plot_df.columns:
358
- score_col = col
359
- break
360
  if score_col is None:
361
  for col in plot_df.columns:
362
  if 'score' in col.lower() and 'average' in col.lower():
@@ -371,202 +142,18 @@ def create_accuracy_by_size_chart(df: pd.DataFrame, mark_by: str = None) -> go.F
371
  x=0.5, y=0.5, showarrow=False,
372
  font=STANDARD_FONT
373
  )
374
- fig.update_layout(**STANDARD_LAYOUT)
375
  return fig
376
 
377
- # Get model name column
378
- model_col = None
379
- for col in ['Language Model', 'Language model', 'llm_base']:
380
- if col in plot_df.columns:
381
- model_col = col
382
- break
383
- if model_col is None:
384
- model_col = 'Language Model' # Default
385
-
386
- fig = go.Figure()
387
-
388
- # Prepare data for plotting
389
- data_points = []
390
- for _, row in plot_df.iterrows():
391
- total_params = row[param_col]
392
- model_name = row.get(model_col, 'Unknown')
393
- score = row[score_col]
394
- openness = row.get(openness_col, '')
395
-
396
- # Use total params for x-axis
397
- x_val = total_params
398
-
399
- # Create hover text matching existing chart style
400
- h_pad = " "
401
- hover_text = f"<br>{h_pad}<b>{model_name}</b>{h_pad}<br>"
402
- hover_text += f"{h_pad}Parameters: <b>{total_params:.0f}B</b>{h_pad}<br>"
403
- hover_text += f"{h_pad}Average Score: <b>{score:.1f}</b>{h_pad}<br>"
404
-
405
- data_points.append({
406
- 'x': x_val,
407
- 'y': score,
408
- 'model_name': model_name,
409
- 'hover_text': hover_text,
410
- 'total_params': total_params,
411
- 'openness': openness
412
- })
413
-
414
- x_values = [p['x'] for p in data_points]
415
- y_values = [p['y'] for p in data_points]
416
-
417
- # Calculate axis ranges for domain coordinate conversion
418
- min_x = min(x_values)
419
- max_x = max(x_values)
420
- x_min_log = np.log10(min_x * 0.5) if min_x > 0 else 0
421
- x_max_log = np.log10(max_x * 1.5) if max_x > 0 else 3
422
-
423
- min_score = min(y_values)
424
- max_score = max(y_values)
425
- y_min = min_score - 5 if min_score > 5 else 0
426
- y_max = max_score + 5
427
-
428
- # Calculate and draw Pareto Efficiency Frontier
429
- # For size vs accuracy, we want: smaller size (lower x) AND higher accuracy (higher y)
430
- # Sort by x ascending, then track maximum y seen
431
- sorted_data = sorted(data_points, key=lambda p: (p['x'], -p['y']))
432
- frontier_points = []
433
- frontier_rows = []
434
- max_score_so_far = float('-inf')
435
-
436
- for point in sorted_data:
437
- if point['y'] >= max_score_so_far:
438
- frontier_points.append({'x': point['x'], 'y': point['y']})
439
- frontier_rows.append(point)
440
- max_score_so_far = point['y']
441
-
442
- if frontier_points:
443
- frontier_df = pd.DataFrame(frontier_points)
444
- fig.add_trace(go.Scatter(
445
- x=frontier_df['x'],
446
- y=frontier_df['y'],
447
- mode='lines',
448
- name='Efficiency Frontier',
449
- showlegend=False,
450
- line=dict(color='#FFE165', width=2, dash='dash'), # primary yellow
451
- hoverinfo='skip'
452
- ))
453
-
454
- # Add invisible markers for hover functionality
455
- fig.add_trace(go.Scatter(
456
- x=x_values,
457
- y=y_values,
458
- mode='markers',
459
- name='Models',
460
- showlegend=False,
461
- text=[p['hover_text'] for p in data_points],
462
- hoverinfo='text',
463
- marker=dict(
464
- color='rgba(0,0,0,0)', # Invisible markers
465
- size=25, # Large enough for hover detection
466
- opacity=0
467
- )
468
- ))
469
-
470
- # Add marker icon images for each data point (uniform size like Cost/Performance chart)
471
- layout_images = []
472
-
473
- for point in data_points:
474
- x_val = point['x']
475
- y_val = point['y']
476
- model_name = point['model_name']
477
- openness = point['openness']
478
-
479
- marker_info = get_marker_icon(model_name, openness, mark_by)
480
- logo_path = marker_info['path']
481
-
482
- # Read the SVG file and encode as base64 data URI
483
- if os.path.exists(logo_path):
484
- try:
485
- with open(logo_path, 'rb') as f:
486
- encoded_logo = base64.b64encode(f.read()).decode('utf-8')
487
- logo_uri = f"data:image/svg+xml;base64,{encoded_logo}"
488
-
489
- # Convert to domain coordinates (0-1 range) for log scale x-axis
490
- if x_val > 0:
491
- log_x = np.log10(x_val)
492
- domain_x = (log_x - x_min_log) / (x_max_log - x_min_log)
493
- else:
494
- domain_x = 0
495
-
496
- domain_y = (y_val - y_min) / (y_max - y_min) if (y_max - y_min) > 0 else 0.5
497
-
498
- # Clamp to valid range
499
- domain_x = max(0, min(1, domain_x))
500
- domain_y = max(0, min(1, domain_y))
501
-
502
- # Uniform logo size (same as Cost/Performance chart)
503
- layout_images.append(dict(
504
- source=logo_uri,
505
- xref="x domain",
506
- yref="y domain",
507
- x=domain_x,
508
- y=domain_y,
509
- sizex=0.04, # Size as fraction of plot width
510
- sizey=0.06, # Size as fraction of plot height
511
- xanchor="center",
512
- yanchor="middle",
513
- layer="above"
514
- ))
515
- except Exception:
516
- pass
517
-
518
- # Add model name labels for frontier points only (like Cost/Performance chart)
519
- for point in frontier_rows:
520
- x_val = point['x']
521
- y_val = point['y']
522
- model_name = point['model_name']
523
-
524
- # Clean model name for label
525
- if isinstance(model_name, list):
526
- model_name = model_name[0] if model_name else ''
527
- model_name = str(model_name).split('/')[-1]
528
- if len(model_name) > 25:
529
- model_name = model_name[:22] + '...'
530
-
531
- # Transform x to log10 for annotation positioning on log scale
532
- if x_val > 0:
533
- x_log = np.log10(x_val)
534
- else:
535
- x_log = x_min_log
536
-
537
- fig.add_annotation(
538
- x=x_log,
539
- y=y_val,
540
- text=model_name,
541
- showarrow=False,
542
- yshift=25,
543
- font=STANDARD_FONT,
544
- xanchor='center',
545
- yanchor='bottom'
546
- )
547
-
548
- # Build layout configuration
549
- layout_config = dict(
550
- **STANDARD_LAYOUT,
551
  title="Open Model Accuracy by Size",
552
- xaxis=dict(
553
- title="Parameters (Billions)",
554
- type="log",
555
- range=[x_min_log, x_max_log]
556
- ),
557
- yaxis=dict(
558
- title="Average Score",
559
- range=[y_min, y_max]
560
- ),
561
  )
562
-
563
- # Add company logo images to the layout
564
- if layout_images:
565
- layout_config['images'] = layout_images
566
-
567
- fig.update_layout(**layout_config)
568
-
569
- # Add OpenHands branding
570
- add_branding_to_figure(fig)
571
-
572
- return fig
 
1
  """
2
  Additional visualizations for the OpenHands Index leaderboard.
3
+
4
+ These functions use the generic create_scatter_chart() from leaderboard_transformer
5
+ as the single source of truth for scatter plot styling and behavior.
6
  """
7
  import pandas as pd
8
  import plotly.graph_objects as go
 
 
 
 
9
  import aliases
 
10
 
11
+ # Import the generic scatter chart function - single source of truth
12
+ from leaderboard_transformer import create_scatter_chart, STANDARD_LAYOUT, STANDARD_FONT
 
 
 
 
 
 
13
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
14
 
15
+ def _find_column(df: pd.DataFrame, candidates: list, default: str = None) -> str:
16
+ """Find the first matching column name from candidates."""
17
+ for col in candidates:
18
+ if col in df.columns:
19
+ return col
20
+ return default
21
 
22
 
23
  def create_evolution_over_time_chart(df: pd.DataFrame, mark_by: str = None) -> go.Figure:
24
  """
25
  Create a chart showing model performance evolution over release dates.
 
26
 
27
  Args:
28
+ df: DataFrame with release_date and score columns
29
+ mark_by: One of "Company", "Openness", or "Country" for marker icons
30
 
31
  Returns:
32
  Plotly figure showing score evolution over time
33
  """
34
+ # Find the release date column
35
+ release_date_col = _find_column(df, ['release_date', 'Release_Date', 'Release Date'])
 
 
 
 
 
 
36
 
37
  if df.empty or release_date_col is None:
38
  fig = go.Figure()
 
42
  x=0.5, y=0.5, showarrow=False,
43
  font=STANDARD_FONT
44
  )
45
+ fig.update_layout(**STANDARD_LAYOUT, title="Model Performance Evolution Over Time")
46
  return fig
47
 
48
+ # Find score column
49
+ score_col = _find_column(df, ['Average Score', 'average score', 'Average score'])
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  if score_col is None:
51
+ # Try to find any column with 'score' and 'average'
52
+ for col in df.columns:
53
  if 'score' in col.lower() and 'average' in col.lower():
54
  score_col = col
55
  break
 
62
  x=0.5, y=0.5, showarrow=False,
63
  font=STANDARD_FONT
64
  )
65
+ fig.update_layout(**STANDARD_LAYOUT, title="Model Performance Evolution Over Time")
66
  return fig
67
 
68
+ # Use the generic scatter chart
69
+ return create_scatter_chart(
70
+ df=df,
71
+ x_col=release_date_col,
72
+ y_col=score_col,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
73
  title="Model Performance Evolution Over Time",
74
+ x_label="Model Release Date",
75
+ y_label="Average Score",
76
+ mark_by=mark_by,
77
+ x_type="date",
78
+ pareto_lower_is_better=False, # Later dates with higher scores are better
 
 
 
79
  )
 
 
 
 
 
 
 
 
 
 
 
80
 
81
 
82
  def create_accuracy_by_size_chart(df: pd.DataFrame, mark_by: str = None) -> go.Figure:
83
  """
84
  Create a scatter plot showing accuracy vs parameter count for open-weights models.
 
 
85
 
86
  Args:
87
+ df: DataFrame with parameter_count and score columns
88
+ mark_by: One of "Company", "Openness", or "Country" for marker icons
 
89
 
90
  Returns:
91
+ Plotly figure showing accuracy vs model size
92
  """
93
+ # Find parameter count column
94
+ param_col = _find_column(df, ['parameter_count_b', 'Parameter_Count_B', 'Parameter Count B'])
 
 
 
 
 
 
 
 
 
95
 
96
  if df.empty or param_col is None:
97
  fig = go.Figure()
 
101
  x=0.5, y=0.5, showarrow=False,
102
  font=STANDARD_FONT
103
  )
104
+ fig.update_layout(**STANDARD_LAYOUT, title="Open Model Accuracy by Size")
105
  return fig
106
 
107
+ # Filter to only open-weights models
108
+ open_aliases = [aliases.CANONICAL_OPENNESS_OPEN] + list(
109
+ aliases.OPENNESS_ALIASES.get(aliases.CANONICAL_OPENNESS_OPEN, [])
110
+ )
111
  openness_col = 'Openness' if 'Openness' in df.columns else 'openness'
112
 
113
  plot_df = df[
 
123
  x=0.5, y=0.5, showarrow=False,
124
  font=STANDARD_FONT
125
  )
126
+ fig.update_layout(**STANDARD_LAYOUT, title="Open Model Accuracy by Size")
127
  return fig
128
 
129
+ # Find score column
130
+ score_col = _find_column(plot_df, ['Average Score', 'average score', 'Average score'])
 
 
 
 
131
  if score_col is None:
132
  for col in plot_df.columns:
133
  if 'score' in col.lower() and 'average' in col.lower():
 
142
  x=0.5, y=0.5, showarrow=False,
143
  font=STANDARD_FONT
144
  )
145
+ fig.update_layout(**STANDARD_LAYOUT, title="Open Model Accuracy by Size")
146
  return fig
147
 
148
+ # Use the generic scatter chart
149
+ return create_scatter_chart(
150
+ df=plot_df,
151
+ x_col=param_col,
152
+ y_col=score_col,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
153
  title="Open Model Accuracy by Size",
154
+ x_label="Parameters (Billions)",
155
+ y_label="Average Score",
156
+ mark_by=mark_by,
157
+ x_type="log",
158
+ pareto_lower_is_better=True, # Smaller models with higher scores are better
 
 
 
 
159
  )