openhands openhands commited on
Commit
4ab5f97
·
1 Parent(s): 5998027

fix: Column naming and incomplete entries toggle

Browse files

Fixes:
1. Change 'Overall Score' to 'Average Score' in all places (including
view() method which was constructing 'Overall Score' directly)
2. Change 'Overall Cost' to 'Total Cost' - properly route to Total Cost
column for Overall view
3. Fix incomplete entries toggle:
- Check 'Categories Attempted' column (formatted as 'X/5')
- Filter on '5/5' for complete entries
- If no complete entries exist, show all entries with explanatory note
- Keep 'Categories Attempted' column visible in the table

Co-authored-by: openhands <openhands@all-hands.dev>

Files changed (2) hide show
  1. leaderboard_transformer.py +11 -4
  2. ui_components.py +57 -36
leaderboard_transformer.py CHANGED
@@ -244,7 +244,8 @@ class DataTransformer:
244
 
245
  # --- 1. Determine Primary and Group Metrics Based on the Tag ---
246
  if tag is None or tag == "Overall":
247
- primary_metric = "Overall"
 
248
  group_metrics = list(self.tag_map.keys())
249
  else:
250
  primary_metric = tag
@@ -264,7 +265,13 @@ class DataTransformer:
264
  new_cols = ["Openness"]
265
  ending_cols = ["Date", "Logs"]
266
 
267
- metrics_to_display = [primary_score_col, f"{primary_metric} Cost"]
 
 
 
 
 
 
268
  for item in group_metrics:
269
  metrics_to_display.append(f"{item} Score")
270
  metrics_to_display.append(f"{item} Cost")
@@ -280,7 +287,7 @@ class DataTransformer:
280
  cols = len(final_cols_ordered)
281
 
282
  # Calculated and add "Categories Attempted" column
283
- if primary_metric == "Overall":
284
  def calculate_attempted(row):
285
  main_categories = ['Bug Fixing', 'Frontend Development', 'App Creation', 'Test Generation', 'Information Gathering']
286
  count = 0
@@ -307,7 +314,7 @@ class DataTransformer:
307
  # --- 4. Generate the Scatter Plot for the Primary Metric ---
308
  plots: dict[str, go.Figure] = {}
309
  if use_plotly:
310
- primary_cost_col = f"{primary_metric} Cost"
311
  # Check if the primary score and cost columns exist in the FINAL view
312
  if primary_score_col in df_view.columns and primary_cost_col in df_view.columns:
313
  fig = _plot_scatter_plotly(
 
244
 
245
  # --- 1. Determine Primary and Group Metrics Based on the Tag ---
246
  if tag is None or tag == "Overall":
247
+ # Use "Average" for the primary metric display name
248
+ primary_metric = "Average"
249
  group_metrics = list(self.tag_map.keys())
250
  else:
251
  primary_metric = tag
 
265
  new_cols = ["Openness"]
266
  ending_cols = ["Date", "Logs"]
267
 
268
+ # For Overall view, use "Total Cost" instead of "Average Cost"
269
+ if tag is None or tag == "Overall":
270
+ primary_cost_col = "Total Cost"
271
+ else:
272
+ primary_cost_col = f"{primary_metric} Cost"
273
+
274
+ metrics_to_display = [primary_score_col, primary_cost_col]
275
  for item in group_metrics:
276
  metrics_to_display.append(f"{item} Score")
277
  metrics_to_display.append(f"{item} Cost")
 
287
  cols = len(final_cols_ordered)
288
 
289
  # Calculated and add "Categories Attempted" column
290
+ if tag is None or tag == "Overall":
291
  def calculate_attempted(row):
292
  main_categories = ['Bug Fixing', 'Frontend Development', 'App Creation', 'Test Generation', 'Information Gathering']
293
  count = 0
 
314
  # --- 4. Generate the Scatter Plot for the Primary Metric ---
315
  plots: dict[str, go.Figure] = {}
316
  if use_plotly:
317
+ # primary_cost_col is already set above (Total Cost for Overall, or {metric} Cost otherwise)
318
  # Check if the primary score and cost columns exist in the FINAL view
319
  if primary_score_col in df_view.columns and primary_cost_col in df_view.columns:
320
  fig = _plot_scatter_plotly(
ui_components.py CHANGED
@@ -407,7 +407,7 @@ def create_leaderboard_display(
407
  all_cols.insert(0, all_cols.pop(all_cols.index('Pareto')))
408
  df_display = df_display[all_cols]
409
 
410
- columns_to_drop = ['id', 'Openness', 'Agent Tooling', 'Source', 'Categories Completed']
411
  df_display = df_display.drop(columns=columns_to_drop, errors='ignore')
412
 
413
  header_rename_map = {
@@ -420,18 +420,22 @@ def create_leaderboard_display(
420
 
421
  # Prepare both complete and all entries versions
422
  # Complete entries have all 5 categories submitted
423
- if 'Categories Completed' in df_view_full.columns:
424
- df_view_complete = df_view_full[df_view_full['Categories Completed'] == 5].copy()
 
425
  else:
426
  df_view_complete = df_view_full.copy()
427
 
428
  df_display_complete = prepare_df_for_display(df_view_complete)
429
  df_display_all = prepare_df_for_display(df_view_full)
430
 
 
 
 
431
  scatter_plot = plots_dict.get('scatter_plot', go.Figure())
432
 
433
- # Now get headers from the renamed dataframe
434
- df_headers = df_display_complete.columns.tolist()
435
  df_datatypes = []
436
  for col in df_headers:
437
  if col == "Logs" or "Cost" in col or "Score" in col:
@@ -469,37 +473,54 @@ def create_leaderboard_display(
469
  num_total = len(df_display_all)
470
  num_incomplete = num_total - num_complete
471
 
472
- show_incomplete_checkbox = gr.Checkbox(
473
- label=f"Show incomplete entries ({num_incomplete} entries with fewer than 5 categories)",
474
- value=False,
475
- elem_id="show-incomplete-toggle"
476
- )
477
-
478
- # Start with complete entries only (default)
479
- dataframe_component = gr.DataFrame(
480
- headers=df_headers,
481
- value=df_display_complete,
482
- datatype=df_datatypes,
483
- interactive=False,
484
- wrap=True,
485
- column_widths=final_column_widths,
486
- elem_classes=["wrap-header-df"],
487
- show_search="search",
488
- elem_id="main-leaderboard"
489
- )
490
-
491
- # Update function for the toggle
492
- def update_table(show_incomplete):
493
- if show_incomplete:
494
- return df_display_all
495
- else:
496
- return df_display_complete
497
-
498
- show_incomplete_checkbox.change(
499
- fn=update_table,
500
- inputs=[show_incomplete_checkbox],
501
- outputs=[dataframe_component]
502
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
503
 
504
  legend_markdown = create_legend_markdown(category_name)
505
  gr.HTML(value=legend_markdown, elem_id="legend-markdown")
 
407
  all_cols.insert(0, all_cols.pop(all_cols.index('Pareto')))
408
  df_display = df_display[all_cols]
409
 
410
+ columns_to_drop = ['id', 'Openness', 'Agent Tooling', 'Source']
411
  df_display = df_display.drop(columns=columns_to_drop, errors='ignore')
412
 
413
  header_rename_map = {
 
420
 
421
  # Prepare both complete and all entries versions
422
  # Complete entries have all 5 categories submitted
423
+ # The 'Categories Attempted' column is formatted as "X/5"
424
+ if 'Categories Attempted' in df_view_full.columns:
425
+ df_view_complete = df_view_full[df_view_full['Categories Attempted'] == '5/5'].copy()
426
  else:
427
  df_view_complete = df_view_full.copy()
428
 
429
  df_display_complete = prepare_df_for_display(df_view_complete)
430
  df_display_all = prepare_df_for_display(df_view_full)
431
 
432
+ # If no complete entries exist, show all entries by default
433
+ has_complete_entries = len(df_display_complete) > 0
434
+
435
  scatter_plot = plots_dict.get('scatter_plot', go.Figure())
436
 
437
+ # Now get headers from the renamed dataframe (use all entries to ensure headers are present)
438
+ df_headers = df_display_all.columns.tolist()
439
  df_datatypes = []
440
  for col in df_headers:
441
  if col == "Logs" or "Cost" in col or "Score" in col:
 
473
  num_total = len(df_display_all)
474
  num_incomplete = num_total - num_complete
475
 
476
+ # If there are complete entries, show toggle. If not, show all entries.
477
+ if has_complete_entries:
478
+ show_incomplete_checkbox = gr.Checkbox(
479
+ label=f"Show incomplete entries ({num_incomplete} entries with fewer than 5 categories)",
480
+ value=False,
481
+ elem_id="show-incomplete-toggle"
482
+ )
483
+
484
+ # Start with complete entries only (default)
485
+ dataframe_component = gr.DataFrame(
486
+ headers=df_headers,
487
+ value=df_display_complete,
488
+ datatype=df_datatypes,
489
+ interactive=False,
490
+ wrap=True,
491
+ column_widths=final_column_widths,
492
+ elem_classes=["wrap-header-df"],
493
+ show_search="search",
494
+ elem_id="main-leaderboard"
495
+ )
496
+
497
+ # Update function for the toggle
498
+ def update_table(show_incomplete):
499
+ if show_incomplete:
500
+ return df_display_all
501
+ else:
502
+ return df_display_complete
503
+
504
+ show_incomplete_checkbox.change(
505
+ fn=update_table,
506
+ inputs=[show_incomplete_checkbox],
507
+ outputs=[dataframe_component]
508
+ )
509
+ else:
510
+ # No complete entries - show all entries and a note
511
+ gr.Markdown(f"*No entries with all 5 categories completed yet. Showing all {num_total} entries.*")
512
+
513
+ dataframe_component = gr.DataFrame(
514
+ headers=df_headers,
515
+ value=df_display_all,
516
+ datatype=df_datatypes,
517
+ interactive=False,
518
+ wrap=True,
519
+ column_widths=final_column_widths,
520
+ elem_classes=["wrap-header-df"],
521
+ show_search="search",
522
+ elem_id="main-leaderboard"
523
+ )
524
 
525
  legend_markdown = create_legend_markdown(category_name)
526
  gr.HTML(value=legend_markdown, elem_id="legend-markdown")