Spaces:
Running
Running
openhands
openhands
commited on
Commit
·
4ab5f97
1
Parent(s):
5998027
fix: Column naming and incomplete entries toggle
Browse filesFixes:
1. Change 'Overall Score' to 'Average Score' in all places (including
view() method which was constructing 'Overall Score' directly)
2. Change 'Overall Cost' to 'Total Cost' - properly route to Total Cost
column for Overall view
3. Fix incomplete entries toggle:
- Check 'Categories Attempted' column (formatted as 'X/5')
- Filter on '5/5' for complete entries
- If no complete entries exist, show all entries with explanatory note
- Keep 'Categories Attempted' column visible in the table
Co-authored-by: openhands <openhands@all-hands.dev>
- leaderboard_transformer.py +11 -4
- ui_components.py +57 -36
leaderboard_transformer.py
CHANGED
|
@@ -244,7 +244,8 @@ class DataTransformer:
|
|
| 244 |
|
| 245 |
# --- 1. Determine Primary and Group Metrics Based on the Tag ---
|
| 246 |
if tag is None or tag == "Overall":
|
| 247 |
-
|
|
|
|
| 248 |
group_metrics = list(self.tag_map.keys())
|
| 249 |
else:
|
| 250 |
primary_metric = tag
|
|
@@ -264,7 +265,13 @@ class DataTransformer:
|
|
| 264 |
new_cols = ["Openness"]
|
| 265 |
ending_cols = ["Date", "Logs"]
|
| 266 |
|
| 267 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 268 |
for item in group_metrics:
|
| 269 |
metrics_to_display.append(f"{item} Score")
|
| 270 |
metrics_to_display.append(f"{item} Cost")
|
|
@@ -280,7 +287,7 @@ class DataTransformer:
|
|
| 280 |
cols = len(final_cols_ordered)
|
| 281 |
|
| 282 |
# Calculated and add "Categories Attempted" column
|
| 283 |
-
if
|
| 284 |
def calculate_attempted(row):
|
| 285 |
main_categories = ['Bug Fixing', 'Frontend Development', 'App Creation', 'Test Generation', 'Information Gathering']
|
| 286 |
count = 0
|
|
@@ -307,7 +314,7 @@ class DataTransformer:
|
|
| 307 |
# --- 4. Generate the Scatter Plot for the Primary Metric ---
|
| 308 |
plots: dict[str, go.Figure] = {}
|
| 309 |
if use_plotly:
|
| 310 |
-
primary_cost_col
|
| 311 |
# Check if the primary score and cost columns exist in the FINAL view
|
| 312 |
if primary_score_col in df_view.columns and primary_cost_col in df_view.columns:
|
| 313 |
fig = _plot_scatter_plotly(
|
|
|
|
| 244 |
|
| 245 |
# --- 1. Determine Primary and Group Metrics Based on the Tag ---
|
| 246 |
if tag is None or tag == "Overall":
|
| 247 |
+
# Use "Average" for the primary metric display name
|
| 248 |
+
primary_metric = "Average"
|
| 249 |
group_metrics = list(self.tag_map.keys())
|
| 250 |
else:
|
| 251 |
primary_metric = tag
|
|
|
|
| 265 |
new_cols = ["Openness"]
|
| 266 |
ending_cols = ["Date", "Logs"]
|
| 267 |
|
| 268 |
+
# For Overall view, use "Total Cost" instead of "Average Cost"
|
| 269 |
+
if tag is None or tag == "Overall":
|
| 270 |
+
primary_cost_col = "Total Cost"
|
| 271 |
+
else:
|
| 272 |
+
primary_cost_col = f"{primary_metric} Cost"
|
| 273 |
+
|
| 274 |
+
metrics_to_display = [primary_score_col, primary_cost_col]
|
| 275 |
for item in group_metrics:
|
| 276 |
metrics_to_display.append(f"{item} Score")
|
| 277 |
metrics_to_display.append(f"{item} Cost")
|
|
|
|
| 287 |
cols = len(final_cols_ordered)
|
| 288 |
|
| 289 |
# Calculated and add "Categories Attempted" column
|
| 290 |
+
if tag is None or tag == "Overall":
|
| 291 |
def calculate_attempted(row):
|
| 292 |
main_categories = ['Bug Fixing', 'Frontend Development', 'App Creation', 'Test Generation', 'Information Gathering']
|
| 293 |
count = 0
|
|
|
|
| 314 |
# --- 4. Generate the Scatter Plot for the Primary Metric ---
|
| 315 |
plots: dict[str, go.Figure] = {}
|
| 316 |
if use_plotly:
|
| 317 |
+
# primary_cost_col is already set above (Total Cost for Overall, or {metric} Cost otherwise)
|
| 318 |
# Check if the primary score and cost columns exist in the FINAL view
|
| 319 |
if primary_score_col in df_view.columns and primary_cost_col in df_view.columns:
|
| 320 |
fig = _plot_scatter_plotly(
|
ui_components.py
CHANGED
|
@@ -407,7 +407,7 @@ def create_leaderboard_display(
|
|
| 407 |
all_cols.insert(0, all_cols.pop(all_cols.index('Pareto')))
|
| 408 |
df_display = df_display[all_cols]
|
| 409 |
|
| 410 |
-
columns_to_drop = ['id', 'Openness', 'Agent Tooling', 'Source'
|
| 411 |
df_display = df_display.drop(columns=columns_to_drop, errors='ignore')
|
| 412 |
|
| 413 |
header_rename_map = {
|
|
@@ -420,18 +420,22 @@ def create_leaderboard_display(
|
|
| 420 |
|
| 421 |
# Prepare both complete and all entries versions
|
| 422 |
# Complete entries have all 5 categories submitted
|
| 423 |
-
|
| 424 |
-
|
|
|
|
| 425 |
else:
|
| 426 |
df_view_complete = df_view_full.copy()
|
| 427 |
|
| 428 |
df_display_complete = prepare_df_for_display(df_view_complete)
|
| 429 |
df_display_all = prepare_df_for_display(df_view_full)
|
| 430 |
|
|
|
|
|
|
|
|
|
|
| 431 |
scatter_plot = plots_dict.get('scatter_plot', go.Figure())
|
| 432 |
|
| 433 |
-
# Now get headers from the renamed dataframe
|
| 434 |
-
df_headers =
|
| 435 |
df_datatypes = []
|
| 436 |
for col in df_headers:
|
| 437 |
if col == "Logs" or "Cost" in col or "Score" in col:
|
|
@@ -469,37 +473,54 @@ def create_leaderboard_display(
|
|
| 469 |
num_total = len(df_display_all)
|
| 470 |
num_incomplete = num_total - num_complete
|
| 471 |
|
| 472 |
-
|
| 473 |
-
|
| 474 |
-
|
| 475 |
-
|
| 476 |
-
|
| 477 |
-
|
| 478 |
-
|
| 479 |
-
|
| 480 |
-
|
| 481 |
-
|
| 482 |
-
|
| 483 |
-
|
| 484 |
-
|
| 485 |
-
|
| 486 |
-
|
| 487 |
-
|
| 488 |
-
|
| 489 |
-
|
| 490 |
-
|
| 491 |
-
|
| 492 |
-
|
| 493 |
-
|
| 494 |
-
|
| 495 |
-
|
| 496 |
-
|
| 497 |
-
|
| 498 |
-
|
| 499 |
-
|
| 500 |
-
|
| 501 |
-
|
| 502 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 503 |
|
| 504 |
legend_markdown = create_legend_markdown(category_name)
|
| 505 |
gr.HTML(value=legend_markdown, elem_id="legend-markdown")
|
|
|
|
| 407 |
all_cols.insert(0, all_cols.pop(all_cols.index('Pareto')))
|
| 408 |
df_display = df_display[all_cols]
|
| 409 |
|
| 410 |
+
columns_to_drop = ['id', 'Openness', 'Agent Tooling', 'Source']
|
| 411 |
df_display = df_display.drop(columns=columns_to_drop, errors='ignore')
|
| 412 |
|
| 413 |
header_rename_map = {
|
|
|
|
| 420 |
|
| 421 |
# Prepare both complete and all entries versions
|
| 422 |
# Complete entries have all 5 categories submitted
|
| 423 |
+
# The 'Categories Attempted' column is formatted as "X/5"
|
| 424 |
+
if 'Categories Attempted' in df_view_full.columns:
|
| 425 |
+
df_view_complete = df_view_full[df_view_full['Categories Attempted'] == '5/5'].copy()
|
| 426 |
else:
|
| 427 |
df_view_complete = df_view_full.copy()
|
| 428 |
|
| 429 |
df_display_complete = prepare_df_for_display(df_view_complete)
|
| 430 |
df_display_all = prepare_df_for_display(df_view_full)
|
| 431 |
|
| 432 |
+
# If no complete entries exist, show all entries by default
|
| 433 |
+
has_complete_entries = len(df_display_complete) > 0
|
| 434 |
+
|
| 435 |
scatter_plot = plots_dict.get('scatter_plot', go.Figure())
|
| 436 |
|
| 437 |
+
# Now get headers from the renamed dataframe (use all entries to ensure headers are present)
|
| 438 |
+
df_headers = df_display_all.columns.tolist()
|
| 439 |
df_datatypes = []
|
| 440 |
for col in df_headers:
|
| 441 |
if col == "Logs" or "Cost" in col or "Score" in col:
|
|
|
|
| 473 |
num_total = len(df_display_all)
|
| 474 |
num_incomplete = num_total - num_complete
|
| 475 |
|
| 476 |
+
# If there are complete entries, show toggle. If not, show all entries.
|
| 477 |
+
if has_complete_entries:
|
| 478 |
+
show_incomplete_checkbox = gr.Checkbox(
|
| 479 |
+
label=f"Show incomplete entries ({num_incomplete} entries with fewer than 5 categories)",
|
| 480 |
+
value=False,
|
| 481 |
+
elem_id="show-incomplete-toggle"
|
| 482 |
+
)
|
| 483 |
+
|
| 484 |
+
# Start with complete entries only (default)
|
| 485 |
+
dataframe_component = gr.DataFrame(
|
| 486 |
+
headers=df_headers,
|
| 487 |
+
value=df_display_complete,
|
| 488 |
+
datatype=df_datatypes,
|
| 489 |
+
interactive=False,
|
| 490 |
+
wrap=True,
|
| 491 |
+
column_widths=final_column_widths,
|
| 492 |
+
elem_classes=["wrap-header-df"],
|
| 493 |
+
show_search="search",
|
| 494 |
+
elem_id="main-leaderboard"
|
| 495 |
+
)
|
| 496 |
+
|
| 497 |
+
# Update function for the toggle
|
| 498 |
+
def update_table(show_incomplete):
|
| 499 |
+
if show_incomplete:
|
| 500 |
+
return df_display_all
|
| 501 |
+
else:
|
| 502 |
+
return df_display_complete
|
| 503 |
+
|
| 504 |
+
show_incomplete_checkbox.change(
|
| 505 |
+
fn=update_table,
|
| 506 |
+
inputs=[show_incomplete_checkbox],
|
| 507 |
+
outputs=[dataframe_component]
|
| 508 |
+
)
|
| 509 |
+
else:
|
| 510 |
+
# No complete entries - show all entries and a note
|
| 511 |
+
gr.Markdown(f"*No entries with all 5 categories completed yet. Showing all {num_total} entries.*")
|
| 512 |
+
|
| 513 |
+
dataframe_component = gr.DataFrame(
|
| 514 |
+
headers=df_headers,
|
| 515 |
+
value=df_display_all,
|
| 516 |
+
datatype=df_datatypes,
|
| 517 |
+
interactive=False,
|
| 518 |
+
wrap=True,
|
| 519 |
+
column_widths=final_column_widths,
|
| 520 |
+
elem_classes=["wrap-header-df"],
|
| 521 |
+
show_search="search",
|
| 522 |
+
elem_id="main-leaderboard"
|
| 523 |
+
)
|
| 524 |
|
| 525 |
legend_markdown = create_legend_markdown(category_name)
|
| 526 |
gr.HTML(value=legend_markdown, elem_id="legend-markdown")
|