Spaces:

OpenHands
/

openhands-index

Running

openhands openhands commited on Jan 16

Commit

4ab5f97

1 Parent(s): 5998027

fix: Column naming and incomplete entries toggle

Fixes:
1. Change 'Overall Score' to 'Average Score' in all places (including
view() method which was constructing 'Overall Score' directly)
2. Change 'Overall Cost' to 'Total Cost' - properly route to Total Cost
column for Overall view
3. Fix incomplete entries toggle:
- Check 'Categories Attempted' column (formatted as 'X/5')
- Filter on '5/5' for complete entries
- If no complete entries exist, show all entries with explanatory note
- Keep 'Categories Attempted' column visible in the table

Co-authored-by: openhands <openhands@all-hands.dev>

Files changed (2) hide show

leaderboard_transformer.py +11 -4
ui_components.py +57 -36

leaderboard_transformer.py CHANGED Viewed

@@ -244,7 +244,8 @@ class DataTransformer:
         # --- 1. Determine Primary and Group Metrics Based on the Tag ---
         if tag is None or tag == "Overall":
-            primary_metric = "Overall"
             group_metrics = list(self.tag_map.keys())
         else:
             primary_metric = tag
@@ -264,7 +265,13 @@ class DataTransformer:
         new_cols = ["Openness"]
         ending_cols = ["Date", "Logs"]
-        metrics_to_display = [primary_score_col, f"{primary_metric} Cost"]
         for item in group_metrics:
             metrics_to_display.append(f"{item} Score")
             metrics_to_display.append(f"{item} Cost")
@@ -280,7 +287,7 @@ class DataTransformer:
         cols = len(final_cols_ordered)
         # Calculated and add "Categories Attempted" column
-        if primary_metric == "Overall":
             def calculate_attempted(row):
                 main_categories = ['Bug Fixing', 'Frontend Development', 'App Creation', 'Test Generation', 'Information Gathering']
                 count = 0
@@ -307,7 +314,7 @@ class DataTransformer:
         # --- 4. Generate the Scatter Plot for the Primary Metric ---
         plots: dict[str, go.Figure] = {}
         if use_plotly:
-            primary_cost_col = f"{primary_metric} Cost"
             # Check if the primary score and cost columns exist in the FINAL view
             if primary_score_col in df_view.columns and primary_cost_col in df_view.columns:
                 fig = _plot_scatter_plotly(

         # --- 1. Determine Primary and Group Metrics Based on the Tag ---
         if tag is None or tag == "Overall":
+            # Use "Average" for the primary metric display name
+            primary_metric = "Average"
             group_metrics = list(self.tag_map.keys())
         else:
             primary_metric = tag
         new_cols = ["Openness"]
         ending_cols = ["Date", "Logs"]
+        # For Overall view, use "Total Cost" instead of "Average Cost"
+        if tag is None or tag == "Overall":
+            primary_cost_col = "Total Cost"
+        else:
+            primary_cost_col = f"{primary_metric} Cost"
+        metrics_to_display = [primary_score_col, primary_cost_col]
         for item in group_metrics:
             metrics_to_display.append(f"{item} Score")
             metrics_to_display.append(f"{item} Cost")
         cols = len(final_cols_ordered)
         # Calculated and add "Categories Attempted" column
+        if tag is None or tag == "Overall":
             def calculate_attempted(row):
                 main_categories = ['Bug Fixing', 'Frontend Development', 'App Creation', 'Test Generation', 'Information Gathering']
                 count = 0
         # --- 4. Generate the Scatter Plot for the Primary Metric ---
         plots: dict[str, go.Figure] = {}
         if use_plotly:
+            # primary_cost_col is already set above (Total Cost for Overall, or {metric} Cost otherwise)
             # Check if the primary score and cost columns exist in the FINAL view
             if primary_score_col in df_view.columns and primary_cost_col in df_view.columns:
                 fig = _plot_scatter_plotly(

ui_components.py CHANGED Viewed

@@ -407,7 +407,7 @@ def create_leaderboard_display(
         all_cols.insert(0, all_cols.pop(all_cols.index('Pareto')))
         df_display = df_display[all_cols]
-        columns_to_drop = ['id', 'Openness', 'Agent Tooling', 'Source', 'Categories Completed']
         df_display = df_display.drop(columns=columns_to_drop, errors='ignore')
         header_rename_map = {
@@ -420,18 +420,22 @@ def create_leaderboard_display(
     # Prepare both complete and all entries versions
     # Complete entries have all 5 categories submitted
-    if 'Categories Completed' in df_view_full.columns:
-        df_view_complete = df_view_full[df_view_full['Categories Completed'] == 5].copy()
     else:
         df_view_complete = df_view_full.copy()
     df_display_complete = prepare_df_for_display(df_view_complete)
     df_display_all = prepare_df_for_display(df_view_full)
     scatter_plot = plots_dict.get('scatter_plot', go.Figure())
-    # Now get headers from the renamed dataframe
-    df_headers = df_display_complete.columns.tolist()
     df_datatypes = []
     for col in df_headers:
         if col == "Logs" or "Cost" in col or "Score" in col:
@@ -469,37 +473,54 @@ def create_leaderboard_display(
         num_total = len(df_display_all)
         num_incomplete = num_total - num_complete
-        show_incomplete_checkbox = gr.Checkbox(
-            label=f"Show incomplete entries ({num_incomplete} entries with fewer than 5 categories)",
-            value=False,
-            elem_id="show-incomplete-toggle"
-        )
-        # Start with complete entries only (default)
-        dataframe_component = gr.DataFrame(
-            headers=df_headers,
-            value=df_display_complete,
-            datatype=df_datatypes,
-            interactive=False,
-            wrap=True,
-            column_widths=final_column_widths,
-            elem_classes=["wrap-header-df"],
-            show_search="search",
-            elem_id="main-leaderboard"
-        )
-        # Update function for the toggle
-        def update_table(show_incomplete):
-            if show_incomplete:
-                return df_display_all
-            else:
-                return df_display_complete
-        show_incomplete_checkbox.change(
-            fn=update_table,
-            inputs=[show_incomplete_checkbox],
-            outputs=[dataframe_component]
-        )
         legend_markdown = create_legend_markdown(category_name)
         gr.HTML(value=legend_markdown, elem_id="legend-markdown")

         all_cols.insert(0, all_cols.pop(all_cols.index('Pareto')))
         df_display = df_display[all_cols]
+        columns_to_drop = ['id', 'Openness', 'Agent Tooling', 'Source']
         df_display = df_display.drop(columns=columns_to_drop, errors='ignore')
         header_rename_map = {
     # Prepare both complete and all entries versions
     # Complete entries have all 5 categories submitted
+    # The 'Categories Attempted' column is formatted as "X/5"
+    if 'Categories Attempted' in df_view_full.columns:
+        df_view_complete = df_view_full[df_view_full['Categories Attempted'] == '5/5'].copy()
     else:
         df_view_complete = df_view_full.copy()
     df_display_complete = prepare_df_for_display(df_view_complete)
     df_display_all = prepare_df_for_display(df_view_full)
+    # If no complete entries exist, show all entries by default
+    has_complete_entries = len(df_display_complete) > 0
     scatter_plot = plots_dict.get('scatter_plot', go.Figure())
+    # Now get headers from the renamed dataframe (use all entries to ensure headers are present)
+    df_headers = df_display_all.columns.tolist()
     df_datatypes = []
     for col in df_headers:
         if col == "Logs" or "Cost" in col or "Score" in col:
         num_total = len(df_display_all)
         num_incomplete = num_total - num_complete
+        # If there are complete entries, show toggle. If not, show all entries.
+        if has_complete_entries:
+            show_incomplete_checkbox = gr.Checkbox(
+                label=f"Show incomplete entries ({num_incomplete} entries with fewer than 5 categories)",
+                value=False,
+                elem_id="show-incomplete-toggle"
+            )
+            # Start with complete entries only (default)
+            dataframe_component = gr.DataFrame(
+                headers=df_headers,
+                value=df_display_complete,
+                datatype=df_datatypes,
+                interactive=False,
+                wrap=True,
+                column_widths=final_column_widths,
+                elem_classes=["wrap-header-df"],
+                show_search="search",
+                elem_id="main-leaderboard"
+            )
+            # Update function for the toggle
+            def update_table(show_incomplete):
+                if show_incomplete:
+                    return df_display_all
+                else:
+                    return df_display_complete
+            show_incomplete_checkbox.change(
+                fn=update_table,
+                inputs=[show_incomplete_checkbox],
+                outputs=[dataframe_component]
+            )
+        else:
+            # No complete entries - show all entries and a note
+            gr.Markdown(f"*No entries with all 5 categories completed yet. Showing all {num_total} entries.*")
+            dataframe_component = gr.DataFrame(
+                headers=df_headers,
+                value=df_display_all,
+                datatype=df_datatypes,
+                interactive=False,
+                wrap=True,
+                column_widths=final_column_widths,
+                elem_classes=["wrap-header-df"],
+                show_search="search",
+                elem_id="main-leaderboard"
+            )
         legend_markdown = create_legend_markdown(category_name)
         gr.HTML(value=legend_markdown, elem_id="legend-markdown")