Spaces:

OpenHands
/

openhands-index

Running

App Files Files Community

Fix: Scatter plot zoom and 'Show all labels' not working

#29

by juan-all-hands - opened 13 days ago

base: refs/heads/main

←

from: refs/pr/29

Discussion Files changed

+84

-96

Files changed (3) hide show

leaderboard_transformer.py +73 -78
simple_data_loader.py +1 -2
ui_components.py +10 -16

leaderboard_transformer.py CHANGED Viewed

@@ -971,7 +971,7 @@ def _plot_scatter_plotly(
         name: Optional[str] = None,
         plot_type: str = 'cost',  # 'cost' or 'runtime'
         mark_by: Optional[str] = None,  # 'Company', 'Openness', or 'Country'
-        show_all_labels: bool = False  # Show labels for all points vs only Pareto frontier
 ) -> go.Figure:
     from constants import MARK_BY_DEFAULT
     if mark_by is None:
@@ -1268,107 +1268,93 @@ def _plot_scatter_plotly(
         domain_x = max(0, min(1, domain_x))
         domain_y = max(0, min(1, domain_y))
-        # Convert to data coordinates
-        # For log scale x: use log10(x) to match the axis type
-        x_log = np.log10(x_val) if x_val > 0 else x_min_log
         if harness_uri is not None:
-            # Composite: stack model on top, harness on bottom
-            # Use data coordinates (x, y) so logos zoom/pan together with labels
-            y_offset = 0.8  # Offset above the data point (in score units)
             layout_images.append(dict(
                 source=model_logo_uri,
-                xref="x", yref="y",
-                x=x_log, y=y_val + y_offset,
-                sizex=STACKED_SIZE_X * (x_max_log - x_min_log),
-                sizey=STACKED_SIZE_Y * (y_max - y_min),
                 xanchor="center", yanchor="middle",
                 layer="above",
             ))
             layout_images.append(dict(
                 source=harness_uri,
-                xref="x", yref="y",
-                x=x_log, y=y_val - y_offset,
-                sizex=STACKED_SIZE_X * (x_max_log - x_min_log),
-                sizey=STACKED_SIZE_Y * (y_max - y_min),
                 xanchor="center", yanchor="middle",
                 layer="above",
             ))
         else:
-            # Single marker - use data coordinates so logo zooms/pans with labels
             layout_images.append(dict(
                 source=model_logo_uri,
-                xref="x", yref="y",
-                x=x_log, y=y_val,
-                sizex=SINGLE_SIZE_X * (x_max_log - x_min_log),
-                sizey=SINGLE_SIZE_Y * (y_max - y_min),
                 xanchor="center", yanchor="middle",
                 layer="above",
             ))
-    # --- Section 7: Add Model Name Labels ---
-    # Show labels for all points if show_all_labels is True, otherwise just Pareto frontier
-    if show_all_labels:
-        # Label all data points
-        labels_data = []
-        for _, row in data_plot.iterrows():
-            x_val = row[x_col_to_use]
-            y_val = row[y_col_to_use]
-            model_name = row.get('Language Model', '')
-            if isinstance(model_name, list):
-                model_name = model_name[0] if model_name else ''
-            model_name = str(model_name).split('/')[-1]
-            if len(model_name) > 25:
-                model_name = model_name[:22] + '...'
-            labels_data.append({'x': x_val, 'y': y_val, 'label': model_name})
-    elif frontier_rows:
-        # Label only Pareto frontier points
-        labels_data = []
         for row in frontier_rows:
             x_val = row[x_col_to_use]
             y_val = row[y_col_to_use]
             model_name = row.get('Language Model', '')
             if isinstance(model_name, list):
                 model_name = model_name[0] if model_name else ''
             model_name = str(model_name).split('/')[-1]
             if len(model_name) > 25:
                 model_name = model_name[:22] + '...'
-            labels_data.append({'x': x_val, 'y': y_val, 'label': model_name})
-    else:
-        labels_data = []
-    # Add annotations for each label
-    # For log scale x-axis, annotations need log10(x) coordinates (Plotly issue #2580)
-    for item in labels_data:
-        x_val = item['x']
-        y_val = item['y']
-        label = item['label']
-        # Transform x to log10 for annotation positioning on log scale
-        if x_val > 0:
-            x_log = np.log10(x_val)
-        else:
-            x_log = x_min_log
-        fig.add_annotation(
-            x=x_log,
-            y=y_val,
-            text=label,
-            showarrow=False,
-            yshift=25,  # Move label higher above the icon
-            font=dict(
-                size=10,
-                color='#0D0D0F',  # neutral-950
-                family=FONT_FAMILY_SHORT
-            ),
-            xanchor='center',
-            yanchor='bottom'
-        )
     # --- Section 8: Configure Layout  ---
     # Use the same axis ranges as calculated for domain coordinates
@@ -1487,38 +1473,47 @@ def format_score_column(df: pd.DataFrame, score_col_name: str) -> pd.DataFrame:
     return df.assign(**{score_col_name: df[score_col_name].apply(apply_formatting)})
 def format_runtime_column(df: pd.DataFrame, runtime_col_name: str) -> pd.DataFrame:
     """
     Applies custom formatting to a runtime column based on its corresponding score column.
     - If runtime is not null, formats as time with 's' suffix.
     - If runtime is null but score is not, it becomes "Missing".
     - If both runtime and score are null, it becomes "Not Submitted".
     Args:
         df: The DataFrame to modify.
         runtime_col_name: The name of the runtime column to format (e.g., "Average Runtime").
     Returns:
         The DataFrame with the formatted runtime column.
     """
-    # Find the corresponding score column by replacing "Runtime" with "Score"
     score_col_name = runtime_col_name.replace("Runtime", "Score")
-    # Ensure the score column actually exists to avoid errors
     if score_col_name not in df.columns:
-        return df  # Return the DataFrame unmodified if there's no matching score
     def apply_formatting_logic(row):
         runtime_value = row[runtime_col_name]
         score_value = row[score_col_name]
         status_color = "#ec4899"
         if pd.notna(runtime_value) and isinstance(runtime_value, (int, float)):
-            return f"{runtime_value:.0f}s"
         elif pd.notna(score_value):
-            return f'<span style="color: {status_color};">Missing</span>'  # Score exists, but runtime is missing
         else:
-            return f'<span style="color: {status_color};">Not Submitted</span>'  # Neither score nor runtime exists
-    # Apply the logic to the specified runtime column and update the DataFrame
     df[runtime_col_name] = df.apply(apply_formatting_logic, axis=1)
     return df

         name: Optional[str] = None,
         plot_type: str = 'cost',  # 'cost' or 'runtime'
         mark_by: Optional[str] = None,  # 'Company', 'Openness', or 'Country'
+        show_all_labels: bool = False
 ) -> go.Figure:
     from constants import MARK_BY_DEFAULT
     if mark_by is None:
         domain_x = max(0, min(1, domain_x))
         domain_y = max(0, min(1, domain_y))
         if harness_uri is not None:
+            # Composite: stack model on top, harness on bottom, clamping
+            # each half to the plot area so markers near the edges don't
+            # drift off-canvas.
+            model_y = min(1, domain_y + STACKED_Y_OFFSET)
+            harness_y = max(0, domain_y - STACKED_Y_OFFSET)
             layout_images.append(dict(
                 source=model_logo_uri,
+                xref="x domain", yref="y domain",
+                x=domain_x, y=model_y,
+                sizex=STACKED_SIZE_X, sizey=STACKED_SIZE_Y,
                 xanchor="center", yanchor="middle",
                 layer="above",
             ))
             layout_images.append(dict(
                 source=harness_uri,
+                xref="x domain", yref="y domain",
+                x=domain_x, y=harness_y,
+                sizex=STACKED_SIZE_X, sizey=STACKED_SIZE_Y,
                 xanchor="center", yanchor="middle",
                 layer="above",
             ))
         else:
+            # Single marker (canonical OpenHands pages, or Alternative Agents
+            # rows with an unknown harness name — the latter shouldn't happen
+            # in practice since HARNESS_LOGO_PATHS covers every agent_name the
+            # push-to-index script emits).
             layout_images.append(dict(
                 source=model_logo_uri,
+                xref="x domain", yref="y domain",
+                x=domain_x, y=domain_y,
+                sizex=SINGLE_SIZE_X, sizey=SINGLE_SIZE_Y,
                 xanchor="center", yanchor="middle",
                 layer="above",
             ))
+    # --- Section 7: Add Model Name Labels to Frontier Points ---
+    if frontier_rows:
+        frontier_labels_data = []
         for row in frontier_rows:
             x_val = row[x_col_to_use]
             y_val = row[y_col_to_use]
+            # Get the model name for the label
             model_name = row.get('Language Model', '')
             if isinstance(model_name, list):
                 model_name = model_name[0] if model_name else ''
+            # Clean the model name (remove path prefixes)
             model_name = str(model_name).split('/')[-1]
+            # Truncate long names
             if len(model_name) > 25:
                 model_name = model_name[:22] + '...'
+            frontier_labels_data.append({
+                'x': x_val,
+                'y': y_val,
+                'label': model_name
+            })
+        # Add annotations for each frontier label
+        # For log scale x-axis, annotations need log10(x) coordinates (Plotly issue #2580)
+        for item in frontier_labels_data:
+            x_val = item['x']
+            y_val = item['y']
+            label = item['label']
+            # Transform x to log10 for annotation positioning on log scale
+            if x_val > 0:
+                x_log = np.log10(x_val)
+            else:
+                x_log = x_min_log
+            fig.add_annotation(
+                x=x_log,
+                y=y_val,
+                text=label,
+                showarrow=False,
+                yshift=25,  # Move label higher above the icon
+                font=dict(
+                    size=10,
+                    color='#0D0D0F',  # neutral-950
+                    family=FONT_FAMILY_SHORT
+                ),
+                xanchor='center',
+                yanchor='bottom'
+            )
     # --- Section 8: Configure Layout  ---
     # Use the same axis ranges as calculated for domain coordinates
     return df.assign(**{score_col_name: df[score_col_name].apply(apply_formatting)})
+def _hidden_runtime_sort_key(runtime_value: float | int | None, score_value: float | int | None) -> str:
+    """Build a hidden prefix so Gradio's string-based runtime sorting behaves numerically."""
+    if pd.notna(runtime_value) and isinstance(runtime_value, (int, float)):
+        return f"{float(runtime_value):020.6f}"
+    if pd.notna(score_value):
+        return "99999999999999999998"
+    return "99999999999999999999"
 def format_runtime_column(df: pd.DataFrame, runtime_col_name: str) -> pd.DataFrame:
     """
     Applies custom formatting to a runtime column based on its corresponding score column.
     - If runtime is not null, formats as time with 's' suffix.
     - If runtime is null but score is not, it becomes "Missing".
     - If both runtime and score are null, it becomes "Not Submitted".
+    - Adds a hidden, zero-padded numeric prefix so Gradio sorts the column numerically.
     Args:
         df: The DataFrame to modify.
         runtime_col_name: The name of the runtime column to format (e.g., "Average Runtime").
     Returns:
         The DataFrame with the formatted runtime column.
     """
     score_col_name = runtime_col_name.replace("Runtime", "Score")
     if score_col_name not in df.columns:
+        return df
     def apply_formatting_logic(row):
         runtime_value = row[runtime_col_name]
         score_value = row[score_col_name]
         status_color = "#ec4899"
+        sort_key = _hidden_runtime_sort_key(runtime_value, score_value)
+        hidden_sort_prefix = f'<span style="display:none">{sort_key}</span>'
         if pd.notna(runtime_value) and isinstance(runtime_value, (int, float)):
+            return f"{hidden_sort_prefix}{runtime_value:.0f}s"
         elif pd.notna(score_value):
+            return f'{hidden_sort_prefix}<span style="color: {status_color};">Missing</span>'
         else:
+            return f'{hidden_sort_prefix}<span style="color: {status_color};">Not Submitted</span>'
     df[runtime_col_name] = df.apply(apply_formatting_logic, axis=1)
     return df

simple_data_loader.py CHANGED Viewed

@@ -245,6 +245,7 @@ class SimpleLeaderboardViewer:
                 'acp-claude': 'Claude Code',
                 'acp-codex': 'Codex',
                 'acp-gemini': 'Gemini CLI',
             }
             alt_dir = self.config_path / "alternative_agents"
             if alt_dir.exists():
@@ -252,8 +253,6 @@ class SimpleLeaderboardViewer:
                     if not type_dir.is_dir():
                         continue
                     default_name = agent_type_default_name.get(type_dir.name)
-                    if default_name is None:
-                        continue  # skip unlisted agent types (e.g. openhands_subagents)
                     for agent_dir in type_dir.iterdir():
                         if not agent_dir.is_dir():
                             continue

                 'acp-claude': 'Claude Code',
                 'acp-codex': 'Codex',
                 'acp-gemini': 'Gemini CLI',
+                'openhands_subagents': 'OpenHands Sub-agents',
             }
             alt_dir = self.config_path / "alternative_agents"
             if alt_dir.exists():
                     if not type_dir.is_dir():
                         continue
                     default_name = agent_type_default_name.get(type_dir.name)
                     for agent_dir in type_dir.iterdir():
                         if not agent_dir.is_dir():
                             continue

ui_components.py CHANGED Viewed

@@ -954,7 +954,7 @@ def create_leaderboard_display(
             if not new_df.empty:
                 new_transformer = DataTransformer(new_df, new_tag_map)
                 new_df_view_full, _ = new_transformer.view(tag=category_name, use_plotly=True)
                 # Prepare both complete and all entries versions
                 if 'Categories Attempted' in new_df_view_full.columns:
                     new_df_view_complete = new_df_view_full[new_df_view_full['Categories Attempted'] == '5/5'].copy()
@@ -1014,22 +1014,16 @@ def create_leaderboard_display(
     # Connect the timer to the refresh function
     if show_incomplete_checkbox is not None:
         if show_open_only_checkbox is not None:
-            refresh_timer.tick(
-                fn=check_and_refresh_data,
-                inputs=[show_incomplete_checkbox, show_open_only_checkbox, mark_by_dropdown, show_all_labels_checkbox],
-                outputs=[dataframe_component, cost_plot_component, runtime_plot_component]
-            )
-        else:
-            # No open/closed split in this dataset — gr.State can't fill the gap in a
-            # timer tick (no session context), so use a wrapper that fixes show_open_only=False.
-            def _timer_refresh_no_open(show_incomplete, mark_by, show_all_labels):
-                return check_and_refresh_data(show_incomplete, False, mark_by, show_all_labels)
-            refresh_timer.tick(
-                fn=_timer_refresh_no_open,
-                inputs=[show_incomplete_checkbox, mark_by_dropdown, show_all_labels_checkbox],
-                outputs=[dataframe_component, cost_plot_component, runtime_plot_component]
-            )
     else:
         # If no incomplete checkbox, always show all data (but still filter by open if needed)
         def check_and_refresh_all(show_open_only=False, mark_by=MARK_BY_DEFAULT, show_all_labels=False):

             if not new_df.empty:
                 new_transformer = DataTransformer(new_df, new_tag_map)
                 new_df_view_full, _ = new_transformer.view(tag=category_name, use_plotly=True)
                 # Prepare both complete and all entries versions
                 if 'Categories Attempted' in new_df_view_full.columns:
                     new_df_view_complete = new_df_view_full[new_df_view_full['Categories Attempted'] == '5/5'].copy()
     # Connect the timer to the refresh function
     if show_incomplete_checkbox is not None:
+        timer_inputs = [show_incomplete_checkbox]
         if show_open_only_checkbox is not None:
+            timer_inputs.append(show_open_only_checkbox)
+        timer_inputs.append(mark_by_dropdown)  # Always include mark_by
+        timer_inputs.append(show_all_labels_checkbox)
+        refresh_timer.tick(
+            fn=check_and_refresh_data,
+            inputs=timer_inputs,
+            outputs=[dataframe_component, cost_plot_component, runtime_plot_component]
+        )
     else:
         # If no incomplete checkbox, always show all data (but still filter by open if needed)
         def check_and_refresh_all(show_open_only=False, mark_by=MARK_BY_DEFAULT, show_all_labels=False):