Spaces:
Running
Running
Debug Agent commited on
Commit ·
f00a617
1
Parent(s): d7034b8
Show harness in scatter-plot hover on Alternative Agents page
Browse filesThe Alternative Agents scatter plot labels each point with the Language
Model's company logo but nothing else, so two points for the same LLM
run under different harnesses (e.g. claude-sonnet-4-5 under Claude Code
vs OpenHands Sub-agents) are visually indistinguishable — the hover
tooltip only showed "{lm_name} (SDK {version})".
Add a "Harness: {agent}" line to the tooltip when the row has an Agent
value. The canonical OpenHands pages drop the Agent column in
DataTransformer.view() (has_mixed_agents check), so the new line only
shows up on the Alternative Agents page where it carries signal — no
visual change on Home or the per-category subpages.
- leaderboard_transformer.py +17 -4
leaderboard_transformer.py
CHANGED
|
@@ -1033,13 +1033,18 @@ def _plot_scatter_plotly(
|
|
| 1033 |
"""
|
| 1034 |
Builds the complete HTML string for the plot's hover tooltip.
|
| 1035 |
Format: {lm_name} (SDK {version})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1036 |
Average Score: {score}
|
| 1037 |
Average Cost/Runtime: {value}
|
| 1038 |
Openness: {openness}
|
| 1039 |
"""
|
| 1040 |
h_pad = " "
|
| 1041 |
parts = ["<br>"]
|
| 1042 |
-
|
| 1043 |
# Get and clean the language model name
|
| 1044 |
llm_base_value = row.get('Language Model', '')
|
| 1045 |
llm_base_value = clean_llm_base_list(llm_base_value)
|
|
@@ -1047,13 +1052,21 @@ def _plot_scatter_plotly(
|
|
| 1047 |
lm_name = llm_base_value[0]
|
| 1048 |
else:
|
| 1049 |
lm_name = str(llm_base_value) if llm_base_value else 'Unknown'
|
| 1050 |
-
|
| 1051 |
# Get SDK version
|
| 1052 |
sdk_version = row.get('SDK Version', row.get(agent_col, 'Unknown'))
|
| 1053 |
-
|
| 1054 |
# Title line: {lm_name} (SDK {version})
|
| 1055 |
parts.append(f"{h_pad}<b>{lm_name}</b> (SDK {sdk_version}){h_pad}<br>")
|
| 1056 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1057 |
# Average Score
|
| 1058 |
parts.append(f"{h_pad}Average Score: <b>{row[y_col]:.3f}</b>{h_pad}<br>")
|
| 1059 |
|
|
|
|
| 1033 |
"""
|
| 1034 |
Builds the complete HTML string for the plot's hover tooltip.
|
| 1035 |
Format: {lm_name} (SDK {version})
|
| 1036 |
+
Harness: {agent} (only when the row carries an Agent —
|
| 1037 |
+
Alternative Agents page only; the
|
| 1038 |
+
canonical OpenHands pages drop the
|
| 1039 |
+
Agent column in view() so this line
|
| 1040 |
+
is skipped there)
|
| 1041 |
Average Score: {score}
|
| 1042 |
Average Cost/Runtime: {value}
|
| 1043 |
Openness: {openness}
|
| 1044 |
"""
|
| 1045 |
h_pad = " "
|
| 1046 |
parts = ["<br>"]
|
| 1047 |
+
|
| 1048 |
# Get and clean the language model name
|
| 1049 |
llm_base_value = row.get('Language Model', '')
|
| 1050 |
llm_base_value = clean_llm_base_list(llm_base_value)
|
|
|
|
| 1052 |
lm_name = llm_base_value[0]
|
| 1053 |
else:
|
| 1054 |
lm_name = str(llm_base_value) if llm_base_value else 'Unknown'
|
| 1055 |
+
|
| 1056 |
# Get SDK version
|
| 1057 |
sdk_version = row.get('SDK Version', row.get(agent_col, 'Unknown'))
|
| 1058 |
+
|
| 1059 |
# Title line: {lm_name} (SDK {version})
|
| 1060 |
parts.append(f"{h_pad}<b>{lm_name}</b> (SDK {sdk_version}){h_pad}<br>")
|
| 1061 |
+
|
| 1062 |
+
# Harness line — only on pages where the Agent column is present
|
| 1063 |
+
# (Alternative Agents). Without this, two rows for the same LM run
|
| 1064 |
+
# under different harnesses (e.g. Claude Code vs OpenHands Sub-agents
|
| 1065 |
+
# on claude-sonnet-4-5) are indistinguishable on hover.
|
| 1066 |
+
agent_value = row.get('Agent')
|
| 1067 |
+
if agent_value is not None and pd.notna(agent_value) and str(agent_value).strip():
|
| 1068 |
+
parts.append(f"{h_pad}Harness: <b>{agent_value}</b>{h_pad}<br>")
|
| 1069 |
+
|
| 1070 |
# Average Score
|
| 1071 |
parts.append(f"{h_pad}Average Score: <b>{row[y_col]:.3f}</b>{h_pad}<br>")
|
| 1072 |
|