Debug Agent commited on
Commit
f00a617
·
1 Parent(s): d7034b8

Show harness in scatter-plot hover on Alternative Agents page

Browse files

The Alternative Agents scatter plot labels each point with the Language
Model's company logo but nothing else, so two points for the same LLM
run under different harnesses (e.g. claude-sonnet-4-5 under Claude Code
vs OpenHands Sub-agents) are visually indistinguishable — the hover
tooltip only showed "{lm_name} (SDK {version})".

Add a "Harness: {agent}" line to the tooltip when the row has an Agent
value. The canonical OpenHands pages drop the Agent column in
DataTransformer.view() (has_mixed_agents check), so the new line only
shows up on the Alternative Agents page where it carries signal — no
visual change on Home or the per-category subpages.

Files changed (1) hide show
  1. leaderboard_transformer.py +17 -4
leaderboard_transformer.py CHANGED
@@ -1033,13 +1033,18 @@ def _plot_scatter_plotly(
1033
  """
1034
  Builds the complete HTML string for the plot's hover tooltip.
1035
  Format: {lm_name} (SDK {version})
 
 
 
 
 
1036
  Average Score: {score}
1037
  Average Cost/Runtime: {value}
1038
  Openness: {openness}
1039
  """
1040
  h_pad = " "
1041
  parts = ["<br>"]
1042
-
1043
  # Get and clean the language model name
1044
  llm_base_value = row.get('Language Model', '')
1045
  llm_base_value = clean_llm_base_list(llm_base_value)
@@ -1047,13 +1052,21 @@ def _plot_scatter_plotly(
1047
  lm_name = llm_base_value[0]
1048
  else:
1049
  lm_name = str(llm_base_value) if llm_base_value else 'Unknown'
1050
-
1051
  # Get SDK version
1052
  sdk_version = row.get('SDK Version', row.get(agent_col, 'Unknown'))
1053
-
1054
  # Title line: {lm_name} (SDK {version})
1055
  parts.append(f"{h_pad}<b>{lm_name}</b> (SDK {sdk_version}){h_pad}<br>")
1056
-
 
 
 
 
 
 
 
 
1057
  # Average Score
1058
  parts.append(f"{h_pad}Average Score: <b>{row[y_col]:.3f}</b>{h_pad}<br>")
1059
 
 
1033
  """
1034
  Builds the complete HTML string for the plot's hover tooltip.
1035
  Format: {lm_name} (SDK {version})
1036
+ Harness: {agent} (only when the row carries an Agent —
1037
+ Alternative Agents page only; the
1038
+ canonical OpenHands pages drop the
1039
+ Agent column in view() so this line
1040
+ is skipped there)
1041
  Average Score: {score}
1042
  Average Cost/Runtime: {value}
1043
  Openness: {openness}
1044
  """
1045
  h_pad = " "
1046
  parts = ["<br>"]
1047
+
1048
  # Get and clean the language model name
1049
  llm_base_value = row.get('Language Model', '')
1050
  llm_base_value = clean_llm_base_list(llm_base_value)
 
1052
  lm_name = llm_base_value[0]
1053
  else:
1054
  lm_name = str(llm_base_value) if llm_base_value else 'Unknown'
1055
+
1056
  # Get SDK version
1057
  sdk_version = row.get('SDK Version', row.get(agent_col, 'Unknown'))
1058
+
1059
  # Title line: {lm_name} (SDK {version})
1060
  parts.append(f"{h_pad}<b>{lm_name}</b> (SDK {sdk_version}){h_pad}<br>")
1061
+
1062
+ # Harness line — only on pages where the Agent column is present
1063
+ # (Alternative Agents). Without this, two rows for the same LM run
1064
+ # under different harnesses (e.g. Claude Code vs OpenHands Sub-agents
1065
+ # on claude-sonnet-4-5) are indistinguishable on hover.
1066
+ agent_value = row.get('Agent')
1067
+ if agent_value is not None and pd.notna(agent_value) and str(agent_value).strip():
1068
+ parts.append(f"{h_pad}Harness: <b>{agent_value}</b>{h_pad}<br>")
1069
+
1070
  # Average Score
1071
  parts.append(f"{h_pad}Average Score: <b>{row[y_col]:.3f}</b>{h_pad}<br>")
1072