Debug Agent commited on
Commit
fb16d57
·
1 Parent(s): 6860223

Stack model + harness logos on Alternative Agents scatter

Browse files

On the Alternative Agents page the same LLM can show up under multiple
harnesses (e.g. claude-sonnet-4-5 under Claude Code vs OpenHands
Sub-agents). Before this change both points drew the exact same Anthropic
company logo as their marker, so they were visually indistinguishable —
the only way to tell them apart was the hover tooltip's "Harness:" line
from the earlier fix.

Now when a row carries an "Agent" column value (which is only the case
on the Alternative Agents page — DataTransformer.view() drops the Agent
column on the canonical OpenHands pages via the has_mixed_agents check),
the scatter plot draws a composite marker: the model provider logo on
top and the harness logo on the bottom, stacked symmetrically around the
point's true coordinate. Canonical pages keep the single-marker layout
with zero visual change.

- New HARNESS_LOGO_PATHS map + get_harness_icon() helper next to the
existing get_marker_icon(). Kept in sync with AGENT_NAME_BY_TYPE from
OpenHands/evaluation push_to_index_from_archive.py: Claude Code, Codex,
Gemini CLI, OpenHands, OpenHands Sub-agents. Unknown agent_name values
fall back to None so the caller skips the harness layer.
- _plot_scatter_plotly: detect has_harness_column (any non-empty Agent
value in the plotted dataframe), and in the per-point loop either draw
a single marker at (x, y) like before or stack two markers at
(x, y ± STACKED_Y_OFFSET) slightly smaller than the single-marker size
so the composite fits in roughly the same vertical footprint.
- Cache base64-encoded logos across rows so a Claude-heavy page decodes
each SVG once instead of once per point.

Smoke test: Alternative Agents view (7 rows) now produces 14 marker
images in layout.images (2 per point, matching x coords, y separation
exactly 2 * STACKED_Y_OFFSET). Canonical view (22 rows) produces 22
marker images as before.

Files changed (1) hide show
  1. leaderboard_transformer.py +142 -44
leaderboard_transformer.py CHANGED
@@ -228,17 +228,17 @@ def get_country_from_model(model_name: str) -> dict:
228
  def get_marker_icon(model_name: str, openness: str, mark_by: str) -> dict:
229
  """
230
  Gets the appropriate icon based on the mark_by selection.
231
-
232
  Args:
233
  model_name: The model name
234
  openness: The openness value (open/closed)
235
  mark_by: One of "Company", "Openness", or "Country"
236
-
237
  Returns:
238
  dict with 'path' and 'name' keys
239
  """
240
  from constants import MARK_BY_COMPANY, MARK_BY_OPENNESS, MARK_BY_COUNTRY
241
-
242
  if mark_by == MARK_BY_OPENNESS:
243
  return get_openness_icon(openness)
244
  elif mark_by == MARK_BY_COUNTRY:
@@ -247,6 +247,39 @@ def get_marker_icon(model_name: str, openness: str, mark_by: str) -> dict:
247
  return get_company_from_model(model_name)
248
 
249
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
250
  # Standard layout configuration for all charts
251
  STANDARD_LAYOUT = dict(
252
  template="plotly_white",
@@ -1139,51 +1172,116 @@ def _plot_scatter_plotly(
1139
  y_min = min_score - 5 if min_score > 5 else 0
1140
  y_max = max_score + 5
1141
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1142
  for _, row in data_plot.iterrows():
1143
  model_name = row.get('Language Model', '')
1144
  openness = row.get('Openness', '')
1145
  marker_info = get_marker_icon(model_name, openness, mark_by)
1146
- logo_path = marker_info['path']
1147
-
1148
- # Read the SVG file and encode as base64 data URI
1149
- if os.path.exists(logo_path):
1150
- try:
1151
- with open(logo_path, 'rb') as f:
1152
- encoded_logo = base64.b64encode(f.read()).decode('utf-8')
1153
- logo_uri = f"data:image/svg+xml;base64,{encoded_logo}"
1154
-
1155
- x_val = row[x_col_to_use]
1156
- y_val = row[y_col_to_use]
1157
-
1158
- # Convert to domain coordinates (0-1 range)
1159
- # For log scale x: domain_x = (log10(x) - x_min_log) / (x_max_log - x_min_log)
1160
- if x_val > 0:
1161
- log_x = np.log10(x_val)
1162
- domain_x = (log_x - x_min_log) / (x_max_log - x_min_log)
1163
- else:
1164
- domain_x = 0
1165
-
1166
- # For linear y: domain_y = (y - y_min) / (y_max - y_min)
1167
- domain_y = (y_val - y_min) / (y_max - y_min) if (y_max - y_min) > 0 else 0.5
1168
-
1169
- # Clamp to valid range
1170
- domain_x = max(0, min(1, domain_x))
1171
- domain_y = max(0, min(1, domain_y))
1172
-
1173
- layout_images.append(dict(
1174
- source=logo_uri,
1175
- xref="x domain", # Use domain coordinates for log scale compatibility
1176
- yref="y domain",
1177
- x=domain_x,
1178
- y=domain_y,
1179
- sizex=0.04, # Size as fraction of plot width
1180
- sizey=0.06, # Size as fraction of plot height
1181
- xanchor="center",
1182
- yanchor="middle",
1183
- layer="above"
1184
- ))
1185
- except Exception as e:
1186
- logger.warning(f"Could not load logo {logo_path}: {e}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1187
 
1188
  # --- Section 7: Add Model Name Labels to Frontier Points ---
1189
  if frontier_rows:
 
228
  def get_marker_icon(model_name: str, openness: str, mark_by: str) -> dict:
229
  """
230
  Gets the appropriate icon based on the mark_by selection.
231
+
232
  Args:
233
  model_name: The model name
234
  openness: The openness value (open/closed)
235
  mark_by: One of "Company", "Openness", or "Country"
236
+
237
  Returns:
238
  dict with 'path' and 'name' keys
239
  """
240
  from constants import MARK_BY_COMPANY, MARK_BY_OPENNESS, MARK_BY_COUNTRY
241
+
242
  if mark_by == MARK_BY_OPENNESS:
243
  return get_openness_icon(openness)
244
  elif mark_by == MARK_BY_COUNTRY:
 
247
  return get_company_from_model(model_name)
248
 
249
 
250
+ # Map the agent_name stored in the index repo's metadata.json to a harness
251
+ # logo file. Kept in sync with AGENT_NAME_BY_TYPE in OpenHands/evaluation
252
+ # push_to_index_from_archive.py — if a new ACP harness lands there, add the
253
+ # corresponding display name and a matching asset here. Unknown agent_name
254
+ # values fall through to None so the scatter plot just draws the model logo
255
+ # alone (no blank placeholder).
256
+ HARNESS_LOGO_PATHS: dict[str, str] = {
257
+ "Claude Code": "assets/harness-claude-code.svg",
258
+ "Codex": "assets/harness-codex-cli.svg",
259
+ "Gemini CLI": "assets/harness-gemini-cli.svg",
260
+ "OpenHands": "assets/harness-openhands.svg",
261
+ "OpenHands Sub-agents": "assets/harness-openhands.svg",
262
+ }
263
+
264
+
265
+ def get_harness_icon(agent_name: Optional[str]) -> Optional[dict]:
266
+ """Return {'path', 'name'} for the harness logo, or None if unknown.
267
+
268
+ Consumed by the Alternative Agents scatter plot to draw a composite
269
+ marker (model provider on top, harness on bottom). Empty/missing
270
+ agent_name yields None so the caller can skip the harness layer and
271
+ fall back to a plain model logo — which is what canonical OpenHands
272
+ pages hit, since their DataTransformer.view() drops the Agent column
273
+ entirely when there's only one agent.
274
+ """
275
+ if not agent_name:
276
+ return None
277
+ path = HARNESS_LOGO_PATHS.get(str(agent_name).strip())
278
+ if path is None:
279
+ return None
280
+ return {"path": path, "name": agent_name}
281
+
282
+
283
  # Standard layout configuration for all charts
284
  STANDARD_LAYOUT = dict(
285
  template="plotly_white",
 
1172
  y_min = min_score - 5 if min_score > 5 else 0
1173
  y_max = max_score + 5
1174
 
1175
+ # Cache base64-encoded logos across rows — every Claude model on the
1176
+ # Alternative Agents page points at the same assets/harness-claude-code.svg,
1177
+ # so decoding once per path is ~N× cheaper than once per point.
1178
+ _logo_cache: dict[str, str] = {}
1179
+ def _encode_logo(path: str) -> Optional[str]:
1180
+ if path in _logo_cache:
1181
+ return _logo_cache[path]
1182
+ if not os.path.exists(path):
1183
+ return None
1184
+ try:
1185
+ with open(path, "rb") as f:
1186
+ encoded = base64.b64encode(f.read()).decode("utf-8")
1187
+ except Exception as e:
1188
+ logger.warning(f"Could not load logo {path}: {e}")
1189
+ return None
1190
+ mime = "svg+xml" if path.lower().endswith(".svg") else "png"
1191
+ uri = f"data:image/{mime};base64,{encoded}"
1192
+ _logo_cache[path] = uri
1193
+ return uri
1194
+
1195
+ # Composite markers: on the Alternative Agents page the dataframe carries
1196
+ # an "Agent" column (Claude Code / Codex / Gemini CLI / OpenHands Sub-agents),
1197
+ # so a point for claude-sonnet-4-5 under Claude Code and under OpenHands
1198
+ # Sub-agents would otherwise share the exact same Anthropic logo marker
1199
+ # and be visually indistinguishable. When Agent is present, we stack
1200
+ # two logos at each point: model provider on top, harness on the bottom.
1201
+ # Canonical OpenHands pages drop the Agent column in view() (via the
1202
+ # has_mixed_agents check), so they fall through to the single-logo path
1203
+ # and render exactly as before.
1204
+ has_harness_column = (
1205
+ "Agent" in data_plot.columns
1206
+ and data_plot["Agent"].dropna().astype(str).str.strip().ne("").any()
1207
+ )
1208
+
1209
+ # Marker sizes. The composite variant fits two logos inside roughly the
1210
+ # same vertical footprint as a single marker, so each half is slightly
1211
+ # smaller and the two halves are offset symmetrically around the point's
1212
+ # true y-coordinate.
1213
+ SINGLE_SIZE_X, SINGLE_SIZE_Y = 0.04, 0.06
1214
+ STACKED_SIZE_X, STACKED_SIZE_Y = 0.035, 0.048
1215
+ STACKED_Y_OFFSET = 0.028 # half-separation between model (top) and harness (bottom)
1216
+
1217
  for _, row in data_plot.iterrows():
1218
  model_name = row.get('Language Model', '')
1219
  openness = row.get('Openness', '')
1220
  marker_info = get_marker_icon(model_name, openness, mark_by)
1221
+ model_logo_uri = _encode_logo(marker_info['path'])
1222
+ if model_logo_uri is None:
1223
+ continue
1224
+
1225
+ # Harness (only meaningful when the dataframe carries an Agent column).
1226
+ harness_uri = None
1227
+ if has_harness_column:
1228
+ harness_info = get_harness_icon(row.get("Agent"))
1229
+ if harness_info is not None:
1230
+ harness_uri = _encode_logo(harness_info["path"])
1231
+
1232
+ x_val = row[x_col_to_use]
1233
+ y_val = row[y_col_to_use]
1234
+
1235
+ # Convert to domain coordinates (0-1 range)
1236
+ # For log scale x: domain_x = (log10(x) - x_min_log) / (x_max_log - x_min_log)
1237
+ if x_val > 0:
1238
+ log_x = np.log10(x_val)
1239
+ domain_x = (log_x - x_min_log) / (x_max_log - x_min_log)
1240
+ else:
1241
+ domain_x = 0
1242
+
1243
+ # For linear y: domain_y = (y - y_min) / (y_max - y_min)
1244
+ domain_y = (y_val - y_min) / (y_max - y_min) if (y_max - y_min) > 0 else 0.5
1245
+
1246
+ # Clamp to valid range
1247
+ domain_x = max(0, min(1, domain_x))
1248
+ domain_y = max(0, min(1, domain_y))
1249
+
1250
+ if harness_uri is not None:
1251
+ # Composite: stack model on top, harness on bottom, clamping
1252
+ # each half to the plot area so markers near the edges don't
1253
+ # drift off-canvas.
1254
+ model_y = min(1, domain_y + STACKED_Y_OFFSET)
1255
+ harness_y = max(0, domain_y - STACKED_Y_OFFSET)
1256
+ layout_images.append(dict(
1257
+ source=model_logo_uri,
1258
+ xref="x domain", yref="y domain",
1259
+ x=domain_x, y=model_y,
1260
+ sizex=STACKED_SIZE_X, sizey=STACKED_SIZE_Y,
1261
+ xanchor="center", yanchor="middle",
1262
+ layer="above",
1263
+ ))
1264
+ layout_images.append(dict(
1265
+ source=harness_uri,
1266
+ xref="x domain", yref="y domain",
1267
+ x=domain_x, y=harness_y,
1268
+ sizex=STACKED_SIZE_X, sizey=STACKED_SIZE_Y,
1269
+ xanchor="center", yanchor="middle",
1270
+ layer="above",
1271
+ ))
1272
+ else:
1273
+ # Single marker (canonical OpenHands pages, or Alternative Agents
1274
+ # rows with an unknown harness name — the latter shouldn't happen
1275
+ # in practice since HARNESS_LOGO_PATHS covers every agent_name the
1276
+ # push-to-index script emits).
1277
+ layout_images.append(dict(
1278
+ source=model_logo_uri,
1279
+ xref="x domain", yref="y domain",
1280
+ x=domain_x, y=domain_y,
1281
+ sizex=SINGLE_SIZE_X, sizey=SINGLE_SIZE_Y,
1282
+ xanchor="center", yanchor="middle",
1283
+ layer="above",
1284
+ ))
1285
 
1286
  # --- Section 7: Add Model Name Labels to Frontier Points ---
1287
  if frontier_rows: