Spaces:
Running
Stack model + harness logos on Alternative Agents scatter
Browse filesOn the Alternative Agents page the same LLM can show up under multiple
harnesses (e.g. claude-sonnet-4-5 under Claude Code vs OpenHands
Sub-agents). Before this change both points drew the exact same Anthropic
company logo as their marker, so they were visually indistinguishable —
the only way to tell them apart was the hover tooltip's "Harness:" line
from the earlier fix.
Now when a row carries an "Agent" column value (which is only the case
on the Alternative Agents page — DataTransformer.view() drops the Agent
column on the canonical OpenHands pages via the has_mixed_agents check),
the scatter plot draws a composite marker: the model provider logo on
top and the harness logo on the bottom, stacked symmetrically around the
point's true coordinate. Canonical pages keep the single-marker layout
with zero visual change.
- New HARNESS_LOGO_PATHS map + get_harness_icon() helper next to the
existing get_marker_icon(). Kept in sync with AGENT_NAME_BY_TYPE from
OpenHands/evaluation push_to_index_from_archive.py: Claude Code, Codex,
Gemini CLI, OpenHands, OpenHands Sub-agents. Unknown agent_name values
fall back to None so the caller skips the harness layer.
- _plot_scatter_plotly: detect has_harness_column (any non-empty Agent
value in the plotted dataframe), and in the per-point loop either draw
a single marker at (x, y) like before or stack two markers at
(x, y ± STACKED_Y_OFFSET) slightly smaller than the single-marker size
so the composite fits in roughly the same vertical footprint.
- Cache base64-encoded logos across rows so a Claude-heavy page decodes
each SVG once instead of once per point.
Smoke test: Alternative Agents view (7 rows) now produces 14 marker
images in layout.images (2 per point, matching x coords, y separation
exactly 2 * STACKED_Y_OFFSET). Canonical view (22 rows) produces 22
marker images as before.
- leaderboard_transformer.py +142 -44
|
@@ -228,17 +228,17 @@ def get_country_from_model(model_name: str) -> dict:
|
|
| 228 |
def get_marker_icon(model_name: str, openness: str, mark_by: str) -> dict:
|
| 229 |
"""
|
| 230 |
Gets the appropriate icon based on the mark_by selection.
|
| 231 |
-
|
| 232 |
Args:
|
| 233 |
model_name: The model name
|
| 234 |
openness: The openness value (open/closed)
|
| 235 |
mark_by: One of "Company", "Openness", or "Country"
|
| 236 |
-
|
| 237 |
Returns:
|
| 238 |
dict with 'path' and 'name' keys
|
| 239 |
"""
|
| 240 |
from constants import MARK_BY_COMPANY, MARK_BY_OPENNESS, MARK_BY_COUNTRY
|
| 241 |
-
|
| 242 |
if mark_by == MARK_BY_OPENNESS:
|
| 243 |
return get_openness_icon(openness)
|
| 244 |
elif mark_by == MARK_BY_COUNTRY:
|
|
@@ -247,6 +247,39 @@ def get_marker_icon(model_name: str, openness: str, mark_by: str) -> dict:
|
|
| 247 |
return get_company_from_model(model_name)
|
| 248 |
|
| 249 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 250 |
# Standard layout configuration for all charts
|
| 251 |
STANDARD_LAYOUT = dict(
|
| 252 |
template="plotly_white",
|
|
@@ -1139,51 +1172,116 @@ def _plot_scatter_plotly(
|
|
| 1139 |
y_min = min_score - 5 if min_score > 5 else 0
|
| 1140 |
y_max = max_score + 5
|
| 1141 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1142 |
for _, row in data_plot.iterrows():
|
| 1143 |
model_name = row.get('Language Model', '')
|
| 1144 |
openness = row.get('Openness', '')
|
| 1145 |
marker_info = get_marker_icon(model_name, openness, mark_by)
|
| 1146 |
-
|
| 1147 |
-
|
| 1148 |
-
|
| 1149 |
-
|
| 1150 |
-
|
| 1151 |
-
|
| 1152 |
-
|
| 1153 |
-
|
| 1154 |
-
|
| 1155 |
-
|
| 1156 |
-
|
| 1157 |
-
|
| 1158 |
-
|
| 1159 |
-
|
| 1160 |
-
|
| 1161 |
-
|
| 1162 |
-
|
| 1163 |
-
|
| 1164 |
-
|
| 1165 |
-
|
| 1166 |
-
|
| 1167 |
-
|
| 1168 |
-
|
| 1169 |
-
|
| 1170 |
-
|
| 1171 |
-
|
| 1172 |
-
|
| 1173 |
-
|
| 1174 |
-
|
| 1175 |
-
|
| 1176 |
-
|
| 1177 |
-
|
| 1178 |
-
|
| 1179 |
-
|
| 1180 |
-
|
| 1181 |
-
|
| 1182 |
-
|
| 1183 |
-
|
| 1184 |
-
|
| 1185 |
-
|
| 1186 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1187 |
|
| 1188 |
# --- Section 7: Add Model Name Labels to Frontier Points ---
|
| 1189 |
if frontier_rows:
|
|
|
|
| 228 |
def get_marker_icon(model_name: str, openness: str, mark_by: str) -> dict:
|
| 229 |
"""
|
| 230 |
Gets the appropriate icon based on the mark_by selection.
|
| 231 |
+
|
| 232 |
Args:
|
| 233 |
model_name: The model name
|
| 234 |
openness: The openness value (open/closed)
|
| 235 |
mark_by: One of "Company", "Openness", or "Country"
|
| 236 |
+
|
| 237 |
Returns:
|
| 238 |
dict with 'path' and 'name' keys
|
| 239 |
"""
|
| 240 |
from constants import MARK_BY_COMPANY, MARK_BY_OPENNESS, MARK_BY_COUNTRY
|
| 241 |
+
|
| 242 |
if mark_by == MARK_BY_OPENNESS:
|
| 243 |
return get_openness_icon(openness)
|
| 244 |
elif mark_by == MARK_BY_COUNTRY:
|
|
|
|
| 247 |
return get_company_from_model(model_name)
|
| 248 |
|
| 249 |
|
| 250 |
+
# Map the agent_name stored in the index repo's metadata.json to a harness
|
| 251 |
+
# logo file. Kept in sync with AGENT_NAME_BY_TYPE in OpenHands/evaluation
|
| 252 |
+
# push_to_index_from_archive.py — if a new ACP harness lands there, add the
|
| 253 |
+
# corresponding display name and a matching asset here. Unknown agent_name
|
| 254 |
+
# values fall through to None so the scatter plot just draws the model logo
|
| 255 |
+
# alone (no blank placeholder).
|
| 256 |
+
HARNESS_LOGO_PATHS: dict[str, str] = {
|
| 257 |
+
"Claude Code": "assets/harness-claude-code.svg",
|
| 258 |
+
"Codex": "assets/harness-codex-cli.svg",
|
| 259 |
+
"Gemini CLI": "assets/harness-gemini-cli.svg",
|
| 260 |
+
"OpenHands": "assets/harness-openhands.svg",
|
| 261 |
+
"OpenHands Sub-agents": "assets/harness-openhands.svg",
|
| 262 |
+
}
|
| 263 |
+
|
| 264 |
+
|
| 265 |
+
def get_harness_icon(agent_name: Optional[str]) -> Optional[dict]:
|
| 266 |
+
"""Return {'path', 'name'} for the harness logo, or None if unknown.
|
| 267 |
+
|
| 268 |
+
Consumed by the Alternative Agents scatter plot to draw a composite
|
| 269 |
+
marker (model provider on top, harness on bottom). Empty/missing
|
| 270 |
+
agent_name yields None so the caller can skip the harness layer and
|
| 271 |
+
fall back to a plain model logo — which is what canonical OpenHands
|
| 272 |
+
pages hit, since their DataTransformer.view() drops the Agent column
|
| 273 |
+
entirely when there's only one agent.
|
| 274 |
+
"""
|
| 275 |
+
if not agent_name:
|
| 276 |
+
return None
|
| 277 |
+
path = HARNESS_LOGO_PATHS.get(str(agent_name).strip())
|
| 278 |
+
if path is None:
|
| 279 |
+
return None
|
| 280 |
+
return {"path": path, "name": agent_name}
|
| 281 |
+
|
| 282 |
+
|
| 283 |
# Standard layout configuration for all charts
|
| 284 |
STANDARD_LAYOUT = dict(
|
| 285 |
template="plotly_white",
|
|
|
|
| 1172 |
y_min = min_score - 5 if min_score > 5 else 0
|
| 1173 |
y_max = max_score + 5
|
| 1174 |
|
| 1175 |
+
# Cache base64-encoded logos across rows — every Claude model on the
|
| 1176 |
+
# Alternative Agents page points at the same assets/harness-claude-code.svg,
|
| 1177 |
+
# so decoding once per path is ~N× cheaper than once per point.
|
| 1178 |
+
_logo_cache: dict[str, str] = {}
|
| 1179 |
+
def _encode_logo(path: str) -> Optional[str]:
|
| 1180 |
+
if path in _logo_cache:
|
| 1181 |
+
return _logo_cache[path]
|
| 1182 |
+
if not os.path.exists(path):
|
| 1183 |
+
return None
|
| 1184 |
+
try:
|
| 1185 |
+
with open(path, "rb") as f:
|
| 1186 |
+
encoded = base64.b64encode(f.read()).decode("utf-8")
|
| 1187 |
+
except Exception as e:
|
| 1188 |
+
logger.warning(f"Could not load logo {path}: {e}")
|
| 1189 |
+
return None
|
| 1190 |
+
mime = "svg+xml" if path.lower().endswith(".svg") else "png"
|
| 1191 |
+
uri = f"data:image/{mime};base64,{encoded}"
|
| 1192 |
+
_logo_cache[path] = uri
|
| 1193 |
+
return uri
|
| 1194 |
+
|
| 1195 |
+
# Composite markers: on the Alternative Agents page the dataframe carries
|
| 1196 |
+
# an "Agent" column (Claude Code / Codex / Gemini CLI / OpenHands Sub-agents),
|
| 1197 |
+
# so a point for claude-sonnet-4-5 under Claude Code and under OpenHands
|
| 1198 |
+
# Sub-agents would otherwise share the exact same Anthropic logo marker
|
| 1199 |
+
# and be visually indistinguishable. When Agent is present, we stack
|
| 1200 |
+
# two logos at each point: model provider on top, harness on the bottom.
|
| 1201 |
+
# Canonical OpenHands pages drop the Agent column in view() (via the
|
| 1202 |
+
# has_mixed_agents check), so they fall through to the single-logo path
|
| 1203 |
+
# and render exactly as before.
|
| 1204 |
+
has_harness_column = (
|
| 1205 |
+
"Agent" in data_plot.columns
|
| 1206 |
+
and data_plot["Agent"].dropna().astype(str).str.strip().ne("").any()
|
| 1207 |
+
)
|
| 1208 |
+
|
| 1209 |
+
# Marker sizes. The composite variant fits two logos inside roughly the
|
| 1210 |
+
# same vertical footprint as a single marker, so each half is slightly
|
| 1211 |
+
# smaller and the two halves are offset symmetrically around the point's
|
| 1212 |
+
# true y-coordinate.
|
| 1213 |
+
SINGLE_SIZE_X, SINGLE_SIZE_Y = 0.04, 0.06
|
| 1214 |
+
STACKED_SIZE_X, STACKED_SIZE_Y = 0.035, 0.048
|
| 1215 |
+
STACKED_Y_OFFSET = 0.028 # half-separation between model (top) and harness (bottom)
|
| 1216 |
+
|
| 1217 |
for _, row in data_plot.iterrows():
|
| 1218 |
model_name = row.get('Language Model', '')
|
| 1219 |
openness = row.get('Openness', '')
|
| 1220 |
marker_info = get_marker_icon(model_name, openness, mark_by)
|
| 1221 |
+
model_logo_uri = _encode_logo(marker_info['path'])
|
| 1222 |
+
if model_logo_uri is None:
|
| 1223 |
+
continue
|
| 1224 |
+
|
| 1225 |
+
# Harness (only meaningful when the dataframe carries an Agent column).
|
| 1226 |
+
harness_uri = None
|
| 1227 |
+
if has_harness_column:
|
| 1228 |
+
harness_info = get_harness_icon(row.get("Agent"))
|
| 1229 |
+
if harness_info is not None:
|
| 1230 |
+
harness_uri = _encode_logo(harness_info["path"])
|
| 1231 |
+
|
| 1232 |
+
x_val = row[x_col_to_use]
|
| 1233 |
+
y_val = row[y_col_to_use]
|
| 1234 |
+
|
| 1235 |
+
# Convert to domain coordinates (0-1 range)
|
| 1236 |
+
# For log scale x: domain_x = (log10(x) - x_min_log) / (x_max_log - x_min_log)
|
| 1237 |
+
if x_val > 0:
|
| 1238 |
+
log_x = np.log10(x_val)
|
| 1239 |
+
domain_x = (log_x - x_min_log) / (x_max_log - x_min_log)
|
| 1240 |
+
else:
|
| 1241 |
+
domain_x = 0
|
| 1242 |
+
|
| 1243 |
+
# For linear y: domain_y = (y - y_min) / (y_max - y_min)
|
| 1244 |
+
domain_y = (y_val - y_min) / (y_max - y_min) if (y_max - y_min) > 0 else 0.5
|
| 1245 |
+
|
| 1246 |
+
# Clamp to valid range
|
| 1247 |
+
domain_x = max(0, min(1, domain_x))
|
| 1248 |
+
domain_y = max(0, min(1, domain_y))
|
| 1249 |
+
|
| 1250 |
+
if harness_uri is not None:
|
| 1251 |
+
# Composite: stack model on top, harness on bottom, clamping
|
| 1252 |
+
# each half to the plot area so markers near the edges don't
|
| 1253 |
+
# drift off-canvas.
|
| 1254 |
+
model_y = min(1, domain_y + STACKED_Y_OFFSET)
|
| 1255 |
+
harness_y = max(0, domain_y - STACKED_Y_OFFSET)
|
| 1256 |
+
layout_images.append(dict(
|
| 1257 |
+
source=model_logo_uri,
|
| 1258 |
+
xref="x domain", yref="y domain",
|
| 1259 |
+
x=domain_x, y=model_y,
|
| 1260 |
+
sizex=STACKED_SIZE_X, sizey=STACKED_SIZE_Y,
|
| 1261 |
+
xanchor="center", yanchor="middle",
|
| 1262 |
+
layer="above",
|
| 1263 |
+
))
|
| 1264 |
+
layout_images.append(dict(
|
| 1265 |
+
source=harness_uri,
|
| 1266 |
+
xref="x domain", yref="y domain",
|
| 1267 |
+
x=domain_x, y=harness_y,
|
| 1268 |
+
sizex=STACKED_SIZE_X, sizey=STACKED_SIZE_Y,
|
| 1269 |
+
xanchor="center", yanchor="middle",
|
| 1270 |
+
layer="above",
|
| 1271 |
+
))
|
| 1272 |
+
else:
|
| 1273 |
+
# Single marker (canonical OpenHands pages, or Alternative Agents
|
| 1274 |
+
# rows with an unknown harness name — the latter shouldn't happen
|
| 1275 |
+
# in practice since HARNESS_LOGO_PATHS covers every agent_name the
|
| 1276 |
+
# push-to-index script emits).
|
| 1277 |
+
layout_images.append(dict(
|
| 1278 |
+
source=model_logo_uri,
|
| 1279 |
+
xref="x domain", yref="y domain",
|
| 1280 |
+
x=domain_x, y=domain_y,
|
| 1281 |
+
sizex=SINGLE_SIZE_X, sizey=SINGLE_SIZE_Y,
|
| 1282 |
+
xanchor="center", yanchor="middle",
|
| 1283 |
+
layer="above",
|
| 1284 |
+
))
|
| 1285 |
|
| 1286 |
# --- Section 7: Add Model Name Labels to Frontier Points ---
|
| 1287 |
if frontier_rows:
|