Spaces:

OpenHands
/

openhands-index

Running

Debug Agent commited on Apr 8

Commit

fb16d57

1 Parent(s): 6860223

Stack model + harness logos on Alternative Agents scatter

On the Alternative Agents page the same LLM can show up under multiple
harnesses (e.g. claude-sonnet-4-5 under Claude Code vs OpenHands
Sub-agents). Before this change both points drew the exact same Anthropic
company logo as their marker, so they were visually indistinguishable —
the only way to tell them apart was the hover tooltip's "Harness:" line
from the earlier fix.

Now when a row carries an "Agent" column value (which is only the case
on the Alternative Agents page — DataTransformer.view() drops the Agent
column on the canonical OpenHands pages via the has_mixed_agents check),
the scatter plot draws a composite marker: the model provider logo on
top and the harness logo on the bottom, stacked symmetrically around the
point's true coordinate. Canonical pages keep the single-marker layout
with zero visual change.

- New HARNESS_LOGO_PATHS map + get_harness_icon() helper next to the
existing get_marker_icon(). Kept in sync with AGENT_NAME_BY_TYPE from
OpenHands/evaluation push_to_index_from_archive.py: Claude Code, Codex,
Gemini CLI, OpenHands, OpenHands Sub-agents. Unknown agent_name values
fall back to None so the caller skips the harness layer.
- _plot_scatter_plotly: detect has_harness_column (any non-empty Agent
value in the plotted dataframe), and in the per-point loop either draw
a single marker at (x, y) like before or stack two markers at
(x, y ± STACKED_Y_OFFSET) slightly smaller than the single-marker size
so the composite fits in roughly the same vertical footprint.
- Cache base64-encoded logos across rows so a Claude-heavy page decodes
each SVG once instead of once per point.

Smoke test: Alternative Agents view (7 rows) now produces 14 marker
images in layout.images (2 per point, matching x coords, y separation
exactly 2 * STACKED_Y_OFFSET). Canonical view (22 rows) produces 22
marker images as before.

Files changed (1) hide show

leaderboard_transformer.py +142 -44

leaderboard_transformer.py CHANGED Viewed

@@ -228,17 +228,17 @@ def get_country_from_model(model_name: str) -> dict:
 def get_marker_icon(model_name: str, openness: str, mark_by: str) -> dict:
     """
     Gets the appropriate icon based on the mark_by selection.
     Args:
         model_name: The model name
         openness: The openness value (open/closed)
         mark_by: One of "Company", "Openness", or "Country"
     Returns:
         dict with 'path' and 'name' keys
     """
     from constants import MARK_BY_COMPANY, MARK_BY_OPENNESS, MARK_BY_COUNTRY
     if mark_by == MARK_BY_OPENNESS:
         return get_openness_icon(openness)
     elif mark_by == MARK_BY_COUNTRY:
@@ -247,6 +247,39 @@ def get_marker_icon(model_name: str, openness: str, mark_by: str) -> dict:
         return get_company_from_model(model_name)
 # Standard layout configuration for all charts
 STANDARD_LAYOUT = dict(
     template="plotly_white",
@@ -1139,51 +1172,116 @@ def _plot_scatter_plotly(
     y_min = min_score - 5 if min_score > 5 else 0
     y_max = max_score + 5
     for _, row in data_plot.iterrows():
         model_name = row.get('Language Model', '')
         openness = row.get('Openness', '')
         marker_info = get_marker_icon(model_name, openness, mark_by)
-        logo_path = marker_info['path']
-        # Read the SVG file and encode as base64 data URI
-        if os.path.exists(logo_path):
-            try:
-                with open(logo_path, 'rb') as f:
-                    encoded_logo = base64.b64encode(f.read()).decode('utf-8')
-                    logo_uri = f"data:image/svg+xml;base64,{encoded_logo}"
-                    x_val = row[x_col_to_use]
-                    y_val = row[y_col_to_use]
-                    # Convert to domain coordinates (0-1 range)
-                    # For log scale x: domain_x = (log10(x) - x_min_log) / (x_max_log - x_min_log)
-                    if x_val > 0:
-                        log_x = np.log10(x_val)
-                        domain_x = (log_x - x_min_log) / (x_max_log - x_min_log)
-                    else:
-                        domain_x = 0
-                    # For linear y: domain_y = (y - y_min) / (y_max - y_min)
-                    domain_y = (y_val - y_min) / (y_max - y_min) if (y_max - y_min) > 0 else 0.5
-                    # Clamp to valid range
-                    domain_x = max(0, min(1, domain_x))
-                    domain_y = max(0, min(1, domain_y))
-                    layout_images.append(dict(
-                        source=logo_uri,
-                        xref="x domain",  # Use domain coordinates for log scale compatibility
-                        yref="y domain",
-                        x=domain_x,
-                        y=domain_y,
-                        sizex=0.04,  # Size as fraction of plot width
-                        sizey=0.06,  # Size as fraction of plot height
-                        xanchor="center",
-                        yanchor="middle",
-                        layer="above"
-                    ))
-            except Exception as e:
-                logger.warning(f"Could not load logo {logo_path}: {e}")
     # --- Section 7: Add Model Name Labels to Frontier Points ---
     if frontier_rows:

 def get_marker_icon(model_name: str, openness: str, mark_by: str) -> dict:
     """
     Gets the appropriate icon based on the mark_by selection.
     Args:
         model_name: The model name
         openness: The openness value (open/closed)
         mark_by: One of "Company", "Openness", or "Country"
     Returns:
         dict with 'path' and 'name' keys
     """
     from constants import MARK_BY_COMPANY, MARK_BY_OPENNESS, MARK_BY_COUNTRY
     if mark_by == MARK_BY_OPENNESS:
         return get_openness_icon(openness)
     elif mark_by == MARK_BY_COUNTRY:
         return get_company_from_model(model_name)
+# Map the agent_name stored in the index repo's metadata.json to a harness
+# logo file. Kept in sync with AGENT_NAME_BY_TYPE in OpenHands/evaluation
+# push_to_index_from_archive.py — if a new ACP harness lands there, add the
+# corresponding display name and a matching asset here. Unknown agent_name
+# values fall through to None so the scatter plot just draws the model logo
+# alone (no blank placeholder).
+HARNESS_LOGO_PATHS: dict[str, str] = {
+    "Claude Code":          "assets/harness-claude-code.svg",
+    "Codex":                "assets/harness-codex-cli.svg",
+    "Gemini CLI":           "assets/harness-gemini-cli.svg",
+    "OpenHands":            "assets/harness-openhands.svg",
+    "OpenHands Sub-agents": "assets/harness-openhands.svg",
+}
+def get_harness_icon(agent_name: Optional[str]) -> Optional[dict]:
+    """Return {'path', 'name'} for the harness logo, or None if unknown.
+    Consumed by the Alternative Agents scatter plot to draw a composite
+    marker (model provider on top, harness on bottom). Empty/missing
+    agent_name yields None so the caller can skip the harness layer and
+    fall back to a plain model logo — which is what canonical OpenHands
+    pages hit, since their DataTransformer.view() drops the Agent column
+    entirely when there's only one agent.
+    """
+    if not agent_name:
+        return None
+    path = HARNESS_LOGO_PATHS.get(str(agent_name).strip())
+    if path is None:
+        return None
+    return {"path": path, "name": agent_name}
 # Standard layout configuration for all charts
 STANDARD_LAYOUT = dict(
     template="plotly_white",
     y_min = min_score - 5 if min_score > 5 else 0
     y_max = max_score + 5
+    # Cache base64-encoded logos across rows — every Claude model on the
+    # Alternative Agents page points at the same assets/harness-claude-code.svg,
+    # so decoding once per path is ~N× cheaper than once per point.
+    _logo_cache: dict[str, str] = {}
+    def _encode_logo(path: str) -> Optional[str]:
+        if path in _logo_cache:
+            return _logo_cache[path]
+        if not os.path.exists(path):
+            return None
+        try:
+            with open(path, "rb") as f:
+                encoded = base64.b64encode(f.read()).decode("utf-8")
+        except Exception as e:
+            logger.warning(f"Could not load logo {path}: {e}")
+            return None
+        mime = "svg+xml" if path.lower().endswith(".svg") else "png"
+        uri = f"data:image/{mime};base64,{encoded}"
+        _logo_cache[path] = uri
+        return uri
+    # Composite markers: on the Alternative Agents page the dataframe carries
+    # an "Agent" column (Claude Code / Codex / Gemini CLI / OpenHands Sub-agents),
+    # so a point for claude-sonnet-4-5 under Claude Code and under OpenHands
+    # Sub-agents would otherwise share the exact same Anthropic logo marker
+    # and be visually indistinguishable. When Agent is present, we stack
+    # two logos at each point: model provider on top, harness on the bottom.
+    # Canonical OpenHands pages drop the Agent column in view() (via the
+    # has_mixed_agents check), so they fall through to the single-logo path
+    # and render exactly as before.
+    has_harness_column = (
+        "Agent" in data_plot.columns
+        and data_plot["Agent"].dropna().astype(str).str.strip().ne("").any()
+    )
+    # Marker sizes. The composite variant fits two logos inside roughly the
+    # same vertical footprint as a single marker, so each half is slightly
+    # smaller and the two halves are offset symmetrically around the point's
+    # true y-coordinate.
+    SINGLE_SIZE_X, SINGLE_SIZE_Y = 0.04, 0.06
+    STACKED_SIZE_X, STACKED_SIZE_Y = 0.035, 0.048
+    STACKED_Y_OFFSET = 0.028  # half-separation between model (top) and harness (bottom)
     for _, row in data_plot.iterrows():
         model_name = row.get('Language Model', '')
         openness = row.get('Openness', '')
         marker_info = get_marker_icon(model_name, openness, mark_by)
+        model_logo_uri = _encode_logo(marker_info['path'])
+        if model_logo_uri is None:
+            continue
+        # Harness (only meaningful when the dataframe carries an Agent column).
+        harness_uri = None
+        if has_harness_column:
+            harness_info = get_harness_icon(row.get("Agent"))
+            if harness_info is not None:
+                harness_uri = _encode_logo(harness_info["path"])
+        x_val = row[x_col_to_use]
+        y_val = row[y_col_to_use]
+        # Convert to domain coordinates (0-1 range)
+        # For log scale x: domain_x = (log10(x) - x_min_log) / (x_max_log - x_min_log)
+        if x_val > 0:
+            log_x = np.log10(x_val)
+            domain_x = (log_x - x_min_log) / (x_max_log - x_min_log)
+        else:
+            domain_x = 0
+        # For linear y: domain_y = (y - y_min) / (y_max - y_min)
+        domain_y = (y_val - y_min) / (y_max - y_min) if (y_max - y_min) > 0 else 0.5
+        # Clamp to valid range
+        domain_x = max(0, min(1, domain_x))
+        domain_y = max(0, min(1, domain_y))
+        if harness_uri is not None:
+            # Composite: stack model on top, harness on bottom, clamping
+            # each half to the plot area so markers near the edges don't
+            # drift off-canvas.
+            model_y = min(1, domain_y + STACKED_Y_OFFSET)
+            harness_y = max(0, domain_y - STACKED_Y_OFFSET)
+            layout_images.append(dict(
+                source=model_logo_uri,
+                xref="x domain", yref="y domain",
+                x=domain_x, y=model_y,
+                sizex=STACKED_SIZE_X, sizey=STACKED_SIZE_Y,
+                xanchor="center", yanchor="middle",
+                layer="above",
+            ))
+            layout_images.append(dict(
+                source=harness_uri,
+                xref="x domain", yref="y domain",
+                x=domain_x, y=harness_y,
+                sizex=STACKED_SIZE_X, sizey=STACKED_SIZE_Y,
+                xanchor="center", yanchor="middle",
+                layer="above",
+            ))
+        else:
+            # Single marker (canonical OpenHands pages, or Alternative Agents
+            # rows with an unknown harness name — the latter shouldn't happen
+            # in practice since HARNESS_LOGO_PATHS covers every agent_name the
+            # push-to-index script emits).
+            layout_images.append(dict(
+                source=model_logo_uri,
+                xref="x domain", yref="y domain",
+                x=domain_x, y=domain_y,
+                sizex=SINGLE_SIZE_X, sizey=SINGLE_SIZE_Y,
+                xanchor="center", yanchor="middle",
+                layer="above",
+            ))
     # --- Section 7: Add Model Name Labels to Frontier Points ---
     if frontier_rows: