Spaces:
Running
Running
[AUTO] Fix scatter plot zoom and show_all_labels applied to main
#34
by juan-all-hands - opened
- app.py +1 -76
- content.py +5 -8
- leaderboard_transformer.py +100 -103
- simple_data_loader.py +1 -2
- ui_components.py +11 -25
- visualizations.py +1 -11
app.py
CHANGED
|
@@ -2,7 +2,6 @@
|
|
| 2 |
import logging
|
| 3 |
import sys
|
| 4 |
import os
|
| 5 |
-
import json
|
| 6 |
|
| 7 |
from constants import FONT_FAMILY_SHORT
|
| 8 |
|
|
@@ -385,7 +384,6 @@ logger.info("All routes configured")
|
|
| 385 |
|
| 386 |
# Mount the REST API on /api
|
| 387 |
from fastapi import FastAPI, Request
|
| 388 |
-
from fastapi.middleware.cors import CORSMiddleware
|
| 389 |
from fastapi.responses import RedirectResponse
|
| 390 |
from starlette.middleware.base import BaseHTTPMiddleware
|
| 391 |
from api import api_app
|
|
@@ -406,71 +404,6 @@ class RootRedirectMiddleware(BaseHTTPMiddleware):
|
|
| 406 |
return await call_next(request)
|
| 407 |
|
| 408 |
|
| 409 |
-
class StringifiedGradioJSONMiddleware:
|
| 410 |
-
"""Normalize JSON bodies double-encoded by the custom-domain proxy.
|
| 411 |
-
|
| 412 |
-
Requests sent through index.openhands.dev can arrive at Gradio as a JSON
|
| 413 |
-
string containing the real request object, which makes FastAPI validation
|
| 414 |
-
reject interactive callbacks with 422. Direct HF Space traffic already sends
|
| 415 |
-
proper JSON objects, so this only rewrites bodies that decode to strings.
|
| 416 |
-
"""
|
| 417 |
-
|
| 418 |
-
def __init__(self, app):
|
| 419 |
-
self.app = app
|
| 420 |
-
|
| 421 |
-
async def __call__(self, scope, receive, send):
|
| 422 |
-
if (
|
| 423 |
-
scope["type"] == "http"
|
| 424 |
-
and scope.get("method") == "POST"
|
| 425 |
-
and scope.get("path", "").startswith("/gradio_api/")
|
| 426 |
-
):
|
| 427 |
-
headers = {
|
| 428 |
-
key.decode("latin-1").lower(): value.decode("latin-1")
|
| 429 |
-
for key, value in scope.get("headers", [])
|
| 430 |
-
}
|
| 431 |
-
content_type = headers.get("content-type", "")
|
| 432 |
-
if "application/json" not in content_type:
|
| 433 |
-
return await self.app(scope, receive, send)
|
| 434 |
-
|
| 435 |
-
body_parts = []
|
| 436 |
-
while True:
|
| 437 |
-
message = await receive()
|
| 438 |
-
if message["type"] != "http.request":
|
| 439 |
-
break
|
| 440 |
-
body_parts.append(message.get("body", b""))
|
| 441 |
-
if not message.get("more_body", False):
|
| 442 |
-
break
|
| 443 |
-
|
| 444 |
-
body = b"".join(body_parts)
|
| 445 |
-
replacement_body = body
|
| 446 |
-
try:
|
| 447 |
-
decoded = json.loads(body)
|
| 448 |
-
except json.JSONDecodeError:
|
| 449 |
-
decoded = None
|
| 450 |
-
|
| 451 |
-
if isinstance(decoded, str):
|
| 452 |
-
stripped = decoded.strip()
|
| 453 |
-
if stripped.startswith(("{", "[")):
|
| 454 |
-
replacement_body = stripped.encode("utf-8")
|
| 455 |
-
|
| 456 |
-
sent = False
|
| 457 |
-
|
| 458 |
-
async def replay_receive():
|
| 459 |
-
nonlocal sent
|
| 460 |
-
if sent:
|
| 461 |
-
return {"type": "http.request", "body": b"", "more_body": False}
|
| 462 |
-
sent = True
|
| 463 |
-
return {
|
| 464 |
-
"type": "http.request",
|
| 465 |
-
"body": replacement_body,
|
| 466 |
-
"more_body": False,
|
| 467 |
-
}
|
| 468 |
-
|
| 469 |
-
return await self.app(scope, replay_receive, send)
|
| 470 |
-
|
| 471 |
-
return await self.app(scope, receive, send)
|
| 472 |
-
|
| 473 |
-
|
| 474 |
# Create a parent FastAPI app with redirect_slashes=False to prevent
|
| 475 |
# automatic trailing slash redirects that cause issues with Gradio
|
| 476 |
root_app = FastAPI(redirect_slashes=False)
|
|
@@ -482,15 +415,6 @@ root_app.mount("/api", api_app)
|
|
| 482 |
|
| 483 |
# Mount Gradio app at root path
|
| 484 |
app = gr.mount_gradio_app(root_app, demo, path="/")
|
| 485 |
-
app = CORSMiddleware(
|
| 486 |
-
app,
|
| 487 |
-
allow_origins=["https://index.openhands.dev"],
|
| 488 |
-
allow_credentials=True,
|
| 489 |
-
allow_methods=["*"],
|
| 490 |
-
allow_headers=["*"],
|
| 491 |
-
expose_headers=["*"],
|
| 492 |
-
)
|
| 493 |
-
app = StringifiedGradioJSONMiddleware(app)
|
| 494 |
logger.info("REST API mounted at /api, Gradio app mounted at /")
|
| 495 |
|
| 496 |
|
|
@@ -503,3 +427,4 @@ if __name__ == "__main__":
|
|
| 503 |
logger.info(f"Launching app on {host}:{port}")
|
| 504 |
uvicorn.run(app, host=host, port=port)
|
| 505 |
logger.info("App launched successfully")
|
|
|
|
|
|
| 2 |
import logging
|
| 3 |
import sys
|
| 4 |
import os
|
|
|
|
| 5 |
|
| 6 |
from constants import FONT_FAMILY_SHORT
|
| 7 |
|
|
|
|
| 384 |
|
| 385 |
# Mount the REST API on /api
|
| 386 |
from fastapi import FastAPI, Request
|
|
|
|
| 387 |
from fastapi.responses import RedirectResponse
|
| 388 |
from starlette.middleware.base import BaseHTTPMiddleware
|
| 389 |
from api import api_app
|
|
|
|
| 404 |
return await call_next(request)
|
| 405 |
|
| 406 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 407 |
# Create a parent FastAPI app with redirect_slashes=False to prevent
|
| 408 |
# automatic trailing slash redirects that cause issues with Gradio
|
| 409 |
root_app = FastAPI(redirect_slashes=False)
|
|
|
|
| 415 |
|
| 416 |
# Mount Gradio app at root path
|
| 417 |
app = gr.mount_gradio_app(root_app, demo, path="/")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 418 |
logger.info("REST API mounted at /api, Gradio app mounted at /")
|
| 419 |
|
| 420 |
|
|
|
|
| 427 |
logger.info(f"Launching app on {host}:{port}")
|
| 428 |
uvicorn.run(app, host=host, port=port)
|
| 429 |
logger.info("App launched successfully")
|
| 430 |
+
|
content.py
CHANGED
|
@@ -547,20 +547,19 @@ span.wrap[tabindex="0"][role="button"][data-editable="false"] {
|
|
| 547 |
width: 100% !important;
|
| 548 |
align-items: center;
|
| 549 |
}
|
| 550 |
-
.nav-holder nav a[href*="alternative-agents"] {
|
| 551 |
-
grid-row: 1 !important;
|
| 552 |
-
grid-column: 7 !important;
|
| 553 |
-
white-space: nowrap !important;
|
| 554 |
-
}
|
| 555 |
.nav-holder nav a[href*="about"] {
|
| 556 |
grid-row: 1 !important;
|
| 557 |
-
grid-column:
|
| 558 |
}
|
| 559 |
.nav-holder nav a[href*="submit"] {
|
| 560 |
grid-row: 1 !important;
|
| 561 |
grid-column: 8 !important;
|
| 562 |
white-space: nowrap !important;
|
| 563 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
| 564 |
|
| 565 |
/* Divider line between header and category nav */
|
| 566 |
.nav-holder nav::after {
|
|
@@ -599,7 +598,6 @@ span.wrap[tabindex="0"][role="button"][data-editable="false"] {
|
|
| 599 |
.nav-holder nav a[href*="discovery"] { grid-column: 4 !important; }
|
| 600 |
|
| 601 |
/* Navigation hover styles */
|
| 602 |
-
.nav-holder nav a[href*="alternative-agents"]:hover,
|
| 603 |
.nav-holder nav a[href*="about"]:hover,
|
| 604 |
.nav-holder nav a[href*="submit"]:hover,
|
| 605 |
.nav-holder nav a[href*="literature-understanding"]:hover,
|
|
@@ -609,7 +607,6 @@ span.wrap[tabindex="0"][role="button"][data-editable="false"] {
|
|
| 609 |
background-color: #FDF9F4;
|
| 610 |
}
|
| 611 |
|
| 612 |
-
.dark .nav-holder nav a[href*="alternative-agents"]:hover,
|
| 613 |
.dark .nav-holder nav a[href*="about"]:hover,
|
| 614 |
.dark .nav-holder nav a[href*="submit"]:hover,
|
| 615 |
.dark .nav-holder nav a[href*="literature-understanding"]:hover,
|
|
|
|
| 547 |
width: 100% !important;
|
| 548 |
align-items: center;
|
| 549 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 550 |
.nav-holder nav a[href*="about"] {
|
| 551 |
grid-row: 1 !important;
|
| 552 |
+
grid-column: 7 !important;
|
| 553 |
}
|
| 554 |
.nav-holder nav a[href*="submit"] {
|
| 555 |
grid-row: 1 !important;
|
| 556 |
grid-column: 8 !important;
|
| 557 |
white-space: nowrap !important;
|
| 558 |
}
|
| 559 |
+
/* Hide the Alternative Agents page from the top-level nav for now. */
|
| 560 |
+
.nav-holder nav a[href*="alternative-agents"] {
|
| 561 |
+
display: none !important;
|
| 562 |
+
}
|
| 563 |
|
| 564 |
/* Divider line between header and category nav */
|
| 565 |
.nav-holder nav::after {
|
|
|
|
| 598 |
.nav-holder nav a[href*="discovery"] { grid-column: 4 !important; }
|
| 599 |
|
| 600 |
/* Navigation hover styles */
|
|
|
|
| 601 |
.nav-holder nav a[href*="about"]:hover,
|
| 602 |
.nav-holder nav a[href*="submit"]:hover,
|
| 603 |
.nav-holder nav a[href*="literature-understanding"]:hover,
|
|
|
|
| 607 |
background-color: #FDF9F4;
|
| 608 |
}
|
| 609 |
|
|
|
|
| 610 |
.dark .nav-holder nav a[href*="about"]:hover,
|
| 611 |
.dark .nav-holder nav a[href*="submit"]:hover,
|
| 612 |
.dark .nav-holder nav a[href*="literature-understanding"]:hover,
|
leaderboard_transformer.py
CHANGED
|
@@ -12,38 +12,6 @@ from constants import FONT_FAMILY, FONT_FAMILY_SHORT
|
|
| 12 |
|
| 13 |
logger = logging.getLogger(__name__)
|
| 14 |
|
| 15 |
-
_DATA_URI_CACHE: dict[str, str] = {}
|
| 16 |
-
|
| 17 |
-
|
| 18 |
-
def get_asset_data_uri(path: str) -> Optional[str]:
|
| 19 |
-
"""Return a cached data URI for a local image asset."""
|
| 20 |
-
if path in _DATA_URI_CACHE:
|
| 21 |
-
return _DATA_URI_CACHE[path]
|
| 22 |
-
|
| 23 |
-
if not os.path.exists(path):
|
| 24 |
-
_DATA_URI_CACHE[path] = ""
|
| 25 |
-
return None
|
| 26 |
-
|
| 27 |
-
try:
|
| 28 |
-
with open(path, "rb") as f:
|
| 29 |
-
encoded = base64.b64encode(f.read()).decode("utf-8")
|
| 30 |
-
except Exception as e:
|
| 31 |
-
logger.warning(f"Could not load image asset {path}: {e}")
|
| 32 |
-
_DATA_URI_CACHE[path] = ""
|
| 33 |
-
return None
|
| 34 |
-
|
| 35 |
-
ext = os.path.splitext(path)[1].lower()
|
| 36 |
-
if ext == ".svg":
|
| 37 |
-
mime = "image/svg+xml"
|
| 38 |
-
elif ext == ".png":
|
| 39 |
-
mime = "image/png"
|
| 40 |
-
else:
|
| 41 |
-
mime = "application/octet-stream"
|
| 42 |
-
|
| 43 |
-
uri = f"data:{mime};base64,{encoded}"
|
| 44 |
-
_DATA_URI_CACHE[path] = uri
|
| 45 |
-
return uri
|
| 46 |
-
|
| 47 |
# Company logo mapping for graphs - maps model name patterns to company logo files
|
| 48 |
COMPANY_LOGO_MAP = {
|
| 49 |
"anthropic": {"path": "assets/logo-anthropic.svg", "name": "Anthropic"},
|
|
@@ -141,34 +109,42 @@ def get_openhands_logo_images():
|
|
| 141 |
images = []
|
| 142 |
|
| 143 |
# Light mode logo (visible in light mode, hidden in dark mode)
|
| 144 |
-
|
| 145 |
-
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
|
| 150 |
-
|
| 151 |
-
|
| 152 |
-
|
| 153 |
-
|
| 154 |
-
|
| 155 |
-
|
| 156 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 157 |
|
| 158 |
# Dark mode logo (hidden in light mode, visible in dark mode)
|
| 159 |
-
|
| 160 |
-
|
| 161 |
-
|
| 162 |
-
|
| 163 |
-
|
| 164 |
-
|
| 165 |
-
|
| 166 |
-
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
|
| 170 |
-
|
| 171 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 172 |
|
| 173 |
return images
|
| 174 |
|
|
@@ -535,50 +511,54 @@ def create_scatter_chart(
|
|
| 535 |
marker_info = get_marker_icon(model_name, openness, mark_by)
|
| 536 |
logo_path = marker_info['path']
|
| 537 |
|
| 538 |
-
|
| 539 |
-
|
| 540 |
-
|
| 541 |
-
|
| 542 |
-
|
| 543 |
-
|
| 544 |
-
|
| 545 |
-
|
| 546 |
-
|
| 547 |
-
|
| 548 |
-
|
| 549 |
-
|
| 550 |
-
|
| 551 |
-
|
| 552 |
-
|
| 553 |
-
|
| 554 |
-
|
| 555 |
-
|
| 556 |
-
|
| 557 |
-
|
| 558 |
-
|
| 559 |
-
|
| 560 |
-
|
| 561 |
-
|
| 562 |
-
|
| 563 |
-
|
| 564 |
-
|
| 565 |
-
|
| 566 |
-
|
| 567 |
-
|
| 568 |
-
|
| 569 |
-
|
| 570 |
-
|
| 571 |
-
|
| 572 |
-
|
| 573 |
-
|
| 574 |
-
|
| 575 |
-
|
| 576 |
-
|
| 577 |
-
|
| 578 |
-
|
| 579 |
-
|
| 580 |
-
|
| 581 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 582 |
|
| 583 |
# Add labels for frontier points only
|
| 584 |
for row in frontier_rows:
|
|
@@ -1213,8 +1193,25 @@ def _plot_scatter_plotly(
|
|
| 1213 |
y_min = min_score - 5 if min_score > 5 else 0
|
| 1214 |
y_max = max_score + 5
|
| 1215 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1216 |
def _encode_logo(path: str) -> Optional[str]:
|
| 1217 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1218 |
|
| 1219 |
# Composite markers: on the Alternative Agents page the dataframe carries
|
| 1220 |
# an "Agent" column (Claude Code / Codex / Gemini CLI / OpenHands Sub-agents),
|
|
|
|
| 12 |
|
| 13 |
logger = logging.getLogger(__name__)
|
| 14 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
# Company logo mapping for graphs - maps model name patterns to company logo files
|
| 16 |
COMPANY_LOGO_MAP = {
|
| 17 |
"anthropic": {"path": "assets/logo-anthropic.svg", "name": "Anthropic"},
|
|
|
|
| 109 |
images = []
|
| 110 |
|
| 111 |
# Light mode logo (visible in light mode, hidden in dark mode)
|
| 112 |
+
if os.path.exists(OPENHANDS_LOGO_PATH_LIGHT):
|
| 113 |
+
try:
|
| 114 |
+
with open(OPENHANDS_LOGO_PATH_LIGHT, "rb") as f:
|
| 115 |
+
logo_data = base64.b64encode(f.read()).decode('utf-8')
|
| 116 |
+
images.append(dict(
|
| 117 |
+
source=f"data:image/png;openhands=lightlogo;base64,{logo_data}",
|
| 118 |
+
xref="paper",
|
| 119 |
+
yref="paper",
|
| 120 |
+
x=0,
|
| 121 |
+
y=-0.15,
|
| 122 |
+
sizex=0.15,
|
| 123 |
+
sizey=0.15,
|
| 124 |
+
xanchor="left",
|
| 125 |
+
yanchor="bottom",
|
| 126 |
+
))
|
| 127 |
+
except Exception:
|
| 128 |
+
pass
|
| 129 |
|
| 130 |
# Dark mode logo (hidden in light mode, visible in dark mode)
|
| 131 |
+
if os.path.exists(OPENHANDS_LOGO_PATH_DARK):
|
| 132 |
+
try:
|
| 133 |
+
with open(OPENHANDS_LOGO_PATH_DARK, "rb") as f:
|
| 134 |
+
logo_data = base64.b64encode(f.read()).decode('utf-8')
|
| 135 |
+
images.append(dict(
|
| 136 |
+
source=f"data:image/png;openhands=darklogo;base64,{logo_data}",
|
| 137 |
+
xref="paper",
|
| 138 |
+
yref="paper",
|
| 139 |
+
x=0,
|
| 140 |
+
y=-0.15,
|
| 141 |
+
sizex=0.15,
|
| 142 |
+
sizey=0.15,
|
| 143 |
+
xanchor="left",
|
| 144 |
+
yanchor="bottom",
|
| 145 |
+
))
|
| 146 |
+
except Exception:
|
| 147 |
+
pass
|
| 148 |
|
| 149 |
return images
|
| 150 |
|
|
|
|
| 511 |
marker_info = get_marker_icon(model_name, openness, mark_by)
|
| 512 |
logo_path = marker_info['path']
|
| 513 |
|
| 514 |
+
if os.path.exists(logo_path):
|
| 515 |
+
try:
|
| 516 |
+
with open(logo_path, 'rb') as f:
|
| 517 |
+
encoded_logo = base64.b64encode(f.read()).decode('utf-8')
|
| 518 |
+
logo_uri = f"data:image/svg+xml;base64,{encoded_logo}"
|
| 519 |
+
|
| 520 |
+
if x_type == "date":
|
| 521 |
+
# For date axes, use data coordinates directly
|
| 522 |
+
layout_images.append(dict(
|
| 523 |
+
source=logo_uri,
|
| 524 |
+
xref="x",
|
| 525 |
+
yref="y",
|
| 526 |
+
x=x_val,
|
| 527 |
+
y=y_val,
|
| 528 |
+
sizex=15 * 24 * 60 * 60 * 1000, # ~15 days in milliseconds
|
| 529 |
+
sizey=3, # score units
|
| 530 |
+
xanchor="center",
|
| 531 |
+
yanchor="middle",
|
| 532 |
+
layer="above"
|
| 533 |
+
))
|
| 534 |
+
else:
|
| 535 |
+
# For log axes, use domain coordinates (0-1 range)
|
| 536 |
+
if x_type == "log" and x_val > 0:
|
| 537 |
+
log_x = np.log10(x_val)
|
| 538 |
+
domain_x = (log_x - x_range_log[0]) / (x_range_log[1] - x_range_log[0])
|
| 539 |
+
else:
|
| 540 |
+
domain_x = 0.5
|
| 541 |
+
|
| 542 |
+
domain_y = (y_val - y_range[0]) / (y_range[1] - y_range[0]) if (y_range[1] - y_range[0]) > 0 else 0.5
|
| 543 |
+
|
| 544 |
+
# Clamp to valid range
|
| 545 |
+
domain_x = max(0, min(1, domain_x))
|
| 546 |
+
domain_y = max(0, min(1, domain_y))
|
| 547 |
+
|
| 548 |
+
layout_images.append(dict(
|
| 549 |
+
source=logo_uri,
|
| 550 |
+
xref="x domain",
|
| 551 |
+
yref="y domain",
|
| 552 |
+
x=domain_x,
|
| 553 |
+
y=domain_y,
|
| 554 |
+
sizex=0.04,
|
| 555 |
+
sizey=0.06,
|
| 556 |
+
xanchor="center",
|
| 557 |
+
yanchor="middle",
|
| 558 |
+
layer="above"
|
| 559 |
+
))
|
| 560 |
+
except Exception:
|
| 561 |
+
pass
|
| 562 |
|
| 563 |
# Add labels for frontier points only
|
| 564 |
for row in frontier_rows:
|
|
|
|
| 1193 |
y_min = min_score - 5 if min_score > 5 else 0
|
| 1194 |
y_max = max_score + 5
|
| 1195 |
|
| 1196 |
+
# Cache base64-encoded logos across rows — every Claude model on the
|
| 1197 |
+
# Alternative Agents page points at the same assets/harness-claude-code.svg,
|
| 1198 |
+
# so decoding once per path is ~N× cheaper than once per point.
|
| 1199 |
+
_logo_cache: dict[str, str] = {}
|
| 1200 |
def _encode_logo(path: str) -> Optional[str]:
|
| 1201 |
+
if path in _logo_cache:
|
| 1202 |
+
return _logo_cache[path]
|
| 1203 |
+
if not os.path.exists(path):
|
| 1204 |
+
return None
|
| 1205 |
+
try:
|
| 1206 |
+
with open(path, "rb") as f:
|
| 1207 |
+
encoded = base64.b64encode(f.read()).decode("utf-8")
|
| 1208 |
+
except Exception as e:
|
| 1209 |
+
logger.warning(f"Could not load logo {path}: {e}")
|
| 1210 |
+
return None
|
| 1211 |
+
mime = "svg+xml" if path.lower().endswith(".svg") else "png"
|
| 1212 |
+
uri = f"data:image/{mime};base64,{encoded}"
|
| 1213 |
+
_logo_cache[path] = uri
|
| 1214 |
+
return uri
|
| 1215 |
|
| 1216 |
# Composite markers: on the Alternative Agents page the dataframe carries
|
| 1217 |
# an "Agent" column (Claude Code / Codex / Gemini CLI / OpenHands Sub-agents),
|
simple_data_loader.py
CHANGED
|
@@ -245,6 +245,7 @@ class SimpleLeaderboardViewer:
|
|
| 245 |
'acp-claude': 'Claude Code',
|
| 246 |
'acp-codex': 'Codex',
|
| 247 |
'acp-gemini': 'Gemini CLI',
|
|
|
|
| 248 |
}
|
| 249 |
alt_dir = self.config_path / "alternative_agents"
|
| 250 |
if alt_dir.exists():
|
|
@@ -252,8 +253,6 @@ class SimpleLeaderboardViewer:
|
|
| 252 |
if not type_dir.is_dir():
|
| 253 |
continue
|
| 254 |
default_name = agent_type_default_name.get(type_dir.name)
|
| 255 |
-
if default_name is None:
|
| 256 |
-
continue # skip unlisted agent types (e.g. openhands_subagents)
|
| 257 |
for agent_dir in type_dir.iterdir():
|
| 258 |
if not agent_dir.is_dir():
|
| 259 |
continue
|
|
|
|
| 245 |
'acp-claude': 'Claude Code',
|
| 246 |
'acp-codex': 'Codex',
|
| 247 |
'acp-gemini': 'Gemini CLI',
|
| 248 |
+
'openhands_subagents': 'OpenHands Sub-agents',
|
| 249 |
}
|
| 250 |
alt_dir = self.config_path / "alternative_agents"
|
| 251 |
if alt_dir.exists():
|
|
|
|
| 253 |
if not type_dir.is_dir():
|
| 254 |
continue
|
| 255 |
default_name = agent_type_default_name.get(type_dir.name)
|
|
|
|
|
|
|
| 256 |
for agent_dir in type_dir.iterdir():
|
| 257 |
if not agent_dir.is_dir():
|
| 258 |
continue
|
ui_components.py
CHANGED
|
@@ -43,8 +43,6 @@ from content import (
|
|
| 43 |
api = HfApi()
|
| 44 |
os.makedirs(EXTRACTED_DATA_DIR, exist_ok=True)
|
| 45 |
|
| 46 |
-
_SVG_DATA_URI_CACHE: dict[str, str] = {}
|
| 47 |
-
|
| 48 |
|
| 49 |
def get_company_logo_html(model_name: str) -> str:
|
| 50 |
"""
|
|
@@ -83,18 +81,12 @@ OPENNESS_SVG_MAP = {
|
|
| 83 |
|
| 84 |
def get_svg_as_data_uri(path: str) -> str:
|
| 85 |
"""Reads an SVG file and returns it as a base64-encoded data URI."""
|
| 86 |
-
if path in _SVG_DATA_URI_CACHE:
|
| 87 |
-
return _SVG_DATA_URI_CACHE[path]
|
| 88 |
-
|
| 89 |
try:
|
| 90 |
with open(path, "rb") as svg_file:
|
| 91 |
encoded_svg = base64.b64encode(svg_file.read()).decode("utf-8")
|
| 92 |
-
|
| 93 |
-
_SVG_DATA_URI_CACHE[path] = uri
|
| 94 |
-
return uri
|
| 95 |
except FileNotFoundError:
|
| 96 |
print(f"Warning: SVG file not found at {path}")
|
| 97 |
-
_SVG_DATA_URI_CACHE[path] = ""
|
| 98 |
return ""
|
| 99 |
|
| 100 |
|
|
@@ -962,7 +954,7 @@ def create_leaderboard_display(
|
|
| 962 |
if not new_df.empty:
|
| 963 |
new_transformer = DataTransformer(new_df, new_tag_map)
|
| 964 |
new_df_view_full, _ = new_transformer.view(tag=category_name, use_plotly=True)
|
| 965 |
-
|
| 966 |
# Prepare both complete and all entries versions
|
| 967 |
if 'Categories Attempted' in new_df_view_full.columns:
|
| 968 |
new_df_view_complete = new_df_view_full[new_df_view_full['Categories Attempted'] == '5/5'].copy()
|
|
@@ -1022,22 +1014,16 @@ def create_leaderboard_display(
|
|
| 1022 |
|
| 1023 |
# Connect the timer to the refresh function
|
| 1024 |
if show_incomplete_checkbox is not None:
|
|
|
|
| 1025 |
if show_open_only_checkbox is not None:
|
| 1026 |
-
|
| 1027 |
-
|
| 1028 |
-
|
| 1029 |
-
|
| 1030 |
-
|
| 1031 |
-
|
| 1032 |
-
|
| 1033 |
-
|
| 1034 |
-
def _timer_refresh_no_open(show_incomplete, mark_by, show_all_labels):
|
| 1035 |
-
return check_and_refresh_data(show_incomplete, False, mark_by, show_all_labels)
|
| 1036 |
-
refresh_timer.tick(
|
| 1037 |
-
fn=_timer_refresh_no_open,
|
| 1038 |
-
inputs=[show_incomplete_checkbox, mark_by_dropdown, show_all_labels_checkbox],
|
| 1039 |
-
outputs=[dataframe_component, cost_plot_component, runtime_plot_component]
|
| 1040 |
-
)
|
| 1041 |
else:
|
| 1042 |
# If no incomplete checkbox, always show all data (but still filter by open if needed)
|
| 1043 |
def check_and_refresh_all(show_open_only=False, mark_by=MARK_BY_DEFAULT, show_all_labels=False):
|
|
|
|
| 43 |
api = HfApi()
|
| 44 |
os.makedirs(EXTRACTED_DATA_DIR, exist_ok=True)
|
| 45 |
|
|
|
|
|
|
|
| 46 |
|
| 47 |
def get_company_logo_html(model_name: str) -> str:
|
| 48 |
"""
|
|
|
|
| 81 |
|
| 82 |
def get_svg_as_data_uri(path: str) -> str:
|
| 83 |
"""Reads an SVG file and returns it as a base64-encoded data URI."""
|
|
|
|
|
|
|
|
|
|
| 84 |
try:
|
| 85 |
with open(path, "rb") as svg_file:
|
| 86 |
encoded_svg = base64.b64encode(svg_file.read()).decode("utf-8")
|
| 87 |
+
return f"data:image/svg+xml;base64,{encoded_svg}"
|
|
|
|
|
|
|
| 88 |
except FileNotFoundError:
|
| 89 |
print(f"Warning: SVG file not found at {path}")
|
|
|
|
| 90 |
return ""
|
| 91 |
|
| 92 |
|
|
|
|
| 954 |
if not new_df.empty:
|
| 955 |
new_transformer = DataTransformer(new_df, new_tag_map)
|
| 956 |
new_df_view_full, _ = new_transformer.view(tag=category_name, use_plotly=True)
|
| 957 |
+
|
| 958 |
# Prepare both complete and all entries versions
|
| 959 |
if 'Categories Attempted' in new_df_view_full.columns:
|
| 960 |
new_df_view_complete = new_df_view_full[new_df_view_full['Categories Attempted'] == '5/5'].copy()
|
|
|
|
| 1014 |
|
| 1015 |
# Connect the timer to the refresh function
|
| 1016 |
if show_incomplete_checkbox is not None:
|
| 1017 |
+
timer_inputs = [show_incomplete_checkbox]
|
| 1018 |
if show_open_only_checkbox is not None:
|
| 1019 |
+
timer_inputs.append(show_open_only_checkbox)
|
| 1020 |
+
timer_inputs.append(mark_by_dropdown) # Always include mark_by
|
| 1021 |
+
timer_inputs.append(show_all_labels_checkbox)
|
| 1022 |
+
refresh_timer.tick(
|
| 1023 |
+
fn=check_and_refresh_data,
|
| 1024 |
+
inputs=timer_inputs,
|
| 1025 |
+
outputs=[dataframe_component, cost_plot_component, runtime_plot_component]
|
| 1026 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1027 |
else:
|
| 1028 |
# If no incomplete checkbox, always show all data (but still filter by open if needed)
|
| 1029 |
def check_and_refresh_all(show_open_only=False, mark_by=MARK_BY_DEFAULT, show_all_labels=False):
|
visualizations.py
CHANGED
|
@@ -108,17 +108,7 @@ def create_accuracy_by_size_chart(df: pd.DataFrame, mark_by: str = None) -> go.F
|
|
| 108 |
open_aliases = [aliases.CANONICAL_OPENNESS_OPEN] + list(
|
| 109 |
aliases.OPENNESS_ALIASES.get(aliases.CANONICAL_OPENNESS_OPEN, [])
|
| 110 |
)
|
| 111 |
-
openness_col =
|
| 112 |
-
if openness_col is None:
|
| 113 |
-
fig = go.Figure()
|
| 114 |
-
fig.add_annotation(
|
| 115 |
-
text="No openness data available",
|
| 116 |
-
xref="paper", yref="paper",
|
| 117 |
-
x=0.5, y=0.5, showarrow=False,
|
| 118 |
-
font=STANDARD_FONT
|
| 119 |
-
)
|
| 120 |
-
fig.update_layout(**STANDARD_LAYOUT, title="Open Model Accuracy by Size")
|
| 121 |
-
return fig
|
| 122 |
|
| 123 |
plot_df = df[
|
| 124 |
(df[param_col].notna()) &
|
|
|
|
| 108 |
open_aliases = [aliases.CANONICAL_OPENNESS_OPEN] + list(
|
| 109 |
aliases.OPENNESS_ALIASES.get(aliases.CANONICAL_OPENNESS_OPEN, [])
|
| 110 |
)
|
| 111 |
+
openness_col = 'Openness' if 'Openness' in df.columns else 'openness'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 112 |
|
| 113 |
plot_df = df[
|
| 114 |
(df[param_col].notna()) &
|