Spaces:
Running
Running
Fix: Scatter plot zoom and 'Show all labels' not working
#29
by juan-all-hands - opened
- leaderboard_transformer.py +73 -78
- simple_data_loader.py +1 -2
- ui_components.py +10 -16
leaderboard_transformer.py
CHANGED
|
@@ -971,7 +971,7 @@ def _plot_scatter_plotly(
|
|
| 971 |
name: Optional[str] = None,
|
| 972 |
plot_type: str = 'cost', # 'cost' or 'runtime'
|
| 973 |
mark_by: Optional[str] = None, # 'Company', 'Openness', or 'Country'
|
| 974 |
-
show_all_labels: bool = False
|
| 975 |
) -> go.Figure:
|
| 976 |
from constants import MARK_BY_DEFAULT
|
| 977 |
if mark_by is None:
|
|
@@ -1268,107 +1268,93 @@ def _plot_scatter_plotly(
|
|
| 1268 |
domain_x = max(0, min(1, domain_x))
|
| 1269 |
domain_y = max(0, min(1, domain_y))
|
| 1270 |
|
| 1271 |
-
# Convert to data coordinates
|
| 1272 |
-
# For log scale x: use log10(x) to match the axis type
|
| 1273 |
-
x_log = np.log10(x_val) if x_val > 0 else x_min_log
|
| 1274 |
-
|
| 1275 |
if harness_uri is not None:
|
| 1276 |
-
# Composite: stack model on top, harness on bottom
|
| 1277 |
-
#
|
| 1278 |
-
|
|
|
|
|
|
|
| 1279 |
layout_images.append(dict(
|
| 1280 |
source=model_logo_uri,
|
| 1281 |
-
xref="x", yref="y",
|
| 1282 |
-
x=
|
| 1283 |
-
sizex=STACKED_SIZE_X
|
| 1284 |
-
sizey=STACKED_SIZE_Y * (y_max - y_min),
|
| 1285 |
xanchor="center", yanchor="middle",
|
| 1286 |
layer="above",
|
| 1287 |
))
|
| 1288 |
layout_images.append(dict(
|
| 1289 |
source=harness_uri,
|
| 1290 |
-
xref="x", yref="y",
|
| 1291 |
-
x=
|
| 1292 |
-
sizex=STACKED_SIZE_X
|
| 1293 |
-
sizey=STACKED_SIZE_Y * (y_max - y_min),
|
| 1294 |
xanchor="center", yanchor="middle",
|
| 1295 |
layer="above",
|
| 1296 |
))
|
| 1297 |
else:
|
| 1298 |
-
# Single marker
|
|
|
|
|
|
|
|
|
|
| 1299 |
layout_images.append(dict(
|
| 1300 |
source=model_logo_uri,
|
| 1301 |
-
xref="x", yref="y",
|
| 1302 |
-
x=
|
| 1303 |
-
sizex=SINGLE_SIZE_X
|
| 1304 |
-
sizey=SINGLE_SIZE_Y * (y_max - y_min),
|
| 1305 |
xanchor="center", yanchor="middle",
|
| 1306 |
layer="above",
|
| 1307 |
))
|
| 1308 |
|
| 1309 |
-
# --- Section 7: Add Model Name Labels ---
|
| 1310 |
-
|
| 1311 |
-
|
| 1312 |
-
# Label all data points
|
| 1313 |
-
labels_data = []
|
| 1314 |
-
for _, row in data_plot.iterrows():
|
| 1315 |
-
x_val = row[x_col_to_use]
|
| 1316 |
-
y_val = row[y_col_to_use]
|
| 1317 |
-
|
| 1318 |
-
model_name = row.get('Language Model', '')
|
| 1319 |
-
if isinstance(model_name, list):
|
| 1320 |
-
model_name = model_name[0] if model_name else ''
|
| 1321 |
-
model_name = str(model_name).split('/')[-1]
|
| 1322 |
-
if len(model_name) > 25:
|
| 1323 |
-
model_name = model_name[:22] + '...'
|
| 1324 |
-
|
| 1325 |
-
labels_data.append({'x': x_val, 'y': y_val, 'label': model_name})
|
| 1326 |
-
elif frontier_rows:
|
| 1327 |
-
# Label only Pareto frontier points
|
| 1328 |
-
labels_data = []
|
| 1329 |
|
| 1330 |
for row in frontier_rows:
|
| 1331 |
x_val = row[x_col_to_use]
|
| 1332 |
y_val = row[y_col_to_use]
|
| 1333 |
|
|
|
|
| 1334 |
model_name = row.get('Language Model', '')
|
| 1335 |
if isinstance(model_name, list):
|
| 1336 |
model_name = model_name[0] if model_name else ''
|
|
|
|
| 1337 |
model_name = str(model_name).split('/')[-1]
|
|
|
|
| 1338 |
if len(model_name) > 25:
|
| 1339 |
model_name = model_name[:22] + '...'
|
| 1340 |
|
| 1341 |
-
|
| 1342 |
-
|
| 1343 |
-
|
| 1344 |
-
|
| 1345 |
-
|
| 1346 |
-
# For log scale x-axis, annotations need log10(x) coordinates (Plotly issue #2580)
|
| 1347 |
-
for item in labels_data:
|
| 1348 |
-
x_val = item['x']
|
| 1349 |
-
y_val = item['y']
|
| 1350 |
-
label = item['label']
|
| 1351 |
|
| 1352 |
-
#
|
| 1353 |
-
|
| 1354 |
-
|
| 1355 |
-
|
| 1356 |
-
|
| 1357 |
-
|
| 1358 |
-
|
| 1359 |
-
x
|
| 1360 |
-
|
| 1361 |
-
|
| 1362 |
-
|
| 1363 |
-
|
| 1364 |
-
|
| 1365 |
-
|
| 1366 |
-
|
| 1367 |
-
|
| 1368 |
-
|
| 1369 |
-
|
| 1370 |
-
|
| 1371 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1372 |
|
| 1373 |
# --- Section 8: Configure Layout ---
|
| 1374 |
# Use the same axis ranges as calculated for domain coordinates
|
|
@@ -1487,38 +1473,47 @@ def format_score_column(df: pd.DataFrame, score_col_name: str) -> pd.DataFrame:
|
|
| 1487 |
return df.assign(**{score_col_name: df[score_col_name].apply(apply_formatting)})
|
| 1488 |
|
| 1489 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1490 |
def format_runtime_column(df: pd.DataFrame, runtime_col_name: str) -> pd.DataFrame:
|
| 1491 |
"""
|
| 1492 |
Applies custom formatting to a runtime column based on its corresponding score column.
|
| 1493 |
- If runtime is not null, formats as time with 's' suffix.
|
| 1494 |
- If runtime is null but score is not, it becomes "Missing".
|
| 1495 |
- If both runtime and score are null, it becomes "Not Submitted".
|
|
|
|
| 1496 |
Args:
|
| 1497 |
df: The DataFrame to modify.
|
| 1498 |
runtime_col_name: The name of the runtime column to format (e.g., "Average Runtime").
|
| 1499 |
Returns:
|
| 1500 |
The DataFrame with the formatted runtime column.
|
| 1501 |
"""
|
| 1502 |
-
# Find the corresponding score column by replacing "Runtime" with "Score"
|
| 1503 |
score_col_name = runtime_col_name.replace("Runtime", "Score")
|
| 1504 |
|
| 1505 |
-
# Ensure the score column actually exists to avoid errors
|
| 1506 |
if score_col_name not in df.columns:
|
| 1507 |
-
return df
|
| 1508 |
|
| 1509 |
def apply_formatting_logic(row):
|
| 1510 |
runtime_value = row[runtime_col_name]
|
| 1511 |
score_value = row[score_col_name]
|
| 1512 |
status_color = "#ec4899"
|
|
|
|
|
|
|
| 1513 |
|
| 1514 |
if pd.notna(runtime_value) and isinstance(runtime_value, (int, float)):
|
| 1515 |
-
return f"{runtime_value:.0f}s"
|
| 1516 |
elif pd.notna(score_value):
|
| 1517 |
-
return f'<span style="color: {status_color};">Missing</span>'
|
| 1518 |
else:
|
| 1519 |
-
return f'<span style="color: {status_color};">Not Submitted</span>'
|
| 1520 |
|
| 1521 |
-
# Apply the logic to the specified runtime column and update the DataFrame
|
| 1522 |
df[runtime_col_name] = df.apply(apply_formatting_logic, axis=1)
|
| 1523 |
|
| 1524 |
return df
|
|
|
|
| 971 |
name: Optional[str] = None,
|
| 972 |
plot_type: str = 'cost', # 'cost' or 'runtime'
|
| 973 |
mark_by: Optional[str] = None, # 'Company', 'Openness', or 'Country'
|
| 974 |
+
show_all_labels: bool = False
|
| 975 |
) -> go.Figure:
|
| 976 |
from constants import MARK_BY_DEFAULT
|
| 977 |
if mark_by is None:
|
|
|
|
| 1268 |
domain_x = max(0, min(1, domain_x))
|
| 1269 |
domain_y = max(0, min(1, domain_y))
|
| 1270 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1271 |
if harness_uri is not None:
|
| 1272 |
+
# Composite: stack model on top, harness on bottom, clamping
|
| 1273 |
+
# each half to the plot area so markers near the edges don't
|
| 1274 |
+
# drift off-canvas.
|
| 1275 |
+
model_y = min(1, domain_y + STACKED_Y_OFFSET)
|
| 1276 |
+
harness_y = max(0, domain_y - STACKED_Y_OFFSET)
|
| 1277 |
layout_images.append(dict(
|
| 1278 |
source=model_logo_uri,
|
| 1279 |
+
xref="x domain", yref="y domain",
|
| 1280 |
+
x=domain_x, y=model_y,
|
| 1281 |
+
sizex=STACKED_SIZE_X, sizey=STACKED_SIZE_Y,
|
|
|
|
| 1282 |
xanchor="center", yanchor="middle",
|
| 1283 |
layer="above",
|
| 1284 |
))
|
| 1285 |
layout_images.append(dict(
|
| 1286 |
source=harness_uri,
|
| 1287 |
+
xref="x domain", yref="y domain",
|
| 1288 |
+
x=domain_x, y=harness_y,
|
| 1289 |
+
sizex=STACKED_SIZE_X, sizey=STACKED_SIZE_Y,
|
|
|
|
| 1290 |
xanchor="center", yanchor="middle",
|
| 1291 |
layer="above",
|
| 1292 |
))
|
| 1293 |
else:
|
| 1294 |
+
# Single marker (canonical OpenHands pages, or Alternative Agents
|
| 1295 |
+
# rows with an unknown harness name — the latter shouldn't happen
|
| 1296 |
+
# in practice since HARNESS_LOGO_PATHS covers every agent_name the
|
| 1297 |
+
# push-to-index script emits).
|
| 1298 |
layout_images.append(dict(
|
| 1299 |
source=model_logo_uri,
|
| 1300 |
+
xref="x domain", yref="y domain",
|
| 1301 |
+
x=domain_x, y=domain_y,
|
| 1302 |
+
sizex=SINGLE_SIZE_X, sizey=SINGLE_SIZE_Y,
|
|
|
|
| 1303 |
xanchor="center", yanchor="middle",
|
| 1304 |
layer="above",
|
| 1305 |
))
|
| 1306 |
|
| 1307 |
+
# --- Section 7: Add Model Name Labels to Frontier Points ---
|
| 1308 |
+
if frontier_rows:
|
| 1309 |
+
frontier_labels_data = []
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1310 |
|
| 1311 |
for row in frontier_rows:
|
| 1312 |
x_val = row[x_col_to_use]
|
| 1313 |
y_val = row[y_col_to_use]
|
| 1314 |
|
| 1315 |
+
# Get the model name for the label
|
| 1316 |
model_name = row.get('Language Model', '')
|
| 1317 |
if isinstance(model_name, list):
|
| 1318 |
model_name = model_name[0] if model_name else ''
|
| 1319 |
+
# Clean the model name (remove path prefixes)
|
| 1320 |
model_name = str(model_name).split('/')[-1]
|
| 1321 |
+
# Truncate long names
|
| 1322 |
if len(model_name) > 25:
|
| 1323 |
model_name = model_name[:22] + '...'
|
| 1324 |
|
| 1325 |
+
frontier_labels_data.append({
|
| 1326 |
+
'x': x_val,
|
| 1327 |
+
'y': y_val,
|
| 1328 |
+
'label': model_name
|
| 1329 |
+
})
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1330 |
|
| 1331 |
+
# Add annotations for each frontier label
|
| 1332 |
+
# For log scale x-axis, annotations need log10(x) coordinates (Plotly issue #2580)
|
| 1333 |
+
for item in frontier_labels_data:
|
| 1334 |
+
x_val = item['x']
|
| 1335 |
+
y_val = item['y']
|
| 1336 |
+
label = item['label']
|
| 1337 |
+
|
| 1338 |
+
# Transform x to log10 for annotation positioning on log scale
|
| 1339 |
+
if x_val > 0:
|
| 1340 |
+
x_log = np.log10(x_val)
|
| 1341 |
+
else:
|
| 1342 |
+
x_log = x_min_log
|
| 1343 |
+
|
| 1344 |
+
fig.add_annotation(
|
| 1345 |
+
x=x_log,
|
| 1346 |
+
y=y_val,
|
| 1347 |
+
text=label,
|
| 1348 |
+
showarrow=False,
|
| 1349 |
+
yshift=25, # Move label higher above the icon
|
| 1350 |
+
font=dict(
|
| 1351 |
+
size=10,
|
| 1352 |
+
color='#0D0D0F', # neutral-950
|
| 1353 |
+
family=FONT_FAMILY_SHORT
|
| 1354 |
+
),
|
| 1355 |
+
xanchor='center',
|
| 1356 |
+
yanchor='bottom'
|
| 1357 |
+
)
|
| 1358 |
|
| 1359 |
# --- Section 8: Configure Layout ---
|
| 1360 |
# Use the same axis ranges as calculated for domain coordinates
|
|
|
|
| 1473 |
return df.assign(**{score_col_name: df[score_col_name].apply(apply_formatting)})
|
| 1474 |
|
| 1475 |
|
| 1476 |
+
def _hidden_runtime_sort_key(runtime_value: float | int | None, score_value: float | int | None) -> str:
|
| 1477 |
+
"""Build a hidden prefix so Gradio's string-based runtime sorting behaves numerically."""
|
| 1478 |
+
if pd.notna(runtime_value) and isinstance(runtime_value, (int, float)):
|
| 1479 |
+
return f"{float(runtime_value):020.6f}"
|
| 1480 |
+
if pd.notna(score_value):
|
| 1481 |
+
return "99999999999999999998"
|
| 1482 |
+
return "99999999999999999999"
|
| 1483 |
+
|
| 1484 |
+
|
| 1485 |
def format_runtime_column(df: pd.DataFrame, runtime_col_name: str) -> pd.DataFrame:
|
| 1486 |
"""
|
| 1487 |
Applies custom formatting to a runtime column based on its corresponding score column.
|
| 1488 |
- If runtime is not null, formats as time with 's' suffix.
|
| 1489 |
- If runtime is null but score is not, it becomes "Missing".
|
| 1490 |
- If both runtime and score are null, it becomes "Not Submitted".
|
| 1491 |
+
- Adds a hidden, zero-padded numeric prefix so Gradio sorts the column numerically.
|
| 1492 |
Args:
|
| 1493 |
df: The DataFrame to modify.
|
| 1494 |
runtime_col_name: The name of the runtime column to format (e.g., "Average Runtime").
|
| 1495 |
Returns:
|
| 1496 |
The DataFrame with the formatted runtime column.
|
| 1497 |
"""
|
|
|
|
| 1498 |
score_col_name = runtime_col_name.replace("Runtime", "Score")
|
| 1499 |
|
|
|
|
| 1500 |
if score_col_name not in df.columns:
|
| 1501 |
+
return df
|
| 1502 |
|
| 1503 |
def apply_formatting_logic(row):
|
| 1504 |
runtime_value = row[runtime_col_name]
|
| 1505 |
score_value = row[score_col_name]
|
| 1506 |
status_color = "#ec4899"
|
| 1507 |
+
sort_key = _hidden_runtime_sort_key(runtime_value, score_value)
|
| 1508 |
+
hidden_sort_prefix = f'<span style="display:none">{sort_key}</span>'
|
| 1509 |
|
| 1510 |
if pd.notna(runtime_value) and isinstance(runtime_value, (int, float)):
|
| 1511 |
+
return f"{hidden_sort_prefix}{runtime_value:.0f}s"
|
| 1512 |
elif pd.notna(score_value):
|
| 1513 |
+
return f'{hidden_sort_prefix}<span style="color: {status_color};">Missing</span>'
|
| 1514 |
else:
|
| 1515 |
+
return f'{hidden_sort_prefix}<span style="color: {status_color};">Not Submitted</span>'
|
| 1516 |
|
|
|
|
| 1517 |
df[runtime_col_name] = df.apply(apply_formatting_logic, axis=1)
|
| 1518 |
|
| 1519 |
return df
|
simple_data_loader.py
CHANGED
|
@@ -245,6 +245,7 @@ class SimpleLeaderboardViewer:
|
|
| 245 |
'acp-claude': 'Claude Code',
|
| 246 |
'acp-codex': 'Codex',
|
| 247 |
'acp-gemini': 'Gemini CLI',
|
|
|
|
| 248 |
}
|
| 249 |
alt_dir = self.config_path / "alternative_agents"
|
| 250 |
if alt_dir.exists():
|
|
@@ -252,8 +253,6 @@ class SimpleLeaderboardViewer:
|
|
| 252 |
if not type_dir.is_dir():
|
| 253 |
continue
|
| 254 |
default_name = agent_type_default_name.get(type_dir.name)
|
| 255 |
-
if default_name is None:
|
| 256 |
-
continue # skip unlisted agent types (e.g. openhands_subagents)
|
| 257 |
for agent_dir in type_dir.iterdir():
|
| 258 |
if not agent_dir.is_dir():
|
| 259 |
continue
|
|
|
|
| 245 |
'acp-claude': 'Claude Code',
|
| 246 |
'acp-codex': 'Codex',
|
| 247 |
'acp-gemini': 'Gemini CLI',
|
| 248 |
+
'openhands_subagents': 'OpenHands Sub-agents',
|
| 249 |
}
|
| 250 |
alt_dir = self.config_path / "alternative_agents"
|
| 251 |
if alt_dir.exists():
|
|
|
|
| 253 |
if not type_dir.is_dir():
|
| 254 |
continue
|
| 255 |
default_name = agent_type_default_name.get(type_dir.name)
|
|
|
|
|
|
|
| 256 |
for agent_dir in type_dir.iterdir():
|
| 257 |
if not agent_dir.is_dir():
|
| 258 |
continue
|
ui_components.py
CHANGED
|
@@ -954,7 +954,7 @@ def create_leaderboard_display(
|
|
| 954 |
if not new_df.empty:
|
| 955 |
new_transformer = DataTransformer(new_df, new_tag_map)
|
| 956 |
new_df_view_full, _ = new_transformer.view(tag=category_name, use_plotly=True)
|
| 957 |
-
|
| 958 |
# Prepare both complete and all entries versions
|
| 959 |
if 'Categories Attempted' in new_df_view_full.columns:
|
| 960 |
new_df_view_complete = new_df_view_full[new_df_view_full['Categories Attempted'] == '5/5'].copy()
|
|
@@ -1014,22 +1014,16 @@ def create_leaderboard_display(
|
|
| 1014 |
|
| 1015 |
# Connect the timer to the refresh function
|
| 1016 |
if show_incomplete_checkbox is not None:
|
|
|
|
| 1017 |
if show_open_only_checkbox is not None:
|
| 1018 |
-
|
| 1019 |
-
|
| 1020 |
-
|
| 1021 |
-
|
| 1022 |
-
|
| 1023 |
-
|
| 1024 |
-
|
| 1025 |
-
|
| 1026 |
-
def _timer_refresh_no_open(show_incomplete, mark_by, show_all_labels):
|
| 1027 |
-
return check_and_refresh_data(show_incomplete, False, mark_by, show_all_labels)
|
| 1028 |
-
refresh_timer.tick(
|
| 1029 |
-
fn=_timer_refresh_no_open,
|
| 1030 |
-
inputs=[show_incomplete_checkbox, mark_by_dropdown, show_all_labels_checkbox],
|
| 1031 |
-
outputs=[dataframe_component, cost_plot_component, runtime_plot_component]
|
| 1032 |
-
)
|
| 1033 |
else:
|
| 1034 |
# If no incomplete checkbox, always show all data (but still filter by open if needed)
|
| 1035 |
def check_and_refresh_all(show_open_only=False, mark_by=MARK_BY_DEFAULT, show_all_labels=False):
|
|
|
|
| 954 |
if not new_df.empty:
|
| 955 |
new_transformer = DataTransformer(new_df, new_tag_map)
|
| 956 |
new_df_view_full, _ = new_transformer.view(tag=category_name, use_plotly=True)
|
| 957 |
+
|
| 958 |
# Prepare both complete and all entries versions
|
| 959 |
if 'Categories Attempted' in new_df_view_full.columns:
|
| 960 |
new_df_view_complete = new_df_view_full[new_df_view_full['Categories Attempted'] == '5/5'].copy()
|
|
|
|
| 1014 |
|
| 1015 |
# Connect the timer to the refresh function
|
| 1016 |
if show_incomplete_checkbox is not None:
|
| 1017 |
+
timer_inputs = [show_incomplete_checkbox]
|
| 1018 |
if show_open_only_checkbox is not None:
|
| 1019 |
+
timer_inputs.append(show_open_only_checkbox)
|
| 1020 |
+
timer_inputs.append(mark_by_dropdown) # Always include mark_by
|
| 1021 |
+
timer_inputs.append(show_all_labels_checkbox)
|
| 1022 |
+
refresh_timer.tick(
|
| 1023 |
+
fn=check_and_refresh_data,
|
| 1024 |
+
inputs=timer_inputs,
|
| 1025 |
+
outputs=[dataframe_component, cost_plot_component, runtime_plot_component]
|
| 1026 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1027 |
else:
|
| 1028 |
# If no incomplete checkbox, always show all data (but still filter by open if needed)
|
| 1029 |
def check_and_refresh_all(show_open_only=False, mark_by=MARK_BY_DEFAULT, show_all_labels=False):
|