leaderboard / src /display.py
Dylan123's picture
Upload folder using huggingface_hub
294196e verified
"""Load ``results.jsonl`` rows and flatten them into the leaderboard DataFrame."""
from __future__ import annotations
import json
import math
from pathlib import Path
from typing import Any
import pandas as pd
from src.about import (
ALL_METRIC_COLUMNS,
GROUPS,
META_COLUMNS,
METRIC_KEY,
metric_column,
)
def _as_link(label: str, url: str | None) -> str:
label = label or ""
if not url:
return label
return f'<a href="{url}" target="_blank">{label}</a>'
def _safe_float(value: Any) -> float | None:
if value is None:
return None
try:
f = float(value)
except (TypeError, ValueError):
return None
if not math.isfinite(f):
return None
return f
def _row_to_flat(entry: dict[str, Any]) -> dict[str, Any]:
method = entry.get("method", "")
model = entry.get("model", "") or ""
method_link = entry.get("method_link")
model_link = entry.get("model_link")
flat: dict[str, Any] = {
"Method": _as_link(method, method_link),
"Model": _as_link(model, model_link) if model else "",
"Submitted By": entry.get("submitted_by", "") or "",
"Submitted At": entry.get("submitted_at", "") or "",
}
metrics = entry.get("metrics", {}) or {}
for group in GROUPS:
group_metrics = metrics.get(group, {}) or {}
flat[metric_column(group)] = _safe_float(group_metrics.get(METRIC_KEY))
return flat
def load_results(results_path: Path) -> pd.DataFrame:
columns = META_COLUMNS + ALL_METRIC_COLUMNS
if not results_path.exists():
return pd.DataFrame(columns=columns)
rows: list[dict[str, Any]] = []
with results_path.open("r", encoding="utf-8") as fh:
for raw in fh:
raw = raw.strip()
if not raw:
continue
try:
entry = json.loads(raw)
except json.JSONDecodeError:
continue
rows.append(_row_to_flat(entry))
if not rows:
return pd.DataFrame(columns=columns)
df = pd.DataFrame(rows, columns=columns)
for col in ALL_METRIC_COLUMNS:
df[col] = pd.to_numeric(df[col], errors="coerce")
return df
def _load_unique_field(results_path: Path, field: str) -> list[str]:
"""Return the unique non-empty string values of ``field`` across all rows, sorted."""
if not results_path.exists():
return []
seen: set[str] = set()
with results_path.open("r", encoding="utf-8") as fh:
for raw in fh:
raw = raw.strip()
if not raw:
continue
try:
entry = json.loads(raw)
except json.JSONDecodeError:
continue
value = (entry.get(field) or "").strip()
if value:
seen.add(value)
return sorted(seen)
def load_method_names(results_path: Path) -> list[str]:
"""Unique raw method names from ``results.jsonl``, sorted alphabetically.
Used to populate the method-filter dropdown in ``app.py``. We read raw
strings (not the HTML-wrapped ``Method`` column produced by
:func:`load_results`) so the dropdown shows clean labels.
"""
return _load_unique_field(results_path, "method")
def load_model_names(results_path: Path) -> list[str]:
"""Unique raw model ids from ``results.jsonl``, sorted alphabetically.
Rows with an empty/missing ``model`` field (e.g. ensembles without a single
backing model) are skipped so they don't show up as a blank choice.
"""
return _load_unique_field(results_path, "model")