| """Build per-pillar DataFrames for the VANTAGE-Bench leaderboard. |
| |
| Primary API |
| ----------- |
| build_all_tables(filtered_models, global_ranks) -> dict[str, pd.DataFrame] |
| Returns one DataFrame per pillar key in config.PILLARS. |
| Keys: 'overall', 'spatial', 'st', 'temporal', 'semantic'. |
| Model column: plain '{name} · {org}' string — bold if rank #1 in that |
| pillar. No badge HTML; badge rendering belongs in CSS. |
| Score columns: '—' for missing, '42.31' or '**42.31**' for best-in-col. |
| |
| make_radar_svg(model) -> str |
| 4-axis radar chart SVG (220 × 160 px) for the model detail side panel. |
| Axes: Semantic · Spatial · Sp-Temp · Temporal (clockwise from top). |
| |
| Gradio compatibility API (app.py) |
| ---------------------------------- |
| render_tab(data, rank_map, tab, ...) -> (pd.DataFrame, status_str) |
| headers_for_tab(tab, selected_tasks=None) -> list[str] |
| datatypes_for_tab(tab, selected_tasks=None) -> list[str] |
| These accept the legacy tab keys ('spatio_temporal', etc.) used by app.py. |
| """ |
|
|
| from __future__ import annotations |
|
|
| import math |
| import pandas as pd |
|
|
| from .config import PILLARS, TASKS, TASK_METRIC_LABELS |
| from .data import LeaderboardData, ModelRecord |
| from .filters import apply_filters, is_filter_active |
|
|
| _MISSING = "—" |
|
|
| |
| _TASK_JSON_FIELD: dict[str, str] = { |
| "loc": "2d_localization", |
| "ground": "2d_referring_expressions", |
| "pointing": "2d_spatial_pointing", |
| "sot": "single_object_tracking", |
| "temploc": "temporal_localization", |
| "dvc": "dense_video_captioning", |
| "ev": "event_verification", |
| "vqa": "video_qa", |
| } |
|
|
| |
| |
| |
| _OVERALL_SCORE_COLS: list[tuple[str, str]] = [ |
| |
| ("Obj Loc", "2d_localization"), |
| ("Ref Exp", "2d_referring_expressions"), |
| ("Pointing", "2d_spatial_pointing"), |
| |
| ("SOT", "single_object_tracking"), |
| |
| ("Temp Loc", "temporal_localization"), |
| ("DVC", "dense_video_captioning"), |
| |
| ("Event Ver", "event_verification"), |
| ("VQA", "video_qa"), |
| ] |
|
|
| |
| |
| OVERALL_PILLAR_GROUPS: list[tuple[int, str]] = [ |
| (2, ""), |
| (3, "Spatial"), |
| (1, "Spatio-Temp"), |
| (2, "Temporal"), |
| (2, "Semantic"), |
| ] |
|
|
| |
| |
| _PILLAR_AGGREGATE_COL: dict[str, tuple[str, str]] = { |
| "spatial": ("Spatial", "spatial"), |
| "st": ("Sp-Temp", "spatio_temporal"), |
| "temporal": ("Temporal", "temporal"), |
| "semantic": ("Semantic", "semantic"), |
| } |
|
|
| |
| _OLD_TAB_TO_PILLAR: dict[str, str] = { |
| "overall": "overall", |
| "spatial": "spatial", |
| "spatio_temporal": "st", |
| "temporal": "temporal", |
| "semantic": "semantic", |
| } |
|
|
|
|
| |
|
|
|
|
| def _sort_by_rank( |
| models: list[ModelRecord], rank_map: dict[str, int] |
| ) -> list[ModelRecord]: |
| sentinel = float("inf") |
| return sorted(models, key=lambda m: (rank_map.get(m.id, sentinel), m.name)) |
|
|
|
|
| def _column_maxes( |
| models: list[ModelRecord], json_fields: list[str] |
| ) -> dict[str, float]: |
| """Maximum score per field across the given model set.""" |
| maxes: dict[str, float] = {} |
| for f in json_fields: |
| vals = [m.scores[f] for m in models if f in m.scores] |
| if vals: |
| maxes[f] = max(vals) |
| return maxes |
|
|
|
|
| def _model_html(m: ModelRecord, rank_one_id: str | None) -> str: |
| """HTML cell for the Model column: name + inline badges, org sub-line, type badges.""" |
| name_part = f"<b>{m.name}</b>" if m.id == rank_one_id else m.name |
| if m.model_url: |
| name_core = f'<a href="{m.model_url}" target="_blank" class="mc-link">{name_part}</a>' |
| else: |
| name_core = name_part |
| verified_badge = '<span class="b b-verified">✓</span>' if m.verified else "" |
| name_html = f'<span class="mc-name-row">{name_core}{verified_badge}</span>' |
|
|
| badges: list[str] = [] |
| if m.result_type == "ensemble": |
| badges.append('<span class="b b-ensemble">system / pipeline</span>') |
| else: |
| badges.append('<span class="b b-single">single</span>') |
| if m.type == "open": |
| badges.append('<span class="b b-open">open</span>') |
| else: |
| badges.append('<span class="b b-prop">prop.</span>') |
| if m.is_new: |
| badges.append('<span class="b b-new">new</span>') |
| badge_html = "".join(badges) |
| return ( |
| f'<span data-n="{m.name}" class="mc">' |
| f'{name_html}' |
| f'<span class="mc-org">{m.organization}</span>' |
| f'<span class="mc-badges">{badge_html}</span>' |
| f'</span>' |
| ) |
|
|
|
|
| def _fmt_score_val(v: float) -> str: |
| """Format a numeric score cell for Styler display: NaN → '—', else 2 d.p.""" |
| if pd.isna(v): |
| return _MISSING |
| return f"{v:.2f}" |
|
|
|
|
| def _score_cols_for_pillar( |
| pillar: str, |
| selected_tasks: list[str] | None = None, |
| ) -> list[tuple[str, str]]: |
| """Return [(display_label, json_field)] for one pillar. |
| |
| selected_tasks: JSON field names to include (Gradio column-toggle). |
| None means show all. Only applies to non-overall pillars. |
| """ |
| if pillar == "overall": |
| return list(_OVERALL_SCORE_COLS) |
| task_keys = PILLARS[pillar] |
| if selected_tasks is not None: |
| task_keys = [tk for tk in task_keys if _TASK_JSON_FIELD[tk] in selected_tasks] |
| cols: list[tuple[str, str]] = [] |
| |
| agg = _PILLAR_AGGREGATE_COL.get(pillar) |
| if agg is not None: |
| cols.append(agg) |
| for tk in task_keys: |
| json_field = _TASK_JSON_FIELD[tk] |
| metric = TASK_METRIC_LABELS.get(json_field, "") |
| label = f"{TASKS[tk]} ({metric})" if metric else TASKS[tk] |
| cols.append((label, json_field)) |
| return cols |
|
|
|
|
| def _assemble_df( |
| sorted_models: list[ModelRecord], |
| score_cols: list[tuple[str, str]], |
| rank_one_id: str | None, |
| ) -> "pd.io.formats.style.Styler": |
| """Return a Pandas Styler with: |
| - score columns stored as float (NaN for missing) → enables numeric sort |
| - bold font-weight applied to the column-maximum cell in each score column |
| - NaN formatted as '—', floats formatted to 2 decimal places |
| """ |
| rows = [] |
| for i, m in enumerate(sorted_models, 1): |
| row: dict = { |
| "#": str(i), |
| "Name": _model_html(m, rank_one_id), |
| } |
| for label, json_field in score_cols: |
| v = m.scores.get(json_field) |
| row[label] = float(v) if v is not None else float("nan") |
| rows.append(row) |
|
|
| score_labels = [lbl for lbl, _ in score_cols] |
| columns = ["#", "Name"] + score_labels |
| df = pd.DataFrame(rows, columns=columns) |
|
|
| def _bold_max(col: pd.Series) -> list[str]: |
| col_max = col.max(skipna=True) |
| return [ |
| "font-weight: bold" if (not pd.isna(v) and v == col_max) else "" |
| for v in col |
| ] |
|
|
| if score_labels: |
| styler = ( |
| df.style |
| .apply(_bold_max, subset=score_labels, axis=0) |
| .format(_fmt_score_val, subset=score_labels, na_rep=_MISSING) |
| ) |
| else: |
| styler = df.style |
| return styler |
|
|
|
|
| |
|
|
|
|
| def build_overall_html_table( |
| filtered_models: list[ModelRecord], |
| rank_map: dict[str, int], |
| ) -> str: |
| """Hand-rendered HTML <table> for the Overall tab. |
| |
| Used instead of gr.Dataframe because Gradio's DataFrame component |
| does not cleanly support multi-level (grouped) column headers. |
| Renders a two-row header: |
| # · Name · Overall span both rows (rowspan=2) |
| Spatial (×3) | Spatio-Temp (×1) | Temporal (×2) | Semantic (×2) |
| Obj Loc | Ref Exp | Pointing | SOT | Temp Loc | DVC | Event Ver | VQA |
| |
| The Overall column sits between Name and the pillar groups as a |
| standalone (non-grouped) column showing each model's stored overall |
| score from ModelRecord.scores["overall"]. |
| |
| Preserves: striping, hover, bold column-max, model badges, rank order, |
| scroll behavior. |
| """ |
| sorted_models = _sort_by_rank(filtered_models, rank_map) |
| rank_one_id = next( |
| (m.id for m in sorted_models if rank_map.get(m.id) == 1), None |
| ) |
|
|
| score_cols = _OVERALL_SCORE_COLS |
| json_fields = [f for _, f in score_cols] + ["overall"] |
| col_max = _column_maxes(filtered_models, json_fields) |
| overall_max = col_max.get("overall") |
|
|
| |
| |
| |
| |
| parts: list[str] = ['<table class="lb-overall-table">'] |
|
|
| |
| |
| |
| |
| _ST_FIELD = "single_object_tracking" |
| parts.append('<colgroup>') |
| parts.append('<col class="col-rank">') |
| parts.append('<col class="col-name">') |
| parts.append('<col class="col-overall">') |
| for _, field in score_cols: |
| cls = "col-score col-score-st" if field == _ST_FIELD else "col-score" |
| parts.append(f'<col class="{cls}">') |
| parts.append('</colgroup>') |
|
|
| |
| parts.append('<thead>') |
| parts.append('<tr class="lb-group-row">') |
| parts.append('<th class="lb-corner" rowspan="2">#</th>') |
| parts.append('<th class="lb-corner" rowspan="2">Name</th>') |
| parts.append('<th class="lb-corner lb-corner-num" rowspan="2">Overall</th>') |
| for span, label in [(3, "Spatial"), (1, "Spatio-Temp"), |
| (2, "Temporal"), (2, "Semantic")]: |
| parts.append(f'<th class="lb-group" colspan="{span}">{label}</th>') |
| parts.append('</tr>') |
|
|
| parts.append('<tr class="lb-task-row">') |
| for label, _ in score_cols: |
| parts.append(f'<th>{label}</th>') |
| parts.append('</tr>') |
| parts.append('</thead>') |
|
|
| |
| |
| total_cols = 3 + len(score_cols) |
| parts.append('<tbody>') |
| if not sorted_models: |
| parts.append( |
| f'<tr><td colspan="{total_cols}" class="lb-empty-row">' |
| f'No models match — adjust the filters.</td></tr>' |
| ) |
| for i, m in enumerate(sorted_models, 1): |
| parts.append(f'<tr data-id="{m.id}">') |
| parts.append(f'<td class="lb-rank">{i}</td>') |
| parts.append(f'<td class="lb-name">{_model_html(m, rank_one_id)}</td>') |
|
|
| |
| ov = m.scores.get("overall") |
| if ov is None: |
| parts.append(f'<td class="lb-score lb-overall">{_MISSING}</td>') |
| else: |
| is_max = overall_max is not None and ov == overall_max |
| cls = "lb-score lb-overall lb-max" if is_max else "lb-score lb-overall" |
| parts.append(f'<td class="{cls}">{ov:.2f}</td>') |
|
|
| for _, f in score_cols: |
| v = m.scores.get(f) |
| if v is None: |
| parts.append(f'<td class="lb-score">{_MISSING}</td>') |
| else: |
| m_val = col_max.get(f) |
| is_max = m_val is not None and v == m_val |
| cls = "lb-score lb-max" if is_max else "lb-score" |
| parts.append(f'<td class="{cls}">{v:.2f}</td>') |
| parts.append('</tr>') |
| parts.append('</tbody>') |
| parts.append('</table>') |
| return "".join(parts) |
|
|
|
|
| def build_all_tables( |
| filtered_models: list[ModelRecord], |
| global_ranks: dict[str, dict[str, int]], |
| ) -> dict[str, pd.DataFrame]: |
| """Return {pillar_key: pd.DataFrame} for every pillar in config.PILLARS. |
| |
| Parameters |
| ---------- |
| filtered_models: |
| Model list already narrowed by search / params / type filters. |
| global_ranks: |
| Mapping from util.ranking.GLOBAL_RANKS — |
| {'overall': {model_id: rank}, 'spatial': ..., 'st': ..., ...}. |
| |
| Column rules |
| ------------ |
| Overall : #, Name, Overall, Spatial, Sp-Temp, Temporal, Semantic |
| Pillar : #, Name, [task columns for that pillar only] |
| Name : '{name} · {org}', bold when rank == 1 in this pillar |
| Scores : '42.31' or '**42.31**' for best-in-column; '—' if absent |
| """ |
| result: dict[str, pd.DataFrame] = {} |
|
|
| for pillar in PILLARS: |
| rank_map = global_ranks[pillar] |
| sorted_models = _sort_by_rank(filtered_models, rank_map) |
| score_cols = _score_cols_for_pillar(pillar) |
| rank_one_id = next( |
| (m.id for m in sorted_models if rank_map.get(m.id) == 1), None |
| ) |
| result[pillar] = _assemble_df( |
| sorted_models, score_cols, rank_one_id |
| ) |
|
|
| return result |
|
|
|
|
| |
|
|
|
|
| def make_radar_svg(m: ModelRecord) -> str: |
| """4-axis radar chart SVG for the model detail side panel. |
| |
| Axes (clockwise from top): Semantic · Spatial · Sp-Temp · Temporal. |
| Canvas: 220 × 160 px. Scores assumed in [0, 100]. |
| """ |
| W, H = 220, 160 |
| cx, cy = W / 2, 82.0 |
| chart_r = 50.0 |
| label_r = 67.0 |
|
|
| labels = ["Semantic", "Spatial", "Sp-Temp", "Temporal"] |
| fields = ["semantic", "spatial", "spatio_temporal", "temporal"] |
| vals: list[float] = [] |
| for f in fields: |
| raw = m.scores.get(f) |
| v = float(raw) / 100.0 if raw is not None else 0.0 |
| vals.append(max(0.0, min(1.0, v))) |
|
|
| N = 4 |
| step = 2 * math.pi / N |
| off = -math.pi / 2 |
|
|
| parts: list[str] = [ |
| f'<svg xmlns="http://www.w3.org/2000/svg" ' |
| f'width="{W}" height="{H}" viewBox="0 0 {W} {H}">' |
| ] |
|
|
| |
| for g in range(1, 5): |
| rg = chart_r * g / 4 |
| parts.append( |
| f'<circle cx="{cx}" cy="{cy}" r="{rg:.1f}" ' |
| f'fill="none" stroke="#e5e7eb" stroke-width="0.5"/>' |
| ) |
|
|
| |
| for i in range(N): |
| a = off + i * step |
| x2 = cx + chart_r * math.cos(a) |
| y2 = cy + chart_r * math.sin(a) |
| parts.append( |
| f'<line x1="{cx}" y1="{cy}" ' |
| f'x2="{x2:.1f}" y2="{y2:.1f}" ' |
| f'stroke="#e5e7eb" stroke-width="0.5"/>' |
| ) |
|
|
| |
| poly_pts = " ".join( |
| f"{cx + chart_r * v * math.cos(off + i * step):.1f}," |
| f"{cy + chart_r * v * math.sin(off + i * step):.1f}" |
| for i, v in enumerate(vals) |
| ) |
| parts.append( |
| f'<polygon points="{poly_pts}" ' |
| f'fill="rgba(37,99,235,0.12)" stroke="#2563eb" ' |
| f'stroke-width="1.5" stroke-linejoin="round"/>' |
| ) |
|
|
| |
| for i, lbl in enumerate(labels): |
| a = off + i * step |
| lx = cx + label_r * math.cos(a) |
| ly = cy + label_r * math.sin(a) |
| parts.append( |
| f'<text x="{lx:.1f}" y="{ly:.1f}" ' |
| f'text-anchor="middle" dominant-baseline="middle" ' |
| f'font-size="9" fill="#6b7280" font-family="sans-serif">' |
| f'{lbl}</text>' |
| ) |
|
|
| parts.append("</svg>") |
| return "".join(parts) |
|
|
|
|
| |
|
|
|
|
| def headers_for_tab(tab: str, selected_tasks: list[str] | None = None) -> list[str]: |
| """Column header strings for a Gradio Dataframe widget. |
| |
| tab accepts legacy keys: 'overall', 'spatial', 'spatio_temporal', |
| 'temporal', 'semantic'. |
| """ |
| pillar = _OLD_TAB_TO_PILLAR.get(tab, tab) |
| score_cols = _score_cols_for_pillar(pillar, selected_tasks) |
| return ["#", "Name"] + [label for label, _ in score_cols] |
|
|
|
|
| def datatypes_for_tab(tab: str, selected_tasks: list[str] | None = None) -> list[str]: |
| """Gradio datatype list parallel to headers_for_tab output.""" |
| dtypes = [] |
| for h in headers_for_tab(tab, selected_tasks): |
| if h == "#": |
| dtypes.append("str") |
| elif h == "Name": |
| dtypes.append("html") |
| else: |
| dtypes.append("number") |
| return dtypes |
|
|
|
|
| def render_tab( |
| data: LeaderboardData, |
| rank_map: dict[str, int], |
| tab: str, |
| search: str | None, |
| params_bucket: str | None, |
| model_type: str | None, |
| selected_tasks: list[str] | None = None, |
| ) -> tuple[pd.DataFrame, str]: |
| """Build (df, status_str) for one Gradio tab. |
| |
| Applies search / params / model-type filters internally. |
| selected_tasks: JSON field names to show (Gradio column-toggle); None = all. |
| """ |
| filtered = apply_filters(data.models, search, params_bucket, model_type) |
| sorted_models = _sort_by_rank(filtered, rank_map) |
|
|
| pillar = _OLD_TAB_TO_PILLAR.get(tab, tab) |
| score_cols = _score_cols_for_pillar(pillar, selected_tasks) |
| json_fields = [f for _, f in score_cols] |
| maxes = _column_maxes(filtered, json_fields) |
| rank_one_id = next( |
| (m.id for m in sorted_models if rank_map.get(m.id) == 1), None |
| ) |
|
|
| df = _assemble_df(sorted_models, score_cols, rank_map, maxes, rank_one_id) |
| status = _status_line( |
| total=len(data.models), |
| shown=len(filtered), |
| search=search, |
| params_bucket=params_bucket, |
| model_type=model_type, |
| ) |
| return df, status |
|
|
|
|
| def _status_line( |
| *, |
| total: int, |
| shown: int, |
| search: str | None, |
| params_bucket: str | None, |
| model_type: str | None, |
| ) -> str: |
| if not is_filter_active(search, params_bucket, model_type): |
| return "" |
| if shown == 0: |
| return f"Showing 0 of {total} models — no matches" |
| return f"Showing {shown} of {total} models" |
|
|