"""Build per-pillar DataFrames for the VANTAGE-Bench leaderboard. Primary API ----------- build_all_tables(filtered_models, global_ranks) -> dict[str, pd.DataFrame] Returns one DataFrame per pillar key in config.PILLARS. Keys: 'overall', 'spatial', 'st', 'temporal', 'semantic'. Model column: plain '{name} · {org}' string — bold if rank #1 in that pillar. No badge HTML; badge rendering belongs in CSS. Score columns: '—' for missing, '42.31' or '**42.31**' for best-in-col. make_radar_svg(model) -> str 4-axis radar chart SVG (220 × 160 px) for the model detail side panel. Axes: Semantic · Spatial · Sp-Temp · Temporal (clockwise from top). Gradio compatibility API (app.py) ---------------------------------- render_tab(data, rank_map, tab, ...) -> (pd.DataFrame, status_str) headers_for_tab(tab, selected_tasks=None) -> list[str] datatypes_for_tab(tab, selected_tasks=None) -> list[str] These accept the legacy tab keys ('spatio_temporal', etc.) used by app.py. """ from __future__ import annotations import math import pandas as pd from .config import PILLARS, TASKS, TASK_METRIC_LABELS from .data import LeaderboardData, ModelRecord from .filters import apply_filters, is_filter_active _MISSING = "—" # Short task key (config.TASKS) → JSON score field in ModelRecord.scores. _TASK_JSON_FIELD: dict[str, str] = { "loc": "2d_localization", "ground": "2d_referring_expressions", "pointing": "2d_spatial_pointing", "sot": "single_object_tracking", "temploc": "temporal_localization", "dvc": "dense_video_captioning", "ev": "event_verification", "vqa": "video_qa", } # Overall tab: ordered (display_label, json_field) pairs. # Task columns ordered to align with pillar super-header groups injected by JS # (see _RESIZE_JS in app.py): Spatial (3) · Spatio-Temp (1) · Temporal (2) · Semantic (2). _OVERALL_SCORE_COLS: list[tuple[str, str]] = [ # Spatial ("Obj Loc", "2d_localization"), ("Ref Exp", "2d_referring_expressions"), ("Pointing", "2d_spatial_pointing"), # Spatio-Temporal ("SOT", "single_object_tracking"), # Temporal ("Temp Loc", "temporal_localization"), ("DVC", "dense_video_captioning"), # Semantic ("Event Ver", "event_verification"), ("VQA", "video_qa"), ] # Pillar group spec for the Overall-tab super-header row. # Each entry: (colspan, label). Two empty leading cells cover the # and Name columns. OVERALL_PILLAR_GROUPS: list[tuple[int, str]] = [ (2, ""), # # + Name (3, "Spatial"), (1, "Spatio-Temp"), (2, "Temporal"), (2, "Semantic"), ] # Pillar aggregate score column shown immediately after Name on each pillar tab. # Reuses the same pre-computed pillar field from the model record — no recomputation. _PILLAR_AGGREGATE_COL: dict[str, tuple[str, str]] = { "spatial": ("Spatial", "spatial"), "st": ("Sp-Temp", "spatio_temporal"), "temporal": ("Temporal", "temporal"), "semantic": ("Semantic", "semantic"), } # Legacy Gradio tab key → short pillar key used in config.PILLARS. _OLD_TAB_TO_PILLAR: dict[str, str] = { "overall": "overall", "spatial": "spatial", "spatio_temporal": "st", "temporal": "temporal", "semantic": "semantic", } # -- Shared primitives ----------------------------------------------------- def _sort_by_rank( models: list[ModelRecord], rank_map: dict[str, int] ) -> list[ModelRecord]: sentinel = float("inf") return sorted(models, key=lambda m: (rank_map.get(m.id, sentinel), m.name)) def _column_maxes( models: list[ModelRecord], json_fields: list[str] ) -> dict[str, float]: """Maximum score per field across the given model set.""" maxes: dict[str, float] = {} for f in json_fields: vals = [m.scores[f] for m in models if f in m.scores] if vals: maxes[f] = max(vals) return maxes def _model_html(m: ModelRecord, rank_one_id: str | None) -> str: """HTML cell for the Model column: name + inline badges, org sub-line, type badges.""" name_part = f"{m.name}" if m.id == rank_one_id else m.name if m.model_url: name_core = f'{name_part}' else: name_core = name_part verified_badge = '' if m.verified else "" name_html = f'{name_core}{verified_badge}' badges: list[str] = [] if m.result_type == "ensemble": badges.append('system / pipeline') else: badges.append('single') if m.type == "open": badges.append('open') else: badges.append('prop.') if m.is_new: badges.append('new') badge_html = "".join(badges) return ( f'' f'{name_html}' f'{m.organization}' f'{badge_html}' f'' ) def _fmt_score_val(v: float) -> str: """Format a numeric score cell for Styler display: NaN → '—', else 2 d.p.""" if pd.isna(v): return _MISSING return f"{v:.2f}" def _score_cols_for_pillar( pillar: str, selected_tasks: list[str] | None = None, ) -> list[tuple[str, str]]: """Return [(display_label, json_field)] for one pillar. selected_tasks: JSON field names to include (Gradio column-toggle). None means show all. Only applies to non-overall pillars. """ if pillar == "overall": return list(_OVERALL_SCORE_COLS) task_keys = PILLARS[pillar] if selected_tasks is not None: task_keys = [tk for tk in task_keys if _TASK_JSON_FIELD[tk] in selected_tasks] cols: list[tuple[str, str]] = [] # Lead each pillar tab with its pre-computed pillar aggregate score. agg = _PILLAR_AGGREGATE_COL.get(pillar) if agg is not None: cols.append(agg) for tk in task_keys: json_field = _TASK_JSON_FIELD[tk] metric = TASK_METRIC_LABELS.get(json_field, "") label = f"{TASKS[tk]} ({metric})" if metric else TASKS[tk] cols.append((label, json_field)) return cols def _assemble_df( sorted_models: list[ModelRecord], score_cols: list[tuple[str, str]], rank_one_id: str | None, ) -> "pd.io.formats.style.Styler": """Return a Pandas Styler with: - score columns stored as float (NaN for missing) → enables numeric sort - bold font-weight applied to the column-maximum cell in each score column - NaN formatted as '—', floats formatted to 2 decimal places """ rows = [] for i, m in enumerate(sorted_models, 1): row: dict = { "#": str(i), "Name": _model_html(m, rank_one_id), } for label, json_field in score_cols: v = m.scores.get(json_field) row[label] = float(v) if v is not None else float("nan") rows.append(row) score_labels = [lbl for lbl, _ in score_cols] columns = ["#", "Name"] + score_labels df = pd.DataFrame(rows, columns=columns) def _bold_max(col: pd.Series) -> list[str]: col_max = col.max(skipna=True) return [ "font-weight: bold" if (not pd.isna(v) and v == col_max) else "" for v in col ] if score_labels: styler = ( df.style .apply(_bold_max, subset=score_labels, axis=0) .format(_fmt_score_val, subset=score_labels, na_rep=_MISSING) ) else: styler = df.style return styler # -- Primary API ----------------------------------------------------------- def build_overall_html_table( filtered_models: list[ModelRecord], rank_map: dict[str, int], ) -> str: """Hand-rendered HTML for the Overall tab. Used instead of gr.Dataframe because Gradio's DataFrame component does not cleanly support multi-level (grouped) column headers. Renders a two-row header: # · Name · Overall span both rows (rowspan=2) Spatial (×3) | Spatio-Temp (×1) | Temporal (×2) | Semantic (×2) Obj Loc | Ref Exp | Pointing | SOT | Temp Loc | DVC | Event Ver | VQA The Overall column sits between Name and the pillar groups as a standalone (non-grouped) column showing each model's stored overall score from ModelRecord.scores["overall"]. Preserves: striping, hover, bold column-max, model badges, rank order, scroll behavior. """ sorted_models = _sort_by_rank(filtered_models, rank_map) rank_one_id = next( (m.id for m in sorted_models if rank_map.get(m.id) == 1), None ) score_cols = _OVERALL_SCORE_COLS json_fields = [f for _, f in score_cols] + ["overall"] col_max = _column_maxes(filtered_models, json_fields) overall_max = col_max.get("overall") # Render the table directly (no .table-wrap wrapper, no resize handle): # the gr.HTML element itself acts as the rectangular container, with # styling on .lb-table-overall in css.py. This avoids Gradio's # rounded/clipped DataFrame-shell visuals. parts: list[str] = ['
'] # Column-width control via (avoids nth-child collisions across # the two header rows). The Spatio-Temporal column gets a wider class # because its lone task (SOT) sits under the long "Spatio-Temp" pillar # super-header — we want that header to fit on one line. _ST_FIELD = "single_object_tracking" parts.append('') parts.append('') parts.append('') parts.append('') for _, field in score_cols: cls = "col-score col-score-st" if field == _ST_FIELD else "col-score" parts.append(f'') parts.append('') # Header: two rows. # / Name / Overall span both via rowspan=2. parts.append('') parts.append('') parts.append('') parts.append('') parts.append('') for span, label in [(3, "Spatial"), (1, "Spatio-Temp"), (2, "Temporal"), (2, "Semantic")]: parts.append(f'') parts.append('') parts.append('') for label, _ in score_cols: parts.append(f'') parts.append('') parts.append('') # Body # Total column count: # + Name + Overall + 8 task scores = 11. total_cols = 3 + len(score_cols) parts.append('') if not sorted_models: parts.append( f'' ) for i, m in enumerate(sorted_models, 1): parts.append(f'') parts.append(f'') parts.append(f'') # Overall cell — bold if it's the column max. ov = m.scores.get("overall") if ov is None: parts.append(f'') else: is_max = overall_max is not None and ov == overall_max cls = "lb-score lb-overall lb-max" if is_max else "lb-score lb-overall" parts.append(f'') for _, f in score_cols: v = m.scores.get(f) if v is None: parts.append(f'') else: m_val = col_max.get(f) is_max = m_val is not None and v == m_val cls = "lb-score lb-max" if is_max else "lb-score" parts.append(f'') parts.append('') parts.append('') parts.append('
#NameOverall{label}
{label}
' f'No models match — adjust the filters.
{i}{_model_html(m, rank_one_id)}{_MISSING}{ov:.2f}{_MISSING}{v:.2f}
') return "".join(parts) def build_all_tables( filtered_models: list[ModelRecord], global_ranks: dict[str, dict[str, int]], ) -> dict[str, pd.DataFrame]: """Return {pillar_key: pd.DataFrame} for every pillar in config.PILLARS. Parameters ---------- filtered_models: Model list already narrowed by search / params / type filters. global_ranks: Mapping from util.ranking.GLOBAL_RANKS — {'overall': {model_id: rank}, 'spatial': ..., 'st': ..., ...}. Column rules ------------ Overall : #, Name, Overall, Spatial, Sp-Temp, Temporal, Semantic Pillar : #, Name, [task columns for that pillar only] Name : '{name} · {org}', bold when rank == 1 in this pillar Scores : '42.31' or '**42.31**' for best-in-column; '—' if absent """ result: dict[str, pd.DataFrame] = {} for pillar in PILLARS: rank_map = global_ranks[pillar] sorted_models = _sort_by_rank(filtered_models, rank_map) score_cols = _score_cols_for_pillar(pillar) rank_one_id = next( (m.id for m in sorted_models if rank_map.get(m.id) == 1), None ) result[pillar] = _assemble_df( sorted_models, score_cols, rank_one_id ) return result # -- Radar SVG ------------------------------------------------------------ def make_radar_svg(m: ModelRecord) -> str: """4-axis radar chart SVG for the model detail side panel. Axes (clockwise from top): Semantic · Spatial · Sp-Temp · Temporal. Canvas: 220 × 160 px. Scores assumed in [0, 100]. """ W, H = 220, 160 cx, cy = W / 2, 82.0 # slightly below centre for top-label room chart_r = 50.0 # polygon radius label_r = 67.0 # label ring radius labels = ["Semantic", "Spatial", "Sp-Temp", "Temporal"] fields = ["semantic", "spatial", "spatio_temporal", "temporal"] vals: list[float] = [] for f in fields: raw = m.scores.get(f) v = float(raw) / 100.0 if raw is not None else 0.0 vals.append(max(0.0, min(1.0, v))) N = 4 step = 2 * math.pi / N off = -math.pi / 2 # index 0 points straight up parts: list[str] = [ f'' ] # Grid rings (4 concentric) for g in range(1, 5): rg = chart_r * g / 4 parts.append( f'' ) # Axis lines for i in range(N): a = off + i * step x2 = cx + chart_r * math.cos(a) y2 = cy + chart_r * math.sin(a) parts.append( f'' ) # Data polygon poly_pts = " ".join( f"{cx + chart_r * v * math.cos(off + i * step):.1f}," f"{cy + chart_r * v * math.sin(off + i * step):.1f}" for i, v in enumerate(vals) ) parts.append( f'' ) # Labels for i, lbl in enumerate(labels): a = off + i * step lx = cx + label_r * math.cos(a) ly = cy + label_r * math.sin(a) parts.append( f'' f'{lbl}' ) parts.append("") return "".join(parts) # -- Gradio compatibility API ---------------------------------------------- def headers_for_tab(tab: str, selected_tasks: list[str] | None = None) -> list[str]: """Column header strings for a Gradio Dataframe widget. tab accepts legacy keys: 'overall', 'spatial', 'spatio_temporal', 'temporal', 'semantic'. """ pillar = _OLD_TAB_TO_PILLAR.get(tab, tab) score_cols = _score_cols_for_pillar(pillar, selected_tasks) return ["#", "Name"] + [label for label, _ in score_cols] def datatypes_for_tab(tab: str, selected_tasks: list[str] | None = None) -> list[str]: """Gradio datatype list parallel to headers_for_tab output.""" dtypes = [] for h in headers_for_tab(tab, selected_tasks): if h == "#": dtypes.append("str") elif h == "Name": dtypes.append("html") else: dtypes.append("number") return dtypes def render_tab( data: LeaderboardData, rank_map: dict[str, int], tab: str, search: str | None, params_bucket: str | None, model_type: str | None, selected_tasks: list[str] | None = None, ) -> tuple[pd.DataFrame, str]: """Build (df, status_str) for one Gradio tab. Applies search / params / model-type filters internally. selected_tasks: JSON field names to show (Gradio column-toggle); None = all. """ filtered = apply_filters(data.models, search, params_bucket, model_type) sorted_models = _sort_by_rank(filtered, rank_map) pillar = _OLD_TAB_TO_PILLAR.get(tab, tab) score_cols = _score_cols_for_pillar(pillar, selected_tasks) json_fields = [f for _, f in score_cols] maxes = _column_maxes(filtered, json_fields) rank_one_id = next( (m.id for m in sorted_models if rank_map.get(m.id) == 1), None ) df = _assemble_df(sorted_models, score_cols, rank_map, maxes, rank_one_id) status = _status_line( total=len(data.models), shown=len(filtered), search=search, params_bucket=params_bucket, model_type=model_type, ) return df, status def _status_line( *, total: int, shown: int, search: str | None, params_bucket: str | None, model_type: str | None, ) -> str: if not is_filter_active(search, params_bucket, model_type): return "" if shown == 0: return f"Showing 0 of {total} models — no matches" return f"Showing {shown} of {total} models"