File size: 18,496 Bytes
8b5161a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
"""Build per-pillar DataFrames for the VANTAGE-Bench leaderboard.

Primary API
-----------
build_all_tables(filtered_models, global_ranks) -> dict[str, pd.DataFrame]
    Returns one DataFrame per pillar key in config.PILLARS.
    Keys: 'overall', 'spatial', 'st', 'temporal', 'semantic'.
    Model column: plain '{name} · {org}' string — bold if rank #1 in that
    pillar. No badge HTML; badge rendering belongs in CSS.
    Score columns: '—' for missing, '42.31' or '**42.31**' for best-in-col.

make_radar_svg(model) -> str
    4-axis radar chart SVG (220 × 160 px) for the model detail side panel.
    Axes: Semantic · Spatial · Sp-Temp · Temporal (clockwise from top).

Gradio compatibility API (app.py)
----------------------------------
render_tab(data, rank_map, tab, ...) -> (pd.DataFrame, status_str)
headers_for_tab(tab, selected_tasks=None) -> list[str]
datatypes_for_tab(tab, selected_tasks=None) -> list[str]
These accept the legacy tab keys ('spatio_temporal', etc.) used by app.py.
"""

from __future__ import annotations

import math
import pandas as pd

from .config import PILLARS, TASKS, TASK_METRIC_LABELS
from .data import LeaderboardData, ModelRecord
from .filters import apply_filters, is_filter_active

_MISSING = "—"

# Short task key (config.TASKS) → JSON score field in ModelRecord.scores.
_TASK_JSON_FIELD: dict[str, str] = {
    "loc":      "2d_localization",
    "ground":   "2d_referring_expressions",
    "pointing": "2d_spatial_pointing",
    "sot":      "single_object_tracking",
    "temploc":  "temporal_localization",
    "dvc":      "dense_video_captioning",
    "ev":       "event_verification",
    "vqa":      "video_qa",
}

# Overall tab: ordered (display_label, json_field) pairs.
# Task columns ordered to align with pillar super-header groups injected by JS
# (see _RESIZE_JS in app.py): Spatial (3) · Spatio-Temp (1) · Temporal (2) · Semantic (2).
_OVERALL_SCORE_COLS: list[tuple[str, str]] = [
    # Spatial
    ("Obj Loc",   "2d_localization"),
    ("Ref Exp",   "2d_referring_expressions"),
    ("Pointing",  "2d_spatial_pointing"),
    # Spatio-Temporal
    ("SOT",       "single_object_tracking"),
    # Temporal
    ("Temp Loc",  "temporal_localization"),
    ("DVC",       "dense_video_captioning"),
    # Semantic
    ("Event Ver", "event_verification"),
    ("VQA",       "video_qa"),
]

# Pillar group spec for the Overall-tab super-header row.
# Each entry: (colspan, label). Two empty leading cells cover the # and Name columns.
OVERALL_PILLAR_GROUPS: list[tuple[int, str]] = [
    (2, ""),              # # + Name
    (3, "Spatial"),
    (1, "Spatio-Temp"),
    (2, "Temporal"),
    (2, "Semantic"),
]

# Pillar aggregate score column shown immediately after Name on each pillar tab.
# Reuses the same pre-computed pillar field from the model record — no recomputation.
_PILLAR_AGGREGATE_COL: dict[str, tuple[str, str]] = {
    "spatial":  ("Spatial",  "spatial"),
    "st":       ("Sp-Temp",  "spatio_temporal"),
    "temporal": ("Temporal", "temporal"),
    "semantic": ("Semantic", "semantic"),
}

# Legacy Gradio tab key → short pillar key used in config.PILLARS.
_OLD_TAB_TO_PILLAR: dict[str, str] = {
    "overall":         "overall",
    "spatial":         "spatial",
    "spatio_temporal": "st",
    "temporal":        "temporal",
    "semantic":        "semantic",
}


# -- Shared primitives -----------------------------------------------------


def _sort_by_rank(
    models: list[ModelRecord], rank_map: dict[str, int]
) -> list[ModelRecord]:
    sentinel = float("inf")
    return sorted(models, key=lambda m: (rank_map.get(m.id, sentinel), m.name))


def _column_maxes(
    models: list[ModelRecord], json_fields: list[str]
) -> dict[str, float]:
    """Maximum score per field across the given model set."""
    maxes: dict[str, float] = {}
    for f in json_fields:
        vals = [m.scores[f] for m in models if f in m.scores]
        if vals:
            maxes[f] = max(vals)
    return maxes


def _model_html(m: ModelRecord, rank_one_id: str | None) -> str:
    """HTML cell for the Model column: name + inline badges, org sub-line, type badges."""
    name_part = f"<b>{m.name}</b>" if m.id == rank_one_id else m.name
    if m.model_url:
        name_core = f'<a href="{m.model_url}" target="_blank" class="mc-link">{name_part}</a>'
    else:
        name_core = name_part
    verified_badge = '<span class="b b-verified">✓</span>' if m.verified else ""
    name_html = f'<span class="mc-name-row">{name_core}{verified_badge}</span>'

    badges: list[str] = []
    if m.result_type == "ensemble":
        badges.append('<span class="b b-ensemble">system / pipeline</span>')
    else:
        badges.append('<span class="b b-single">single</span>')
    if m.type == "open":
        badges.append('<span class="b b-open">open</span>')
    else:
        badges.append('<span class="b b-prop">prop.</span>')
    if m.is_new:
        badges.append('<span class="b b-new">new</span>')
    badge_html = "".join(badges)
    return (
        f'<span data-n="{m.name}" class="mc">'
        f'{name_html}'
        f'<span class="mc-org">{m.organization}</span>'
        f'<span class="mc-badges">{badge_html}</span>'
        f'</span>'
    )


def _fmt_score_val(v: float) -> str:
    """Format a numeric score cell for Styler display: NaN → '—', else 2 d.p."""
    if pd.isna(v):
        return _MISSING
    return f"{v:.2f}"


def _score_cols_for_pillar(
    pillar: str,
    selected_tasks: list[str] | None = None,
) -> list[tuple[str, str]]:
    """Return [(display_label, json_field)] for one pillar.

    selected_tasks: JSON field names to include (Gradio column-toggle).
    None means show all. Only applies to non-overall pillars.
    """
    if pillar == "overall":
        return list(_OVERALL_SCORE_COLS)
    task_keys = PILLARS[pillar]
    if selected_tasks is not None:
        task_keys = [tk for tk in task_keys if _TASK_JSON_FIELD[tk] in selected_tasks]
    cols: list[tuple[str, str]] = []
    # Lead each pillar tab with its pre-computed pillar aggregate score.
    agg = _PILLAR_AGGREGATE_COL.get(pillar)
    if agg is not None:
        cols.append(agg)
    for tk in task_keys:
        json_field = _TASK_JSON_FIELD[tk]
        metric = TASK_METRIC_LABELS.get(json_field, "")
        label = f"{TASKS[tk]} ({metric})" if metric else TASKS[tk]
        cols.append((label, json_field))
    return cols


def _assemble_df(
    sorted_models: list[ModelRecord],
    score_cols: list[tuple[str, str]],
    rank_one_id: str | None,
) -> "pd.io.formats.style.Styler":
    """Return a Pandas Styler with:
    - score columns stored as float (NaN for missing) → enables numeric sort
    - bold font-weight applied to the column-maximum cell in each score column
    - NaN formatted as '—', floats formatted to 2 decimal places
    """
    rows = []
    for i, m in enumerate(sorted_models, 1):
        row: dict = {
            "#":    str(i),
            "Name": _model_html(m, rank_one_id),
        }
        for label, json_field in score_cols:
            v = m.scores.get(json_field)
            row[label] = float(v) if v is not None else float("nan")
        rows.append(row)

    score_labels = [lbl for lbl, _ in score_cols]
    columns = ["#", "Name"] + score_labels
    df = pd.DataFrame(rows, columns=columns)

    def _bold_max(col: pd.Series) -> list[str]:
        col_max = col.max(skipna=True)
        return [
            "font-weight: bold" if (not pd.isna(v) and v == col_max) else ""
            for v in col
        ]

    if score_labels:
        styler = (
            df.style
            .apply(_bold_max, subset=score_labels, axis=0)
            .format(_fmt_score_val, subset=score_labels, na_rep=_MISSING)
        )
    else:
        styler = df.style
    return styler


# -- Primary API -----------------------------------------------------------


def build_overall_html_table(
    filtered_models: list[ModelRecord],
    rank_map: dict[str, int],
) -> str:
    """Hand-rendered HTML <table> for the Overall tab.

    Used instead of gr.Dataframe because Gradio's DataFrame component
    does not cleanly support multi-level (grouped) column headers.
    Renders a two-row header:
        # · Name · Overall span both rows (rowspan=2)
        Spatial (×3) | Spatio-Temp (×1) | Temporal (×2) | Semantic (×2)
        Obj Loc | Ref Exp | Pointing | SOT | Temp Loc | DVC | Event Ver | VQA

    The Overall column sits between Name and the pillar groups as a
    standalone (non-grouped) column showing each model's stored overall
    score from ModelRecord.scores["overall"].

    Preserves: striping, hover, bold column-max, model badges, rank order,
    scroll behavior.
    """
    sorted_models = _sort_by_rank(filtered_models, rank_map)
    rank_one_id = next(
        (m.id for m in sorted_models if rank_map.get(m.id) == 1), None
    )

    score_cols = _OVERALL_SCORE_COLS
    json_fields = [f for _, f in score_cols] + ["overall"]
    col_max = _column_maxes(filtered_models, json_fields)
    overall_max = col_max.get("overall")

    # Render the table directly (no .table-wrap wrapper, no resize handle):
    # the gr.HTML element itself acts as the rectangular container, with
    # styling on .lb-table-overall in css.py. This avoids Gradio's
    # rounded/clipped DataFrame-shell visuals.
    parts: list[str] = ['<table class="lb-overall-table">']

    # Column-width control via <colgroup> (avoids nth-child collisions across
    # the two header rows). The Spatio-Temporal column gets a wider class
    # because its lone task (SOT) sits under the long "Spatio-Temp" pillar
    # super-header — we want that header to fit on one line.
    _ST_FIELD = "single_object_tracking"
    parts.append('<colgroup>')
    parts.append('<col class="col-rank">')
    parts.append('<col class="col-name">')
    parts.append('<col class="col-overall">')
    for _, field in score_cols:
        cls = "col-score col-score-st" if field == _ST_FIELD else "col-score"
        parts.append(f'<col class="{cls}">')
    parts.append('</colgroup>')

    # Header: two rows. # / Name / Overall span both via rowspan=2.
    parts.append('<thead>')
    parts.append('<tr class="lb-group-row">')
    parts.append('<th class="lb-corner" rowspan="2">#</th>')
    parts.append('<th class="lb-corner" rowspan="2">Name</th>')
    parts.append('<th class="lb-corner lb-corner-num" rowspan="2">Overall</th>')
    for span, label in [(3, "Spatial"), (1, "Spatio-Temp"),
                        (2, "Temporal"), (2, "Semantic")]:
        parts.append(f'<th class="lb-group" colspan="{span}">{label}</th>')
    parts.append('</tr>')

    parts.append('<tr class="lb-task-row">')
    for label, _ in score_cols:
        parts.append(f'<th>{label}</th>')
    parts.append('</tr>')
    parts.append('</thead>')

    # Body
    # Total column count: # + Name + Overall + 8 task scores = 11.
    total_cols = 3 + len(score_cols)
    parts.append('<tbody>')
    if not sorted_models:
        parts.append(
            f'<tr><td colspan="{total_cols}" class="lb-empty-row">'
            f'No models match — adjust the filters.</td></tr>'
        )
    for i, m in enumerate(sorted_models, 1):
        parts.append(f'<tr data-id="{m.id}">')
        parts.append(f'<td class="lb-rank">{i}</td>')
        parts.append(f'<td class="lb-name">{_model_html(m, rank_one_id)}</td>')

        # Overall cell — bold if it's the column max.
        ov = m.scores.get("overall")
        if ov is None:
            parts.append(f'<td class="lb-score lb-overall">{_MISSING}</td>')
        else:
            is_max = overall_max is not None and ov == overall_max
            cls = "lb-score lb-overall lb-max" if is_max else "lb-score lb-overall"
            parts.append(f'<td class="{cls}">{ov:.2f}</td>')

        for _, f in score_cols:
            v = m.scores.get(f)
            if v is None:
                parts.append(f'<td class="lb-score">{_MISSING}</td>')
            else:
                m_val = col_max.get(f)
                is_max = m_val is not None and v == m_val
                cls = "lb-score lb-max" if is_max else "lb-score"
                parts.append(f'<td class="{cls}">{v:.2f}</td>')
        parts.append('</tr>')
    parts.append('</tbody>')
    parts.append('</table>')
    return "".join(parts)


def build_all_tables(
    filtered_models: list[ModelRecord],
    global_ranks: dict[str, dict[str, int]],
) -> dict[str, pd.DataFrame]:
    """Return {pillar_key: pd.DataFrame} for every pillar in config.PILLARS.

    Parameters
    ----------
    filtered_models:
        Model list already narrowed by search / params / type filters.
    global_ranks:
        Mapping from util.ranking.GLOBAL_RANKS —
        {'overall': {model_id: rank}, 'spatial': ..., 'st': ..., ...}.

    Column rules
    ------------
    Overall  : #, Name, Overall, Spatial, Sp-Temp, Temporal, Semantic
    Pillar   : #, Name, [task columns for that pillar only]
    Name     : '{name} · {org}', bold when rank == 1 in this pillar
    Scores   : '42.31' or '**42.31**' for best-in-column; '—' if absent
    """
    result: dict[str, pd.DataFrame] = {}

    for pillar in PILLARS:
        rank_map = global_ranks[pillar]
        sorted_models = _sort_by_rank(filtered_models, rank_map)
        score_cols = _score_cols_for_pillar(pillar)
        rank_one_id = next(
            (m.id for m in sorted_models if rank_map.get(m.id) == 1), None
        )
        result[pillar] = _assemble_df(
            sorted_models, score_cols, rank_one_id
        )

    return result


# -- Radar SVG ------------------------------------------------------------


def make_radar_svg(m: ModelRecord) -> str:
    """4-axis radar chart SVG for the model detail side panel.

    Axes (clockwise from top): Semantic · Spatial · Sp-Temp · Temporal.
    Canvas: 220 × 160 px.  Scores assumed in [0, 100].
    """
    W, H = 220, 160
    cx, cy = W / 2, 82.0   # slightly below centre for top-label room
    chart_r = 50.0          # polygon radius
    label_r = 67.0          # label ring radius

    labels = ["Semantic", "Spatial", "Sp-Temp", "Temporal"]
    fields = ["semantic", "spatial", "spatio_temporal", "temporal"]
    vals: list[float] = []
    for f in fields:
        raw = m.scores.get(f)
        v = float(raw) / 100.0 if raw is not None else 0.0
        vals.append(max(0.0, min(1.0, v)))

    N = 4
    step = 2 * math.pi / N
    off = -math.pi / 2     # index 0 points straight up

    parts: list[str] = [
        f'<svg xmlns="http://www.w3.org/2000/svg" '
        f'width="{W}" height="{H}" viewBox="0 0 {W} {H}">'
    ]

    # Grid rings (4 concentric)
    for g in range(1, 5):
        rg = chart_r * g / 4
        parts.append(
            f'<circle cx="{cx}" cy="{cy}" r="{rg:.1f}" '
            f'fill="none" stroke="#e5e7eb" stroke-width="0.5"/>'
        )

    # Axis lines
    for i in range(N):
        a = off + i * step
        x2 = cx + chart_r * math.cos(a)
        y2 = cy + chart_r * math.sin(a)
        parts.append(
            f'<line x1="{cx}" y1="{cy}" '
            f'x2="{x2:.1f}" y2="{y2:.1f}" '
            f'stroke="#e5e7eb" stroke-width="0.5"/>'
        )

    # Data polygon
    poly_pts = " ".join(
        f"{cx + chart_r * v * math.cos(off + i * step):.1f},"
        f"{cy + chart_r * v * math.sin(off + i * step):.1f}"
        for i, v in enumerate(vals)
    )
    parts.append(
        f'<polygon points="{poly_pts}" '
        f'fill="rgba(37,99,235,0.12)" stroke="#2563eb" '
        f'stroke-width="1.5" stroke-linejoin="round"/>'
    )

    # Labels
    for i, lbl in enumerate(labels):
        a = off + i * step
        lx = cx + label_r * math.cos(a)
        ly = cy + label_r * math.sin(a)
        parts.append(
            f'<text x="{lx:.1f}" y="{ly:.1f}" '
            f'text-anchor="middle" dominant-baseline="middle" '
            f'font-size="9" fill="#6b7280" font-family="sans-serif">'
            f'{lbl}</text>'
        )

    parts.append("</svg>")
    return "".join(parts)


# -- Gradio compatibility API ----------------------------------------------


def headers_for_tab(tab: str, selected_tasks: list[str] | None = None) -> list[str]:
    """Column header strings for a Gradio Dataframe widget.

    tab accepts legacy keys: 'overall', 'spatial', 'spatio_temporal',
    'temporal', 'semantic'.
    """
    pillar = _OLD_TAB_TO_PILLAR.get(tab, tab)
    score_cols = _score_cols_for_pillar(pillar, selected_tasks)
    return ["#", "Name"] + [label for label, _ in score_cols]


def datatypes_for_tab(tab: str, selected_tasks: list[str] | None = None) -> list[str]:
    """Gradio datatype list parallel to headers_for_tab output."""
    dtypes = []
    for h in headers_for_tab(tab, selected_tasks):
        if h == "#":
            dtypes.append("str")
        elif h == "Name":
            dtypes.append("html")
        else:
            dtypes.append("number")
    return dtypes


def render_tab(
    data: LeaderboardData,
    rank_map: dict[str, int],
    tab: str,
    search: str | None,
    params_bucket: str | None,
    model_type: str | None,
    selected_tasks: list[str] | None = None,
) -> tuple[pd.DataFrame, str]:
    """Build (df, status_str) for one Gradio tab.

    Applies search / params / model-type filters internally.
    selected_tasks: JSON field names to show (Gradio column-toggle); None = all.
    """
    filtered = apply_filters(data.models, search, params_bucket, model_type)
    sorted_models = _sort_by_rank(filtered, rank_map)

    pillar = _OLD_TAB_TO_PILLAR.get(tab, tab)
    score_cols = _score_cols_for_pillar(pillar, selected_tasks)
    json_fields = [f for _, f in score_cols]
    maxes = _column_maxes(filtered, json_fields)
    rank_one_id = next(
        (m.id for m in sorted_models if rank_map.get(m.id) == 1), None
    )

    df = _assemble_df(sorted_models, score_cols, rank_map, maxes, rank_one_id)
    status = _status_line(
        total=len(data.models),
        shown=len(filtered),
        search=search,
        params_bucket=params_bucket,
        model_type=model_type,
    )
    return df, status


def _status_line(
    *,
    total: int,
    shown: int,
    search: str | None,
    params_bucket: str | None,
    model_type: str | None,
) -> str:
    if not is_filter_active(search, params_bucket, model_type):
        return ""
    if shown == 0:
        return f"Showing 0 of {total} models — no matches"
    return f"Showing {shown} of {total} models"