File size: 22,338 Bytes
60ce794
f468dc6
29ccfa6
 
 
f468dc6
 
29ccfa6
f468dc6
54fee25
f468dc6
29ccfa6
4e7cba4
29ccfa6
 
f468dc6
29ccfa6
f468dc6
 
 
29ccfa6
f468dc6
 
 
29ccfa6
f468dc6
 
 
 
 
29ccfa6
f468dc6
 
29ccfa6
f468dc6
29ccfa6
 
 
 
 
f468dc6
 
29ccfa6
 
 
f468dc6
 
 
 
 
 
 
29ccfa6
f468dc6
 
 
29ccfa6
f468dc6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
df31915
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
792fc9a
 
 
 
 
171923d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f6347cd
 
 
 
 
51e3d67
 
 
 
 
 
 
 
f6347cd
29ccfa6
 
 
f468dc6
 
 
 
 
 
 
 
 
 
 
 
29ccfa6
f468dc6
29ccfa6
f468dc6
29ccfa6
f468dc6
29ccfa6
f468dc6
 
29ccfa6
 
 
f468dc6
 
3368001
b26494f
 
 
3368001
b26494f
 
3368001
f6347cd
 
 
 
 
 
 
7b93b09
4e7cba4
f6347cd
4e7cba4
ee33931
29ccfa6
 
f468dc6
 
 
29ccfa6
f468dc6
 
d56a145
f468dc6
29ccfa6
f468dc6
 
5a72fd0
 
f468dc6
5a72fd0
f468dc6
 
29ccfa6
ad051e1
 
 
1bd245c
ad051e1
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f7e970e
 
 
ad051e1
f7e970e
 
 
ad051e1
f7e970e
 
 
ad051e1
f7e970e
 
 
ad051e1
 
 
29ccfa6
f468dc6
792fc9a
f468dc6
 
 
0e5f92a
f468dc6
 
 
0e5f92a
 
 
f468dc6
 
 
 
df31915
 
51e3d67
df31915
 
51e3d67
 
 
df31915
f468dc6
df31915
 
 
 
 
 
 
 
51e3d67
 
df31915
 
 
 
51e3d67
 
df31915
 
 
 
51e3d67
792fc9a
 
 
51e3d67
568654a
51e3d67
 
 
 
df31915
 
 
51e3d67
f468dc6
29ccfa6
75867f4
d0ca1f1
f468dc6
 
29ccfa6
 
 
4200c24
 
 
29ccfa6
4200c24
 
 
 
 
 
 
 
 
 
 
 
 
 
 
29ccfa6
4200c24
5044c19
 
 
 
 
 
4200c24
5044c19
 
 
 
 
 
4200c24
5044c19
4200c24
5044c19
 
 
 
 
 
 
4200c24
5044c19
 
 
 
4200c24
5044c19
 
 
 
 
 
 
4200c24
 
 
f468dc6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4200c24
 
171923d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
f6347cd
 
81a9387
f6347cd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
171923d
29ccfa6
f468dc6
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
<!doctype html>
<html lang="en">
<head>
  <meta charset="utf-8" />
  <meta name="viewport" content="width=device-width, initial-scale=1" />
  <title>AVIP – Audio–Vision Interaction Probe (Benchmark)</title>
  <meta name="description" content="AVIP is a lightweight benchmark to check whether multimodal models truly use audio in videos." />

  <!-- Social cards (add your own image at assets/cover.jpg) -->
  <meta property="og:title" content="AVIP Benchmark" />
  <meta property="og:description" content="Do multimodal models actually use audio in videos?" />
  <meta property="og:type" content="website" />
  <meta property="og:video" content="thumbnail" />
  <meta name="twitter:card" content="summary_large_image" />

  <link rel="icon" href="assets/favicon.png" />
  <style>
    /* =====================
       THEME & BASICS
       ===================== */
    :root{
      --bg:#0b0b0c; --surface:#111216; --text:#e9e9ee; --muted:#9aa0a6; --accent:#6ee7ff; --ring:rgba(110,231,255,.35);
      --card: color-mix(in oklab, var(--surface), transparent 8%);
      --border: 1px solid rgba(255,255,255,.08);
    }
    @media (prefers-color-scheme: light){
      :root{ --bg:#fafafa; --surface:#ffffff; --text:#101114; --muted:#5f6368; --accent:#0078ff; --ring:rgba(0,120,255,.2);
             --card: color-mix(in oklab, var(--surface), transparent 4%);
             --border: 1px solid rgba(0,0,0,.08);
      }
    }
    *,*::before,*::after{ box-sizing:border-box }
    html,body{ height:100% }
    body{
      margin:0; font:16px/1.6 system-ui,-apple-system,Segoe UI,Roboto,Arial,sans-serif; color:var(--text);
      background:
        radial-gradient(1200px 800px at 10% -5%, rgba(110,231,255,.08), transparent 40%),
        radial-gradient(900px 700px at 110% 10%, rgba(110,231,255,.06), transparent 40%),
        var(--bg);
    }
    .site-header{ position:sticky; top:0; z-index:20; display:flex; align-items:center; gap:1rem; justify-content:space-between;
      padding:.8rem 1rem; border-bottom:var(--border);
      background:color-mix(in oklab, var(--surface), transparent 35%);
      backdrop-filter:saturate(1.2) blur(8px);
    }
    .logo{ font-weight:800; letter-spacing:.2px; text-decoration:none; color:var(--text); display:flex; align-items:center; gap:.55rem }
    .logo .dot{ width:.7rem; height:.7rem; border-radius:999px; background:var(--accent); box-shadow:0 0 16px var(--ring) }
    .logo span{ color:var(--accent) }
    .site-nav{ display:flex; gap:.75rem }
    .site-nav a{ color:var(--text); text-decoration:none; padding:.4rem .6rem; border-radius:.5rem }
    .site-nav a:hover{ outline:2px solid var(--ring); outline-offset:2px }
    .nav-toggle{ display:none; background:transparent; border:1px solid rgba(255,255,255,.2); color:var(--text); border-radius:.5rem; padding:.4rem .6rem }
    @media (max-width: 720px){
      .nav-toggle{ display:inline-block }
      .site-nav{ position:absolute; right:1rem; top:3.4rem; flex-direction:column; padding:.6rem; background:var(--surface); border:var(--border); border-radius:.6rem; display:none }
      .site-nav.open{ display:flex }
    }

    .hero{ text-align:center; padding:4.5rem 1rem 2rem; max-width:1060px; margin:0 auto }
    .hero h1{ font-size:clamp(2rem,3.6vw,3rem); margin:0 0 .5rem }
    .hero p{ margin:0 0 1.5rem; color:var(--muted) }
    .btn{ display:inline-block; padding:.72rem 1rem; border-radius:.6rem; background:var(--accent); color:#06141b; text-decoration:none; font-weight:700 }
    .cover{ margin:1.2rem auto 0; max-width:980px; aspect-ratio: 16/9; border-radius:.8rem; overflow:hidden; border:var(--border); background:var(--card) }
    .cover img{ width:100%; height:100%; object-fit:cover; display:block }

    .section{ padding:2.6rem 1rem; max-width:1060px; margin:0 auto }
    .section h2{ font-size:clamp(1.4rem,2.2vw,1.8rem); margin:0 0 .4rem }
    .muted{ color:var(--muted) }

    .features{ display:grid; gap:.8rem; grid-template-columns:repeat(auto-fill,minmax(230px,1fr)); margin:1rem 0 0 }
    .card{ background:var(--card); border:var(--border); border-radius:.8rem; padding:1rem }

    /* Leaderboard */
    .table-wrap{ overflow-x:auto; background:var(--card); border-radius:.8rem; border:var(--border) }
    table{ width:100%; border-collapse:collapse }
    th,td{ padding:.7rem .8rem; border-bottom:1px solid rgba(255,255,255,.06); text-align:left }
    th{ font-weight:700 }
    .pill{ display:inline-block; padding:.2rem .5rem; border-radius:.5rem; background:rgba(110,231,255,.18); color:var(--text); font-size:.8rem }

    /* Media showcase */
    .media-grid{ display:grid; grid-template-columns:2fr 1fr; gap:1rem }
    @media (max-width: 920px){ .media-grid{ grid-template-columns:1fr } }
    figure{ margin:0 }
    .media-card{ background:var(--card); border:var(--border); border-radius:.8rem; padding:.8rem }
    video{ width:100%; max-height:520px; background:#000; border-radius:.6rem }
    .toolbar{ display:flex; gap:.5rem; flex-wrap:wrap; margin:.6rem 0 }
    .toolbar button{ background:transparent; color:var(--text); border:1px solid rgba(255,255,255,.25); padding:.38rem .6rem; border-radius:.5rem; cursor:pointer }
    .toolbar button[aria-pressed="true"]{ outline:2px solid var(--ring) }

    /* Footer */
    .site-footer{ padding:2rem 1rem 4rem; text-align:center; color:var(--muted) }

    /* A11y: reduce motion */
    @media (prefers-reduced-motion: reduce){ *{ transition:none!important; animation:none!important } }

    /* --- Results grids (for figures) --- */
    .results-group{ margin-top:.8rem }
    .results-group h3{ margin:.2rem 0 .4rem; font-size:1rem; color:var(--muted) }
    .grid-2{
      display:grid; grid-template-columns:repeat(2,1fr); gap:1rem;
    }
    .grid-3{
      display:grid; grid-template-columns:repeat(3,1fr); gap:1rem;
    }
    @media (max-width: 900px){
      .grid-2, .grid-3{ grid-template-columns:1fr }
    }
    .figure-card img{
      width:100%; height:auto; border-radius:.6rem; display:block;
      box-shadow:0 2px 12px rgba(0,0,0,.08);
    }
    .figure-card figcaption{
      font-size:.9rem; color:var(--muted); margin-top:.35rem;
    }

    /* make a card span both columns of .media-grid */
    .full-span { grid-column: 1 / -1; }
    @media (max-width: 920px){ .full-span { grid-column: auto; } }

    /* ===== Image modal (click-to-zoom) ===== */
    .img-modal {
      display:none; position:fixed; inset:0; z-index:1000;
      background:rgba(0,0,0,.75);
      align-items:center; justify-content:center;
      padding:2rem;
    }
    .img-modal.open { display:flex; }
    .img-modal img {
      max-width:92vw; max-height:92vh;
      border-radius:.6rem; box-shadow:0 6px 28px rgba(0,0,0,.4);
    }
    .img-modal .close {
      position:absolute; top:12px; right:16px;
      font-size:1.6rem; color:#fff; background:transparent; border:0; cursor:pointer;
    }
    .figure-card img { cursor: zoom-in; }
    body.modal-open { overflow:hidden; } /* prevent background scroll */

    .btn-row{ display:flex; gap:.6rem; justify-content:center; flex-wrap:wrap; margin:.6rem 0 .2rem; }
    .btn.btn-disabled{ opacity:.6; cursor:not-allowed; pointer-events:none; }

    /* push the hero video down so the button never overlaps */
    .hero > video{ margin-top:.8rem; display:block; max-width:980px; width:100%; border-radius:.8rem; border:var(--border); background:var(--card); }
    
    .hint{
      margin-top:.35rem; font-size:.88rem; color:var(--muted);
      border-left:3px solid color-mix(in oklab, var(--accent), transparent 65%);
      padding:.2rem .6rem; line-height:1.4;
    }



  </style>
</head>
<body>
  <!--
    ASSETS CHECKLIST (drop into your Space):
    - assets/cover.jpg           (hero title image)
    - assets/sample_AV.mp4       (example clip – audio+video)
    - assets/sample_A.mp4        (audio-only version)
    - assets/sample_V.mp4        (video-only version)
    - assets/sample_poster.jpg   (poster frame for the video)
    - assets/heatmap.png         (overall heatmap graphic)
    - assets/confusion.png       (confusion matrix or similar)
    - assets/favicon.png         (16–64px)
  -->

  <header class="site-header">
    <a href="#home" class="logo" aria-label="AVIP Benchmark home"><span class="dot" aria-hidden="true"></span> AVIP<span>Benchmark</span></a>
    <button class="nav-toggle" aria-expanded="false" aria-controls="site-nav"></button>
    <nav id="site-nav" class="site-nav" aria-label="Main navigation">
      <a href="#home">Home</a>
      <a href="#about">About</a>
      <a href="#leaderboard">Leaderboard</a>
      <a href="#media">Examples</a>
      <a href="#contact">Contact</a>
    </nav>
  </header>

  <main id="home">
    <section class="hero" aria-labelledby="tagline">
      <h1 id="tagline">Do you hear it? Meet AVIP-Bench</h1>
      <p style="font-style:italic;">
        A controlled benchmark for evaluating intuitive physics from video &amp; sound.
      </p>
      <p>
      Objects crash, bounce, and shatter - our benchmark of audiovisual object drops
      probes whether models benefit from adding <strong>sound</strong> when reasoning about physics.
      </p>
      <!-- Button row: examples + PDF -->
      <div class="btn-row">
        <a class="btn" href="#media">See example Videos and Results</a>
        <a id="paperLink" href="#" class="btn btn-disabled" aria-disabled="true">📄 PDF coming soon</a>
      </div>

      <!-- Hero video (pushed down a bit) -->
         <video autoplay muted loop playsinline poster="">
        <source src="thumbnail.mp4" type="video/mp4" />
        <!-- Remove the webm line unless you add it later -->
        <source src="thumbnail.webm" type="video/webm" />
      </video>
    </section>

    <section id="about" class="section">
      <h2>What is AVIP?</h2>
      <p class="muted">A tiny, controlled benchmark with triplet videos per clip: <span class="pill">A</span> audio-only, <span class="pill">V</span> video-only, and <span class="pill">AV</span> audio+video. Tasks: <em>object</em>, <em>material</em>, <em>outcome</em>. We check top‑1 predictions vs. ground truth and look for cross‑modal gains.</p>
      <ul class="features">
        <li class="card">📦 <strong>Minimal, reproducible clips</strong><br/>Short single‑impact scenes recorded in a controlled setup.</li>
        <li class="card">🔊 <strong>Modality toggles</strong><br/>Each clip exists as A, V, and AV to test true audio usage.</li>
        <li class="card">📈 <strong>Metrics</strong><br/>Top‑1 accuracy per task and an <em>AV − max(A,V)</em> cross‑modal gain.</li>
        <li class="card">🧪 <strong>Probe‑style prompts</strong><br/>Strict label sets &amp; JSON outputs to avoid prompt drift.</li>
      </ul>
      <details class="card" style="margin-top:1rem">
        <summary><strong>Method (short)</strong></summary>
        <ol> 
          <li>For each clip, run models on A, V, and AV variants with the same instruction-style prompt.</li>
          <li>Decode model outputs into <code>{object, material, outcome}</code> and compare against labels.</li>
          <li>Compute per-task Top-1 and Top-5 accuracy and cross-modal gain per clip and in aggregate; additionally report calibration/confidence metrics (ECE, Brier, margin, entropy, Top-1 probability) and probing-based audio reliance via fixed cue selection and A/V/AV consistency; all metrics computed on the paired clip set (A&cap;V&cap;AV) with 95% confidence intervals.</li>
        </ol>
      </details>
    </section>
    
    <section id="leaderboard" class="section" aria-labelledby="lb-title">
      <h2 id="lb-title">Leaderboard</h2>
      <p class="muted">Per‑Modality (A / V / AV)</p>
        <div class="table-wrap">
          <table aria-describedby="lb-title">
            <thead>
              <tr>
                <th scope="col">Model</th>
                <th scope="col">Modality</th>
                <th scope="col">N</th>
                <th scope="col">Top‑1 Acc (%)</th>
                <th scope="col">Updated</th>
              </tr>
            </thead>
            <tbody id="leaderboard-body"><!-- JS renders here --></tbody>
          </table>
        </div>
      
      <!-- Deine Daten (inline JSON) -->
      <script id="leaderboard-data" type="application/json">{
        "rows": [
        {"model":"Gemini‑2.5 Flash (no think)", "modality":"A", "Top1AccuracyinPercent":20.0, "N":993},
        {"model":"Gemini‑2.5 Flash (no think)", "modality":"AV", "Top1AccuracyinPercent":53.4, "N":993},
        {"model":"Gemini‑2.5 Flash (no think)", "modality":"V", "Top1AccuracyinPercent":48.2, "N":993},
        
        {"model":"Gemini‑2.5 Flash (think)", "modality":"A", "Top1AccuracyinPercent":24.1, "N":990},
        {"model":"Gemini‑2.5 Flash (think)", "modality":"AV", "Top1AccuracyinPercent":58.5, "N":993},
        {"model":"Gemini‑2.5 Flash (think)", "modality":"V", "Top1AccuracyinPercent":50.9, "N":993},
        
        {"model":"Gemini‑2.5 Pro (think)", "modality":"A", "Top1AccuracyinPercent":17.3, "N":819},
        {"model":"Gemini‑2.5 Pro (think)", "modality":"AV", "Top1AccuracyinPercent":61.8, "N":807},
        {"model":"Gemini‑2.5 Pro (think)", "modality":"V", "Top1AccuracyinPercent":56.3, "N":807},
        
        {"model":"Qwen2.5‑Omni 7B (local)", "modality":"A", "Top1AccuracyinPercent":10.9, "N":993},
        {"model":"Qwen2.5‑Omni 7B (local)", "modality":"AV", "Top1AccuracyinPercent":38.7, "N":993},
        {"model":"Qwen2.5‑Omni 7B (local)", "modality":"V", "Top1AccuracyinPercent":38.5, "N":993}
        ]
      }</script>
    </section>

    <section id="media" class="section" aria-labelledby="ex-title">
      <h2 id="ex-title">Example clips and Plots</h2>
      <div class="media-grid">
        <figure class="media-card">
          <video id="sampleVideo" controls preload="metadata" playsinline poster="assets/sample_poster.jpg">
            <source src="paperbox_high_1.MP4" type="video/mp4" />
            Your browser doesn’t support HTML5 video.
          </video>
          <div class="toolbar" role="toolbar" aria-label="Version selector">
            <button type="button" class="ver" data-src="paperbox_high_1_A.mp4" aria-pressed="false" aria-label="Audio only (A)">A</button>
            <button type="button" class="ver" data-src="paperbox_high_1_V.mp4" aria-pressed="false" aria-label="Video only (V)">V</button>
            <button type="button" class="ver" data-src="paperbox_high_1.MP4" aria-pressed="true" aria-label="Audio + Video (AV)">AV</button>
            <span class="muted" id="verStatus" aria-live="polite" style="margin-left:.4rem">Now showing: AV</span>
          </div>
          <figcaption class="muted">Task labels (demo): <strong>object</strong>=<code>paperbox</code>, <strong>material</strong>=<code>cardboard</code>, <strong>outcome</strong>=<code>bounce</code></figcaption>
        </figure>

        <!-- Group 0: Cross-Modal Gain heatmap -->
        <div class="results-group">
          <h3>Cross-Modal Gain (CMG)</h3>
            <figure class="figure-card">
              <img src="xmod_cis.png" alt="Cross-Modal Gain heatmap" loading="lazy">
              <figcaption>CMG in percentage points per engine; horizontal bars are 95\% paired-bootstrap CIs on the paired clip set.</figcaption>
              <div class="hint">Look for positive values: these mean AV was better than either audio or video alone. Gains usually appear for outcome prediction, but rarely for object or material recognition.</div>
            </figure>
        </div>

        <!-- Group 1: Average modality attribution (Audio vs Video) -->
        <div class="results-group">
          <h3>Average modality attribution (AV)</h3>
          <div class="grid-2">
            <figure class="figure-card">
              <img src="Heatmap_Audio.png" alt="Average audio weight across models" loading="lazy">
              <figcaption>Audio weight by model.</figcaption>
              <div class="hint">What to look for: Red = model relies more on audio, Blue = model relies less. 
                Engines that “listen” more may gain on outcome prediction, but not always.</div>
            </figure>
            <figure class="figure-card">
              <img src="Heatmap_Video.png" alt="Average video weight across models" loading="lazy">
              <figcaption>Video weight by model.</figcaption>
              <div class="hint">What to look for: Red = model relies more on video, Blue = model relies less. 
                Engines that “look” more often ignore sound, which can explain weak cross-modal gains.</div>
            </figure>
          </div>
        </div>

        <!-- Group 2: Top-1 Accuracy: single combined figure -->
        <div class="media-card full-span">
          <div class="results-group">
            <h3>Top-1 accuracy by task</h3>
            <figure class="figure-card">
              <img src="accuracy_micro_macro_cis.png" alt="Top-1 accuracy per model across object, material, and outcome for A, V, AV" loading="lazy">
              <figcaption>Top-1 accuracy with 95% CIs (A, V, AV) across tasks and models.</figcaption>
              <div class="hint">What to look for: V is usually highest; AV improves over A and sometimes nudges past V on outcome. 
                Big gaps A→AV mean sound is helpful; AV≈V means little extra benefit.</div>
            </figure>
          </div>
        </div>


    </section>

    <section id="contact" class="section">
      <h2>Contact</h2>
      <p>Questions? <a href="mailto:bramo.g@protonmail.com">bramo.g@protonmail.com</a></p>
      <p class="muted"><a href="https://huggingface.co/Grets/AVIP">huggingface.co/Grets/AVIP</a></p>
    </section>
  </main>

<footer class="site-footer">
  <small>&copy; <span id="year"></span> Grets. Rendered by Hugging Face Spaces.</small>
</footer>

<!-- 1) Basis-Skripte (Nav & Jahr) -->
<script>
  // Mobile nav toggle
  const navToggle = document.querySelector('.nav-toggle');
  const nav = document.getElementById('site-nav');
  if (navToggle && nav) {
    navToggle.addEventListener('click', () => {
      const open = nav.classList.toggle('open');
      navToggle.setAttribute('aria-expanded', String(open));
    });
  }
  // Year in footer
  const y = document.getElementById('year');
  if (y) y.textContent = new Date().getFullYear();
</script>

<!-- 2) Leaderboard-Renderer (nur deine inline-JSON rows) -->
<script>
  (function renderLeaderboard(){
    const el = document.getElementById('leaderboard-body');
    const dataEl = document.getElementById('leaderboard-data');
    if (!el || !dataEl) return;

    // JSON laden
    let rows = [];
    try {
      const parsed = JSON.parse(dataEl.textContent.trim());
      rows = Array.isArray(parsed.rows) ? parsed.rows : [];
    } catch (_) {}

    // Sortierung: Model (A–Z) -> Modality in Reihenfolge AV, V, A
    const order = ['AV', 'V', 'A'];
    rows.sort((a,b) =>
      String(a.model).localeCompare(String(b.model)) ||
      order.indexOf(a.modality) - order.indexOf(b.modality)
    );

    const today = new Date().toISOString().slice(0,10);
    const fmtPct = v => (v==null || v==='') ? '—' : (Number(v).toFixed(1) + '%');

    // Spalten: Model | Modality | N | Top-1 | Top-5 | Updated
    el.innerHTML = rows.map(r => `
      <tr>
        <td>${r.model}</td>
        <td>${r.modality}</td>
        <td>${r.N ?? '—'}</td>
        <td>${fmtPct(r.Top1AccuracyinPercent)}</td>
        <td>${r.updated ?? today}</td>
      </tr>
    `).join('');
  })();
</script>

<!-- 3) Example video toggles -->
<script>
  (function(){
    const video = document.getElementById('sampleVideo');
    const verButtons = document.querySelectorAll('.ver');
    const verStatus = document.getElementById('verStatus');
    verButtons.forEach(btn => {
      btn.addEventListener('click', () => {
        verButtons.forEach(b => b.setAttribute('aria-pressed','false'));
        btn.setAttribute('aria-pressed','true');
        const src = btn.getAttribute('data-src');
        const label = btn.textContent.trim();
        if (src && video) {
          const wasPlaying = !video.paused && !video.ended;
          video.pause();
          video.querySelector('source').src = src;
          video.load();
          if (wasPlaying) video.play().catch(()=>{});
          if (verStatus) verStatus.textContent = `Now showing: ${label}`;
        }
      });
    });
  })();
</script>

<!-- Image modal (click-to-zoom) -->
<div class="img-modal" id="imgModal" aria-hidden="true">
  <button class="close" aria-label="Close">×</button>
  <img id="imgModalImg" alt="">
</div>

<script>
(function(){
  const modal = document.getElementById('imgModal');
  const modalImg = document.getElementById('imgModalImg');
  if (!modal || !modalImg) return;

  document.addEventListener('click', (e)=>{
    const img = e.target.closest('.figure-card img');
    if (!img) return;
    const full = img.getAttribute('data-full');
    modalImg.src = full || img.src;
    modalImg.alt = img.alt || '';
    modal.classList.add('open');
    document.body.classList.add('modal-open');
    modal.setAttribute('aria-hidden','false');
  });

  modal.addEventListener('click', (e)=>{
    if (e.target === modal || e.target.classList.contains('close')) closeModal();
  });

  document.addEventListener('keydown', (e)=>{
    if (e.key === 'Escape' && modal.classList.contains('open')) closeModal();
  });

  function closeModal(){
    modal.classList.remove('open');
    document.body.classList.remove('modal-open');
    modal.setAttribute('aria-hidden','true');
    modalImg.src = '';
  }
})();
</script>

<script>
  // Where your PDF will live
  const PDF_PATH = "AVIP_gbramow_lbreitkopf_iberger.pdf";

  async function enablePdfButton(){
    try{
      const res = await fetch(PDF_PATH, { method:"HEAD", cache:"no-store" });
      if(!res.ok) return; // keep disabled state
      const a = document.getElementById("paperLink");
      if(!a) return;
      a.href = PDF_PATH;
      a.target = "_blank";
      a.rel = "noopener";
      a.textContent = "📄 View PDF";
      a.classList.remove("btn-disabled");
      a.setAttribute("aria-disabled","false");
    }catch(e){ /* keep disabled */ }
  }
  document.addEventListener("DOMContentLoaded", enablePdfButton);
</script>


</body>
</html>