File size: 118,665 Bytes
8564264
 
 
 
 
 
 
 
2668ac0
8564264
 
 
2668ac0
8564264
2668ac0
8564264
 
 
2668ac0
 
8564264
2668ac0
95330bf
 
 
 
28913ff
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95330bf
2668ac0
 
 
 
 
8564264
2668ac0
 
a0a058e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2668ac0
 
8564264
2668ac0
 
 
 
8564264
 
 
2668ac0
 
 
 
8564264
2668ac0
 
 
8564264
2668ac0
 
 
 
8564264
f447f8e
2668ac0
 
f447f8e
 
 
 
8564264
2668ac0
 
 
 
8564264
 
 
2668ac0
8564264
2668ac0
8564264
3084d27
8564264
2668ac0
8564264
 
2668ac0
 
 
 
 
 
 
 
8564264
95330bf
 
 
 
 
 
 
 
28913ff
 
 
 
 
 
95330bf
 
 
 
 
 
 
 
 
5e76be6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2668ac0
28913ff
2668ac0
8564264
 
 
 
fcaba40
 
 
 
2668ac0
28913ff
 
 
 
6f249a4
28913ff
 
f6e61a1
28913ff
 
 
2668ac0
 
 
 
 
8564264
a0a058e
 
 
 
 
 
 
 
 
 
 
 
8564264
 
 
2668ac0
6f249a4
 
 
 
2668ac0
 
8564264
 
 
 
 
 
2668ac0
bdf5f19
2668ac0
 
 
 
 
5e76be6
 
 
 
 
 
 
 
 
95330bf
 
 
28913ff
00e5e3d
 
 
28913ff
 
 
 
95330bf
00e5e3d
95330bf
28913ff
00e5e3d
 
 
 
 
 
28913ff
 
 
 
95330bf
 
 
 
 
 
 
 
 
 
 
 
 
8564264
 
2668ac0
 
40456dc
1747071
e39968b
a0a058e
 
 
 
 
c2a3de8
0f3810d
a0a058e
 
c2a3de8
8564264
2668ac0
8564264
2668ac0
 
 
 
 
 
 
a0a058e
 
 
 
 
 
 
 
 
 
 
40456dc
a0a058e
 
 
 
 
 
 
 
 
 
c4c45a6
a0a058e
c4c45a6
 
 
 
 
 
 
 
 
 
 
 
a0a058e
2668ac0
 
 
6f249a4
 
 
 
2668ac0
 
 
 
6f249a4
 
 
 
 
 
 
 
 
 
 
2668ac0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8564264
2668ac0
8564264
2668ac0
a0a058e
2668ac0
 
28913ff
2668ac0
 
a0a058e
2668ac0
 
 
 
 
 
8564264
2668ac0
 
 
 
 
 
 
 
28913ff
 
2668ac0
 
 
28913ff
2668ac0
28913ff
 
2668ac0
 
28913ff
 
2668ac0
 
 
 
 
 
 
 
0f3810d
 
2668ac0
a0a058e
2668ac0
 
 
 
 
 
 
 
 
 
 
 
85e93ad
2668ac0
 
 
 
 
 
 
 
6f249a4
2668ac0
 
 
 
a0a058e
2668ac0
 
 
 
 
 
8564264
2668ac0
8564264
2668ac0
 
 
6f249a4
 
 
 
2668ac0
 
6f249a4
 
8564264
6f249a4
 
2668ac0
 
 
6f249a4
2668ac0
 
 
8564264
2668ac0
40456dc
2668ac0
8564264
5e76be6
 
 
a0a058e
5e76be6
 
 
 
 
 
 
 
 
a0a058e
5e76be6
 
 
 
 
 
 
 
 
 
a0a058e
5e76be6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
95330bf
 
a0a058e
95330bf
 
 
 
 
 
28913ff
95330bf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a0a058e
95330bf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28913ff
95330bf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a0a058e
95330bf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28913ff
 
95330bf
 
 
 
 
 
 
 
 
 
 
28913ff
00e5e3d
28913ff
95330bf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28913ff
 
 
 
 
 
 
95330bf
28913ff
95330bf
28913ff
 
 
 
 
 
 
 
95330bf
 
 
 
 
 
 
 
28913ff
 
95330bf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a0a058e
 
 
 
 
 
 
2668ac0
 
5e76be6
95330bf
 
 
 
2668ac0
 
8564264
2668ac0
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
658
659
660
661
662
663
664
665
666
667
668
669
670
671
672
673
674
675
676
677
678
679
680
681
682
683
684
685
686
687
688
689
690
691
692
693
694
695
696
697
698
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722
723
724
725
726
727
728
729
730
731
732
733
734
735
736
737
738
739
740
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
<!DOCTYPE html>
<html lang="en">
<head>
  <meta charset="UTF-8"/>
  <meta name="viewport" content="width=device-width, initial-scale=1.0"/>
  <title>Text Quality Rating Benchmark</title>
  <link rel="preconnect" href="https://fonts.googleapis.com"/>
  <link href="https://fonts.googleapis.com/css2?family=JetBrains+Mono:wght@400;600;700&family=Syne:wght@700;800&display=swap" rel="stylesheet"/>
  <script src="https://cdnjs.cloudflare.com/ajax/libs/Chart.js/4.4.1/chart.umd.min.js"></script>
  <style>
    *, *::before, *::after { box-sizing: border-box; margin: 0; padding: 0; }
    body {
      background: #0d1117; color: #e2e8f0;
      font-family: 'JetBrains Mono', monospace;
      padding: 36px 28px 80px; min-height: 100vh;
    }
    h1 {
      font-family: 'Syne', sans-serif;
      font-size: clamp(18px, 3vw, 28px); font-weight: 800;
      letter-spacing: -0.02em; color: #f1f5f9; margin-bottom: 6px;
    }
    .subtitle { color: #64748b; font-size: 12px; margin-bottom: 12px; }
    .meta-subtitle {
      color: #64748b; font-size: 12px; margin-bottom: 20px;
      line-height: 1.8;
    }
    .methodology-box {
      background: #111827; border: 1px solid #1e2a3a;
      border-radius: 8px; padding: 18px 22px;
      margin-bottom: 24px; max-width: 900px;
    }
    .methodology-box h3 {
      font-family: 'Syne', sans-serif; font-size: 14px;
      color: #e2e8f0; margin-bottom: 8px; font-weight: 700;
    }
    .methodology-box p, .methodology-box li {
      font-size: 11.5px; color: #94a3b8; line-height: 1.6;
    }
    .methodology-box ul {
      margin-top: 8px; padding-left: 20px;
    }
    .methodology-box li { margin-bottom: 4px; }
    .highlight { color: #7dd3fc; font-weight: 600; }
    .sep { color: #334155; margin: 0 8px; }
    .scoring-note {
      display: inline-flex; gap: 16px; flex-wrap: wrap;
      background: #131820; border: 1px solid #1e2a3a;
      border-radius: 8px; padding: 8px 14px;
      font-size: 11px; color: #94a3b8; margin-bottom: 28px;
    }
    .scoring-note span { display: flex; align-items: center; gap: 5px; }
    .dot { width: 9px; height: 9px; border-radius: 50%; flex-shrink: 0; }
    .dataset-toggle {
      display: inline-flex; margin-bottom: 20px;
      border: 1px solid #1e2a3a; border-radius: 8px; overflow: hidden;
    }
    .ds-btn {
      padding: 8px 20px; font: inherit; font-size: 12px; font-weight: 700;
      cursor: pointer; border: none; background: #131820; color: #475569;
      transition: all .15s; letter-spacing: 0.03em;
    }
    .ds-btn:hover { color: #94a3b8; }
    .ds-btn.active { background: #1e3a5f; color: #7dd3fc; }
    .ds-btn:disabled { opacity: 0.3; cursor: not-allowed; }
    .ds-badge {
      display: inline-block; font-size: 9px; font-weight: 700;
      padding: 1px 5px; border-radius: 4px; margin-left: 6px;
      background: #0f2840; color: #38bdf8; vertical-align: middle;
      letter-spacing: 0.05em;
    }
    .filter-label { font-size: 10px; text-transform: uppercase; letter-spacing: 0.1em; color: #475569; margin-bottom: 10px; }
    #chips { display: flex; flex-wrap: wrap; gap: 6px; margin-bottom: 24px; }
    .chip {
      padding: 4px 11px; border-radius: 20px; font-size: 11px;
      font-family: 'JetBrains Mono', monospace; cursor: pointer;
      border: 1px solid #2d3748; background: #161b26; color: #94a3b8;
      transition: all .15s; user-select: none;
    }
    .chip:hover { border-color: #7dd3fc; color: #e2e8f0; }
    .chip.active { background: #1e3a5f; border-color: #38bdf8; color: #7dd3fc; }
    .metric-toggle {
      display: flex; width: fit-content;
      border: 1px solid #1e2a3a; border-radius: 6px;
      overflow: hidden; margin-bottom: 16px;
    }
    .mt-btn {
      padding: 6px 14px; font: inherit; font-size: 11px; cursor: pointer;
      border: none; background: #131820; color: #64748b; transition: all .15s;
    }
    .mt-btn.active { background: #1e3a5f; color: #7dd3fc; }
    .table-wrap {
      overflow-x: auto; border-radius: 10px;
      border: 1px solid #1e2a3a; margin-bottom: 52px;
    }
    table { border-collapse: collapse; width: auto; min-width: 100%; font-size: 12px; }
    thead tr { background: #111827; border-bottom: 2px solid #1e2a3a; }
    th { padding: 11px 6px; white-space: nowrap; }
    th.rank-col  { width: 44px; padding-left: 14px; }
    th.model-col { text-align: left; width: 220px; min-width: 180px; padding-left: 14px; }
    th.avg-col   { width: 110px; }
    th.lang-col  { width: 90px; }
    .sort-btn {
      background: none; border: none; color: #64748b; cursor: pointer;
      font: inherit; font-size: 10px; font-weight: 700;
      text-transform: uppercase; letter-spacing: 0.07em;
      display: inline-flex; align-items: center; gap: 3px; padding: 0; white-space: nowrap;
    }
    .sort-btn:hover { color: #7dd3fc; }
    .sort-btn.active { color: #e2e8f0; }
    tbody tr { border-bottom: 1px solid #0f1520; transition: filter .1s; }
    tbody tr:hover { filter: brightness(1.15); }
    td { padding: 8px 6px; white-space: nowrap; }
    td.rank  { padding-left: 14px; color: #475569; font-weight: 700; font-size: 13px; }
    td.model { padding-left: 14px; color: #cbd5e1; font-weight: 600; width: 220px; max-width: 220px; overflow: hidden; text-overflow: ellipsis; }
    td.score {
      text-align: center; font-weight: 700; font-size: 11.5px;
      border-right: 1px solid rgba(255,255,255,0.04);
    }
    td.empty { text-align: center; color: #2d3748; background: #111520; border-right: 1px solid rgba(255,255,255,0.04); }
    .section-title {
      font-family: 'Syne', sans-serif; font-size: 18px; font-weight: 700;
      color: #f1f5f9; margin-bottom: 4px;
    }
    .chart-wrap {
      background: #111827; border: 1px solid #1e2a3a;
      border-radius: 10px; padding: 24px 20px;
    }
    .analysis-card {
      background: #111827; border: 1px solid #1e2a3a;
      border-radius: 10px; padding: 22px 20px;
    }
    .analysis-card h3 {
      font-family: 'Syne', sans-serif; font-size: 14px; font-weight: 700;
      color: #f1f5f9; margin-bottom: 4px;
    }
    .chart-scroll-wrap {
      max-height: 380px;
      overflow-y: auto;
      overflow-x: hidden;
      padding-right: 8px;
    }
    .analysis-card .card-sub {
      font-size: 11px; color: #475569; margin-bottom: 16px; line-height: 1.5;
    }
    .model-select {
      background: #1a2236; border: 1px solid #2d3748; border-radius: 6px;
      color: #cbd5e1; font: inherit; font-size: 11px;
      padding: 5px 10px; margin-bottom: 14px; cursor: pointer; width: 100%;
    }
    .model-select:focus { outline: none; border-color: #38bdf8; }
    .dist-wrap {
      overflow-x: auto; border-radius: 10px;
      border: 1px solid #1e2a3a; margin-bottom: 16px;
    }
    .dist-wrap table { border-collapse: collapse; width: auto; min-width: 100%; font-size: 12px; }
    .dist-wrap thead tr { background: #111827; border-bottom: 2px solid #1e2a3a; }
    .dist-wrap th {
      padding: 10px 10px; white-space: nowrap; font-size: 10px;
      text-transform: uppercase; letter-spacing: 0.07em; color: #475569; font-weight: 700;
    }
    .dist-wrap th.lang-h  { text-align: left; width: 140px; padding-left: 14px; color: #64748b; }
    .dist-wrap th.score-h { width: 70px; text-align: center; }
    .dist-wrap th.total-h { width: 80px; text-align: center; color: #94a3b8; }
    .dist-wrap td { padding: 8px 10px; border-bottom: 1px solid #0f1520; white-space: nowrap; }
    .dist-wrap td.lang-d  { padding-left: 14px; color: #cbd5e1; font-weight: 600; font-size: 12px; }
    .dist-wrap td.count-d { text-align: center; font-size: 12px; }
    .dist-wrap td.total-d { text-align: center; font-weight: 700; font-size: 12px; color: #94a3b8; }
    .dist-bar {
      display: inline-block; height: 6px; border-radius: 3px;
      background: #2563eb; vertical-align: middle; margin-left: 4px; opacity: 0.7;
    }
    .footer { margin-top: 20px; font-size: 11px; color: #2d3748; text-align: right; }
    ::-webkit-scrollbar { width: 6px; height: 6px; background: #0d1117; }
    ::-webkit-scrollbar-thumb { background: #2d3748; border-radius: 3px; }
  </style>
</head>
<body>
  <h1>Text Quality Rating Benchmark</h1>
  <p class="meta-subtitle">
    LLM accuracy at rating text quality on a 1–6 scale across multiple languages
    <span class="sep">Β·</span> Documents sourced from FineWeb dataset
  </p>

  <div class="methodology-box">
    <h3>Methodology</h3>
    <p>The core objective of this benchmark is to evaluate how effectively Large Language Models can assess text quality, simulating the process of filtering data for LLM pre-training. The dataset curation followed a strict pipeline:</p>
    <ul>
      <li><span class="highlight">Initial Scoring:</span> Multilingual texts sampled from the FineWeb dataset were evaluated by <strong>DeepSeek V3.2</strong>, which assigned them a quality and substantiveness rating on a scale from 1 (lowest quality) to 6 (highest quality).</li>
      <li><span class="highlight">Verification:</span> These initial scores were subsequently verified by an independent judge, <strong>Gemini 3 Flash</strong>.</li>
      <li><span class="highlight">Filtering:</span> To ensure the highest ground-truth reliability, only the documents that received the absolute highest approval rating during the Gemini verification phase were included in this benchmark.</li>
      <li><span class="highlight">Version:</span> 1.0</li>
    </ul>
  </div>

  <div class="scoring-note">
    <span><span class="dot" style="background:#22c55e"></span>Exact match = 1.0 pt</span>
    <span><span class="dot" style="background:#eab308"></span>Off by Β±1 = 0.5 pt</span>
    <span><span class="dot" style="background:#ef4444"></span>Off by β‰₯2 = 0.0 pt</span>
  </div>

  <div id="dataset-toggle-wrap">
    <div class="filter-label" style="margin-bottom:8px">Dataset</div>
    <div class="dataset-toggle" style="margin-bottom:20px">
      <button class="ds-btn active" id="ds-btn-1" onclick="setDataset(1)">
        FineWeb <span class="ds-badge">WEB</span>
      </button>
      <button class="ds-btn" id="ds-btn-2" onclick="setDataset(2)" id="ds-btn-2">
        FinePDF <span class="ds-badge">PDF</span>
      </button>
    </div>
  </div>

  <div class="filter-label">Filter by language</div>
  <div id="chips"></div>

  <div class="metric-toggle">
    <button class="mt-btn active" id="btn-wp"     onclick="setMetric('wp')">Weighted Score</button>
    <button class="mt-btn"        id="btn-ex"     onclick="setMetric('ex')">Exact Accuracy</button>
    <button class="mt-btn"        id="btn-parsed" onclick="setMetric('parsed')">Parse Rate</button>
    <button class="mt-btn"        id="btn-mae"    onclick="setMetric('mae')">MAE</button>
  </div>

  <div class="table-wrap">
    <table id="lb-table">
      <thead id="lb-head"></thead>
      <tbody id="lb-body"></tbody>
    </table>
  </div>

  <p class="section-title" style="margin-top:52px">Global Model Comparison</p>
  <p class="subtitle" style="margin-bottom:20px">Weighted Score vs Exact Accuracy β€” all languages combined, sorted by Weighted Score</p>
  <div class="chart-wrap">
    <canvas id="globalChart"></canvas>
  </div>

  <p class="section-title" style="margin-top:52px">Dataset Distribution</p>
  <p class="subtitle" style="margin-bottom:20px">Number of unique texts per rating score (1–6) for each language β€” sourced from original files</p>
  <div class="dist-wrap">
    <table id="dist-table">
      <thead id="dist-head"></thead>
      <tbody id="dist-body"></tbody>
    </table>
  </div>

  <p class="section-title" style="margin-bottom:4px;margin-top:52px">Model Error Analysis</p>
  <p class="subtitle" style="margin-bottom:20px">Bias, critical misclassifications and confusion patterns</p>

  <!-- Bias lollipop β€” full width -->
  <div class="analysis-card" style="margin-bottom:24px">
    <h3>Prediction Bias</h3>
    <p class="card-sub">Average error (predicted βˆ’ ground truth). Negative = underestimation, positive = overestimation.</p>
    <div class="chart-scroll-wrap">
      <div id="biasChartContainer" style="position:relative">
        <canvas id="biasChart"></canvas>
      </div>
    </div>
  </div>

  <!-- Critical confusion β€” full width, below bias -->
  <div class="analysis-card" style="margin-bottom:52px">
    <h3>Critical Confusion Rate</h3>
    <p class="card-sub">
      % of low-quality texts (rating 1–2) predicted as high-quality (5–6) and vice versa.
      These are the most dangerous misclassifications.
    </p>
    <div class="chart-scroll-wrap">
      <div id="criticalChartContainer" style="position:relative">
        <canvas id="criticalChart"></canvas>
      </div>
    </div>
  </div>

  <!-- Full confusion heatmap with model dropdown -->
  <div class="analysis-card" style="margin-bottom:52px">
    <h3>Confusion Matrix</h3>
    <p class="card-sub">Row = ground truth rating, column = predicted rating. Values show % of predictions within each true class.</p>
    <select class="model-select" id="confModelSelect" onchange="renderConfusion()"></select>
    <div id="confusionWrap" style="overflow-x:auto">
      <canvas id="confusionChart"></canvas>
    </div>
  </div>

  <div class="footer" id="footer"></div>

<script>
(function() {
  const ALL_ROWS    = [{"model": "Qwen/Qwen3.5-397B-A17B-FP8", "avg_exact": 0.60605, "avg_wp": 0.768996, "avg_bias": 0.2132, "avg_parsed": 1.0, "avg_mae": 0.4719, "total": 21952, "lang_exact": {"ab": 0.508333, "ar": 0.6725, "az": 0.695, "be": 0.73, "bg": 0.712707, "bo": 0.7575, "ca": 0.657382, "cn": 0.546667, "cs": 0.7525, "cy": 0.471667, "da": 0.585, "de": 0.56, "el": 0.6325, "en": 0.648333, "es": 0.786787, "et": 0.6775, "eu": 0.420436, "fa": 0.395, "fi": 0.7575, "fr": 0.643979, "gl": 0.5, "he": 0.511667, "hi": 0.568015, "hu": 0.6525, "hv": 0.715, "ir": 0.591667, "is": 0.675, "it": 0.76, "ka": 0.556604, "kz": 0.43, "la": 0.692008, "li": 0.625, "lv": 0.625, "mk": 0.5125, "mt": 0.655, "nl": 0.655, "no": 0.5625, "pl": 0.508772, "pt": 0.665, "ro": 0.61, "ru": 0.581667, "sk": 0.665, "sl": 0.7625, "sq": 0.7325, "sr": 0.615, "sv": 0.6125, "tr": 0.62, "uk": 0.489547}, "lang_wp": {"ab": 0.7075, "ar": 0.80375, "az": 0.805, "be": 0.83125, "bg": 0.839779, "bo": 0.87375, "ca": 0.786908, "cn": 0.721667, "cs": 0.8625, "cy": 0.664167, "da": 0.7825, "de": 0.765, "el": 0.805, "en": 0.806667, "es": 0.887387, "et": 0.82875, "eu": 0.623116, "fa": 0.606667, "fi": 0.8675, "fr": 0.793194, "gl": 0.6825, "he": 0.721667, "hi": 0.751838, "hu": 0.81, "hv": 0.855, "ir": 0.750833, "is": 0.805, "it": 0.87375, "ka": 0.687107, "kz": 0.644167, "la": 0.824561, "li": 0.80625, "lv": 0.7875, "mk": 0.69375, "mt": 0.76125, "nl": 0.82625, "no": 0.715, "pl": 0.70614, "pt": 0.815, "ro": 0.765, "ru": 0.7625, "sk": 0.8275, "sl": 0.87625, "sq": 0.85375, "sr": 0.7975, "sv": 0.79375, "tr": 0.7975, "uk": 0.702091}, "lang_parsed": {"ab": 1.0, "ar": 1.0, "az": 1.0, "be": 1.0, "bg": 1.0, "bo": 1.0, "ca": 1.0, "cn": 1.0, "cs": 1.0, "cy": 1.0, "da": 1.0, "de": 1.0, "el": 1.0, "en": 1.0, "es": 1.0, "et": 1.0, "eu": 1.0, "fa": 1.0, "fi": 1.0, "fr": 1.0, "gl": 1.0, "he": 1.0, "hi": 1.0, "hu": 1.0, "hv": 1.0, "ir": 1.0, "is": 1.0, "it": 1.0, "ka": 1.0, "kz": 1.0, "la": 1.0, "li": 1.0, "lv": 1.0, "mk": 1.0, "mt": 1.0, "nl": 1.0, "no": 1.0, "pl": 1.0, "pt": 1.0, "ro": 1.0, "ru": 1.0, "sk": 1.0, "sl": 1.0, "sq": 1.0, "sr": 1.0, "sv": 1.0, "tr": 1.0, "uk": 1.0}, "lang_mae": {"ab": 0.5933, "ar": 0.4025, "az": 0.405, "be": 0.36, "bg": 0.3232, "bo": 0.255, "ca": 0.429, "cn": 0.5617, "cs": 0.2925, "cy": 0.6883, "da": 0.435, "de": 0.47, "el": 0.3925, "en": 0.3917, "es": 0.2252, "et": 0.3425, "eu": 0.804, "fa": 0.8133, "fi": 0.2675, "fr": 0.4162, "gl": 0.66, "he": 0.5583, "hi": 0.4982, "hu": 0.3825, "hv": 0.29, "ir": 0.5017, "is": 0.405, "it": 0.2575, "ka": 0.6352, "kz": 0.7283, "la": 0.3548, "li": 0.3875, "lv": 0.43, "mk": 0.6325, "mt": 0.5075, "nl": 0.3475, "no": 0.6175, "pl": 0.6, "pt": 0.3775, "ro": 0.5025, "ru": 0.4817, "sk": 0.3475, "sl": 0.25, "sq": 0.2925, "sr": 0.4075, "sv": 0.4125, "tr": 0.41, "uk": 0.601}, "confusion": {"1": {"1": 0.7238, "2": 0.2468, "4": 0.0134, "3": 0.0139, "6": 0.0006, "5": 0.0015}, "2": {"4": 0.162, "2": 0.5413, "3": 0.1193, "5": 0.014, "1": 0.1614, "6": 0.0021}, "3": {"4": 0.5814, "5": 0.1859, "2": 0.0798, "3": 0.1411, "6": 0.0099, "1": 0.002}, "4": {"4": 0.3681, "5": 0.5431, "6": 0.0494, "3": 0.02, "2": 0.0187, "1": 0.0006}, "5": {"5": 0.6818, "6": 0.2325, "4": 0.0831, "2": 0.0023, "3": 0.0002}, "6": {"6": 0.7118, "5": 0.2707, "4": 0.0156, "2": 0.0015, "1": 0.0002, "3": 0.0002}}}, {"model": "speakleash/Bielik-11B-v3.0-Instruct", "avg_exact": 0.445539, "avg_wp": 0.650048, "avg_bias": -0.0673, "avg_parsed": 0.981352, "avg_mae": 0.7343, "total": 21933, "lang_exact": {"ab": 0.360601, "ar": 0.094148, "az": 0.5325, "be": 0.4675, "bg": 0.574586, "bo": 0.565, "ca": 0.367688, "cn": 0.416388, "cs": 0.5575, "cy": 0.335, "da": 0.4975, "de": 0.371667, "el": 0.505, "en": 0.493333, "es": 0.696697, "et": 0.5525, "eu": 0.304858, "fa": 0.43, "fi": 0.5475, "fr": 0.60733, "gl": 0.356667, "he": 0.335017, "hi": 0.379374, "hu": 0.5425, "hv": 0.575, "ir": 0.373333, "is": 0.5, "it": 0.64, "ka": 0.310127, "kz": 0.266667, "la": 0.393762, "li": 0.47, "lv": 0.4225, "mk": 0.3725, "mt": 0.4125, "nl": 0.5375, "no": 0.485, "pl": 0.349123, "pt": 0.53, "ro": 0.4575, "ru": 0.423333, "sk": 0.505, "sl": 0.515, "sq": 0.5775, "sr": 0.4325, "sv": 0.5175, "tr": 0.5125, "uk": 0.449477}, "lang_wp": {"ab": 0.570952, "ar": 0.232824, "az": 0.72625, "be": 0.685, "bg": 0.75, "bo": 0.72625, "ca": 0.58078, "cn": 0.646321, "cs": 0.725, "cy": 0.544167, "da": 0.715, "de": 0.630833, "el": 0.71375, "en": 0.716667, "es": 0.837838, "et": 0.76625, "eu": 0.5, "fa": 0.658333, "fi": 0.695, "fr": 0.786649, "gl": 0.583333, "he": 0.560606, "hi": 0.563536, "hu": 0.7325, "hv": 0.7225, "ir": 0.575, "is": 0.715, "it": 0.80375, "ka": 0.503165, "kz": 0.520833, "la": 0.554581, "li": 0.70125, "lv": 0.65125, "mk": 0.5775, "mt": 0.63875, "nl": 0.76, "no": 0.65875, "pl": 0.558772, "pt": 0.7275, "ro": 0.67375, "ru": 0.639167, "sk": 0.69125, "sl": 0.73125, "sq": 0.7675, "sr": 0.6725, "sv": 0.7375, "tr": 0.745, "uk": 0.650697}, "lang_parsed": {"ab": 0.926544, "ar": 0.913486, "az": 1.0, "be": 0.98, "bg": 0.972376, "bo": 0.935, "ca": 1.0, "cn": 0.996656, "cs": 0.9225, "cy": 1.0, "da": 0.9825, "de": 0.996667, "el": 1.0, "en": 0.995, "es": 1.0, "et": 0.995, "eu": 0.99665, "fa": 1.0, "fi": 0.955, "fr": 1.0, "gl": 1.0, "he": 1.0, "hi": 0.972376, "hu": 0.9675, "hv": 0.905, "ir": 1.0, "is": 0.9775, "it": 0.9975, "ka": 1.0, "kz": 1.0, "la": 0.998051, "li": 1.0, "lv": 0.9925, "mk": 0.9975, "mt": 1.0, "nl": 1.0, "no": 1.0, "pl": 0.875439, "pt": 1.0, "ro": 1.0, "ru": 0.991667, "sk": 0.9575, "sl": 0.955, "sq": 0.97, "sr": 0.9825, "sv": 1.0, "tr": 0.995, "uk": 0.987805}, "lang_mae": {"ab": 0.8216, "ar": 2.0585, "az": 0.5775, "be": 0.6454, "bg": 0.4943, "bo": 0.5909, "ca": 0.8719, "cn": 0.7215, "cs": 0.4444, "cy": 0.9967, "da": 0.5496, "de": 0.7358, "el": 0.59, "en": 0.5678, "es": 0.3273, "et": 0.4648, "eu": 1.1714, "fa": 0.705, "fi": 0.7853, "fr": 0.4346, "gl": 0.8667, "he": 0.9192, "hi": 0.9034, "hu": 0.4884, "hv": 0.5083, "ir": 0.9583, "is": 0.5473, "it": 0.3885, "ka": 1.1361, "kz": 1.0033, "la": 1.2793, "li": 0.605, "lv": 0.7657, "mk": 0.8972, "mt": 0.7475, "nl": 0.48, "no": 0.7225, "pl": 0.7395, "pt": 0.5475, "ro": 0.7025, "ru": 0.7277, "sk": 0.6057, "sl": 0.4764, "sq": 0.4175, "sr": 0.6845, "sv": 0.525, "tr": 0.5176, "uk": 0.6896}, "confusion": {"1": {"1": 0.8352, "4": 0.0327, "2": 0.0513, "3": 0.0734, "5": 0.0074}, "2": {"4": 0.1633, "1": 0.3314, "3": 0.2951, "2": 0.1803, "5": 0.0298}, "3": {"2": 0.0419, "3": 0.1863, "4": 0.4225, "1": 0.0572, "5": 0.2914, "6": 0.0007}, "4": {"4": 0.3742, "3": 0.0815, "5": 0.4861, "1": 0.0373, "2": 0.0202, "6": 0.0006}, "5": {"4": 0.0846, "5": 0.8737, "3": 0.02, "1": 0.0204, "6": 0.0002, "2": 0.0011}, "6": {"3": 0.0125, "5": 0.8709, "4": 0.0618, "1": 0.0444, "2": 0.0028, "6": 0.0077}}}, {"model": "utter-project/EuroLLM-22B-Instruct-2512", "avg_exact": 0.371688, "avg_wp": 0.583166, "avg_bias": 0.5601, "avg_parsed": 0.999761, "avg_mae": 0.9896, "total": 20910, "lang_exact": {"ab": 0.3125, "ar": 0.437247, "az": 0.359494, "be": 0.378866, "bg": 0.553459, "bo": 0.50137, "ca": 0.432203, "cn": 0.307823, "cs": 0.479339, "cy": 0.212224, "da": 0.482412, "de": 0.30303, "el": 0.41, "en": 0.285714, "es": 0.522796, "et": 0.432292, "eu": 0.181049, "fa": 0.281145, "fi": 0.449568, "fr": 0.417553, "gl": 0.268007, "he": 0.290146, "hi": 0.287594, "hu": 0.402597, "hv": 0.546479, "ir": 0.286195, "is": 0.393768, "it": 0.444444, "ka": 0.382166, "kz": 0.172174, "la": 0.305857, "li": 0.380711, "lv": 0.483544, "mk": 0.313283, "mt": 0.387179, "nl": 0.56, "no": 0.476965, "pl": 0.340149, "pt": 0.40458, "ro": 0.42487, "ru": 0.3, "sk": 0.43734, "sl": 0.443864, "sq": 0.661184, "sr": 0.485934, "sv": 0.358056, "tr": 0.492386, "uk": 0.283582}, "lang_wp": {"ab": 0.536607, "ar": 0.560729, "az": 0.597468, "be": 0.622423, "bg": 0.731132, "bo": 0.709589, "ca": 0.653955, "cn": 0.529762, "cs": 0.692837, "cy": 0.412564, "da": 0.709799, "de": 0.55303, "el": 0.64875, "en": 0.490756, "es": 0.705167, "et": 0.622396, "eu": 0.36379, "fa": 0.520202, "fi": 0.635447, "fr": 0.648936, "gl": 0.525126, "he": 0.515511, "hi": 0.473684, "hu": 0.622078, "hv": 0.732394, "ir": 0.491582, "is": 0.586402, "it": 0.683463, "ka": 0.506369, "kz": 0.34, "la": 0.5282, "li": 0.602792, "lv": 0.650633, "mk": 0.505013, "mt": 0.592308, "nl": 0.75625, "no": 0.696477, "pl": 0.582714, "pt": 0.629771, "ro": 0.639896, "ru": 0.530172, "sk": 0.647059, "sl": 0.660574, "sq": 0.789474, "sr": 0.704604, "sv": 0.607417, "tr": 0.714467, "uk": 0.51306}, "lang_parsed": {"ab": 0.998214, "ar": 1.0, "az": 1.0, "be": 1.0, "bg": 1.0, "bo": 1.0, "ca": 1.0, "cn": 1.0, "cs": 1.0, "cy": 1.0, "da": 1.0, "de": 1.0, "el": 1.0, "en": 1.0, "es": 1.0, "et": 0.997396, "eu": 1.0, "fa": 1.0, "fi": 0.994236, "fr": 1.0, "gl": 1.0, "he": 1.0, "hi": 1.0, "hu": 1.0, "hv": 1.0, "ir": 1.0, "is": 1.0, "it": 1.0, "ka": 1.0, "kz": 1.0, "la": 1.0, "li": 1.0, "lv": 1.0, "mk": 1.0, "mt": 1.0, "nl": 1.0, "no": 1.0, "pl": 1.0, "pt": 1.0, "ro": 1.0, "ru": 0.998276, "sk": 1.0, "sl": 1.0, "sq": 1.0, "sr": 1.0, "sv": 1.0, "tr": 1.0, "uk": 1.0}, "lang_mae": {"ab": 1.0537, "ar": 1.417, "az": 0.9519, "be": 0.9046, "bg": 0.6164, "bo": 0.663, "ca": 0.8136, "cn": 1.0357, "cs": 0.7135, "cy": 1.4499, "da": 0.6357, "de": 0.9579, "el": 0.775, "en": 1.1832, "es": 0.6717, "et": 0.9112, "eu": 1.6565, "fa": 1.0707, "fi": 0.8986, "fr": 0.8059, "gl": 1.0503, "he": 1.0876, "hi": 1.2143, "hu": 0.9039, "hv": 0.6254, "ir": 1.1717, "is": 1.0935, "it": 0.677, "ka": 1.535, "kz": 1.8122, "la": 1.2364, "li": 0.9239, "lv": 0.8582, "mk": 1.396, "mt": 1.0128, "nl": 0.535, "no": 0.6558, "pl": 0.8792, "pt": 0.916, "ro": 0.8394, "ru": 1.0052, "sk": 0.8159, "sl": 0.8225, "sq": 0.4967, "sr": 0.6931, "sv": 0.9182, "tr": 0.6269, "uk": 1.028}, "confusion": {"3": {"5": 0.8138, "3": 0.1095, "2": 0.0719, "4": 0.002, "1": 0.0027}, "6": {"5": 0.9351, "6": 0.0509, "2": 0.007, "1": 0.0041, "3": 0.0026, "4": 0.0003}, "5": {"5": 0.9685, "1": 0.0034, "2": 0.0054, "3": 0.0086, "6": 0.0141}, "4": {"5": 0.9327, "3": 0.0343, "2": 0.0311, "4": 0.0013, "1": 0.0006}, "2": {"5": 0.3548, "3": 0.2338, "2": 0.3613, "1": 0.0349, "4": 0.0078, "6": 0.0074}, "1": {"3": 0.098, "2": 0.4931, "5": 0.1401, "1": 0.2581, "4": 0.0089, "6": 0.0017}}}, {"model": "allenai/Olmo-3.1-32B-Instruct", "avg_exact": 0.377707, "avg_wp": 0.576104, "avg_bias": 0.5894, "avg_parsed": 0.999951, "avg_mae": 0.9557, "total": 20275, "lang_exact": {"ab": 0.392453, "ar": 0.09375, "az": 0.453165, "be": 0.323834, "bg": 0.319865, "bo": 0.448795, "ca": 0.437143, "cn": 0.441176, "cs": 0.505988, "cy": 0.408475, "da": 0.449239, "de": 0.360544, "el": 0.367454, "en": 0.478261, "es": 0.415625, "et": 0.37931, "eu": 0.322635, "fa": 0.392256, "fi": 0.49226, "fr": 0.467213, "gl": 0.322148, "he": 0.326087, "hi": 0.420945, "hu": 0.435013, "hv": 0.419048, "ir": 0.368866, "is": 0.344828, "it": 0.536232, "ka": 0.068182, "kz": 0.347518, "la": 0.269565, "li": 0.333333, "lv": 0.309524, "mk": 0.315789, "mt": 0.380208, "nl": 0.528967, "no": 0.370588, "pl": 0.30099, "pt": 0.412088, "ro": 0.336842, "ru": 0.358047, "sk": 0.387863, "sl": 0.380054, "sq": 0.265574, "sr": 0.438462, "sv": 0.402062, "tr": 0.51671, "uk": 0.21519}, "lang_wp": {"ab": 0.582075, "ar": 0.225446, "az": 0.64557, "be": 0.541451, "bg": 0.557239, "bo": 0.680723, "ca": 0.584286, "cn": 0.650519, "cs": 0.691617, "cy": 0.614407, "da": 0.640863, "de": 0.581633, "el": 0.562992, "en": 0.660535, "es": 0.657813, "et": 0.62069, "eu": 0.518581, "fa": 0.571549, "fi": 0.660991, "fr": 0.643443, "gl": 0.495805, "he": 0.555254, "hi": 0.575975, "hu": 0.616711, "hv": 0.666667, "ir": 0.566836, "is": 0.514368, "it": 0.723188, "ka": 0.201299, "kz": 0.531915, "la": 0.423913, "li": 0.580103, "lv": 0.547619, "mk": 0.489975, "mt": 0.56901, "nl": 0.724181, "no": 0.548529, "pl": 0.543564, "pt": 0.615385, "ro": 0.563158, "ru": 0.55877, "sk": 0.614776, "sl": 0.588949, "sq": 0.459016, "sr": 0.661538, "sv": 0.60567, "tr": 0.727506, "uk": 0.409283}, "lang_parsed": {"ab": 1.0, "ar": 1.0, "az": 1.0, "be": 1.0, "bg": 1.0, "bo": 1.0, "ca": 1.0, "cn": 1.0, "cs": 1.0, "cy": 1.0, "da": 1.0, "de": 1.0, "el": 1.0, "en": 1.0, "es": 1.0, "et": 1.0, "eu": 1.0, "fa": 1.0, "fi": 1.0, "fr": 1.0, "gl": 1.0, "he": 1.0, "hi": 1.0, "hu": 1.0, "hv": 1.0, "ir": 1.0, "is": 1.0, "it": 1.0, "ka": 1.0, "kz": 1.0, "la": 1.0, "li": 1.0, "lv": 1.0, "mk": 1.0, "mt": 1.0, "nl": 1.0, "no": 1.0, "pl": 1.0, "pt": 1.0, "ro": 1.0, "ru": 0.998192, "sk": 1.0, "sl": 1.0, "sq": 1.0, "sr": 1.0, "sv": 1.0, "tr": 1.0, "uk": 1.0}, "lang_mae": {"ab": 0.9642, "ar": 1.9152, "az": 0.7696, "be": 0.9663, "bg": 0.9933, "bo": 0.6867, "ca": 0.9514, "cn": 0.7388, "cs": 0.6677, "cy": 0.8508, "da": 0.7893, "de": 0.8759, "el": 0.9606, "en": 0.7642, "es": 0.7219, "et": 0.7931, "eu": 1.0861, "fa": 0.9259, "fi": 0.774, "fr": 0.7896, "gl": 1.1426, "he": 0.942, "hi": 0.9815, "hu": 0.8674, "hv": 0.6857, "ir": 0.934, "is": 1.5776, "it": 0.5913, "ka": 1.8214, "kz": 1.0993, "la": 1.6674, "li": 0.8889, "lv": 1.1032, "mk": 1.1855, "mt": 0.974, "nl": 0.5894, "no": 1.1235, "pl": 0.998, "pt": 0.8901, "ro": 0.9842, "ru": 0.9348, "sk": 0.7889, "sl": 0.8571, "sq": 1.2, "sr": 0.7, "sv": 0.8351, "tr": 0.5758, "uk": 1.3038}, "confusion": {"1": {"2": 0.2279, "6": 0.0437, "3": 0.302, "1": 0.3751, "4": 0.0391, "5": 0.0122}, "2": {"3": 0.5088, "4": 0.1397, "1": 0.0887, "6": 0.0536, "2": 0.1323, "5": 0.077}, "3": {"3": 0.3729, "4": 0.289, "6": 0.0988, "5": 0.2106, "1": 0.0191, "2": 0.0095}, "4": {"4": 0.2984, "3": 0.1757, "5": 0.2991, "6": 0.2048, "2": 0.0039, "1": 0.0181}, "5": {"5": 0.4112, "6": 0.3921, "4": 0.1305, "3": 0.0565, "1": 0.0091, "2": 0.0007}, "6": {"6": 0.7092, "5": 0.1825, "4": 0.0725, "3": 0.0261, "1": 0.0084, "2": 0.0012}}}, {"model": "meta-llama/Llama-3.3-70B-Instruct", "avg_exact": 0.421966, "avg_wp": 0.552045, "avg_bias": 0.9204, "avg_parsed": 0.999818, "avg_mae": 1.0478, "total": 21952, "lang_exact": {"ab": 0.3, "ar": 0.485, "az": 0.505, "be": 0.4825, "bg": 0.549724, "bo": 0.6125, "ca": 0.481894, "cn": 0.386667, "cs": 0.57, "cy": 0.275, "da": 0.4825, "de": 0.363333, "el": 0.4175, "en": 0.358333, "es": 0.489489, "et": 0.545, "eu": 0.249581, "fa": 0.341667, "fi": 0.535, "fr": 0.439791, "gl": 0.263333, "he": 0.326667, "hi": 0.301471, "hu": 0.4625, "hv": 0.5825, "ir": 0.335, "is": 0.62, "it": 0.5375, "ka": 0.477987, "kz": 0.228333, "la": 0.463938, "li": 0.4275, "lv": 0.4825, "mk": 0.3325, "mt": 0.4675, "nl": 0.5625, "no": 0.4925, "pl": 0.280702, "pt": 0.5025, "ro": 0.415, "ru": 0.388333, "sk": 0.46, "sl": 0.5875, "sq": 0.495, "sr": 0.45, "sv": 0.425, "tr": 0.5475, "uk": 0.283972}, "lang_wp": {"ab": 0.453333, "ar": 0.565, "az": 0.605, "be": 0.5775, "bg": 0.640884, "bo": 0.70875, "ca": 0.547354, "cn": 0.530833, "cs": 0.665, "cy": 0.454167, "da": 0.6275, "de": 0.519167, "el": 0.55875, "en": 0.5425, "es": 0.605105, "et": 0.695, "eu": 0.38526, "fa": 0.485833, "fi": 0.64125, "fr": 0.552356, "gl": 0.4375, "he": 0.4875, "hi": 0.440257, "hu": 0.58625, "hv": 0.6925, "ir": 0.485833, "is": 0.725, "it": 0.63625, "ka": 0.561321, "kz": 0.376667, "la": 0.615984, "li": 0.55875, "lv": 0.60875, "mk": 0.44125, "mt": 0.59875, "nl": 0.68125, "no": 0.58625, "pl": 0.435088, "pt": 0.60125, "ro": 0.5325, "ru": 0.529167, "sk": 0.58125, "sl": 0.71, "sq": 0.63625, "sr": 0.5775, "sv": 0.5475, "tr": 0.69125, "uk": 0.399826}, "lang_parsed": {"ab": 1.0, "ar": 1.0, "az": 1.0, "be": 1.0, "bg": 1.0, "bo": 1.0, "ca": 1.0, "cn": 1.0, "cs": 1.0, "cy": 1.0, "da": 1.0, "de": 1.0, "el": 1.0, "en": 1.0, "es": 1.0, "et": 1.0, "eu": 1.0, "fa": 1.0, "fi": 0.9925, "fr": 1.0, "gl": 1.0, "he": 1.0, "hi": 1.0, "hu": 1.0, "hv": 1.0, "ir": 1.0, "is": 1.0, "it": 1.0, "ka": 1.0, "kz": 1.0, "la": 1.0, "li": 1.0, "lv": 1.0, "mk": 1.0, "mt": 1.0, "nl": 1.0, "no": 1.0, "pl": 1.0, "pt": 1.0, "ro": 1.0, "ru": 1.0, "sk": 1.0, "sl": 1.0, "sq": 1.0, "sr": 1.0, "sv": 0.9975, "tr": 1.0, "uk": 1.0}, "lang_mae": {"ab": 1.29, "ar": 1.2725, "az": 1.025, "be": 1.0575, "bg": 0.8204, "bo": 0.6125, "ca": 1.2033, "cn": 1.04, "cs": 0.725, "cy": 1.2683, "da": 0.83, "de": 1.035, "el": 1.0475, "en": 0.9967, "es": 0.8649, "et": 0.6425, "eu": 1.5394, "fa": 1.2017, "fi": 0.7632, "fr": 1.0471, "gl": 1.385, "he": 1.1683, "hi": 1.3015, "hu": 1.0175, "hv": 0.65, "ir": 1.1633, "is": 0.65, "it": 0.795, "ka": 1.2013, "kz": 1.5567, "la": 0.922, "li": 1.0325, "lv": 0.9475, "mk": 1.5725, "mt": 1.02, "nl": 0.6775, "no": 0.96, "pl": 1.2246, "pt": 0.9025, "ro": 1.1375, "ru": 0.995, "sk": 0.95, "sl": 0.6275, "sq": 0.7875, "sr": 1.0225, "sv": 1.0526, "tr": 0.655, "uk": 1.3432}, "confusion": {"1": {"1": 0.4084, "2": 0.2733, "5": 0.0553, "4": 0.2302, "6": 0.0038, "3": 0.029}, "2": {"4": 0.4936, "5": 0.2946, "2": 0.1358, "6": 0.0159, "1": 0.0455, "3": 0.0146}, "3": {"5": 0.7686, "6": 0.0758, "4": 0.1457, "2": 0.004, "1": 0.0059}, "4": {"5": 0.7606, "6": 0.1988, "4": 0.0381, "1": 0.0013, "2": 0.0013}, "5": {"5": 0.6645, "6": 0.3313, "4": 0.004, "2": 0.0002}, "6": {"6": 0.7575, "5": 0.2392, "4": 0.0029, "1": 0.0004}}}, {"model": "CYFRAGOVPL/Llama-PLLuM-70B-chat-250801", "avg_exact": 0.298105, "avg_wp": 0.518928, "avg_bias": -0.3311, "avg_parsed": 0.996492, "avg_mae": 1.0453, "total": 21952, "lang_exact": {"ab": 0.296667, "ar": 0.395, "az": 0.2425, "be": 0.345, "bg": 0.303867, "bo": 0.33, "ca": 0.169916, "cn": 0.323333, "cs": 0.4025, "cy": 0.361667, "da": 0.3525, "de": 0.335, "el": 0.1825, "en": 0.295, "es": 0.312312, "et": 0.3075, "eu": 0.271357, "fa": 0.315, "fi": 0.2375, "fr": 0.374346, "gl": 0.283333, "he": 0.323333, "hi": 0.395221, "hu": 0.275, "hv": 0.3525, "ir": 0.298333, "is": 0.405, "it": 0.315, "ka": 0.081761, "kz": 0.305, "la": 0.2846, "li": 0.21, "lv": 0.27, "mk": 0.075, "mt": 0.3225, "nl": 0.3125, "no": 0.295, "pl": 0.315789, "pt": 0.2975, "ro": 0.2475, "ru": 0.335, "sk": 0.2675, "sl": 0.2125, "sq": 0.1675, "sr": 0.32, "sv": 0.3375, "tr": 0.2925, "uk": 0.358885}, "lang_wp": {"ab": 0.515833, "ar": 0.61125, "az": 0.4675, "be": 0.5825, "bg": 0.517956, "bo": 0.55875, "ca": 0.4039, "cn": 0.520833, "cs": 0.61875, "cy": 0.583333, "da": 0.5775, "de": 0.5775, "el": 0.41375, "en": 0.4825, "es": 0.555556, "et": 0.515, "eu": 0.464824, "fa": 0.515833, "fi": 0.455, "fr": 0.590314, "gl": 0.48, "he": 0.535, "hi": 0.606618, "hu": 0.49875, "hv": 0.585, "ir": 0.465833, "is": 0.63625, "it": 0.53125, "ka": 0.292453, "kz": 0.520833, "la": 0.475634, "li": 0.47375, "lv": 0.515, "mk": 0.305, "mt": 0.58, "nl": 0.55625, "no": 0.495, "pl": 0.534211, "pt": 0.515, "ro": 0.455, "ru": 0.59, "sk": 0.47625, "sl": 0.4625, "sq": 0.40625, "sr": 0.56875, "sv": 0.57875, "tr": 0.5325, "uk": 0.589721}, "lang_parsed": {"ab": 1.0, "ar": 1.0, "az": 1.0, "be": 1.0, "bg": 0.997238, "bo": 0.9975, "ca": 1.0, "cn": 1.0, "cs": 1.0, "cy": 1.0, "da": 0.9975, "de": 1.0, "el": 1.0, "en": 0.995, "es": 1.0, "et": 1.0, "eu": 0.998325, "fa": 1.0, "fi": 0.9925, "fr": 1.0, "gl": 0.998333, "he": 0.998333, "hi": 1.0, "hu": 0.9975, "hv": 1.0, "ir": 0.998333, "is": 1.0, "it": 0.9975, "ka": 0.996855, "kz": 0.995, "la": 1.0, "li": 1.0, "lv": 1.0, "mk": 0.9975, "mt": 1.0, "nl": 1.0, "no": 0.9975, "pl": 0.907018, "pt": 1.0, "ro": 0.9975, "ru": 1.0, "sk": 1.0, "sl": 1.0, "sq": 1.0, "sr": 1.0, "sv": 0.9975, "tr": 0.9975, "uk": 1.0}, "lang_mae": {"ab": 1.0233, "ar": 0.885, "az": 1.15, "be": 0.885, "bg": 1.036, "bo": 0.9323, "ca": 1.2897, "cn": 1.0767, "cs": 0.85, "cy": 0.92, "da": 0.8822, "de": 0.905, "el": 1.245, "en": 1.1993, "es": 0.9129, "et": 1.155, "eu": 1.2383, "fa": 1.09, "fi": 1.2368, "fr": 0.8586, "gl": 1.1235, "he": 0.9666, "hi": 0.8199, "hu": 1.0426, "hv": 0.9125, "ir": 1.3556, "is": 0.765, "it": 0.9975, "ka": 1.5331, "kz": 1.0519, "la": 1.3957, "li": 1.065, "lv": 1.0075, "mk": 1.4211, "mt": 0.8875, "nl": 0.92, "no": 1.1378, "pl": 0.8781, "pt": 1.0225, "ro": 1.193, "ru": 0.8533, "sk": 1.1675, "sl": 1.11, "sq": 1.2375, "sr": 0.92, "sv": 0.8972, "tr": 0.9474, "uk": 0.8554}, "confusion": {"1": {"1": 0.6736, "3": 0.2988, "4": 0.0116, "5": 0.0075, "2": 0.0079, "6": 0.0006}, "2": {"3": 0.5543, "1": 0.3632, "5": 0.0304, "4": 0.044, "6": 0.0017, "2": 0.0065}, "3": {"3": 0.5333, "4": 0.1967, "1": 0.1281, "5": 0.1406, "6": 0.0007, "2": 0.0007}, "4": {"3": 0.4161, "4": 0.266, "5": 0.2365, "1": 0.0776, "6": 0.0025, "2": 0.0013}, "5": {"4": 0.3081, "3": 0.2347, "5": 0.427, "1": 0.0258, "6": 0.0038, "2": 0.0006}, "6": {"4": 0.2463, "5": 0.539, "3": 0.1553, "1": 0.0383, "6": 0.0209, "2": 0.0002}}}, {"model": "mistralai/Mistral-Small-24B-Instruct-2501", "avg_exact": 0.216916, "avg_wp": 0.501815, "avg_bias": 0.068, "avg_parsed": 1.0, "avg_mae": 1.0172, "total": 20939, "lang_exact": {"ab": 0.265487, "ar": 0.087302, "az": 0.08794, "be": 0.151282, "bg": 0.197452, "bo": 0.210227, "ca": 0.073446, "cn": 0.332198, "cs": 0.169014, "cy": 0.28088, "da": 0.15443, "de": 0.314815, "el": 0.137845, "en": 0.364094, "es": 0.158055, "et": 0.173228, "eu": 0.225589, "fa": 0.242424, "fi": 0.16568, "fr": 0.221053, "gl": 0.229481, "he": 0.340171, "hi": 0.263969, "hu": 0.159269, "hv": 0.277439, "ir": 0.275804, "is": 0.296919, "it": 0.158602, "ka": 0.101587, "kz": 0.290155, "la": 0.422993, "li": 0.164103, "lv": 0.204663, "mk": 0.042607, "mt": 0.101562, "nl": 0.176768, "no": 0.112392, "pl": 0.361905, "pt": 0.082902, "ro": 0.117493, "ru": 0.261246, "sk": 0.265464, "sl": 0.167109, "sq": 0.203647, "sr": 0.164975, "sv": 0.136247, "tr": 0.172589, "uk": 0.254302}, "lang_wp": {"ab": 0.525664, "ar": 0.345238, "az": 0.423367, "be": 0.435897, "bg": 0.490446, "bo": 0.539773, "ca": 0.391243, "cn": 0.596252, "cs": 0.487324, "cy": 0.542301, "da": 0.481013, "de": 0.602694, "el": 0.446115, "en": 0.638423, "es": 0.492401, "et": 0.497375, "eu": 0.463805, "fa": 0.458754, "fi": 0.502959, "fr": 0.530263, "gl": 0.470687, "he": 0.612821, "hi": 0.547206, "hu": 0.466057, "hv": 0.599085, "ir": 0.51692, "is": 0.578431, "it": 0.481183, "ka": 0.380952, "kz": 0.518135, "la": 0.548807, "li": 0.465385, "lv": 0.511658, "mk": 0.318296, "mt": 0.386719, "nl": 0.515152, "no": 0.420749, "pl": 0.612381, "pt": 0.409326, "ro": 0.411227, "ru": 0.563149, "sk": 0.57732, "sl": 0.527851, "sq": 0.56535, "sr": 0.442893, "sv": 0.447301, "tr": 0.454315, "uk": 0.520076}, "lang_parsed": {"ab": 1.0, "ar": 1.0, "az": 1.0, "be": 1.0, "bg": 1.0, "bo": 1.0, "ca": 1.0, "cn": 1.0, "cs": 1.0, "cy": 1.0, "da": 1.0, "de": 1.0, "el": 1.0, "en": 1.0, "es": 1.0, "et": 1.0, "eu": 1.0, "fa": 1.0, "fi": 1.0, "fr": 1.0, "gl": 1.0, "he": 1.0, "hi": 1.0, "hu": 1.0, "hv": 1.0, "ir": 1.0, "is": 1.0, "it": 1.0, "ka": 1.0, "kz": 1.0, "la": 1.0, "li": 1.0, "lv": 1.0, "mk": 1.0, "mt": 1.0, "nl": 1.0, "no": 1.0, "pl": 1.0, "pt": 1.0, "ro": 1.0, "ru": 1.0, "sk": 1.0, "sl": 1.0, "sq": 1.0, "sr": 1.0, "sv": 1.0, "tr": 1.0, "uk": 1.0}, "lang_mae": {"ab": 0.9681, "ar": 1.328, "az": 1.1683, "be": 1.1564, "bg": 1.0414, "bo": 0.9403, "ca": 1.2486, "cn": 0.816, "cs": 1.0394, "cy": 0.9255, "da": 1.0506, "de": 0.7963, "el": 1.1378, "en": 0.7282, "es": 1.0213, "et": 1.0131, "eu": 1.1296, "fa": 1.1044, "fi": 1.0059, "fr": 0.9553, "gl": 1.0787, "he": 0.7863, "hi": 0.9094, "hu": 1.0809, "hv": 0.8018, "ir": 0.9915, "is": 0.8543, "it": 1.0618, "ka": 1.2413, "kz": 1.0017, "la": 1.0738, "li": 1.0769, "lv": 0.9845, "mk": 1.386, "mt": 1.2422, "nl": 0.9722, "no": 1.1758, "pl": 0.8514, "pt": 1.1995, "ro": 1.2324, "ru": 0.8754, "sk": 0.8479, "sl": 0.9443, "sq": 0.8693, "sr": 1.1396, "sv": 1.1105, "tr": 1.1015, "uk": 0.9751}, "confusion": {"3": {"3": 0.3689, "4": 0.5614, "2": 0.0329, "5": 0.0295, "1": 0.0074}, "2": {"4": 0.1826, "3": 0.5406, "2": 0.2634, "5": 0.0013, "1": 0.0122}, "1": {"3": 0.2925, "2": 0.584, "4": 0.0435, "1": 0.0793, "5": 0.0007}, "4": {"4": 0.6457, "3": 0.2184, "5": 0.0959, "2": 0.0248, "1": 0.0152}, "5": {"4": 0.6735, "5": 0.2695, "3": 0.0444, "2": 0.0055, "6": 0.0011, "1": 0.0059}, "6": {"5": 0.5407, "4": 0.409, "6": 0.0237, "3": 0.0232, "2": 0.0021, "1": 0.0013}}}, {"model": "speakleash/Bielik-11B-v2.6-Instruct", "avg_exact": 0.26891, "avg_wp": 0.480144, "avg_bias": 0.4487, "avg_parsed": 1.0, "avg_mae": 1.1156, "total": 21933, "lang_exact": {"ab": 0.297162, "ar": 0.002545, "az": 0.2625, "be": 0.2875, "bg": 0.292818, "bo": 0.365, "ca": 0.306407, "cn": 0.274247, "cs": 0.3325, "cy": 0.196667, "da": 0.415, "de": 0.288333, "el": 0.28, "en": 0.301667, "es": 0.42042, "et": 0.28, "eu": 0.232831, "fa": 0.293333, "fi": 0.295, "fr": 0.272251, "gl": 0.258333, "he": 0.215488, "hi": 0.267035, "hu": 0.2325, "hv": 0.2975, "ir": 0.186667, "is": 0.1125, "it": 0.33, "ka": 0.132911, "kz": 0.215, "la": 0.204678, "li": 0.23, "lv": 0.1775, "mk": 0.305, "mt": 0.2825, "nl": 0.45, "no": 0.2725, "pl": 0.292982, "pt": 0.2525, "ro": 0.215, "ru": 0.293333, "sk": 0.32, "sl": 0.38, "sq": 0.16, "sr": 0.355, "sv": 0.3575, "tr": 0.2675, "uk": 0.249129}, "lang_wp": {"ab": 0.488314, "ar": 0.128499, "az": 0.4725, "be": 0.49125, "bg": 0.524862, "bo": 0.58, "ca": 0.493036, "cn": 0.441472, "cs": 0.595, "cy": 0.393333, "da": 0.63125, "de": 0.48, "el": 0.48625, "en": 0.525833, "es": 0.600601, "et": 0.50375, "eu": 0.449749, "fa": 0.506667, "fi": 0.53, "fr": 0.502618, "gl": 0.460833, "he": 0.43266, "hi": 0.443831, "hu": 0.46875, "hv": 0.5525, "ir": 0.360833, "is": 0.2825, "it": 0.58125, "ka": 0.349684, "kz": 0.4175, "la": 0.397661, "li": 0.47, "lv": 0.43125, "mk": 0.49125, "mt": 0.44375, "nl": 0.675, "no": 0.4775, "pl": 0.510526, "pt": 0.47625, "ro": 0.47, "ru": 0.5325, "sk": 0.5675, "sl": 0.6125, "sq": 0.3875, "sr": 0.56125, "sv": 0.5525, "tr": 0.4825, "uk": 0.482578}, "lang_parsed": {"ab": 1.0, "ar": 1.0, "az": 1.0, "be": 1.0, "bg": 1.0, "bo": 1.0, "ca": 1.0, "cn": 1.0, "cs": 1.0, "cy": 1.0, "da": 1.0, "de": 1.0, "el": 1.0, "en": 1.0, "es": 1.0, "et": 1.0, "eu": 1.0, "fa": 1.0, "fi": 1.0, "fr": 1.0, "gl": 1.0, "he": 1.0, "hi": 1.0, "hu": 1.0, "hv": 1.0, "ir": 1.0, "is": 1.0, "it": 1.0, "ka": 1.0, "kz": 1.0, "la": 1.0, "li": 1.0, "lv": 1.0, "mk": 1.0, "mt": 1.0, "nl": 1.0, "no": 1.0, "pl": 1.0, "pt": 1.0, "ro": 1.0, "ru": 1.0, "sk": 1.0, "sl": 1.0, "sq": 1.0, "sr": 1.0, "sv": 1.0, "tr": 1.0, "uk": 1.0}, "lang_mae": {"ab": 1.1302, "ar": 2.0204, "az": 1.1125, "be": 1.0675, "bg": 0.989, "bo": 0.85, "ca": 1.1783, "cn": 1.2324, "cs": 0.8525, "cy": 1.3217, "da": 0.76, "de": 1.0733, "el": 1.1475, "en": 0.9983, "es": 0.8228, "et": 1.0075, "eu": 1.1859, "fa": 1.0367, "fi": 0.99, "fr": 1.0183, "gl": 1.16, "he": 1.2121, "hi": 1.2449, "hu": 1.1325, "hv": 0.905, "ir": 1.4883, "is": 1.81, "it": 0.8575, "ka": 1.5158, "kz": 1.2533, "la": 1.3899, "li": 1.08, "lv": 1.1575, "mk": 1.095, "mt": 1.3, "nl": 0.655, "no": 1.175, "pl": 1.0175, "pt": 1.0825, "ro": 1.105, "ru": 0.95, "sk": 0.8725, "sl": 0.7775, "sq": 1.265, "sr": 0.9025, "sv": 0.92, "tr": 1.055, "uk": 1.0418}, "confusion": {"1": {"3": 0.7791, "5": 0.0137, "1": 0.1906, "6": 0.0115, "4": 0.0036, "2": 0.0015}, "2": {"3": 0.8773, "6": 0.0123, "5": 0.0831, "4": 0.0161, "1": 0.0111, "2": 0.0002}, "3": {"5": 0.3681, "3": 0.5488, "6": 0.06, "4": 0.0218, "1": 0.0013}, "4": {"3": 0.4065, "5": 0.4784, "6": 0.0926, "4": 0.0163, "1": 0.0063}, "5": {"5": 0.5982, "3": 0.1713, "6": 0.2082, "4": 0.0213, "1": 0.001}, "6": {"5": 0.5402, "3": 0.1576, "6": 0.2809, "4": 0.0203, "1": 0.0011}}}, {"model": "mistralai/Mistral-7B-Instruct-v0.3", "avg_exact": 0.288776, "avg_wp": 0.464722, "avg_bias": 0.8116, "avg_parsed": 0.997271, "avg_mae": 1.3553, "total": 20154, "lang_exact": {"ab": 0.270541, "ar": 0.332103, "az": 0.356962, "be": 0.310078, "bg": 0.294702, "bo": 0.430303, "ca": 0.367816, "cn": 0.25, "cs": 0.373494, "cy": 0.21562, "da": 0.401535, "de": 0.295337, "el": 0.25, "en": 0.271959, "es": 0.415335, "et": 0.286096, "eu": 0.20339, "fa": 0.310811, "fi": 0.329154, "fr": 0.236842, "gl": 0.220168, "he": 0.240876, "hi": 0.26501, "hu": 0.352785, "hv": 0.278317, "ir": 0.241963, "is": 0.265672, "it": 0.343373, "ka": 0.280255, "kz": 0.211538, "la": 0.247265, "li": 0.294574, "lv": 0.283784, "mk": 0.275689, "mt": 0.234987, "nl": 0.482143, "no": 0.32622, "pl": 0.289738, "pt": 0.329377, "ro": 0.283422, "ru": 0.262877, "sk": 0.236074, "sl": 0.294278, "sq": 0.176667, "sr": 0.369231, "sv": 0.359173, "tr": 0.348052, "uk": 0.198413}, "lang_wp": {"ab": 0.4499, "ar": 0.468635, "az": 0.512658, "be": 0.465116, "bg": 0.491722, "bo": 0.607576, "ca": 0.525862, "cn": 0.433621, "cs": 0.591867, "cy": 0.363328, "da": 0.627877, "de": 0.468912, "el": 0.457447, "en": 0.421453, "es": 0.583067, "et": 0.477273, "eu": 0.340678, "fa": 0.471284, "fi": 0.523511, "fr": 0.467836, "gl": 0.373109, "he": 0.437956, "hi": 0.406832, "hu": 0.554377, "hv": 0.503236, "ir": 0.400169, "is": 0.468657, "it": 0.554217, "ka": 0.444268, "kz": 0.38986, "la": 0.392779, "li": 0.425065, "lv": 0.463514, "mk": 0.429825, "mt": 0.370757, "nl": 0.668367, "no": 0.527439, "pl": 0.457746, "pt": 0.507418, "ro": 0.487968, "ru": 0.422735, "sk": 0.442971, "sl": 0.510899, "sq": 0.35, "sr": 0.565385, "sv": 0.550388, "tr": 0.52987, "uk": 0.374008}, "lang_parsed": {"ab": 0.993988, "ar": 0.98524, "az": 1.0, "be": 0.994832, "bg": 1.0, "bo": 0.987879, "ca": 0.997126, "cn": 1.0, "cs": 1.0, "cy": 0.991511, "da": 1.0, "de": 0.996546, "el": 1.0, "en": 0.996622, "es": 1.0, "et": 0.997326, "eu": 0.998305, "fa": 0.998311, "fi": 1.0, "fr": 1.0, "gl": 0.998319, "he": 1.0, "hi": 0.995859, "hu": 0.997347, "hv": 1.0, "ir": 0.996616, "is": 1.0, "it": 0.990964, "ka": 0.996815, "kz": 0.991259, "la": 0.995624, "li": 1.0, "lv": 1.0, "mk": 0.997494, "mt": 1.0, "nl": 1.0, "no": 0.990854, "pl": 1.0, "pt": 0.997033, "ro": 1.0, "ru": 1.0, "sk": 0.992042, "sl": 0.997275, "sq": 1.0, "sr": 0.997436, "sv": 1.0, "tr": 0.994805, "uk": 1.0}, "lang_mae": {"ab": 1.2681, "ar": 1.5693, "az": 1.4785, "be": 1.4987, "bg": 1.2815, "bo": 0.9479, "ca": 1.2882, "cn": 1.2759, "cs": 0.9699, "cy": 1.5993, "da": 0.8849, "de": 1.1976, "el": 1.3484, "en": 1.4729, "es": 0.9489, "et": 1.4424, "eu": 1.8506, "fa": 1.3113, "fi": 1.1411, "fr": 1.2427, "gl": 1.6229, "he": 1.3084, "hi": 1.4449, "hu": 1.0239, "hv": 1.1909, "ir": 1.5586, "is": 1.4985, "it": 0.9757, "ka": 1.4792, "kz": 1.4656, "la": 1.7846, "li": 1.6357, "lv": 1.427, "mk": 1.6683, "mt": 1.9817, "nl": 0.7857, "no": 1.1323, "pl": 1.2113, "pt": 1.2589, "ro": 1.3182, "ru": 1.3908, "sk": 1.4358, "sl": 1.1585, "sq": 1.6833, "sr": 1.0051, "sv": 1.1421, "tr": 1.094, "uk": 1.4603}, "confusion": {"6": {"5": 0.4509, "3": 0.0545, "6": 0.4353, "4": 0.0255, "2": 0.0084, "1": 0.0255}, "1": {"3": 0.3259, "1": 0.3726, "5": 0.1265, "2": 0.1027, "4": 0.0271, "6": 0.0451}, "5": {"5": 0.4683, "4": 0.0352, "3": 0.081, "6": 0.386, "2": 0.0089, "1": 0.0206}, "4": {"5": 0.4871, "3": 0.1381, "4": 0.0394, "2": 0.0174, "6": 0.2716, "1": 0.0465}, "3": {"4": 0.0485, "5": 0.4675, "3": 0.1757, "6": 0.2468, "1": 0.0451, "2": 0.0164}, "2": {"4": 0.0497, "3": 0.3139, "5": 0.3039, "1": 0.1398, "2": 0.0495, "6": 0.1432}}}, {"model": "allenai/Olmo-3-7B-Instruct", "avg_exact": 0.266683, "avg_wp": 0.461356, "avg_bias": 0.4023, "avg_parsed": 0.999507, "avg_mae": 1.3081, "total": 20275, "lang_exact": {"ab": 0.273585, "ar": 0.160714, "az": 0.058228, "be": 0.19171, "bg": 0.343434, "bo": 0.38253, "ca": 0.457143, "cn": 0.311419, "cs": 0.242515, "cy": 0.201695, "da": 0.439086, "de": 0.295918, "el": 0.186352, "en": 0.337793, "es": 0.34375, "et": 0.177719, "eu": 0.228041, "fa": 0.188552, "fi": 0.235294, "fr": 0.273224, "gl": 0.315436, "he": 0.211957, "hi": 0.275154, "hu": 0.153846, "hv": 0.35873, "ir": 0.153976, "is": 0.063218, "it": 0.289855, "ka": 0.048701, "kz": 0.171986, "la": 0.143478, "li": 0.20155, "lv": 0.335979, "mk": 0.461153, "mt": 0.033854, "nl": 0.551637, "no": 0.326471, "pl": 0.20396, "pt": 0.362637, "ro": 0.365789, "ru": 0.307414, "sk": 0.308707, "sl": 0.191375, "sq": 0.278689, "sr": 0.502564, "sv": 0.404639, "tr": 0.321337, "uk": 0.236287}, "lang_wp": {"ab": 0.438679, "ar": 0.287946, "az": 0.232911, "be": 0.382124, "bg": 0.584175, "bo": 0.561747, "ca": 0.63, "cn": 0.497405, "cs": 0.458084, "cy": 0.382203, "da": 0.64467, "de": 0.514456, "el": 0.370079, "en": 0.551003, "es": 0.626563, "et": 0.334218, "eu": 0.411318, "fa": 0.292929, "fi": 0.428793, "fr": 0.505464, "gl": 0.52349, "he": 0.40942, "hi": 0.474333, "hu": 0.346154, "hv": 0.58254, "ir": 0.331641, "is": 0.275862, "it": 0.56087, "ka": 0.167208, "kz": 0.380319, "la": 0.365217, "li": 0.392765, "lv": 0.496032, "mk": 0.657895, "mt": 0.192708, "nl": 0.72796, "no": 0.529412, "pl": 0.434653, "pt": 0.605769, "ro": 0.569737, "ru": 0.508137, "sk": 0.470976, "sl": 0.378706, "sq": 0.508197, "sr": 0.697436, "sv": 0.606959, "tr": 0.541131, "uk": 0.420886}, "lang_parsed": {"ab": 1.0, "ar": 0.959821, "az": 1.0, "be": 1.0, "bg": 1.0, "bo": 1.0, "ca": 1.0, "cn": 1.0, "cs": 1.0, "cy": 1.0, "da": 1.0, "de": 1.0, "el": 1.0, "en": 1.0, "es": 1.0, "et": 1.0, "eu": 1.0, "fa": 1.0, "fi": 1.0, "fr": 1.0, "gl": 1.0, "he": 1.0, "hi": 1.0, "hu": 1.0, "hv": 1.0, "ir": 1.0, "is": 1.0, "it": 1.0, "ka": 1.0, "kz": 1.0, "la": 1.0, "li": 1.0, "lv": 1.0, "mk": 1.0, "mt": 1.0, "nl": 1.0, "no": 0.997059, "pl": 1.0, "pt": 1.0, "ro": 1.0, "ru": 1.0, "sk": 1.0, "sl": 1.0, "sq": 1.0, "sr": 1.0, "sv": 1.0, "tr": 1.0, "uk": 1.0}, "lang_mae": {"ab": 1.3151, "ar": 2.0558, "az": 1.6886, "be": 1.4456, "bg": 1.0303, "bo": 1.1627, "ca": 0.9343, "cn": 1.1644, "cs": 1.494, "cy": 1.4085, "da": 0.8503, "de": 1.051, "el": 1.5801, "en": 0.9849, "es": 0.8187, "et": 1.6101, "eu": 1.3581, "fa": 2.2626, "fi": 1.3003, "fr": 1.1913, "gl": 1.0654, "he": 1.3623, "hi": 1.2053, "hu": 1.5438, "hv": 1.0889, "ir": 1.467, "is": 1.9655, "it": 0.9565, "ka": 2.0714, "kz": 1.4078, "la": 1.5283, "li": 1.4031, "lv": 1.4286, "mk": 0.8521, "mt": 1.776, "nl": 0.6373, "no": 1.1799, "pl": 1.3366, "pt": 0.9148, "ro": 1.0579, "ru": 1.123, "sk": 1.8496, "sl": 1.7008, "sq": 1.1443, "sr": 0.6974, "sv": 0.9562, "tr": 1.1183, "uk": 1.3544}, "confusion": {"2": {"4": 0.1651, "5": 0.1141, "2": 0.4071, "6": 0.0734, "3": 0.2152, "1": 0.0251}, "1": {"5": 0.0335, "3": 0.1996, "4": 0.0719, "2": 0.5602, "6": 0.0635, "1": 0.0713}, "3": {"4": 0.2338, "2": 0.1854, "5": 0.2849, "3": 0.137, "6": 0.1568, "1": 0.002}, "4": {"5": 0.3056, "4": 0.2345, "6": 0.1951, "3": 0.1337, "2": 0.1305, "1": 0.0006}, "5": {"5": 0.282, "4": 0.1729, "6": 0.3272, "3": 0.1108, "2": 0.1038, "1": 0.0033}, "6": {"4": 0.2044, "5": 0.1873, "2": 0.1228, "6": 0.3807, "3": 0.1005, "1": 0.0044}}}, {"model": "utter-project/EuroLLM-9B-Instruct-2512", "avg_exact": 0.272262, "avg_wp": 0.422382, "avg_bias": 1.5451, "avg_parsed": 0.999904, "avg_mae": 1.7566, "total": 20910, "lang_exact": {"ab": 0.257143, "ar": 0.291498, "az": 0.258228, "be": 0.314433, "bg": 0.242138, "bo": 0.394521, "ca": 0.279661, "cn": 0.312925, "cs": 0.355372, "cy": 0.239389, "da": 0.376884, "de": 0.178451, "el": 0.285, "en": 0.201681, "es": 0.194529, "et": 0.395833, "eu": 0.165821, "fa": 0.279461, "fi": 0.368876, "fr": 0.226064, "gl": 0.184255, "he": 0.259124, "hi": 0.161654, "hu": 0.293506, "hv": 0.335211, "ir": 0.281145, "is": 0.314448, "it": 0.229974, "ka": 0.292994, "kz": 0.227826, "la": 0.251627, "li": 0.269036, "lv": 0.326582, "mk": 0.305764, "mt": 0.305128, "nl": 0.3975, "no": 0.249322, "pl": 0.193309, "pt": 0.244275, "ro": 0.251295, "ru": 0.239655, "sk": 0.276215, "sl": 0.310705, "sq": 0.296053, "sr": 0.386189, "sv": 0.42711, "tr": 0.347716, "uk": 0.158582}, "lang_wp": {"ab": 0.36875, "ar": 0.439271, "az": 0.387342, "be": 0.536082, "bg": 0.477987, "bo": 0.589041, "ca": 0.433616, "cn": 0.416667, "cs": 0.506887, "cy": 0.376061, "da": 0.541457, "de": 0.279461, "el": 0.45375, "en": 0.312605, "es": 0.381459, "et": 0.619792, "eu": 0.250423, "fa": 0.414983, "fi": 0.544669, "fr": 0.398936, "gl": 0.274707, "he": 0.382299, "hi": 0.279135, "hu": 0.449351, "hv": 0.515493, "ir": 0.382997, "is": 0.491501, "it": 0.410853, "ka": 0.452229, "kz": 0.336522, "la": 0.354664, "li": 0.459391, "lv": 0.53038, "mk": 0.477444, "mt": 0.496154, "nl": 0.615, "no": 0.432249, "pl": 0.314126, "pt": 0.436387, "ro": 0.408031, "ru": 0.365517, "sk": 0.430946, "sl": 0.453003, "sq": 0.503289, "sr": 0.586957, "sv": 0.570332, "tr": 0.544416, "uk": 0.279851}, "lang_parsed": {"ab": 1.0, "ar": 1.0, "az": 1.0, "be": 1.0, "bg": 1.0, "bo": 1.0, "ca": 1.0, "cn": 1.0, "cs": 1.0, "cy": 1.0, "da": 1.0, "de": 1.0, "el": 1.0, "en": 1.0, "es": 1.0, "et": 1.0, "eu": 1.0, "fa": 1.0, "fi": 1.0, "fr": 1.0, "gl": 1.0, "he": 1.0, "hi": 1.0, "hu": 0.997403, "hv": 1.0, "ir": 1.0, "is": 1.0, "it": 1.0, "ka": 1.0, "kz": 1.0, "la": 1.0, "li": 1.0, "lv": 1.0, "mk": 1.0, "mt": 1.0, "nl": 1.0, "no": 1.0, "pl": 1.0, "pt": 1.0, "ro": 1.0, "ru": 1.0, "sk": 1.0, "sl": 1.0, "sq": 1.0, "sr": 1.0, "sv": 1.0, "tr": 0.997462, "uk": 1.0}, "lang_mae": {"ab": 1.8375, "ar": 2.1377, "az": 2.3494, "be": 1.268, "bg": 1.4969, "bo": 1.2027, "ca": 1.8079, "cn": 1.6548, "cs": 1.708, "cy": 1.657, "da": 1.4774, "de": 2.2795, "el": 1.7325, "en": 1.9765, "es": 1.8906, "et": 1.0599, "eu": 2.445, "fa": 1.6431, "fi": 1.3833, "fr": 1.7819, "gl": 2.2546, "he": 1.708, "hi": 2.1203, "hu": 1.6641, "hv": 1.5549, "ir": 1.7306, "is": 1.6827, "it": 1.7907, "ka": 1.9108, "kz": 1.9983, "la": 2.013, "li": 1.3426, "lv": 1.3038, "mk": 1.7243, "mt": 1.6205, "nl": 0.9925, "no": 1.7127, "pl": 1.9424, "pt": 1.7074, "ro": 2.0725, "ru": 1.7845, "sk": 2.1432, "sl": 2.0261, "sq": 1.3684, "sr": 1.2404, "sv": 1.3811, "tr": 1.2468, "uk": 2.1138}, "confusion": {"2": {"6": 0.5699, "3": 0.1654, "2": 0.0793, "1": 0.0882, "5": 0.0827, "4": 0.0146}, "4": {"6": 0.8729, "3": 0.0699, "5": 0.0229, "2": 0.0222, "1": 0.0114, "4": 0.0006}, "3": {"6": 0.822, "3": 0.0853, "2": 0.0275, "1": 0.0316, "5": 0.0302, "4": 0.0034}, "1": {"6": 0.3324, "1": 0.3073, "3": 0.1665, "2": 0.1172, "5": 0.0707, "4": 0.0059}, "6": {"6": 0.8873, "3": 0.0209, "1": 0.0078, "4": 0.0109, "5": 0.0693, "2": 0.0039}, "5": {"6": 0.8652, "5": 0.075, "1": 0.0086, "4": 0.0096, "3": 0.0358, "2": 0.0058}}}, {"model": "speakleash/Bielik-4.5B-v3.0-Instruct", "avg_exact": 0.209792, "avg_wp": 0.345769, "avg_bias": 1.0487, "avg_parsed": 0.999786, "avg_mae": 1.9145, "total": 18709, "lang_exact": {"ab": 0.156566, "ar": 0.103604, "az": 0.190722, "be": 0.152439, "bg": 0.195556, "bo": 0.25, "ca": 0.289474, "cn": 0.158879, "cs": 0.256757, "cy": 0.268251, "da": 0.323907, "de": 0.163445, "el": 0.147799, "en": 0.210821, "es": 0.087948, "et": 0.249315, "eu": 0.176271, "fa": 0.168367, "fi": 0.288026, "fr": 0.215805, "gl": 0.182125, "he": 0.21174, "hi": 0.206128, "hu": 0.195652, "hv": 0.214521, "ir": 0.212585, "is": 0.159851, "it": 0.125786, "ka": 0.226481, "kz": 0.145957, "la": 0.282222, "li": 0.179221, "lv": 0.333333, "mk": 0.082873, "mt": 0.275591, "nl": 0.361257, "no": 0.214953, "pl": 0.149013, "pt": 0.1625, "ro": 0.289326, "ru": 0.164021, "sk": 0.265193, "sl": 0.270423, "sq": 0.133333, "sr": 0.180982, "sv": 0.30504, "tr": 0.267905, "uk": 0.221649}, "lang_wp": {"ab": 0.32702, "ar": 0.195946, "az": 0.335052, "be": 0.303354, "bg": 0.344444, "bo": 0.347222, "ca": 0.421053, "cn": 0.308411, "cs": 0.324324, "cy": 0.434635, "da": 0.430591, "de": 0.287346, "el": 0.312893, "en": 0.357276, "es": 0.236156, "et": 0.361644, "eu": 0.30339, "fa": 0.272109, "fi": 0.381877, "fr": 0.346505, "gl": 0.319562, "he": 0.387841, "hi": 0.348189, "hu": 0.375, "hv": 0.30198, "ir": 0.372449, "is": 0.340149, "it": 0.275157, "ka": 0.39547, "kz": 0.313609, "la": 0.386667, "li": 0.303896, "lv": 0.511019, "mk": 0.227901, "mt": 0.387139, "nl": 0.450262, "no": 0.302181, "pl": 0.318671, "pt": 0.28125, "ro": 0.389045, "ru": 0.34127, "sk": 0.385359, "sl": 0.36338, "sq": 0.333333, "sr": 0.319018, "sv": 0.397878, "tr": 0.395225, "uk": 0.390464}, "lang_parsed": {"ab": 1.0, "ar": 1.0, "az": 1.0, "be": 1.0, "bg": 1.0, "bo": 1.0, "ca": 1.0, "cn": 1.0, "cs": 1.0, "cy": 1.0, "da": 1.0, "de": 1.0, "el": 1.0, "en": 1.0, "es": 1.0, "et": 1.0, "eu": 1.0, "fa": 1.0, "fi": 1.0, "fr": 1.0, "gl": 1.0, "he": 1.0, "hi": 1.0, "hu": 1.0, "hv": 1.0, "ir": 1.0, "is": 1.0, "it": 1.0, "ka": 1.0, "kz": 1.0, "la": 1.0, "li": 1.0, "lv": 1.0, "mk": 1.0, "mt": 1.0, "nl": 1.0, "no": 1.0, "pl": 0.992819, "pt": 1.0, "ro": 1.0, "ru": 1.0, "sk": 1.0, "sl": 1.0, "sq": 1.0, "sr": 1.0, "sv": 1.0, "tr": 1.0, "uk": 1.0}, "lang_mae": {"ab": 1.6742, "ar": 2.1757, "az": 1.8222, "be": 1.9116, "bg": 1.6311, "bo": 2.1111, "ca": 1.9737, "cn": 1.8879, "cs": 2.2365, "cy": 1.3548, "da": 1.8766, "de": 2.0457, "el": 1.7987, "en": 1.6847, "es": 2.8078, "et": 2.0, "eu": 2.039, "fa": 2.2993, "fi": 1.6117, "fr": 1.9939, "gl": 1.9022, "he": 1.5283, "hi": 1.6769, "hu": 1.5978, "hv": 2.5215, "ir": 1.6446, "is": 2.3123, "it": 2.5597, "ka": 1.6167, "kz": 1.8047, "la": 1.7956, "li": 2.0571, "lv": 1.4077, "mk": 2.0083, "mt": 1.916, "nl": 1.856, "no": 2.3333, "pl": 1.8119, "pt": 2.4094, "ro": 2.1067, "ru": 1.5688, "sk": 2.0249, "sl": 2.0282, "sq": 1.5216, "sr": 2.5368, "sv": 2.1989, "tr": 1.7507, "uk": 1.4742}, "confusion": {"1": {"5": 0.4483, "4": 0.1143, "3": 0.1133, "1": 0.1965, "6": 0.1275}, "2": {"1": 0.1132, "4": 0.135, "3": 0.085, "5": 0.5414, "6": 0.1218, "2": 0.0035}, "3": {"5": 0.5204, "4": 0.1431, "3": 0.1343, "6": 0.0423, "1": 0.1599}, "4": {"5": 0.5145, "4": 0.1564, "6": 0.0729, "3": 0.1111, "1": 0.1451}, "5": {"5": 0.4333, "4": 0.1605, "3": 0.0659, "6": 0.2621, "1": 0.0777, "2": 0.0005}, "6": {"5": 0.4508, "4": 0.1181, "3": 0.054, "1": 0.0943, "6": 0.2829}}}, {"model": "speakleash/Bielik-1.5B-v3.0-Instruct", "avg_exact": 0.183655, "avg_wp": 0.320781, "avg_bias": 0.4773, "avg_parsed": 0.999786, "avg_mae": 1.9238, "total": 18709, "lang_exact": {"ab": 0.219697, "ar": 0.153153, "az": 0.193299, "be": 0.240854, "bg": 0.422222, "bo": 0.290123, "ca": 0.125731, "cn": 0.168224, "cs": 0.263514, "cy": 0.142615, "da": 0.107969, "de": 0.224956, "el": 0.283019, "en": 0.242537, "es": 0.120521, "et": 0.178082, "eu": 0.179661, "fa": 0.178571, "fi": 0.210356, "fr": 0.18541, "gl": 0.158516, "he": 0.169811, "hi": 0.197772, "hu": 0.133152, "hv": 0.214521, "ir": 0.170068, "is": 0.156134, "it": 0.154088, "ka": 0.174216, "kz": 0.171598, "la": 0.117778, "li": 0.062338, "lv": 0.112948, "mk": 0.273481, "mt": 0.204724, "nl": 0.104712, "no": 0.137072, "pl": 0.245961, "pt": 0.103125, "ro": 0.151685, "ru": 0.203704, "sk": 0.19337, "sl": 0.250704, "sq": 0.164706, "sr": 0.193252, "sv": 0.169761, "tr": 0.135279, "uk": 0.260309}, "lang_wp": {"ab": 0.390152, "ar": 0.245495, "az": 0.342784, "be": 0.338415, "bg": 0.52, "bo": 0.376543, "ca": 0.295322, "cn": 0.314953, "cs": 0.351351, "cy": 0.271647, "da": 0.250643, "de": 0.405097, "el": 0.433962, "en": 0.423507, "es": 0.247557, "et": 0.324658, "eu": 0.294068, "fa": 0.267857, "fi": 0.391586, "fr": 0.300912, "gl": 0.340641, "he": 0.313417, "hi": 0.33844, "hu": 0.305707, "hv": 0.292079, "ir": 0.280612, "is": 0.317844, "it": 0.27673, "ka": 0.275261, "kz": 0.295858, "la": 0.193333, "li": 0.218182, "lv": 0.268595, "mk": 0.381215, "mt": 0.332021, "nl": 0.270942, "no": 0.308411, "pl": 0.435368, "pt": 0.267188, "ro": 0.268258, "ru": 0.337302, "sk": 0.30663, "sl": 0.377465, "sq": 0.335294, "sr": 0.308282, "sv": 0.312997, "tr": 0.281167, "uk": 0.398196}, "lang_parsed": {"ab": 1.0, "ar": 1.0, "az": 0.997423, "be": 1.0, "bg": 1.0, "bo": 1.0, "ca": 1.0, "cn": 1.0, "cs": 1.0, "cy": 1.0, "da": 1.0, "de": 1.0, "el": 1.0, "en": 1.0, "es": 1.0, "et": 1.0, "eu": 1.0, "fa": 1.0, "fi": 1.0, "fr": 0.99696, "gl": 1.0, "he": 1.0, "hi": 1.0, "hu": 1.0, "hv": 1.0, "ir": 1.0, "is": 1.0, "it": 1.0, "ka": 1.0, "kz": 1.0, "la": 1.0, "li": 1.0, "lv": 1.0, "mk": 1.0, "mt": 1.0, "nl": 1.0, "no": 1.0, "pl": 0.996409, "pt": 1.0, "ro": 1.0, "ru": 1.0, "sk": 1.0, "sl": 1.0, "sq": 1.0, "sr": 1.0, "sv": 1.0, "tr": 1.0, "uk": 1.0}, "lang_mae": {"ab": 1.6818, "ar": 2.473, "az": 1.8682, "be": 2.3323, "bg": 1.5422, "bo": 1.8086, "ca": 1.7135, "cn": 1.8673, "cs": 1.8074, "cy": 2.1613, "da": 1.8483, "de": 1.5378, "el": 1.8176, "en": 1.4907, "es": 1.9902, "et": 1.9726, "eu": 2.1017, "fa": 2.2959, "fi": 1.9644, "fr": 1.8506, "gl": 1.5076, "he": 1.9015, "hi": 1.7772, "hu": 1.9918, "hv": 2.0825, "ir": 2.2313, "is": 2.3903, "it": 1.9214, "ka": 2.4669, "kz": 2.142, "la": 2.4756, "li": 2.0052, "lv": 1.8898, "mk": 2.1077, "mt": 1.9213, "nl": 1.9529, "no": 1.6729, "pl": 1.4739, "pt": 1.8, "ro": 1.8736, "ru": 1.8228, "sk": 2.0994, "sl": 1.938, "sq": 1.9373, "sr": 2.1166, "sv": 1.8064, "tr": 1.7294, "uk": 1.6005}, "confusion": {"2": {"1": 0.2285, "5": 0.3408, "2": 0.0522, "4": 0.3362, "3": 0.0123, "6": 0.03}, "4": {"5": 0.3715, "1": 0.3029, "2": 0.0531, "4": 0.2045, "6": 0.0531, "3": 0.0149}, "1": {"1": 0.1946, "5": 0.3845, "2": 0.0389, "4": 0.3288, "6": 0.0431, "3": 0.0102}, "5": {"5": 0.4242, "1": 0.144, "2": 0.0424, "4": 0.3328, "6": 0.0489, "3": 0.0077}, "3": {"5": 0.3657, "1": 0.3044, "4": 0.2131, "2": 0.0606, "6": 0.0423, "3": 0.0139}, "6": {"5": 0.4583, "1": 0.1581, "2": 0.0508, "6": 0.0845, "4": 0.2388, "3": 0.0095}}}, {"model": "CYFRAGOVPL/pllum-12b-nc-chat-250715", "avg_exact": 0.181031, "avg_wp": 0.290794, "avg_bias": -0.1791, "avg_parsed": 0.682125, "avg_mae": 1.7019, "total": 21952, "lang_exact": {"ab": 0.161667, "ar": 0.0975, "az": 0.2425, "be": 0.2625, "bg": 0.198895, "bo": 0.1675, "ca": 0.147632, "cn": 0.131667, "cs": 0.1425, "cy": 0.153333, "da": 0.2825, "de": 0.146667, "el": 0.1975, "en": 0.196667, "es": 0.24024, "et": 0.24, "eu": 0.155779, "fa": 0.16, "fi": 0.195, "fr": 0.180628, "gl": 0.2, "he": 0.166667, "hi": 0.183824, "hu": 0.215, "hv": 0.235, "ir": 0.111667, "is": 0.17, "it": 0.1825, "ka": 0.157233, "kz": 0.136667, "la": 0.251462, "li": 0.215, "lv": 0.155, "mk": 0.1775, "mt": 0.15, "nl": 0.1925, "no": 0.215, "pl": 0.115789, "pt": 0.1625, "ro": 0.2425, "ru": 0.236667, "sk": 0.2175, "sl": 0.2425, "sq": 0.1075, "sr": 0.155, "sv": 0.22, "tr": 0.195, "uk": 0.121951}, "lang_wp": {"ab": 0.265833, "ar": 0.17375, "az": 0.3925, "be": 0.3925, "bg": 0.299724, "bo": 0.24625, "ca": 0.236769, "cn": 0.211667, "cs": 0.2425, "cy": 0.285, "da": 0.4425, "de": 0.2475, "el": 0.335, "en": 0.264167, "es": 0.355856, "et": 0.34, "eu": 0.250419, "fa": 0.263333, "fi": 0.2975, "fr": 0.306283, "gl": 0.345, "he": 0.26, "hi": 0.318015, "hu": 0.3275, "hv": 0.34375, "ir": 0.189167, "is": 0.30125, "it": 0.2575, "ka": 0.295597, "kz": 0.264167, "la": 0.359649, "li": 0.28625, "lv": 0.26125, "mk": 0.2925, "mt": 0.29125, "nl": 0.335, "no": 0.28875, "pl": 0.17193, "pt": 0.25375, "ro": 0.41375, "ru": 0.358333, "sk": 0.3175, "sl": 0.39875, "sq": 0.21125, "sr": 0.2725, "sv": 0.39, "tr": 0.29625, "uk": 0.213415}, "lang_parsed": {"ab": 0.63, "ar": 0.4875, "az": 0.8725, "be": 0.6975, "bg": 0.588398, "bo": 0.515, "ca": 0.481894, "cn": 0.588333, "cs": 0.5275, "cy": 0.86, "da": 0.84, "de": 0.57, "el": 0.795, "en": 0.51, "es": 0.672673, "et": 0.7525, "eu": 0.748744, "fa": 0.801667, "fi": 0.645, "fr": 0.612565, "gl": 0.69, "he": 0.643333, "hi": 0.871324, "hu": 0.7525, "hv": 0.63, "ir": 0.588333, "is": 0.7675, "it": 0.5375, "ka": 0.748428, "kz": 0.836667, "la": 0.807018, "li": 0.585, "lv": 0.545, "mk": 0.74, "mt": 0.7725, "nl": 0.7175, "no": 0.6175, "pl": 0.477193, "pt": 0.5375, "ro": 0.855, "ru": 0.816667, "sk": 0.695, "sl": 0.83, "sq": 0.5775, "sr": 0.525, "sv": 0.7975, "tr": 0.76, "uk": 0.721254}, "lang_mae": {"ab": 1.4735, "ar": 2.2154, "az": 1.6848, "be": 1.3728, "bg": 1.4507, "bo": 1.7233, "ca": 1.3873, "cn": 1.9773, "cs": 1.6635, "cy": 1.6667, "da": 1.2232, "de": 1.4474, "el": 2.0472, "en": 1.3954, "es": 1.442, "et": 1.7807, "eu": 2.0045, "fa": 1.9459, "fi": 1.845, "fr": 1.6068, "gl": 1.2367, "he": 1.6632, "hi": 1.7511, "hu": 1.9967, "hv": 1.5238, "ir": 1.9065, "is": 1.8078, "it": 1.6512, "ka": 1.7521, "kz": 2.0159, "la": 1.7101, "li": 1.8077, "lv": 1.6743, "mk": 1.7872, "mt": 1.7508, "nl": 1.331, "no": 1.8462, "pl": 1.7353, "pt": 1.7116, "ro": 1.4123, "ru": 1.4878, "sk": 1.8094, "sl": 1.5361, "sq": 2.0346, "sr": 1.1524, "sv": 1.442, "tr": 2.2368, "uk": 2.1304}, "confusion": {"1": {"6": 0.1641, "2": 0.1494, "1": 0.5374, "3": 0.0636, "5": 0.0381, "4": 0.0473}, "2": {"2": 0.1685, "1": 0.3392, "4": 0.0745, "3": 0.1322, "6": 0.217, "5": 0.0686}, "3": {"4": 0.1226, "3": 0.0934, "1": 0.2472, "6": 0.2585, "5": 0.1802, "2": 0.0981}, "4": {"6": 0.2685, "5": 0.2005, "4": 0.1191, "1": 0.2601, "3": 0.0856, "2": 0.0663}, "5": {"4": 0.1975, "2": 0.032, "6": 0.2114, "5": 0.2705, "1": 0.2123, "3": 0.0763}, "6": {"4": 0.1714, "5": 0.2743, "6": 0.2136, "1": 0.2624, "2": 0.0327, "3": 0.0457}}}];
  const ALL_LANGS   = ["sq", "ab", "ar", "az", "eu", "be", "bo", "bg", "ca", "cn", "hv", "cs", "da", "nl", "en", "et", "fa", "fi", "fr", "gl", "ka", "de", "el", "he", "hi", "hu", "is", "ir", "it", "kz", "la", "lv", "li", "mk", "mt", "no", "pl", "pt", "ro", "ru", "sr", "sk", "sl", "es", "sv", "tr", "uk", "cy"];
  const LANG_NAMES  = {"af": "Afrikaans", "ab": "Arabic", "az": "Azerbaijani", "ar": "Armenian", "be": "Belarusian", "bo": "Bosnian", "bg": "Bulgarian", "bn": "Brunei", "ca": "Catalan", "cs": "Czech", "cn": "Chinese", "cy": "Welsh", "da": "Danish", "de": "German", "el": "Greek", "en": "English", "eo": "Esperanto", "es": "Spanish", "et": "Estonian", "eu": "Basque", "fa": "Faroese", "fi": "Finnish", "fr": "French", "ga": "Irish", "gl": "Galician", "gu": "Gujarati", "he": "Hebrew", "hi": "Hindi", "hr": "Croatian", "hu": "Hungarian", "hv": "Croatia", "id": "Indonesian", "is": "Icelandic", "it": "Italian", "ir": "Irish", "ja": "Japanese", "ka": "Georgian", "kz": "Kazakh", "km": "Khmer", "kn": "Kannada", "ko": "Korean", "la": "Latin", "li": "Lithuanian", "lv": "Latvian", "mk": "Macedonian", "ml": "Malayalam", "mn": "Mongolian", "mr": "Marathi", "ms": "Malay", "mt": "Maltese", "my": "Burmese", "ne": "Nepali", "nl": "Dutch", "no": "Norwegian", "pa": "Punjabi", "pe": "Persian", "pl": "Polish", "pt": "Portuguese", "ro": "Romanian", "ru": "Russian", "si": "Sinhala", "sk": "Slovak", "sl": "Slovenian", "sq": "Albanian", "sr": "Serbian", "sv": "Swedish", "sw": "Swahili", "ta": "Tamil", "te": "Telugu", "th": "Thai", "tl": "Filipino", "tr": "Turkish", "uk": "Ukrainian", "ur": "Urdu", "uz": "Uzbek", "vi": "Vietnamese", "zh": "Chinese", "zu": "Zulu"};
  const LANG_COUNTS = {"ab": 600, "ar": 400, "az": 400, "be": 400, "bg": 362, "bo": 400, "ca": 359, "cn": 600, "cs": 400, "cy": 600, "da": 400, "de": 600, "el": 400, "en": 600, "es": 333, "et": 400, "eu": 597, "fa": 600, "fi": 400, "fr": 382, "gl": 600, "he": 600, "hi": 544, "hu": 400, "hv": 400, "ir": 600, "is": 400, "it": 400, "ka": 318, "kz": 600, "la": 513, "li": 400, "lv": 400, "mk": 400, "mt": 400, "nl": 400, "no": 400, "pl": 571, "pt": 400, "ro": 400, "ru": 600, "sk": 400, "sl": 400, "sq": 400, "sr": 400, "sv": 400, "tr": 400, "uk": 574};
  const LANG_DIST   = {"ab": {"1": 100, "2": 100, "3": 100, "4": 100, "5": 100, "6": 100}, "ar": {"1": 100, "2": 100, "5": 100, "6": 100}, "az": {"1": 100, "2": 100, "5": 100, "6": 100}, "be": {"1": 100, "2": 100, "5": 100, "6": 100}, "bg": {"1": 100, "2": 100, "5": 100, "6": 62}, "bo": {"1": 100, "2": 100, "5": 100, "6": 100}, "ca": {"1": 73, "2": 100, "5": 100, "6": 86}, "cn": {"1": 100, "2": 100, "3": 100, "4": 100, "5": 100, "6": 100}, "cs": {"1": 100, "2": 100, "5": 100, "6": 100}, "cy": {"1": 100, "2": 100, "3": 100, "4": 100, "5": 100, "6": 100}, "da": {"1": 100, "2": 100, "5": 100, "6": 100}, "de": {"1": 100, "2": 100, "3": 100, "4": 100, "5": 100, "6": 100}, "el": {"1": 100, "2": 100, "5": 100, "6": 100}, "en": {"1": 100, "2": 100, "3": 100, "4": 100, "5": 100, "6": 100}, "es": {"1": 100, "2": 100, "5": 100, "6": 33}, "et": {"1": 100, "2": 100, "5": 100, "6": 100}, "eu": {"1": 97, "2": 100, "3": 100, "4": 100, "5": 100, "6": 100}, "fa": {"1": 100, "2": 100, "3": 100, "4": 100, "5": 100, "6": 100}, "fi": {"1": 100, "2": 100, "5": 100, "6": 100}, "fr": {"1": 100, "2": 100, "5": 100, "6": 82}, "gl": {"1": 100, "2": 100, "3": 100, "4": 100, "5": 100, "6": 100}, "he": {"1": 100, "2": 100, "3": 100, "4": 100, "5": 100, "6": 100}, "hi": {"1": 100, "2": 100, "3": 100, "4": 100, "5": 100, "6": 44}, "hu": {"1": 100, "2": 100, "5": 100, "6": 100}, "hv": {"1": 100, "2": 100, "5": 100, "6": 100}, "ir": {"1": 100, "2": 100, "3": 100, "4": 100, "5": 100, "6": 100}, "is": {"1": 100, "2": 100, "5": 100, "6": 100}, "it": {"1": 100, "2": 100, "5": 100, "6": 100}, "ka": {"1": 18, "2": 100, "5": 100, "6": 100}, "kz": {"1": 100, "2": 100, "3": 100, "4": 100, "5": 100, "6": 100}, "la": {"1": 100, "2": 96, "3": 17, "4": 100, "5": 100, "6": 100}, "li": {"1": 100, "2": 100, "5": 100, "6": 100}, "lv": {"1": 100, "2": 100, "5": 100, "6": 100}, "mk": {"1": 100, "2": 100, "5": 100, "6": 100}, "mt": {"1": 100, "2": 100, "5": 100, "6": 100}, "nl": {"1": 100, "2": 100, "5": 100, "6": 100}, "no": {"1": 100, "2": 100, "5": 100, "6": 100}, "pl": {"1": 100, "2": 100, "3": 100, "4": 100, "5": 100, "6": 71}, "pt": {"1": 100, "2": 100, "5": 100, "6": 100}, "ro": {"1": 100, "2": 100, "5": 100, "6": 100}, "ru": {"1": 100, "2": 100, "3": 100, "4": 100, "5": 100, "6": 100}, "sk": {"1": 100, "2": 100, "5": 100, "6": 100}, "sl": {"1": 100, "2": 100, "5": 100, "6": 100}, "sq": {"1": 100, "2": 100, "5": 100, "6": 100}, "sr": {"1": 100, "2": 100, "5": 100, "6": 100}, "sv": {"1": 100, "2": 100, "5": 100, "6": 100}, "tr": {"1": 100, "2": 100, "5": 100, "6": 100}, "uk": {"1": 100, "2": 100, "3": 100, "4": 100, "5": 100, "6": 74}};

  // second dataset
  const HAS_SECOND   = true;
  const ALL_ROWS_2   = [{"model": "Qwen/Qwen3.5-397B-A17B-FP8", "avg_exact": 0.467554, "avg_wp": 0.660086, "avg_bias": 0.4369, "avg_parsed": 1.0, "avg_mae": 0.7244, "total": 9092, "lang_exact": {"ab": 0.463333, "cn": 0.438333, "cy": 0.332143, "de": 0.551667, "en": 0.587332, "es": 0.57, "eu": 0.38961, "fr": 0.54, "gl": 0.453846, "he": 0.493333, "hi": 0.513333, "ir": 0.40378, "kz": 0.362805, "la": 0.433862, "pl": 0.423333, "sl": 0.395, "uk": 0.516981}, "lang_wp": {"ab": 0.644167, "cn": 0.640833, "cy": 0.528571, "de": 0.725, "en": 0.759117, "es": 0.7425, "eu": 0.574675, "fr": 0.731667, "gl": 0.646154, "he": 0.695, "hi": 0.703333, "ir": 0.607388, "kz": 0.591463, "la": 0.664021, "pl": 0.606667, "sl": 0.608333, "uk": 0.711321}, "lang_parsed": {"ab": 1.0, "cn": 1.0, "cy": 1.0, "de": 1.0, "en": 1.0, "es": 1.0, "eu": 1.0, "fr": 1.0, "gl": 1.0, "he": 1.0, "hi": 1.0, "ir": 1.0, "kz": 1.0, "la": 1.0, "pl": 1.0, "sl": 1.0, "uk": 1.0}, "lang_mae": {"ab": 0.78, "cn": 0.75, "cy": 1.0071, "de": 0.5783, "en": 0.4952, "es": 0.5383, "eu": 0.9351, "fr": 0.5533, "gl": 0.7654, "he": 0.6333, "hi": 0.6133, "ir": 0.8196, "kz": 0.875, "la": 0.6931, "pl": 0.9333, "sl": 0.83, "uk": 0.5906}, "confusion": {"1": {"1": 0.6019, "2": 0.3257, "4": 0.0426, "6": 0.0162, "3": 0.0068, "5": 0.0068}, "2": {"2": 0.4974, "4": 0.2436, "3": 0.0713, "1": 0.0925, "6": 0.0257, "5": 0.0694}, "3": {"2": 0.2051, "3": 0.0857, "4": 0.4609, "1": 0.0121, "6": 0.0364, "5": 0.1997}, "4": {"2": 0.0587, "5": 0.4404, "6": 0.1144, "4": 0.3651, "3": 0.0202, "1": 0.0012}, "5": {"6": 0.4461, "4": 0.096, "5": 0.4368, "2": 0.0173, "3": 0.0037}, "6": {"6": 0.8223, "5": 0.1526, "4": 0.0221, "2": 0.0031}}}, {"model": "mistralai/Mistral-Small-24B-Instruct-2501", "avg_exact": 0.31779, "avg_wp": 0.560195, "avg_bias": 0.0284, "avg_parsed": 1.0, "avg_mae": 0.9057, "total": 7999, "lang_exact": {"ab": 0.294382, "cn": 0.307407, "cy": 0.321932, "de": 0.336824, "en": 0.415755, "es": 0.382353, "eu": 0.289817, "fr": 0.302867, "gl": 0.305609, "he": 0.306641, "hi": 0.241055, "ir": 0.272727, "kz": 0.376923, "la": 0.319444, "pl": 0.341727, "sl": 0.295585, "uk": 0.318681}, "lang_wp": {"ab": 0.529213, "cn": 0.544444, "cy": 0.560362, "de": 0.576789, "en": 0.671772, "es": 0.639706, "eu": 0.523499, "fr": 0.580645, "gl": 0.56383, "he": 0.544922, "hi": 0.461394, "ir": 0.513834, "kz": 0.621154, "la": 0.402778, "pl": 0.570144, "sl": 0.536468, "uk": 0.596703}, "lang_parsed": {"ab": 1.0, "cn": 1.0, "cy": 1.0, "de": 1.0, "en": 1.0, "es": 1.0, "eu": 1.0, "fr": 1.0, "gl": 1.0, "he": 1.0, "hi": 1.0, "ir": 1.0, "kz": 1.0, "la": 1.0, "pl": 1.0, "sl": 1.0, "uk": 1.0}, "lang_mae": {"ab": 0.9843, "cn": 0.963, "cy": 0.8954, "de": 0.8709, "en": 0.6718, "es": 0.7243, "eu": 0.9687, "fr": 0.8548, "gl": 0.8878, "he": 0.9414, "hi": 1.1337, "ir": 1.0099, "kz": 0.7577, "la": 1.3056, "pl": 0.8759, "sl": 0.9539, "uk": 0.8154}, "confusion": {"3": {"4": 0.5322, "2": 0.0946, "3": 0.346, "1": 0.01, "6": 0.0007, "5": 0.0165}, "2": {"4": 0.3167, "2": 0.2579, "3": 0.4001, "1": 0.0219, "5": 0.0034}, "1": {"2": 0.4823, "3": 0.2102, "1": 0.2138, "4": 0.0919, "6": 0.0018}, "4": {"4": 0.6669, "3": 0.2057, "2": 0.0512, "5": 0.0708, "1": 0.0054}, "5": {"4": 0.5988, "5": 0.3031, "3": 0.0791, "2": 0.0132, "6": 0.0059}, "6": {"4": 0.399, "5": 0.5197, "3": 0.0335, "6": 0.0369, "2": 0.0075, "1": 0.0034}}}, {"model": "allenai/Olmo-3.1-32B-Instruct", "avg_exact": 0.35418, "avg_wp": 0.549877, "avg_bias": 0.6144, "avg_parsed": 1.0, "avg_mae": 1.0553, "total": 7739, "lang_exact": {"ab": 0.355499, "cn": 0.368715, "cy": 0.346154, "de": 0.37766, "en": 0.369892, "es": 0.468635, "eu": 0.312668, "fr": 0.4, "gl": 0.321569, "he": 0.281319, "hi": 0.357853, "ir": 0.348606, "kz": 0.164444, "la": 0.464789, "pl": 0.388385, "sl": 0.327485, "uk": 0.295943}, "lang_wp": {"ab": 0.528133, "cn": 0.566108, "cy": 0.552632, "de": 0.575355, "en": 0.563441, "es": 0.664207, "eu": 0.494609, "fr": 0.597297, "gl": 0.52549, "he": 0.497802, "hi": 0.549702, "ir": 0.557769, "kz": 0.386667, "la": 0.489437, "pl": 0.595281, "sl": 0.515595, "uk": 0.502387}, "lang_parsed": {"ab": 1.0, "cn": 1.0, "cy": 1.0, "de": 1.0, "en": 1.0, "es": 1.0, "eu": 1.0, "fr": 1.0, "gl": 1.0, "he": 1.0, "hi": 1.0, "ir": 1.0, "kz": 1.0, "la": 1.0, "pl": 1.0, "sl": 1.0, "uk": 1.0}, "lang_mae": {"ab": 1.202, "cn": 0.9777, "cy": 1.0223, "de": 1.0018, "en": 1.0817, "es": 0.7583, "eu": 1.1968, "fr": 0.9099, "gl": 1.1, "he": 1.1231, "hi": 1.0676, "ir": 0.9622, "kz": 1.44, "la": 1.8592, "pl": 0.9274, "sl": 1.1384, "uk": 1.1074}, "confusion": {"1": {"1": 0.5078, "6": 0.0683, "3": 0.1588, "2": 0.2133, "5": 0.0185, "4": 0.0332}, "2": {"1": 0.1237, "3": 0.3462, "4": 0.1493, "5": 0.0891, "2": 0.1465, "6": 0.1451}, "3": {"1": 0.0211, "2": 0.0605, "6": 0.1355, "3": 0.3336, "4": 0.2658, "5": 0.1835}, "4": {"5": 0.2575, "3": 0.2026, "4": 0.2484, "6": 0.2498, "2": 0.0236, "1": 0.018}, "5": {"4": 0.1315, "6": 0.5058, "5": 0.2848, "3": 0.0623, "1": 0.0086, "2": 0.007}, "6": {"6": 0.718, "5": 0.1694, "4": 0.0613, "3": 0.027, "2": 0.0009, "1": 0.0234}}}, {"model": "speakleash/Bielik-11B-v3.0-Instruct", "avg_exact": 0.319321, "avg_wp": 0.52795, "avg_bias": -0.0152, "avg_parsed": 0.967946, "avg_mae": 1.0378, "total": 9016, "lang_exact": {"ab": 0.263525, "cn": 0.406518, "cy": 0.264286, "de": 0.366667, "en": 0.341651, "es": 0.446667, "eu": 0.244589, "fr": 0.433333, "gl": 0.290385, "he": 0.22031, "hi": 0.282794, "ir": 0.333333, "kz": 0.335366, "la": 0.243386, "pl": 0.193333, "sl": 0.311667, "uk": 0.388679}, "lang_wp": {"ab": 0.426702, "cn": 0.626072, "cy": 0.502679, "de": 0.595, "en": 0.587332, "es": 0.6625, "eu": 0.462121, "fr": 0.665, "gl": 0.524038, "he": 0.36747, "hi": 0.459114, "ir": 0.542955, "kz": 0.573171, "la": 0.402116, "pl": 0.3625, "sl": 0.524167, "uk": 0.619811}, "lang_parsed": {"ab": 0.900524, "cn": 0.96741, "cy": 1.0, "de": 0.996667, "en": 0.994242, "es": 0.995, "eu": 0.980519, "fr": 1.0, "gl": 1.0, "he": 0.977625, "hi": 0.938671, "ir": 0.998282, "kz": 0.990854, "la": 0.994709, "pl": 0.903333, "sl": 0.933333, "uk": 0.916981}, "lang_mae": {"ab": 1.2868, "cn": 0.7695, "cy": 1.0911, "de": 0.8645, "en": 0.8842, "es": 0.6851, "eu": 1.2075, "fr": 0.6867, "gl": 1.0038, "he": 1.5933, "hi": 1.1887, "ir": 0.9725, "kz": 0.9046, "la": 2.0904, "pl": 1.4963, "sl": 0.9571, "uk": 0.6667}, "confusion": {"1": {"1": 0.748, "5": 0.0537, "2": 0.0661, "6": 0.0018, "4": 0.096, "3": 0.0344}, "2": {"2": 0.1071, "1": 0.2756, "3": 0.1783, "5": 0.128, "4": 0.3109}, "3": {"1": 0.1199, "4": 0.4262, "5": 0.2148, "3": 0.1816, "2": 0.0575}, "4": {"1": 0.0789, "4": 0.3598, "5": 0.428, "3": 0.1086, "2": 0.024, "6": 0.0006}, "5": {"3": 0.0566, "4": 0.2227, "1": 0.0436, "5": 0.6719, "2": 0.0052}, "6": {"5": 0.7356, "1": 0.0781, "3": 0.0374, "4": 0.1422, "2": 0.0053, "6": 0.0013}}}, {"model": "Qwen/Qwen2.5-14B-Instruct", "avg_exact": 0.278385, "avg_wp": 0.516447, "avg_bias": 0.053, "avg_parsed": 1.0, "avg_mae": 0.9821, "total": 7874, "lang_exact": {"ab": 0.253968, "cn": 0.291367, "cy": 0.308316, "de": 0.278571, "en": 0.324561, "es": 0.305556, "eu": 0.248649, "fr": 0.337545, "gl": 0.252964, "he": 0.276062, "hi": 0.226824, "ir": 0.282869, "kz": 0.341772, "la": 0.262411, "pl": 0.21558, "sl": 0.268627, "uk": 0.269142}, "lang_wp": {"ab": 0.503401, "cn": 0.555755, "cy": 0.548682, "de": 0.5125, "en": 0.569079, "es": 0.557407, "eu": 0.467568, "fr": 0.563177, "gl": 0.499012, "he": 0.52027, "hi": 0.455621, "ir": 0.513944, "kz": 0.586498, "la": 0.358156, "pl": 0.447464, "sl": 0.510784, "uk": 0.520882}, "lang_parsed": {"ab": 1.0, "cn": 1.0, "cy": 1.0, "de": 1.0, "en": 1.0, "es": 1.0, "eu": 1.0, "fr": 1.0, "gl": 1.0, "he": 1.0, "hi": 1.0, "ir": 1.0, "kz": 1.0, "la": 1.0, "pl": 1.0, "sl": 1.0, "uk": 1.0}, "lang_mae": {"ab": 1.0113, "cn": 0.8903, "cy": 0.9148, "de": 0.9839, "en": 0.8684, "es": 0.8889, "eu": 1.0973, "fr": 0.8809, "gl": 1.0158, "he": 0.9807, "hi": 1.1144, "ir": 1.0, "kz": 0.827, "la": 1.3333, "pl": 1.125, "sl": 1.0039, "uk": 0.9606}, "confusion": {"1": {"1": 0.1435, "2": 0.4625, "3": 0.3373, "4": 0.0567}, "2": {"3": 0.4684, "2": 0.1699, "4": 0.3432, "1": 0.011, "5": 0.0076}, "3": {"4": 0.5922, "2": 0.049, "3": 0.3429, "5": 0.0144, "1": 0.0007, "6": 0.0007}, "4": {"4": 0.7609, "3": 0.1776, "5": 0.043, "2": 0.0178, "6": 0.0007}, "5": {"4": 0.8014, "5": 0.143, "3": 0.0504, "2": 0.003, "6": 0.0023}, "6": {"5": 0.2314, "4": 0.7293, "6": 0.007, "3": 0.0297, "2": 0.0026}}}, {"model": "utter-project/EuroLLM-22B-Instruct-2512", "avg_exact": 0.298877, "avg_wp": 0.513694, "avg_bias": 0.574, "avg_parsed": 0.999874, "avg_mae": 1.1195, "total": 7923, "lang_exact": {"ab": 0.361751, "cn": 0.33945, "cy": 0.254582, "de": 0.326353, "en": 0.253363, "es": 0.368324, "eu": 0.22372, "fr": 0.389892, "gl": 0.294004, "he": 0.257778, "hi": 0.302026, "ir": 0.351248, "kz": 0.293878, "la": 0.194444, "pl": 0.216696, "sl": 0.272553, "uk": 0.266234}, "lang_wp": {"ab": 0.570276, "cn": 0.569725, "cy": 0.482688, "de": 0.546248, "en": 0.439462, "es": 0.581952, "eu": 0.443396, "fr": 0.591155, "gl": 0.525145, "he": 0.444444, "hi": 0.524862, "ir": 0.56238, "kz": 0.504082, "la": 0.524306, "pl": 0.409414, "sl": 0.482726, "uk": 0.492424}, "lang_parsed": {"ab": 1.0, "cn": 0.998165, "cy": 1.0, "de": 1.0, "en": 1.0, "es": 1.0, "eu": 1.0, "fr": 1.0, "gl": 1.0, "he": 1.0, "hi": 1.0, "ir": 1.0, "kz": 1.0, "la": 1.0, "pl": 1.0, "sl": 1.0, "uk": 1.0}, "lang_mae": {"ab": 0.9401, "cn": 0.9154, "cy": 1.1527, "de": 1.0489, "en": 1.3161, "es": 0.9134, "eu": 1.3477, "fr": 0.9206, "gl": 1.0387, "he": 1.3711, "hi": 1.0829, "ir": 0.9712, "kz": 1.1796, "la": 1.2083, "pl": 1.4938, "sl": 1.2054, "uk": 1.132}, "confusion": {"2": {"1": 0.068, "5": 0.455, "2": 0.3491, "4": 0.0034, "3": 0.1203, "6": 0.0041}, "1": {"1": 0.3846, "2": 0.4139, "5": 0.1465, "3": 0.0513, "6": 0.0037}, "3": {"5": 0.7158, "2": 0.1568, "1": 0.018, "3": 0.1065, "6": 0.0022, "4": 0.0007}, "4": {"5": 0.8645, "2": 0.0735, "3": 0.049, "4": 0.0027, "6": 0.0027, "1": 0.0075}, "5": {"5": 0.936, "2": 0.0387, "3": 0.0097, "6": 0.0082, "1": 0.0045, "4": 0.003}, "6": {"5": 0.9403, "1": 0.0094, "6": 0.0256, "2": 0.0179, "4": 0.0017, "3": 0.0051}}}, {"model": "CYFRAGOVPL/Llama-PLLuM-70B-chat-250801", "avg_exact": 0.275562, "avg_wp": 0.474108, "avg_bias": -0.6122, "avg_parsed": 0.996364, "avg_mae": 1.2257, "total": 9076, "lang_exact": {"ab": 0.297479, "cn": 0.248333, "cy": 0.276786, "de": 0.363333, "en": 0.243762, "es": 0.295, "eu": 0.166667, "fr": 0.318333, "gl": 0.201923, "he": 0.243655, "hi": 0.281667, "ir": 0.262887, "kz": 0.303681, "la": 0.216931, "pl": 0.316667, "sl": 0.331667, "uk": 0.245283}, "lang_wp": {"ab": 0.485714, "cn": 0.39, "cy": 0.504464, "de": 0.561667, "en": 0.487524, "es": 0.491667, "eu": 0.376623, "fr": 0.511667, "gl": 0.414423, "he": 0.423012, "hi": 0.471667, "ir": 0.444158, "kz": 0.546012, "la": 0.304233, "pl": 0.520833, "sl": 0.5425, "uk": 0.471698}, "lang_parsed": {"ab": 0.986555, "cn": 1.0, "cy": 0.998214, "de": 1.0, "en": 0.992322, "es": 0.996667, "eu": 0.997835, "fr": 0.998333, "gl": 1.0, "he": 0.99154, "hi": 1.0, "ir": 0.994845, "kz": 0.996933, "la": 1.0, "pl": 0.993333, "sl": 0.996667, "uk": 0.998113}, "lang_mae": {"ab": 1.1806, "cn": 1.5733, "cy": 1.102, "de": 1.0117, "en": 1.1277, "es": 1.1488, "eu": 1.4967, "fr": 1.1002, "gl": 1.3058, "he": 1.3379, "hi": 1.2433, "ir": 1.3368, "kz": 1.0092, "la": 2.3439, "pl": 1.0419, "sl": 0.9916, "uk": 1.2042}, "confusion": {"1": {"3": 0.1541, "1": 0.786, "4": 0.0317, "5": 0.0154, "6": 0.0051, "2": 0.0077}, "2": {"3": 0.3323, "4": 0.1092, "1": 0.4874, "5": 0.0595, "2": 0.0071, "6": 0.0045}, "3": {"1": 0.3083, "3": 0.3807, "5": 0.1244, "4": 0.1765, "2": 0.0054, "6": 0.0047}, "4": {"4": 0.2148, "3": 0.3557, "5": 0.216, "1": 0.2037, "6": 0.0055, "2": 0.0043}, "5": {"5": 0.3859, "4": 0.2643, "3": 0.2506, "1": 0.0842, "2": 0.0044, "6": 0.0106}, "6": {"4": 0.2679, "3": 0.1772, "5": 0.4247, "1": 0.1019, "6": 0.0253, "2": 0.0031}}}, {"model": "mistralai/Mistral-Nemo-Instruct-2407", "avg_exact": 0.282611, "avg_wp": 0.468668, "avg_bias": -0.2413, "avg_parsed": 0.963425, "avg_mae": 1.2449, "total": 8011, "lang_exact": {"ab": 0.29148, "cn": 0.3, "cy": 0.181087, "de": 0.279232, "en": 0.323851, "es": 0.310662, "eu": 0.263708, "fr": 0.351254, "gl": 0.257253, "he": 0.289062, "hi": 0.265683, "ir": 0.243083, "kz": 0.296154, "la": 0.180556, "pl": 0.354317, "sl": 0.28215, "uk": 0.248352}, "lang_wp": {"ab": 0.483184, "cn": 0.492593, "cy": 0.343058, "de": 0.507853, "en": 0.507659, "es": 0.511949, "eu": 0.425587, "fr": 0.562724, "gl": 0.434236, "he": 0.487305, "hi": 0.417897, "ir": 0.380435, "kz": 0.457692, "la": 0.322917, "pl": 0.546763, "sl": 0.478887, "uk": 0.465934}, "lang_parsed": {"ab": 0.950673, "cn": 0.983333, "cy": 0.987928, "de": 0.95637, "en": 0.964989, "es": 0.977941, "eu": 0.942559, "fr": 0.958781, "gl": 0.912959, "he": 0.988281, "hi": 0.968635, "ir": 0.966403, "kz": 0.915385, "la": 0.9375, "pl": 0.992806, "sl": 0.955854, "uk": 0.967033}, "lang_mae": {"ab": 1.1085, "cn": 1.2298, "cy": 1.8045, "de": 1.042, "en": 1.0, "es": 1.1015, "eu": 1.3989, "fr": 0.8991, "gl": 1.2712, "he": 1.1739, "hi": 1.499, "ir": 1.5808, "kz": 1.1723, "la": 2.2148, "pl": 1.0145, "sl": 1.2129, "uk": 1.1795}, "confusion": {"1": {"1": 0.4749, "2": 0.3597, "5": 0.0799, "4": 0.0288, "3": 0.0483, "6": 0.0084}, "2": {"4": 0.1264, "2": 0.4316, "5": 0.176, "1": 0.1669, "3": 0.0866, "6": 0.0126}, "3": {"4": 0.2181, "1": 0.0934, "2": 0.3071, "3": 0.1225, "5": 0.248, "6": 0.0109}, "4": {"4": 0.2169, "2": 0.2479, "5": 0.3336, "1": 0.076, "3": 0.1105, "6": 0.0152}, "5": {"4": 0.1957, "2": 0.1995, "5": 0.4612, "1": 0.0491, "6": 0.0269, "3": 0.0675}, "6": {"5": 0.5312, "4": 0.1379, "2": 0.1544, "6": 0.0478, "1": 0.08, "3": 0.0487}}}, {"model": "Qwen/Qwen2.5-7B-Instruct", "avg_exact": 0.285497, "avg_wp": 0.461773, "avg_bias": 0.9564, "avg_parsed": 1.0, "avg_mae": 1.212, "total": 7874, "lang_exact": {"ab": 0.283447, "cn": 0.305755, "cy": 0.219067, "de": 0.341071, "en": 0.296053, "es": 0.362963, "eu": 0.267568, "fr": 0.370036, "gl": 0.304348, "he": 0.272201, "hi": 0.258383, "ir": 0.213147, "kz": 0.278481, "la": 0.35461, "pl": 0.286232, "sl": 0.190196, "uk": 0.266821}, "lang_wp": {"ab": 0.451247, "cn": 0.48741, "cy": 0.408722, "de": 0.525893, "en": 0.501096, "es": 0.531481, "eu": 0.422973, "fr": 0.546029, "gl": 0.503953, "he": 0.44305, "hi": 0.431953, "ir": 0.376494, "kz": 0.440928, "la": 0.574468, "pl": 0.429348, "sl": 0.347059, "uk": 0.472158}, "lang_parsed": {"ab": 1.0, "cn": 1.0, "cy": 1.0, "de": 1.0, "en": 1.0, "es": 1.0, "eu": 1.0, "fr": 1.0, "gl": 1.0, "he": 1.0, "hi": 1.0, "ir": 1.0, "kz": 1.0, "la": 1.0, "pl": 1.0, "sl": 1.0, "uk": 1.0}, "lang_mae": {"ab": 1.3107, "cn": 1.1259, "cy": 1.3266, "de": 1.0429, "en": 1.0658, "es": 1.0093, "eu": 1.3081, "fr": 0.9585, "gl": 1.0514, "he": 1.2838, "hi": 1.284, "ir": 1.4422, "kz": 1.1772, "la": 1.0, "pl": 1.4094, "sl": 1.5529, "uk": 1.1439}, "confusion": {"1": {"1": 0.3062, "3": 0.2678, "4": 0.3291, "6": 0.0283, "2": 0.0375, "5": 0.0311}, "2": {"2": 0.0186, "3": 0.1403, "4": 0.6272, "5": 0.1039, "1": 0.0413, "6": 0.0688}, "3": {"4": 0.611, "6": 0.1318, "2": 0.0029, "5": 0.2089, "3": 0.0389, "1": 0.0065}, "4": {"6": 0.2281, "4": 0.4597, "5": 0.2937, "3": 0.0143, "1": 0.0041}, "5": {"6": 0.4387, "4": 0.2092, "5": 0.3506, "3": 0.0015}, "6": {"6": 0.6052, "4": 0.1231, "5": 0.2681, "3": 0.0026, "1": 0.0009}}}, {"model": "speakleash/Bielik-11B-v2.6-Instruct", "avg_exact": 0.261933, "avg_wp": 0.461439, "avg_bias": 0.4995, "avg_parsed": 1.0, "avg_mae": 1.2414, "total": 8506, "lang_exact": {"ab": 0.214533, "cn": 0.250432, "cy": 0.219031, "de": 0.268456, "en": 0.321705, "es": 0.361204, "eu": 0.197397, "fr": 0.301667, "gl": 0.280769, "he": 0.238384, "hi": 0.23689, "ir": 0.183849, "kz": 0.206642, "la": 0.196721, "pl": 0.321667, "sl": 0.26, "uk": 0.280632}, "lang_wp": {"ab": 0.378893, "cn": 0.42228, "cy": 0.40754, "de": 0.466443, "en": 0.543605, "es": 0.574415, "eu": 0.439262, "fr": 0.526667, "gl": 0.523077, "he": 0.437374, "hi": 0.391501, "ir": 0.354811, "kz": 0.47417, "la": 0.289617, "pl": 0.498333, "sl": 0.438333, "uk": 0.530632}, "lang_parsed": {"ab": 1.0, "cn": 1.0, "cy": 1.0, "de": 1.0, "en": 1.0, "es": 1.0, "eu": 1.0, "fr": 1.0, "gl": 1.0, "he": 1.0, "hi": 1.0, "ir": 1.0, "kz": 1.0, "la": 1.0, "pl": 1.0, "sl": 1.0, "uk": 1.0}, "lang_mae": {"ab": 1.4948, "cn": 1.361, "cy": 1.3429, "de": 1.203, "en": 1.0039, "es": 0.903, "eu": 1.282, "fr": 0.9917, "gl": 1.0442, "he": 1.1636, "hi": 1.5461, "ir": 1.5155, "kz": 1.0701, "la": 1.9563, "pl": 1.315, "sl": 1.375, "uk": 1.0119}, "confusion": {"1": {"3": 0.653, "1": 0.2373, "6": 0.0874, "5": 0.0223}, "2": {"3": 0.7263, "5": 0.1183, "6": 0.1079, "1": 0.0195, "4": 0.028}, "3": {"3": 0.5543, "6": 0.1349, "5": 0.2886, "4": 0.0201, "1": 0.0021}, "4": {"3": 0.4415, "6": 0.1742, "5": 0.3612, "4": 0.0219, "1": 0.0013}, "6": {"3": 0.2302, "6": 0.3566, "5": 0.4009, "4": 0.0065, "1": 0.0058}, "5": {"5": 0.4329, "3": 0.2825, "6": 0.2621, "4": 0.0218, "2": 0.0007}}}, {"model": "allenai/Olmo-3-7B-Instruct", "avg_exact": 0.228195, "avg_wp": 0.409937, "avg_bias": 0.5317, "avg_parsed": 0.999612, "avg_mae": 1.4272, "total": 7739, "lang_exact": {"ab": 0.176471, "cn": 0.214153, "cy": 0.204453, "de": 0.23227, "en": 0.292473, "es": 0.287823, "eu": 0.221024, "fr": 0.23964, "gl": 0.264706, "he": 0.178022, "hi": 0.252485, "ir": 0.181275, "kz": 0.213333, "la": 0.211268, "pl": 0.257713, "sl": 0.163743, "uk": 0.250597}, "lang_wp": {"ab": 0.391304, "cn": 0.368715, "cy": 0.385628, "de": 0.437943, "en": 0.489247, "es": 0.511993, "eu": 0.392183, "fr": 0.423423, "gl": 0.431373, "he": 0.36044, "hi": 0.437376, "ir": 0.35757, "kz": 0.397778, "la": 0.31338, "pl": 0.434664, "sl": 0.309942, "uk": 0.435561}, "lang_parsed": {"ab": 1.0, "cn": 1.0, "cy": 1.0, "de": 0.998227, "en": 1.0, "es": 1.0, "eu": 1.0, "fr": 1.0, "gl": 1.0, "he": 1.0, "hi": 0.998012, "ir": 1.0, "kz": 1.0, "la": 1.0, "pl": 0.998185, "sl": 1.0, "uk": 1.0}, "lang_mae": {"ab": 1.3632, "cn": 1.5736, "cy": 1.4555, "de": 1.3091, "en": 1.1634, "es": 1.0996, "eu": 1.5553, "fr": 1.3892, "gl": 1.4078, "he": 1.5231, "hi": 1.3625, "ir": 1.498, "kz": 1.3911, "la": 1.7042, "pl": 1.4691, "sl": 1.8226, "uk": 1.3771}, "confusion": {"1": {"3": 0.2114, "4": 0.157, "2": 0.4035, "1": 0.0905, "5": 0.0766, "6": 0.0609}, "2": {"1": 0.027, "3": 0.1728, "4": 0.2122, "5": 0.1638, "6": 0.114, "2": 0.3103}, "3": {"3": 0.1151, "2": 0.2367, "4": 0.2251, "6": 0.1653, "5": 0.2535, "1": 0.0044}, "4": {"4": 0.1985, "3": 0.0985, "6": 0.2415, "2": 0.186, "5": 0.2741, "1": 0.0014}, "5": {"4": 0.1877, "6": 0.3879, "3": 0.0724, "5": 0.2033, "2": 0.148, "1": 0.0008}, "6": {"6": 0.4639, "5": 0.1597, "4": 0.1634, "3": 0.0731, "2": 0.139, "1": 0.0009}}}, {"model": "mistralai/Mistral-7B-Instruct-v0.3", "avg_exact": 0.229035, "avg_wp": 0.398141, "avg_bias": 0.9606, "avg_parsed": 0.994066, "avg_mae": 1.5188, "total": 7584, "lang_exact": {"ab": 0.224599, "cn": 0.208566, "cy": 0.220619, "de": 0.253142, "en": 0.16895, "es": 0.36194, "eu": 0.208914, "fr": 0.261993, "gl": 0.208791, "he": 0.199557, "hi": 0.240481, "ir": 0.197154, "kz": 0.228814, "la": 0.154412, "pl": 0.259191, "sl": 0.182711, "uk": 0.223502}, "lang_wp": {"ab": 0.415775, "cn": 0.388268, "cy": 0.371134, "de": 0.435368, "en": 0.326484, "es": 0.539179, "eu": 0.410864, "fr": 0.445572, "gl": 0.369231, "he": 0.379157, "hi": 0.403808, "ir": 0.327236, "kz": 0.427966, "la": 0.386029, "pl": 0.409007, "sl": 0.319253, "uk": 0.396313}, "lang_parsed": {"ab": 0.994652, "cn": 0.998138, "cy": 0.995876, "de": 0.994614, "en": 0.993151, "es": 0.994403, "eu": 0.997214, "fr": 0.99262, "gl": 0.997802, "he": 0.997783, "hi": 0.98998, "ir": 0.989837, "kz": 0.970339, "la": 0.977941, "pl": 1.0, "sl": 0.996071, "uk": 0.995392}, "lang_mae": {"ab": 1.371, "cn": 1.4981, "cy": 1.6418, "de": 1.37, "en": 1.7586, "es": 1.1032, "eu": 1.486, "fr": 1.3309, "gl": 1.5991, "he": 1.4422, "hi": 1.5668, "ir": 1.8029, "kz": 1.31, "la": 1.6692, "pl": 1.5625, "sl": 1.8679, "uk": 1.4769}, "confusion": {"2": {"4": 0.0677, "5": 0.3656, "1": 0.1214, "3": 0.2357, "2": 0.0247, "6": 0.1849}, "1": {"3": 0.2608, "1": 0.3499, "2": 0.0582, "5": 0.1839, "6": 0.1126, "4": 0.0347}, "4": {"3": 0.1364, "5": 0.4961, "4": 0.0473, "6": 0.2608, "1": 0.0382, "2": 0.0212}, "5": {"5": 0.5188, "2": 0.016, "3": 0.0721, "6": 0.3443, "4": 0.0328, "1": 0.016}, "3": {"5": 0.4767, "4": 0.0665, "3": 0.1663, "2": 0.0237, "6": 0.2106, "1": 0.0562}, "6": {"5": 0.4899, "4": 0.0308, "3": 0.0645, "6": 0.3744, "2": 0.0115, "1": 0.0289}}}, {"model": "cjvt/GaMS3-12B-Instruct", "avg_exact": 0.223201, "avg_wp": 0.38902, "avg_bias": 0.7368, "avg_parsed": 0.999876, "avg_mae": 1.4632, "total": 8060, "lang_exact": {"ab": 0.301339, "cn": 0.212613, "cy": 0.157258, "de": 0.217014, "en": 0.235931, "es": 0.210238, "eu": 0.18254, "fr": 0.183124, "gl": 0.223938, "he": 0.25, "hi": 0.268116, "ir": 0.200393, "kz": 0.155894, "la": 0.536913, "pl": 0.2375, "sl": 0.193858, "uk": 0.21692}, "lang_wp": {"ab": 0.459821, "cn": 0.364865, "cy": 0.336694, "de": 0.378472, "en": 0.452381, "es": 0.390311, "eu": 0.371693, "fr": 0.35368, "gl": 0.399614, "he": 0.436024, "hi": 0.424819, "ir": 0.311395, "kz": 0.368821, "la": 0.583893, "pl": 0.372321, "sl": 0.350288, "uk": 0.402386}, "lang_parsed": {"ab": 1.0, "cn": 1.0, "cy": 1.0, "de": 1.0, "en": 1.0, "es": 1.0, "eu": 1.0, "fr": 1.0, "gl": 1.0, "he": 1.0, "hi": 1.0, "ir": 1.0, "kz": 1.0, "la": 1.0, "pl": 1.0, "sl": 1.0, "uk": 0.997831}, "lang_mae": {"ab": 1.2612, "cn": 1.5964, "cy": 1.5323, "de": 1.5556, "en": 1.1645, "es": 1.3346, "eu": 1.4603, "fr": 1.4237, "gl": 1.444, "he": 1.2559, "hi": 1.3967, "ir": 1.8468, "kz": 1.4335, "la": 1.255, "pl": 1.7446, "sl": 1.5566, "uk": 1.3543}, "confusion": {"2": {"4": 0.3051, "1": 0.0567, "5": 0.2137, "3": 0.2594, "6": 0.1563, "2": 0.0089}, "3": {"4": 0.269, "5": 0.3915, "6": 0.1516, "3": 0.1715, "1": 0.0114, "2": 0.005}, "1": {"1": 0.3159, "6": 0.1182, "4": 0.1336, "3": 0.3655, "5": 0.0379, "2": 0.0289}, "4": {"4": 0.2047, "6": 0.1785, "3": 0.1604, "5": 0.4396, "1": 0.0114, "2": 0.0054}, "5": {"5": 0.3671, "4": 0.2143, "6": 0.2339, "3": 0.1752, "1": 0.0072, "2": 0.0022}, "6": {"6": 0.3165, "5": 0.2314, "4": 0.243, "3": 0.1901, "1": 0.0165, "2": 0.0025}}}, {"model": "swiss-ai/Apertus-8B-Instruct-2509", "avg_exact": 0.226841, "avg_wp": 0.368914, "avg_bias": 1.3785, "avg_parsed": 0.989763, "avg_mae": 1.7385, "total": 8010, "lang_exact": {"ab": 0.238202, "cn": 0.22037, "cy": 0.205231, "de": 0.247818, "en": 0.207877, "es": 0.277574, "eu": 0.224543, "fr": 0.340502, "gl": 0.224371, "he": 0.132812, "hi": 0.208487, "ir": 0.20751, "kz": 0.165385, "la": 0.506944, "pl": 0.223022, "sl": 0.197697, "uk": 0.178022}, "lang_wp": {"ab": 0.347191, "cn": 0.387963, "cy": 0.325956, "de": 0.410122, "en": 0.364333, "es": 0.418199, "eu": 0.390339, "fr": 0.537634, "gl": 0.341393, "he": 0.24707, "hi": 0.353321, "ir": 0.368577, "kz": 0.35, "la": 0.548611, "pl": 0.317446, "sl": 0.330134, "uk": 0.331868}, "lang_parsed": {"ab": 0.966292, "cn": 0.998148, "cy": 0.993964, "de": 1.0, "en": 1.0, "es": 0.996324, "eu": 0.986945, "fr": 0.996416, "gl": 0.996132, "he": 0.986328, "hi": 0.990775, "ir": 0.998024, "kz": 0.903846, "la": 0.972222, "pl": 0.998201, "sl": 0.996161, "uk": 0.984615}, "lang_mae": {"ab": 1.8721, "cn": 1.5158, "cy": 1.913, "de": 1.5445, "en": 1.6893, "es": 1.4668, "eu": 1.6323, "fr": 1.0917, "gl": 1.8699, "he": 2.2693, "hi": 1.8994, "ir": 1.7446, "kz": 1.4723, "la": 1.3929, "pl": 2.1369, "sl": 1.8825, "uk": 1.8438}, "confusion": {"2": {"6": 0.5169, "3": 0.1884, "1": 0.0856, "2": 0.1304, "5": 0.0649, "4": 0.0138}, "1": {"6": 0.3163, "3": 0.2203, "1": 0.2477, "4": 0.0165, "5": 0.0238, "2": 0.1755}, "3": {"6": 0.6273, "3": 0.1288, "1": 0.0317, "2": 0.0791, "4": 0.0187, "5": 0.1144}, "4": {"6": 0.6785, "5": 0.1539, "3": 0.0828, "1": 0.0321, "4": 0.0178, "2": 0.0349}, "5": {"5": 0.1532, "6": 0.7654, "3": 0.0244, "1": 0.0222, "2": 0.0185, "4": 0.0163}, "6": {"6": 0.7995, "5": 0.1261, "2": 0.0068, "4": 0.0161, "3": 0.0279, "1": 0.0237}}}, {"model": "speakleash/Bielik-1.5B-v3.0-Instruct", "avg_exact": 0.206902, "avg_wp": 0.363133, "avg_bias": -0.3521, "avg_parsed": 0.999854, "avg_mae": 1.7134, "total": 6868, "lang_exact": {"ab": 0.178694, "cn": 0.230932, "cy": 0.164927, "de": 0.225746, "en": 0.265306, "es": 0.238921, "eu": 0.143258, "fr": 0.210832, "gl": 0.229167, "he": 0.174263, "hi": 0.178082, "ir": 0.164241, "kz": 0.114094, "la": 0.126866, "pl": 0.268078, "sl": 0.268537, "uk": 0.143791}, "lang_wp": {"ab": 0.302405, "cn": 0.377119, "cy": 0.317328, "de": 0.403918, "en": 0.452806, "es": 0.416185, "eu": 0.296348, "fr": 0.404255, "gl": 0.436343, "he": 0.323056, "hi": 0.286301, "ir": 0.277547, "kz": 0.288591, "la": 0.164179, "pl": 0.417989, "sl": 0.428858, "uk": 0.289216}, "lang_parsed": {"ab": 1.0, "cn": 1.0, "cy": 1.0, "de": 1.0, "en": 0.997449, "es": 1.0, "eu": 1.0, "fr": 1.0, "gl": 1.0, "he": 1.0, "hi": 1.0, "ir": 1.0, "kz": 1.0, "la": 1.0, "pl": 1.0, "sl": 1.0, "uk": 1.0}, "lang_mae": {"ab": 2.1512, "cn": 1.6589, "cy": 1.9019, "de": 1.5392, "en": 1.2506, "es": 1.4644, "eu": 2.014, "fr": 1.4255, "gl": 1.1968, "he": 1.8365, "hi": 2.2658, "ir": 2.1497, "kz": 2.0738, "la": 2.6493, "pl": 1.5185, "sl": 1.4469, "uk": 1.9869}, "confusion": {"1": {"1": 0.5015, "5": 0.2251, "6": 0.0336, "2": 0.0809, "4": 0.1333, "3": 0.0257}, "2": {"1": 0.4241, "5": 0.1975, "4": 0.2221, "2": 0.1047, "6": 0.0269, "3": 0.0247}, "3": {"1": 0.3597, "2": 0.0854, "5": 0.2182, "4": 0.2877, "3": 0.0253, "6": 0.0237}, "4": {"1": 0.3423, "5": 0.2571, "6": 0.0384, "4": 0.2678, "2": 0.0806, "3": 0.0138}, "5": {"1": 0.2749, "5": 0.3123, "6": 0.0447, "4": 0.2603, "2": 0.0868, "3": 0.021}, "6": {"1": 0.2752, "5": 0.3571, "2": 0.0539, "6": 0.0585, "4": 0.2307, "3": 0.0246}}}, {"model": "speakleash/Bielik-4.5B-v3.0-Instruct", "avg_exact": 0.188847, "avg_wp": 0.338818, "avg_bias": 1.1003, "avg_parsed": 1.0, "avg_mae": 1.7567, "total": 6868, "lang_exact": {"ab": 0.154639, "cn": 0.152542, "cy": 0.17119, "de": 0.169776, "en": 0.191327, "es": 0.22158, "eu": 0.129213, "fr": 0.193424, "gl": 0.196759, "he": 0.201072, "hi": 0.191781, "ir": 0.2079, "kz": 0.255034, "la": 0.462687, "pl": 0.17284, "sl": 0.162325, "uk": 0.202614}, "lang_wp": {"ab": 0.286942, "cn": 0.313559, "cy": 0.331942, "de": 0.29291, "en": 0.359694, "es": 0.32948, "eu": 0.29073, "fr": 0.330754, "gl": 0.359954, "he": 0.348525, "hi": 0.349315, "ir": 0.373181, "kz": 0.489933, "la": 0.55597, "pl": 0.324515, "sl": 0.313627, "uk": 0.367647}, "lang_parsed": {"ab": 1.0, "cn": 1.0, "cy": 1.0, "de": 1.0, "en": 1.0, "es": 1.0, "eu": 1.0, "fr": 1.0, "gl": 1.0, "he": 1.0, "hi": 1.0, "ir": 1.0, "kz": 1.0, "la": 1.0, "pl": 1.0, "sl": 1.0, "uk": 1.0}, "lang_mae": {"ab": 1.8007, "cn": 1.786, "cy": 1.739, "de": 1.9888, "en": 1.5357, "es": 1.9094, "eu": 2.0674, "fr": 1.7988, "gl": 1.5995, "he": 1.6756, "hi": 1.7342, "ir": 1.5738, "kz": 1.1812, "la": 1.2015, "pl": 1.8554, "sl": 1.8858, "uk": 1.6471}, "confusion": {"2": {"4": 0.1429, "5": 0.6133, "1": 0.101, "3": 0.0868, "6": 0.0561}, "3": {"5": 0.6411, "4": 0.1186, "1": 0.0822, "6": 0.0909, "3": 0.0672}, "1": {"1": 0.1598, "4": 0.1696, "5": 0.5276, "3": 0.1016, "6": 0.0414}, "4": {"4": 0.0875, "5": 0.6454, "1": 0.086, "6": 0.1266, "3": 0.0537, "2": 0.0008}, "5": {"5": 0.6694, "4": 0.0502, "6": 0.1763, "3": 0.0247, "1": 0.0795}, "6": {"5": 0.5855, "4": 0.0691, "3": 0.0187, "6": 0.2377, "1": 0.089}}}, {"model": "utter-project/EuroLLM-9B-Instruct-2512", "avg_exact": 0.205604, "avg_wp": 0.316168, "avg_bias": 1.7841, "avg_parsed": 0.999874, "avg_mae": 2.0126, "total": 7923, "lang_exact": {"ab": 0.239631, "cn": 0.212844, "cy": 0.219959, "de": 0.157068, "en": 0.136771, "es": 0.235727, "eu": 0.188679, "fr": 0.194946, "gl": 0.197292, "he": 0.171111, "hi": 0.233886, "ir": 0.253359, "kz": 0.102041, "la": 0.534722, "pl": 0.213144, "sl": 0.195777, "uk": 0.177489}, "lang_wp": {"ab": 0.35023, "cn": 0.317431, "cy": 0.385947, "de": 0.254799, "en": 0.25, "es": 0.346225, "eu": 0.295148, "fr": 0.311372, "gl": 0.296905, "he": 0.278889, "hi": 0.3407, "ir": 0.37524, "kz": 0.265306, "la": 0.604167, "pl": 0.29929, "sl": 0.287908, "uk": 0.287879}, "lang_parsed": {"ab": 1.0, "cn": 1.0, "cy": 0.997963, "de": 1.0, "en": 1.0, "es": 1.0, "eu": 1.0, "fr": 1.0, "gl": 1.0, "he": 1.0, "hi": 1.0, "ir": 1.0, "kz": 1.0, "la": 1.0, "pl": 1.0, "sl": 1.0, "uk": 1.0}, "lang_mae": {"ab": 1.9885, "cn": 2.0661, "cy": 1.5184, "de": 2.3874, "en": 2.0987, "es": 1.8858, "eu": 2.0377, "fr": 2.0018, "gl": 2.0406, "he": 2.1822, "hi": 2.0166, "ir": 1.7332, "kz": 1.8612, "la": 1.1319, "pl": 2.2274, "sl": 2.2188, "uk": 2.0649}, "confusion": {"1": {"6": 0.4478, "1": 0.294, "3": 0.0907, "2": 0.0833, "5": 0.0815, "4": 0.0027}, "2": {"1": 0.068, "3": 0.1003, "6": 0.7127, "2": 0.0536, "5": 0.0605, "4": 0.0048}, "3": {"6": 0.8115, "1": 0.0237, "2": 0.0245, "3": 0.0827, "5": 0.0532, "4": 0.0043}, "4": {"6": 0.8735, "2": 0.0102, "1": 0.0177, "3": 0.0633, "5": 0.0327, "4": 0.0027}, "5": {"6": 0.9033, "3": 0.0536, "5": 0.0186, "1": 0.0186, "2": 0.006}, "6": {"6": 0.9274, "1": 0.0111, "3": 0.0384, "5": 0.0179, "2": 0.0017, "4": 0.0034}}}, {"model": "CYFRAGOVPL/pllum-12b-nc-chat-250715", "avg_exact": 0.107897, "avg_wp": 0.186483, "avg_bias": -0.1812, "avg_parsed": 0.554114, "avg_mae": 1.9248, "total": 9092, "lang_exact": {"ab": 0.093333, "cn": 0.11, "cy": 0.1625, "de": 0.111667, "en": 0.099808, "es": 0.11, "eu": 0.077922, "fr": 0.113333, "gl": 0.119231, "he": 0.085, "hi": 0.12, "ir": 0.080756, "kz": 0.088415, "la": 0.15873, "pl": 0.125, "sl": 0.096667, "uk": 0.103774}, "lang_wp": {"ab": 0.17, "cn": 0.180833, "cy": 0.283929, "de": 0.180833, "en": 0.179463, "es": 0.165833, "eu": 0.15368, "fr": 0.199167, "gl": 0.232692, "he": 0.154167, "hi": 0.1975, "ir": 0.150344, "kz": 0.185976, "la": 0.224868, "pl": 0.185833, "sl": 0.168333, "uk": 0.185849}, "lang_parsed": {"ab": 0.49, "cn": 0.528333, "cy": 0.789286, "de": 0.481667, "en": 0.495202, "es": 0.443333, "eu": 0.649351, "fr": 0.551667, "gl": 0.548077, "he": 0.493333, "hi": 0.586667, "ir": 0.594502, "kz": 0.631098, "la": 0.666667, "pl": 0.541667, "sl": 0.546667, "uk": 0.520755}, "lang_mae": {"ab": 1.8503, "cn": 2.0158, "cy": 1.7421, "de": 1.8478, "en": 1.8023, "es": 1.7556, "eu": 2.4033, "fr": 1.7976, "gl": 1.5123, "he": 2.0101, "hi": 1.9062, "ir": 2.2168, "kz": 1.9179, "la": 2.3413, "pl": 1.9938, "sl": 2.0305, "uk": 1.7826}, "confusion": {"1": {"6": 0.229, "3": 0.0519, "1": 0.5224, "2": 0.1055, "5": 0.0376, "4": 0.0537}, "2": {"3": 0.0562, "4": 0.1054, "6": 0.2799, "1": 0.363, "5": 0.0843, "2": 0.1112}, "3": {"1": 0.2962, "4": 0.1346, "3": 0.071, "2": 0.0783, "6": 0.295, "5": 0.1248}, "4": {"2": 0.0586, "1": 0.2669, "4": 0.1476, "3": 0.0779, "5": 0.1547, "6": 0.2942}, "5": {"5": 0.1895, "4": 0.1581, "1": 0.2555, "6": 0.3026, "2": 0.0513, "3": 0.0429}, "6": {"4": 0.14, "5": 0.1944, "6": 0.2419, "3": 0.0521, "1": 0.3206, "2": 0.0509}}}, {"model": "mistralai/Mistral-Small-3.1-24B-Instruct-2503", "avg_exact": 0.094012, "avg_wp": 0.168646, "avg_bias": 0.3332, "avg_parsed": 0.36242, "avg_mae": 1.2653, "total": 7999, "lang_exact": {"ab": 0.105618, "cn": 0.092593, "cy": 0.104628, "de": 0.099476, "en": 0.098468, "es": 0.09375, "eu": 0.078329, "fr": 0.129032, "gl": 0.090909, "he": 0.080078, "hi": 0.111111, "ir": 0.088933, "kz": 0.111538, "la": 0.034722, "pl": 0.06295, "sl": 0.101727, "uk": 0.074725}, "lang_wp": {"ab": 0.186517, "cn": 0.167593, "cy": 0.165996, "de": 0.173647, "en": 0.173961, "es": 0.176471, "eu": 0.159269, "fr": 0.206093, "gl": 0.166344, "he": 0.158203, "hi": 0.19774, "ir": 0.153162, "kz": 0.201923, "la": 0.059028, "pl": 0.130396, "sl": 0.177543, "uk": 0.146154}, "lang_parsed": {"ab": 0.4, "cn": 0.366667, "cy": 0.331992, "de": 0.354276, "en": 0.371991, "es": 0.380515, "eu": 0.318538, "fr": 0.399642, "gl": 0.353965, "he": 0.378906, "hi": 0.416196, "ir": 0.326087, "kz": 0.415385, "la": 0.201389, "pl": 0.31295, "sl": 0.414587, "uk": 0.314286}, "lang_mae": {"ab": 1.3034, "cn": 1.2778, "cy": 1.2, "de": 1.1527, "en": 1.2647, "es": 1.2367, "eu": 1.1803, "fr": 1.148, "gl": 1.2623, "he": 1.3557, "hi": 1.2579, "ir": 1.2485, "kz": 1.2315, "la": 2.0345, "pl": 1.3563, "sl": 1.4074, "uk": 1.1888}, "confusion": {"1": {"5": 0.1048, "1": 0.3238, "3": 0.1683, "4": 0.2127, "6": 0.0381, "2": 0.1524}, "2": {"5": 0.3046, "1": 0.1996, "3": 0.0966, "6": 0.0231, "2": 0.0987, "4": 0.2773}, "3": {"2": 0.0522, "3": 0.0703, "5": 0.3775, "4": 0.3735, "1": 0.0904, "6": 0.0361}, "4": {"5": 0.5109, "4": 0.2945, "1": 0.0782, "2": 0.0382, "6": 0.0455, "3": 0.0327}, "5": {"5": 0.6578, "4": 0.1765, "6": 0.0606, "1": 0.0588, "3": 0.0285, "2": 0.0178}, "6": {"5": 0.7234, "4": 0.1263, "3": 0.006, "1": 0.0441, "2": 0.0261, "6": 0.0741}}}];
  const ALL_LANGS_2  = ["ab", "eu", "cn", "en", "fr", "gl", "de", "he", "hi", "ir", "kz", "la", "pl", "sl", "es", "uk", "cy"];
  const LANG_COUNTS_2 = {"ab": 600, "cn": 600, "cy": 560, "de": 600, "en": 521, "es": 600, "eu": 462, "fr": 600, "gl": 520, "he": 600, "hi": 600, "ir": 582, "kz": 328, "la": 189, "pl": 600, "sl": 600, "uk": 530};
  const LANG_DIST_2   = {"ab": {"1": 100, "2": 100, "3": 100, "4": 100, "5": 100, "6": 100}, "cn": {"1": 100, "2": 100, "3": 100, "4": 100, "5": 100, "6": 100}, "cy": {"1": 60, "2": 100, "3": 100, "4": 100, "5": 100, "6": 100}, "de": {"1": 100, "2": 100, "3": 100, "4": 100, "5": 100, "6": 100}, "en": {"1": 21, "2": 100, "3": 100, "4": 100, "5": 100, "6": 100}, "es": {"1": 100, "2": 100, "3": 100, "4": 100, "5": 100, "6": 100}, "eu": {"1": 10, "2": 100, "3": 52, "4": 100, "5": 100, "6": 100}, "fr": {"1": 100, "2": 100, "3": 100, "4": 100, "5": 100, "6": 100}, "gl": {"1": 20, "2": 100, "3": 100, "4": 100, "5": 100, "6": 100}, "he": {"1": 100, "2": 100, "3": 100, "4": 100, "5": 100, "6": 100}, "hi": {"1": 100, "2": 100, "3": 100, "4": 100, "5": 100, "6": 100}, "ir": {"1": 82, "2": 100, "3": 100, "4": 100, "5": 100, "6": 100}, "kz": {"1": 6, "2": 53, "3": 37, "4": 100, "5": 100, "6": 32}, "la": {"1": 34, "2": 3, "3": 3, "4": 35, "5": 14, "6": 100}, "pl": {"1": 100, "2": 100, "3": 100, "4": 100, "5": 100, "6": 100}, "sl": {"1": 100, "2": 100, "3": 100, "4": 100, "5": 100, "6": 100}, "uk": {"1": 40, "2": 100, "3": 90, "4": 100, "5": 100, "6": 100}};
  const TOTAL_PREDS_2 = 153253;

  function langName(c) { return LANG_NAMES[c] || c.toUpperCase(); }

  function scoreColor(v) {
    const r = v < 0.5 ? 220 : Math.round(220 - (v - 0.5) * 2 * 160);
    const g = v > 0.5 ? 160 : Math.round(v * 2 * 160);
    return `rgb(${r},${g},40)`;
  }
  function textColor(v) { return v > 0.35 ? '#fff' : '#ccc'; }

  let selLangs  = [];
  let sortCol   = 'avg';
  let sortDir   = -1;
  let metric    = 'wp';
  let activeDS  = 1;  // 1 = FineWeb, 2 = FinePDF

  // active dataset accessors
  function dsRows()        { return activeDS === 1 ? ALL_ROWS      : ALL_ROWS_2; }
  function dsLangs()       { return activeDS === 1 ? ALL_LANGS     : ALL_LANGS_2; }
  function dsLangCounts()  { return activeDS === 1 ? LANG_COUNTS   : LANG_COUNTS_2; }
  function dsLangDist()    { return activeDS === 1 ? LANG_DIST     : LANG_DIST_2; }
  function dsTotalPreds()  { return activeDS === 1 ? 292555 : TOTAL_PREDS_2; }

  window.setDataset = function(ds) {
    if (ds === 2 && !HAS_SECOND) return;
    activeDS = ds;
    document.getElementById('ds-btn-1').classList.toggle('active', ds === 1);
    document.getElementById('ds-btn-2').classList.toggle('active', ds === 2);
    selLangs = [];  // reset language filter when switching dataset
    sortCol  = 'avg';
    sortDir  = -1;
    render();
    renderChart();
    renderDist();
    renderBias();
    renderCritical();
    // reset confusion dropdown for new dataset
    const sel = document.getElementById('confModelSelect');
    sel.innerHTML = '';
    dsRows().forEach((row, i) => {
      const opt = document.createElement('option');
      opt.value = i;
      opt.textContent = row.model;
      sel.appendChild(opt);
    });
    renderConfusion();
  };

  window.setMetric = function(m) {
    metric = m;
    document.getElementById('btn-wp').classList.toggle('active',     m === 'wp');
    document.getElementById('btn-ex').classList.toggle('active',     m === 'ex');
    document.getElementById('btn-parsed').classList.toggle('active', m === 'parsed');
    document.getElementById('btn-mae').classList.toggle('active',    m === 'mae');
    render();
  };

  function getScore(row, lang) {
    if (lang) {
      if (metric === 'wp')     return row.lang_wp[lang];
      if (metric === 'ex')     return row.lang_exact[lang];
      if (metric === 'parsed') return row.lang_parsed ? row.lang_parsed[lang] : undefined;
      if (metric === 'mae')    return row.lang_mae    ? row.lang_mae[lang]    : undefined;
    }
    if (metric === 'wp')     return row.avg_wp;
    if (metric === 'ex')     return row.avg_exact;
    if (metric === 'parsed') return row.avg_parsed ?? undefined;
    if (metric === 'mae')    return row.avg_mae    ?? undefined;
    return undefined;
  }

  // chips
  const chipsEl = document.getElementById('chips');
  function renderChips() {
    chipsEl.innerHTML = '';
    function mk(label, key, active) {
      const s = document.createElement('span');
      s.className = 'chip' + (active ? ' active' : '');
      s.textContent = label;
      s.onclick = () => {
        if (key === '__all__') { selLangs = []; }
        else {
          const i = selLangs.indexOf(key);
          i >= 0 ? selLangs.splice(i, 1) : selLangs.push(key);
        }
        render();
      };
      chipsEl.appendChild(s);
    }
    mk('All', '__all__', selLangs.length === 0);
    dsLangs().forEach(l => mk(langName(l), l, selLangs.includes(l)));
  }

  // global chart
  let chartInstance = null;
  function renderChart() {
    const sorted = [...dsRows()].sort((a, b) => b.avg_wp - a.avg_wp);
    const labels = sorted.map(r => r.model);
    const wpData = sorted.map(r => +(r.avg_wp    * 100).toFixed(1));
    const exData = sorted.map(r => +(r.avg_exact * 100).toFixed(1));

    const ctx = document.getElementById('globalChart').getContext('2d');
    if (chartInstance) chartInstance.destroy();

    const h = Math.max(320, sorted.length * 32 + 80);
    document.getElementById('globalChart').style.height = h + 'px';

    chartInstance = new Chart(ctx, {
      type: 'bar',
      data: {
        labels,
        datasets: [
          { label: 'Weighted Score', data: wpData, backgroundColor: '#2563eb', borderRadius: 3, barPercentage: 0.72 },
          { label: 'Exact Accuracy', data: exData, backgroundColor: '#16a34a', borderRadius: 3, barPercentage: 0.72 },
        ]
      },
      options: {
        indexAxis: 'y', responsive: true, maintainAspectRatio: false, animation: { duration: 500 },
        plugins: {
          legend: { position: 'bottom', labels: { color: '#94a3b8', font: { family: 'JetBrains Mono', size: 11 }, boxWidth: 14, padding: 20 } },
          tooltip: { backgroundColor: '#1e2a3a', titleColor: '#e2e8f0', bodyColor: '#94a3b8', callbacks: { label: ctx => ` ${ctx.dataset.label}: ${ctx.parsed.x}%` } },
        },
        scales: {
          x: { min: 0, max: 108, grid: { color: '#1a2236' }, ticks: { color: '#64748b', font: { family: 'JetBrains Mono', size: 10 }, callback: v => v + '%' }, title: { display: true, text: 'Percent (%)', color: '#64748b', font: { family: 'JetBrains Mono', size: 11 } } },
          y: { grid: { display: false }, ticks: { color: '#cbd5e1', font: { family: 'JetBrains Mono', size: 11 } } }
        }
      }
    });
  }

  // table
  function render() {
    renderChips();
    const byName = (a, b) => langName(a).localeCompare(langName(b));
    const visLangs = selLangs.length > 0 ? [...selLangs].sort(byName) : [...dsLangs()].sort(byName);

    let rows = dsRows().map(row => {
      const ls = {};
      visLangs.forEach(l => { const v = getScore(row, l); if (v !== undefined) ls[l] = v; });
      const vals = Object.values(ls);
      const avg = selLangs.length === 0
        ? getScore(row, null)
        : (vals.length ? vals.reduce((a, b) => a + b, 0) / vals.length : null);
      return { ...row, _avg: avg, _ls: ls };
    }).filter(r => r._avg !== null);

    rows.sort((a, b) => {
      const va = sortCol === 'avg' ? a._avg : (a._ls[sortCol] ?? -1);
      const vb = sortCol === 'avg' ? b._avg : (b._ls[sortCol] ?? -1);
      return sortDir * (va - vb);
    });

    const head = document.getElementById('lb-head');
    const mkBtn = (label, col) => {
      const active = sortCol === col;
      const arrow  = active ? (sortDir === -1 ? ' ↓' : ' ↑') : '';
      return `<button class="sort-btn${active ? ' active' : ''}" data-col="${col}">${label}${arrow}</button>`;
    };
    const ml = metric === 'wp' ? 'Wtd Score' : metric === 'ex' ? 'Exact Acc' : metric === 'parsed' ? 'Parse Rate' : 'MAE';
    head.innerHTML = `<tr>
      <th class="rank-col"></th>
      <th class="model-col"><span style="font-size:10px;text-transform:uppercase;letter-spacing:.08em;color:#475569">Model</span></th>
      <th class="avg-col">${mkBtn(ml + ' avg', 'avg')}</th>
      ${visLangs.map(l => `<th class="lang-col">${mkBtn(langName(l), l)}<br><span style="font-size:9px;color:#475569;font-weight:400;letter-spacing:0">${dsLangCounts()[l] !== undefined ? 'n=' + dsLangCounts()[l] : ''}</span></th>`).join('')}
    </tr>`;
    head.querySelectorAll('.sort-btn').forEach(btn => {
      btn.onclick = () => {
        const col = btn.dataset.col;
        sortCol === col ? (sortDir = -sortDir) : (sortCol = col, sortDir = -1);
        render();
      };
    });

    const body = document.getElementById('lb-body');
    body.innerHTML = rows.map((row, i) => {
      const avgPct   = (row._avg * 100).toFixed(1) + '%';
      const isMae = metric === 'mae';
      const fmtVal = v => isMae ? v.toFixed(2) : (v * 100).toFixed(1) + '%';
      // MAE: lower is better so invert color (0=best=green, 5=worst=red)
      const cellBg = v => isMae ? scoreColor(1 - Math.min(v, 5) / 5) : scoreColor(v);
      const langCells = visLangs.map(l => {
        const v = row._ls[l];
        if (v === undefined || v === null) return `<td class="empty">β€”</td>`;
        return `<td class="score" style="background:${cellBg(v)};color:${textColor(isMae ? 1 - Math.min(v,5)/5 : v)}">${fmtVal(v)}</td>`;
      }).join('');
      const avgBg  = isMae ? scoreColor(1 - Math.min(row._avg,5)/5) : scoreColor(row._avg);
      const avgTxt = isMae ? textColor(1 - Math.min(row._avg,5)/5)  : textColor(row._avg);
      return `<tr>
        <td class="rank">${i + 1}</td>
        <td class="model" title="${row.model}">${row.model}</td>
        <td class="score" style="background:${avgBg};color:${avgTxt};font-size:13px;font-weight:800">${fmtVal(row._avg)}</td>
        ${langCells}
      </tr>`;
    }).join('');

    document.getElementById('footer').textContent =
      `292555 predictions Β· ${dsLangs().length} languages Β· ${dsRows().length} models`;
  }

  // ── dataset distribution ──
  function renderDist() {
    const scores = [1, 2, 3, 4, 5, 6];
    const langs  = [...dsLangs()];

    if (!LANG_DIST || Object.keys(LANG_DIST).length === 0) {
      document.getElementById('dist-head').innerHTML =
        '<tr><th class="lang-h" colspan="8" style="color:#475569;text-align:center;padding:20px">No source directory provided β€” run with --source-dir to enable this section.</th></tr>';
      return;
    }

    let maxCount = 0;
    langs.forEach(l => {
      const d = dsLangDist()[l] || {};
      scores.forEach(s => { if ((d[s] || 0) > maxCount) maxCount = d[s] || 0; });
    });

    document.getElementById('dist-head').innerHTML = `<tr>
      <th class="lang-h">Language</th>
      ${scores.map(s => `<th class="score-h">Rating ${s}</th>`).join('')}
      <th class="total-h">Total</th>
    </tr>`;

    document.getElementById('dist-body').innerHTML = langs.map(lang => {
      const d     = dsLangDist()[lang] || {};
      const total = Object.values(d).reduce((a, b) => a + b, 0);
      if (total === 0) return '';
      const cells = scores.map(s => {
        const n   = d[s] || 0;
        const bar = maxCount > 0 ? Math.round((n / maxCount) * 48) : 0;
        return `<td class="count-d">${
          n > 0
            ? `${n}<span class="dist-bar" style="width:${bar}px"></span>`
            : '<span style="color:#2d3748">β€”</span>'
        }</td>`;
      }).join('');
      return `<tr>
        <td class="lang-d">${langName(lang)}</td>
        ${cells}
        <td class="total-d">${total.toLocaleString()}</td>
      </tr>`;
    }).join('');
  }

  // ── bias lollipop ──
  function renderBias() {
    const sorted = [...dsRows()].sort((a, b) => a.avg_bias - b.avg_bias);
    const labels = sorted.map(r => r.model);
    const values = sorted.map(r => +(r.avg_bias).toFixed(3));
    const colors = values.map(v => v >= 0 ? '#16a34a' : '#dc2626');

    const ctx = document.getElementById('biasChart').getContext('2d');
    const h   = Math.max(260, sorted.length * 26 + 40);
    document.getElementById('biasChartContainer').style.height = h + 'px';

    new Chart(ctx, {
      type: 'bar',
      data: { labels, datasets: [{
        label: 'Mean Error', data: values, backgroundColor: colors,
        borderRadius: 3, barPercentage: 0.45,
      }] },
      options: {
        indexAxis: 'y', responsive: true, maintainAspectRatio: false,
        animation: { duration: 400 },
        plugins: {
          legend: { display: false },
          tooltip: { backgroundColor: '#1e2a3a', callbacks: {
            label: ctx => ` Bias: ${ctx.parsed.x > 0 ? '+' : ''}${ctx.parsed.x.toFixed(3)}`
          } }
        },
        scales: {
          x: { grid: { color: '#1a2236' },
            ticks: { color: '#64748b', font: { family: 'JetBrains Mono', size: 10 } },
            title: { display: true, text: 'Mean Error (pred βˆ’ gt)', color: '#64748b', font: { family: 'JetBrains Mono', size: 10 } }
          },
          y: { grid: { display: false },
            ticks: { color: '#cbd5e1', font: { family: 'JetBrains Mono', size: 10 } }
          }
        }
      }
    });
  }

  // ── critical confusion ──
  function renderCritical() {
    const LOW  = new Set([1, 2]);
    const HIGH = new Set([5, 6]);
    const sorted = [...dsRows()].sort((a, b) => b.avg_wp - a.avg_wp);
    const labels = sorted.map(r => r.model);
    const lh = [], hl = [];

    sorted.forEach(row => {
      let lhNumer = 0, lhDenom = 0, hlNumer = 0, hlDenom = 0;
      Object.entries(row.confusion || {}).forEach(([gtStr, preds]) => {
        const gt = parseInt(gtStr);
        const rowTotal = Object.values(preds).reduce((a, b) => a + b, 0);
        if (LOW.has(gt)) {
          lhDenom += rowTotal;
          Object.entries(preds).forEach(([pStr, v]) => {
            if (HIGH.has(parseInt(pStr))) lhNumer += v * rowTotal;
          });
        }
        if (HIGH.has(gt)) {
          hlDenom += rowTotal;
          Object.entries(preds).forEach(([pStr, v]) => {
            if (LOW.has(parseInt(pStr))) hlNumer += v * rowTotal;
          });
        }
      });
      lh.push(lhDenom > 0 ? +(lhNumer / lhDenom * 100).toFixed(1) : 0);
      hl.push(hlDenom > 0 ? +(hlNumer / hlDenom * 100).toFixed(1) : 0);
    });

    const ctx = document.getElementById('criticalChart').getContext('2d');
    const h   = Math.max(260, sorted.length * 26 + 60);
    document.getElementById('criticalChartContainer').style.height = h + 'px';

    new Chart(ctx, {
      type: 'bar',
      data: { labels, datasets: [
        { label: 'Lowβ†’High (1–2 pred as 5–6)', data: lh, backgroundColor: '#dc2626', borderRadius: 3, barPercentage: 0.7 },
        { label: 'Highβ†’Low (5–6 pred as 1–2)', data: hl, backgroundColor: '#f97316', borderRadius: 3, barPercentage: 0.7 },
      ] },
      options: {
        indexAxis: 'y', responsive: true, maintainAspectRatio: false,
        animation: { duration: 400 },
        plugins: {
          legend: { position: 'bottom', labels: { color: '#94a3b8', font: { family: 'JetBrains Mono', size: 10 }, boxWidth: 12, padding: 16 } },
          tooltip: { backgroundColor: '#1e2a3a', callbacks: { label: ctx => ` ${ctx.dataset.label}: ${ctx.parsed.x}%` } }
        },
        scales: {
          x: { min: 0, grid: { color: '#1a2236' },
            ticks: { color: '#64748b', font: { family: 'JetBrains Mono', size: 10 }, callback: v => v + '%' },
            title: { display: true, text: '% of predictions within true class', color: '#64748b', font: { family: 'JetBrains Mono', size: 10 } }
          },
          y: { grid: { display: false },
            ticks: { color: '#cbd5e1', font: { family: 'JetBrains Mono', size: 10 } }
          }
        }
      }
    });
  }

  // ── confusion heatmap with dropdown ──
  let confChartInstance = null;

  function populateConfSelect() {
    const sel = document.getElementById('confModelSelect');
    dsRows().forEach((row, i) => {
      const opt = document.createElement('option');
      opt.value = i;
      opt.textContent = row.model;
      sel.appendChild(opt);
    });
  }

  window.renderConfusion = function() {
    const idx    = parseInt(document.getElementById('confModelSelect').value || '0');
    const row    = ALL_ROWS[idx];
    const conf   = row.confusion || {};
    const scores = [1, 2, 3, 4, 5, 6];

    const data = [];
    scores.forEach((gt, ri) => {
      const preds  = conf[gt] || {};
      const rowSum = Object.values(preds).reduce((a, b) => a + b, 0);
      scores.forEach((pred, ci) => {
        const v = rowSum > 0 ? (preds[pred] || 0) : 0;
        data.push({ x: ci, y: ri, v });
      });
    });

    const ctx = document.getElementById('confusionChart').getContext('2d');
    if (confChartInstance) confChartInstance.destroy();
    document.getElementById('confusionChart').style.height = '340px';

    function cellColor(ri, ci, v) {
      if (ri === ci)          return `rgba(22,163,74,${0.15 + v * 0.85})`;
      if (Math.abs(ri-ci)>=3) return `rgba(220,38,38,${v * 0.9})`;
      return                         `rgba(37,99,235,${v * 0.75})`;
    }

    confChartInstance = new Chart(ctx, {
      type: 'scatter',
      data: { datasets: [{ data, pointRadius: 0 }] },
      options: {
        responsive: true, maintainAspectRatio: false, animation: { duration: 300 },
        plugins: {
          legend: { display: false },
          tooltip: { backgroundColor: '#1e2a3a', callbacks: {
            title: items => `GT ${scores[items[0].raw.y]} β†’ Pred ${scores[items[0].raw.x]}`,
            label: item  => ` ${(item.raw.v * 100).toFixed(1)}% of true-class predictions`
          } }
        },
        scales: {
          x: {
            type: 'linear', min: -0.5, max: 5.5,
            ticks: {
              stepSize: 1,
              callback: v => (Number.isInteger(v) && v >= 0 && v <= 5) ? 'Pred ' + scores[v] : '',
              color: '#64748b', font: { family: 'JetBrains Mono', size: 10 }
            },
            grid: { color: '#1a2236' },
            position: 'top'
          },
          y: {
            type: 'linear', min: -0.5, max: 5.5,
            reverse: true,
            ticks: {
              stepSize: 1,
              callback: v => (Number.isInteger(v) && v >= 0 && v <= 5) ? 'GT ' + scores[v] : '',
              color: '#64748b', font: { family: 'JetBrains Mono', size: 10 }
            },
            grid: { color: '#1a2236' },
          }
        }
      },
      plugins: [{
        id: 'heatmap',
        afterDraw(chart) {
          const {ctx, scales: {x, y}} = chart;
          const cellW = Math.abs(x.getPixelForValue(1) - x.getPixelForValue(0));
          const cellH = Math.abs(y.getPixelForValue(1) - y.getPixelForValue(0));
          data.forEach(d => {
            const cx = x.getPixelForValue(d.x);
            const cy = y.getPixelForValue(d.y);
            ctx.fillStyle = cellColor(d.y, d.x, d.v);
            ctx.fillRect(cx - cellW/2 + 1, cy - cellH/2 + 1, cellW - 2, cellH - 2);
            if (d.v > 0.005) {
              ctx.fillStyle = d.v > 0.3 ? '#fff' : '#94a3b8';
              ctx.font = `bold 11px JetBrains Mono, monospace`;
              ctx.textAlign = 'center';
              ctx.textBaseline = 'middle';
              ctx.fillText((d.v * 100).toFixed(0) + '%', cx, cy);
            }
          });
        }
      }]
    });
  };

  // hide dataset toggle if only one dataset
  if (!HAS_SECOND) {
    document.getElementById('ds-btn-2').disabled = true;
    document.getElementById('ds-btn-2').style.opacity = '0.35';
    document.getElementById('ds-btn-2').title = 'FinePDF dataset not loaded';
  }

  render();
  renderChart();
  renderDist();
  renderBias();
  renderCritical();
  populateConfSelect();
  renderConfusion();
})();
</script>
</body>
</html>