Update benchmark leaderboard
Browse files- index.html +260 -4
index.html
CHANGED
|
@@ -20,6 +20,11 @@
|
|
| 20 |
letter-spacing: -0.02em; color: #f1f5f9; margin-bottom: 6px;
|
| 21 |
}
|
| 22 |
.subtitle { color: #64748b; font-size: 12px; margin-bottom: 12px; }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 23 |
.scoring-note {
|
| 24 |
display: inline-flex; gap: 16px; flex-wrap: wrap;
|
| 25 |
background: #131820; border: 1px solid #1e2a3a;
|
|
@@ -110,6 +115,31 @@
|
|
| 110 |
display: inline-block; height: 6px; border-radius: 3px;
|
| 111 |
background: #2563eb; vertical-align: middle; margin-left: 4px; opacity: 0.7;
|
| 112 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
.footer { margin-top: 20px; font-size: 11px; color: #2d3748; text-align: right; }
|
| 114 |
::-webkit-scrollbar { height: 5px; background: #0d1117; }
|
| 115 |
::-webkit-scrollbar-thumb { background: #2d3748; border-radius: 3px; }
|
|
@@ -119,7 +149,7 @@
|
|
| 119 |
<h1>Text Quality Rating Benchmark</h1>
|
| 120 |
<p class="meta-subtitle">
|
| 121 |
LLM accuracy at rating text quality on a 1–6 scale across multiple languages
|
| 122 |
-
<span class="sep">·</span> Labeled by DeepSeek V3.2 & judged by Gemini 3
|
| 123 |
<span class="sep">·</span> Documents sourced from FineWeb dataset
|
| 124 |
</p>
|
| 125 |
|
|
@@ -144,7 +174,7 @@
|
|
| 144 |
</table>
|
| 145 |
</div>
|
| 146 |
|
| 147 |
-
DISTRIBUTION SECTION (disabled for testing)
|
| 148 |
<p class="section-title">Dataset Distribution</p>
|
| 149 |
<p class="subtitle" style="margin-bottom:20px">Number of unique texts per rating score (1–6) for each language</p>
|
| 150 |
<div class="dist-wrap">
|
|
@@ -153,6 +183,7 @@
|
|
| 153 |
<tbody id="dist-body"></tbody>
|
| 154 |
</table>
|
| 155 |
</div>
|
|
|
|
| 156 |
|
| 157 |
<p class="section-title" style="margin-top:52px">Global Model Comparison</p>
|
| 158 |
<p class="subtitle" style="margin-bottom:20px">Weighted Score vs Exact Accuracy — all languages combined, sorted by Weighted Score</p>
|
|
@@ -160,11 +191,47 @@
|
|
| 160 |
<canvas id="globalChart"></canvas>
|
| 161 |
</div>
|
| 162 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 163 |
<div class="footer" id="footer"></div>
|
| 164 |
|
| 165 |
<script>
|
| 166 |
(function() {
|
| 167 |
-
const ALL_ROWS = [{"model": "Qwen/Qwen3.5-397B-A17B-FP8", "avg_exact": 0.656148, "avg_wp": 0.808234, "total": 17112, "lang_exact": {"ar": 0.675, "az": 0.7025, "be": 0.785, "bg": 0.759669, "bo": 0.735, "ca": 0.732591, "cn": 0.628333, "cs": 0.7425, "da": 0.565, "el": 0.6075, "en": 0.46, "es": 0.777778, "et": 0.6575, "eu": 0.474037, "fa": 0.565, "fi": 0.77, "fr": 0.769634, "gl": 0.521667, "hu": 0.6475, "hv": 0.7175, "is": 0.715, "it": 0.78, "ka": 0.688679, "la": 0.662768, "li": 0.6, "lv": 0.725, "mk": 0.54, "mt": 0.74, "nl": 0.58, "no": 0.695, "pl": 0.486865, "pt": 0.695, "ro": 0.625, "ru": 0.759076, "sk": 0.63, "sl": 0.715, "sq": 0.8025, "sr": 0.5625, "sv": 0.66, "tr": 0.55, "uk": 0.764706}, "lang_wp": {"ar": 0.8225, "az": 0.83375, "be": 0.87375, "bg": 0.870166, "bo": 0.85625, "ca": 0.85376, "cn": 0.7875, "cs": 0.85125, "da": 0.77125, "el": 0.79625, "en": 0.71125, "es": 0.876877, "et": 0.8175, "eu": 0.68593, "fa": 0.758333, "fi": 0.87125, "fr": 0.870419, "gl": 0.721667, "hu": 0.79625, "hv": 0.85625, "is": 0.8475, "it": 0.87875, "ka": 0.794025, "la": 0.798246, "li": 0.78875, "lv": 0.85625, "mk": 0.75125, "mt": 0.8475, "nl": 0.78, "no": 0.8075, "pl": 0.707531, "pt": 0.80625, "ro": 0.7825, "ru": 0.866337, "sk": 0.8, "sl": 0.85, "sq": 0.89375, "sr": 0.77375, "sv": 0.82125, "tr": 0.755, "uk": 0.874332}}, {"model": "google/gemini-3-flash-preview", "avg_exact": 0.577296, "avg_wp": 0.760411, "total": 1753, "lang_exact": {"ar": 0.575, "az": 0.55, "be": 0.625, "bg": 0.75, "bo": 0.575, "ca": 0.725, "cn": 0.566667, "cs": 0.525, "da": 0.475, "el": 0.575, "en": 0.4, "es": 0.825, "et": 0.625, "eu": 0.416667, "fa": 0.516667, "fi": 0.625, "fr": 0.675, "gl": 0.533333, "hu": 0.575, "hv": 0.7, "is": 0.55, "it": 0.725, "ka": 0.6, "la": 0.5, "li": 0.5, "lv": 0.6, "mk": 0.525, "mt": 0.7, "nl": 0.45, "no": 0.6, "pl": 0.366667, "pt": 0.525, "ro": 0.625, "ru": 0.69697, "sk": 0.525, "sl": 0.7, "sq": 0.675, "sr": 0.475, "sv": 0.6, "tr": 0.6, "uk": 0.6}, "lang_wp": {"ar": 0.7875, "az": 0.6875, "be": 0.8125, "bg": 0.8625, "bo": 0.775, "ca": 0.8375, "cn": 0.783333, "cs": 0.725, "da": 0.725, "el": 0.775, "en": 0.65, "es": 0.9125, "et": 0.8125, "eu": 0.608333, "fa": 0.725, "fi": 0.775, "fr": 0.8125, "gl": 0.7, "hu": 0.7875, "hv": 0.85, "is": 0.7125, "it": 0.85, "ka": 0.8, "la": 0.658333, "li": 0.725, "lv": 0.7875, "mk": 0.7125, "mt": 0.8, "nl": 0.7, "no": 0.775, "pl": 0.641667, "pt": 0.75, "ro": 0.75, "ru": 0.833333, "sk": 0.7625, "sl": 0.8375, "sq": 0.825, "sr": 0.7375, "sv": 0.8, "tr": 0.7625, "uk": 0.7875}}, {"model": "openai/gpt-4o-mini", "avg_exact": 0.596461, "avg_wp": 0.757991, "total": 1752, "lang_exact": {"ar": 0.625, "az": 0.5, "be": 0.725, "bg": 0.675, "bo": 0.675, "ca": 0.475, "cn": 0.7, "cs": 0.6, "da": 0.525, "el": 0.525, "en": 0.425, "es": 0.8, "et": 0.875, "eu": 0.35, "fa": 0.533333, "fi": 0.65, "fr": 0.575, "gl": 0.433333, "hu": 0.65, "hv": 0.7, "is": 0.6, "it": 0.725, "ka": 0.475, "la": 0.4, "li": 0.675, "lv": 0.725, "mk": 0.4, "mt": 0.6, "nl": 0.475, "no": 0.675, "pl": 0.383333, "pt": 0.75, "ro": 0.5, "ru": 0.848485, "sk": 0.7, "sl": 0.7, "sq": 0.7, "sr": 0.475, "sv": 0.725, "tr": 0.65, "uk": 0.692308}, "lang_wp": {"ar": 0.7875, "az": 0.725, "be": 0.8625, "bg": 0.8125, "bo": 0.8375, "ca": 0.675, "cn": 0.841667, "cs": 0.7625, "da": 0.725, "el": 0.7375, "en": 0.5875, "es": 0.9, "et": 0.9375, "eu": 0.583333, "fa": 0.733333, "fi": 0.775, "fr": 0.7625, "gl": 0.666667, "hu": 0.8125, "hv": 0.825, "is": 0.7875, "it": 0.8375, "ka": 0.6625, "la": 0.566667, "li": 0.8125, "lv": 0.8625, "mk": 0.575, "mt": 0.7875, "nl": 0.7, "no": 0.7875, "pl": 0.566667, "pt": 0.8625, "ro": 0.7, "ru": 0.893939, "sk": 0.825, "sl": 0.8375, "sq": 0.825, "sr": 0.6875, "sv": 0.8375, "tr": 0.8, "uk": 0.833333}}, {"model": "qwen/qwen3-235b-a22b-2507", "avg_exact": 0.496292, "avg_wp": 0.693953, "total": 1753, "lang_exact": {"ar": 0.6, "az": 0.475, "be": 0.55, "bg": 0.65, "bo": 0.55, "ca": 0.525, "cn": 0.433333, "cs": 0.425, "da": 0.45, "el": 0.55, "en": 0.425, "es": 0.525, "et": 0.5, "eu": 0.416667, "fa": 0.616667, "fi": 0.55, "fr": 0.6, "gl": 0.433333, "hu": 0.65, "hv": 0.625, "is": 0.475, "it": 0.65, "ka": 0.325, "la": 0.433333, "li": 0.4, "lv": 0.575, "mk": 0.35, "mt": 0.475, "nl": 0.35, "no": 0.4, "pl": 0.316667, "pt": 0.575, "ro": 0.5, "ru": 0.454545, "sk": 0.45, "sl": 0.575, "sq": 0.6, "sr": 0.5, "sv": 0.475, "tr": 0.45, "uk": 0.625}, "lang_wp": {"ar": 0.775, "az": 0.7, "be": 0.75, "bg": 0.8, "bo": 0.75, "ca": 0.7, "cn": 0.616667, "cs": 0.6375, "da": 0.6625, "el": 0.725, "en": 0.55, "es": 0.725, "et": 0.75, "eu": 0.608333, "fa": 0.783333, "fi": 0.725, "fr": 0.7625, "gl": 0.625, "hu": 0.8125, "hv": 0.8, "is": 0.7, "it": 0.8125, "ka": 0.6, "la": 0.558333, "li": 0.6625, "lv": 0.775, "mk": 0.575, "mt": 0.675, "nl": 0.6, "no": 0.65, "pl": 0.458333, "pt": 0.775, "ro": 0.7375, "ru": 0.712121, "sk": 0.7, "sl": 0.775, "sq": 0.7625, "sr": 0.725, "sv": 0.725, "tr": 0.675, "uk": 0.8}}, {"model": "deepseek/deepseek-v3.2", "avg_exact": 0.403879, "avg_wp": 0.629492, "total": 1753, "lang_exact": {"ar": 0.375, "az": 0.325, "be": 0.475, "bg": 0.475, "bo": 0.475, "ca": 0.425, "cn": 0.533333, "cs": 0.5, "da": 0.275, "el": 0.25, "en": 0.525, "es": 0.425, "et": 0.425, "eu": 0.333333, "fa": 0.35, "fi": 0.375, "fr": 0.45, "gl": 0.483333, "hu": 0.4, "hv": 0.475, "is": 0.575, "it": 0.425, "ka": 0.35, "la": 0.3, "li": 0.375, "lv": 0.4, "mk": 0.325, "mt": 0.375, "nl": 0.325, "no": 0.4, "pl": 0.366667, "pt": 0.475, "ro": 0.25, "ru": 0.484848, "sk": 0.375, "sl": 0.6, "sq": 0.375, "sr": 0.3, "sv": 0.375, "tr": 0.375, "uk": 0.425}, "lang_wp": {"ar": 0.625, "az": 0.5625, "be": 0.725, "bg": 0.675, "bo": 0.7, "ca": 0.6375, "cn": 0.741667, "cs": 0.6875, "da": 0.5125, "el": 0.475, "en": 0.6875, "es": 0.7, "et": 0.7, "eu": 0.541667, "fa": 0.633333, "fi": 0.5875, "fr": 0.6875, "gl": 0.683333, "hu": 0.65, "hv": 0.7125, "is": 0.7625, "it": 0.6875, "ka": 0.575, "la": 0.5, "li": 0.5375, "lv": 0.6625, "mk": 0.55, "mt": 0.5625, "nl": 0.5625, "no": 0.625, "pl": 0.558333, "pt": 0.7, "ro": 0.4625, "ru": 0.712121, "sk": 0.625, "sl": 0.775, "sq": 0.6625, "sr": 0.6, "sv": 0.6, "tr": 0.625, "uk": 0.6125}}, {"model": "z-ai/glm-4-32b", "avg_exact": 0.432402, "avg_wp": 0.620936, "total": 1753, "lang_exact": {"ar": 0.375, "az": 0.5, "be": 0.625, "bg": 0.375, "bo": 0.625, "ca": 0.35, "cn": 0.383333, "cs": 0.625, "da": 0.35, "el": 0.55, "en": 0.3, "es": 0.35, "et": 0.525, "eu": 0.3, "fa": 0.383333, "fi": 0.525, "fr": 0.525, "gl": 0.25, "hu": 0.5, "hv": 0.625, "is": 0.35, "it": 0.4, "ka": 0.5, "la": 0.433333, "li": 0.425, "lv": 0.525, "mk": 0.225, "mt": 0.35, "nl": 0.425, "no": 0.475, "pl": 0.35, "pt": 0.25, "ro": 0.3, "ru": 0.515152, "sk": 0.525, "sl": 0.475, "sq": 0.7, "sr": 0.375, "sv": 0.4, "tr": 0.425, "uk": 0.525}, "lang_wp": {"ar": 0.525, "az": 0.7, "be": 0.775, "bg": 0.5375, "bo": 0.7625, "ca": 0.5, "cn": 0.583333, "cs": 0.775, "da": 0.6, "el": 0.7375, "en": 0.45, "es": 0.575, "et": 0.725, "eu": 0.533333, "fa": 0.608333, "fi": 0.6375, "fr": 0.6625, "gl": 0.375, "hu": 0.65, "hv": 0.8125, "is": 0.5875, "it": 0.6, "ka": 0.65, "la": 0.558333, "li": 0.65, "lv": 0.7125, "mk": 0.4125, "mt": 0.5625, "nl": 0.675, "no": 0.675, "pl": 0.575, "pt": 0.4375, "ro": 0.525, "ru": 0.69697, "sk": 0.7375, "sl": 0.6875, "sq": 0.8375, "sr": 0.6, "sv": 0.625, "tr": 0.675, "uk": 0.7125}}, {"model": "speakleash/Bielik-11B-v3.0-Instruct", "avg_exact": 0.41462, "avg_wp": 0.601942, "total": 1751, "lang_exact": {"ar": 0.025, "az": 0.525, "be": 0.2, "bg": 0.525, "bo": 0.675, "ca": 0.325, "cn": 0.271186, "cs": 0.5, "da": 0.425, "el": 0.325, "en": 0.5, "es": 0.475, "et": 0.55, "eu": 0.25, "fa": 0.366667, "fi": 0.425, "fr": 0.5, "gl": 0.4, "hu": 0.475, "hv": 0.525, "is": 0.225, "it": 0.475, "ka": 0.230769, "la": 0.3, "li": 0.45, "lv": 0.425, "mk": 0.275, "mt": 0.375, "nl": 0.45, "no": 0.475, "pl": 0.366667, "pt": 0.475, "ro": 0.425, "ru": 0.606061, "sk": 0.475, "sl": 0.55, "sq": 0.5, "sr": 0.45, "sv": 0.425, "tr": 0.45, "uk": 0.625}, "lang_wp": {"ar": 0.15, "az": 0.6875, "be": 0.275, "bg": 0.675, "bo": 0.7875, "ca": 0.6125, "cn": 0.40678, "cs": 0.6125, "da": 0.675, "el": 0.5625, "en": 0.65, "es": 0.7125, "et": 0.7375, "eu": 0.483333, "fa": 0.583333, "fi": 0.5375, "fr": 0.725, "gl": 0.608333, "hu": 0.6625, "hv": 0.675, "is": 0.3875, "it": 0.6875, "ka": 0.371795, "la": 0.483333, "li": 0.675, "lv": 0.675, "mk": 0.575, "mt": 0.575, "nl": 0.7125, "no": 0.6875, "pl": 0.541667, "pt": 0.6875, "ro": 0.65, "ru": 0.727273, "sk": 0.6125, "sl": 0.6625, "sq": 0.6625, "sr": 0.6, "sv": 0.7, "tr": 0.7, "uk": 0.75}}, {"model": "google/gemini-2.0-flash-lite-001", "avg_exact": 0.385054, "avg_wp": 0.586423, "total": 1753, "lang_exact": {"ar": 0.325, "az": 0.325, "be": 0.55, "bg": 0.6, "bo": 0.4, "ca": 0.275, "cn": 0.45, "cs": 0.3, "da": 0.4, "el": 0.225, "en": 0.525, "es": 0.4, "et": 0.55, "eu": 0.333333, "fa": 0.516667, "fi": 0.4, "fr": 0.425, "gl": 0.383333, "hu": 0.45, "hv": 0.425, "is": 0.2, "it": 0.425, "ka": 0.3, "la": 0.216667, "li": 0.425, "lv": 0.475, "mk": 0.2, "mt": 0.425, "nl": 0.4, "no": 0.325, "pl": 0.366667, "pt": 0.375, "ro": 0.275, "ru": 0.606061, "sk": 0.425, "sl": 0.375, "sq": 0.425, "sr": 0.25, "sv": 0.425, "tr": 0.4, "uk": 0.275}, "lang_wp": {"ar": 0.525, "az": 0.6, "be": 0.75, "bg": 0.7625, "bo": 0.5875, "ca": 0.525, "cn": 0.608333, "cs": 0.5, "da": 0.55, "el": 0.5, "en": 0.7125, "es": 0.65, "et": 0.6875, "eu": 0.566667, "fa": 0.691667, "fi": 0.6375, "fr": 0.6125, "gl": 0.633333, "hu": 0.6375, "hv": 0.625, "is": 0.45, "it": 0.6125, "ka": 0.525, "la": 0.383333, "li": 0.575, "lv": 0.7, "mk": 0.4625, "mt": 0.65, "nl": 0.6125, "no": 0.4875, "pl": 0.55, "pt": 0.525, "ro": 0.4375, "ru": 0.712121, "sk": 0.6, "sl": 0.6125, "sq": 0.6125, "sr": 0.4875, "sv": 0.6375, "tr": 0.5875, "uk": 0.525}}, {"model": "google/gemma-3-12b-it", "avg_exact": 0.337707, "avg_wp": 0.573873, "total": 1753, "lang_exact": {"ar": 0.275, "az": 0.35, "be": 0.35, "bg": 0.475, "bo": 0.45, "ca": 0.225, "cn": 0.383333, "cs": 0.45, "da": 0.375, "el": 0.275, "en": 0.45, "es": 0.325, "et": 0.4, "eu": 0.233333, "fa": 0.25, "fi": 0.4, "fr": 0.425, "gl": 0.166667, "hu": 0.35, "hv": 0.4, "is": 0.475, "it": 0.325, "ka": 0.3, "la": 0.3, "li": 0.25, "lv": 0.425, "mk": 0.275, "mt": 0.25, "nl": 0.45, "no": 0.375, "pl": 0.366667, "pt": 0.25, "ro": 0.25, "ru": 0.575758, "sk": 0.275, "sl": 0.35, "sq": 0.35, "sr": 0.325, "sv": 0.175, "tr": 0.325, "uk": 0.375}, "lang_wp": {"ar": 0.5, "az": 0.625, "be": 0.6125, "bg": 0.6625, "bo": 0.675, "ca": 0.4875, "cn": 0.583333, "cs": 0.625, "da": 0.5875, "el": 0.5125, "en": 0.5625, "es": 0.5875, "et": 0.625, "eu": 0.466667, "fa": 0.533333, "fi": 0.625, "fr": 0.675, "gl": 0.375, "hu": 0.6125, "hv": 0.6375, "is": 0.6875, "it": 0.5375, "ka": 0.475, "la": 0.525, "li": 0.5875, "lv": 0.6625, "mk": 0.55, "mt": 0.5375, "nl": 0.725, "no": 0.5625, "pl": 0.575, "pt": 0.525, "ro": 0.4875, "ru": 0.787879, "sk": 0.5625, "sl": 0.6, "sq": 0.5125, "sr": 0.5875, "sv": 0.5125, "tr": 0.6125, "uk": 0.575}}, {"model": "mistralai/mistral-nemo", "avg_exact": 0.309184, "avg_wp": 0.499715, "total": 1753, "lang_exact": {"ar": 0.325, "az": 0.45, "be": 0.475, "bg": 0.325, "bo": 0.375, "ca": 0.25, "cn": 0.383333, "cs": 0.425, "da": 0.375, "el": 0.3, "en": 0.25, "es": 0.35, "et": 0.225, "eu": 0.216667, "fa": 0.266667, "fi": 0.35, "fr": 0.275, "gl": 0.283333, "hu": 0.2, "hv": 0.425, "is": 0.3, "it": 0.2, "ka": 0.425, "la": 0.183333, "li": 0.325, "lv": 0.425, "mk": 0.375, "mt": 0.325, "nl": 0.35, "no": 0.375, "pl": 0.25, "pt": 0.35, "ro": 0.2, "ru": 0.212121, "sk": 0.375, "sl": 0.35, "sq": 0.3, "sr": 0.25, "sv": 0.175, "tr": 0.275, "uk": 0.25}, "lang_wp": {"ar": 0.5, "az": 0.6125, "be": 0.675, "bg": 0.4625, "bo": 0.5, "ca": 0.5125, "cn": 0.608333, "cs": 0.5125, "da": 0.5375, "el": 0.4625, "en": 0.5, "es": 0.575, "et": 0.4875, "eu": 0.441667, "fa": 0.5, "fi": 0.525, "fr": 0.5625, "gl": 0.475, "hu": 0.425, "hv": 0.5875, "is": 0.4625, "it": 0.4, "ka": 0.5875, "la": 0.333333, "li": 0.525, "lv": 0.6625, "mk": 0.5375, "mt": 0.425, "nl": 0.4875, "no": 0.5375, "pl": 0.466667, "pt": 0.525, "ro": 0.3, "ru": 0.484848, "sk": 0.6, "sl": 0.575, "sq": 0.425, "sr": 0.45, "sv": 0.4375, "tr": 0.45, "uk": 0.4375}}, {"model": "z-ai/glm-4.5-air", "avg_exact": 0.36203, "avg_wp": 0.498575, "total": 1754, "lang_exact": {"ar": 0.317073, "az": 0.4, "be": 0.525, "bg": 0.475, "bo": 0.45, "ca": 0.375, "cn": 0.4, "cs": 0.35, "da": 0.325, "el": 0.325, "en": 0.275, "es": 0.35, "et": 0.6, "eu": 0.3, "fa": 0.283333, "fi": 0.45, "fr": 0.3, "gl": 0.383333, "hu": 0.375, "hv": 0.175, "is": 0.25, "it": 0.125, "ka": 0.35, "la": 0.15, "li": 0.175, "lv": 0.1, "mk": 0.2, "mt": 0.275, "nl": 0.225, "no": 0.375, "pl": 0.35, "pt": 0.525, "ro": 0.4, "ru": 0.484848, "sk": 0.55, "sl": 0.625, "sq": 0.65, "sr": 0.325, "sv": 0.5, "tr": 0.45, "uk": 0.5}, "lang_wp": {"ar": 0.52439, "az": 0.55, "be": 0.6375, "bg": 0.6375, "bo": 0.575, "ca": 0.475, "cn": 0.6, "cs": 0.5, "da": 0.4875, "el": 0.5125, "en": 0.4375, "es": 0.5, "et": 0.75, "eu": 0.433333, "fa": 0.525, "fi": 0.5125, "fr": 0.475, "gl": 0.541667, "hu": 0.425, "hv": 0.1875, "is": 0.3625, "it": 0.15, "ka": 0.4125, "la": 0.208333, "li": 0.25, "lv": 0.2, "mk": 0.3375, "mt": 0.4375, "nl": 0.375, "no": 0.5125, "pl": 0.5, "pt": 0.6625, "ro": 0.5375, "ru": 0.621212, "sk": 0.7, "sl": 0.775, "sq": 0.75, "sr": 0.55, "sv": 0.625, "tr": 0.6375, "uk": 0.6625}}, {"model": "meta-llama/llama-4-scout", "avg_exact": 0.380137, "avg_wp": 0.497717, "total": 1752, "lang_exact": {"ar": 0.325, "az": 0.475, "be": 0.3, "bg": 0.375, "bo": 0.425, "ca": 0.3, "cn": 0.25, "cs": 0.525, "da": 0.375, "el": 0.275, "en": 0.225, "es": 0.475, "et": 0.425, "eu": 0.254237, "fa": 0.4, "fi": 0.5, "fr": 0.375, "gl": 0.2, "hu": 0.45, "hv": 0.45, "is": 0.55, "it": 0.425, "ka": 0.3, "la": 0.316667, "li": 0.35, "lv": 0.45, "mk": 0.275, "mt": 0.35, "nl": 0.325, "no": 0.475, "pl": 0.35, "pt": 0.5, "ro": 0.325, "ru": 0.545455, "sk": 0.5, "sl": 0.4, "sq": 0.575, "sr": 0.275, "sv": 0.425, "tr": 0.425, "uk": 0.35}, "lang_wp": {"ar": 0.4625, "az": 0.5625, "be": 0.4, "bg": 0.45, "bo": 0.5375, "ca": 0.3875, "cn": 0.441667, "cs": 0.6, "da": 0.4875, "el": 0.4, "en": 0.325, "es": 0.5875, "et": 0.5875, "eu": 0.389831, "fa": 0.55, "fi": 0.625, "fr": 0.4375, "gl": 0.433333, "hu": 0.5375, "hv": 0.525, "is": 0.65, "it": 0.4625, "ka": 0.4125, "la": 0.425, "li": 0.45, "lv": 0.575, "mk": 0.425, "mt": 0.5125, "nl": 0.475, "no": 0.6125, "pl": 0.441667, "pt": 0.6, "ro": 0.475, "ru": 0.575758, "sk": 0.55, "sl": 0.525, "sq": 0.6625, "sr": 0.4125, "sv": 0.5375, "tr": 0.6, "uk": 0.4625}}, {"model": "meta-llama/llama-3.3-70b-instruct", "avg_exact": 0.366589, "avg_wp": 0.49652, "total": 1724, "lang_exact": {"ar": 0.384615, "az": 0.394737, "be": 0.475, "bg": 0.4, "bo": 0.45, "ca": 0.25, "cn": 0.305085, "cs": 0.525, "da": 0.358974, "el": 0.447368, "en": 0.25641, "es": 0.324324, "et": 0.512821, "eu": 0.293103, "fa": 0.310345, "fi": 0.538462, "fr": 0.384615, "gl": 0.183333, "hu": 0.45, "hv": 0.4, "is": 0.45, "it": 0.384615, "ka": 0.35, "la": 0.310345, "li": 0.25641, "lv": 0.425, "mk": 0.225, "mt": 0.275, "nl": 0.425, "no": 0.375, "pl": 0.4, "pt": 0.358974, "ro": 0.131579, "ru": 0.515152, "sk": 0.425, "sl": 0.435897, "sq": 0.282051, "sr": 0.333333, "sv": 0.410256, "tr": 0.475, "uk": 0.35}, "lang_wp": {"ar": 0.5, "az": 0.513158, "be": 0.5625, "bg": 0.5375, "bo": 0.5375, "ca": 0.3625, "cn": 0.466102, "cs": 0.5875, "da": 0.5, "el": 0.5, "en": 0.435897, "es": 0.405405, "et": 0.679487, "eu": 0.431034, "fa": 0.465517, "fi": 0.653846, "fr": 0.448718, "gl": 0.383333, "hu": 0.5375, "hv": 0.5125, "is": 0.625, "it": 0.512821, "ka": 0.4625, "la": 0.431034, "li": 0.423077, "lv": 0.6, "mk": 0.3625, "mt": 0.3875, "nl": 0.5875, "no": 0.475, "pl": 0.558333, "pt": 0.474359, "ro": 0.315789, "ru": 0.530303, "sk": 0.6375, "sl": 0.551282, "sq": 0.487179, "sr": 0.448718, "sv": 0.512821, "tr": 0.625, "uk": 0.4375}}, {"model": "openai/gpt-4.1-nano", "avg_exact": 0.293212, "avg_wp": 0.494295, "total": 1753, "lang_exact": {"ar": 0.25, "az": 0.275, "be": 0.25, "bg": 0.35, "bo": 0.325, "ca": 0.125, "cn": 0.4, "cs": 0.35, "da": 0.375, "el": 0.225, "en": 0.1, "es": 0.275, "et": 0.45, "eu": 0.2, "fa": 0.366667, "fi": 0.5, "fr": 0.325, "gl": 0.3, "hu": 0.25, "hv": 0.35, "is": 0.3, "it": 0.3, "ka": 0.125, "la": 0.2, "li": 0.275, "lv": 0.425, "mk": 0.125, "mt": 0.25, "nl": 0.3, "no": 0.275, "pl": 0.3, "pt": 0.225, "ro": 0.25, "ru": 0.393939, "sk": 0.275, "sl": 0.3, "sq": 0.325, "sr": 0.325, "sv": 0.375, "tr": 0.325, "uk": 0.325}, "lang_wp": {"ar": 0.5, "az": 0.5125, "be": 0.4875, "bg": 0.55, "bo": 0.525, "ca": 0.4, "cn": 0.616667, "cs": 0.55, "da": 0.425, "el": 0.475, "en": 0.4125, "es": 0.4625, "et": 0.6125, "eu": 0.391667, "fa": 0.6, "fi": 0.6, "fr": 0.5125, "gl": 0.5, "hu": 0.475, "hv": 0.5375, "is": 0.55, "it": 0.5125, "ka": 0.3125, "la": 0.316667, "li": 0.5375, "lv": 0.6125, "mk": 0.3125, "mt": 0.3875, "nl": 0.4125, "no": 0.425, "pl": 0.541667, "pt": 0.425, "ro": 0.4625, "ru": 0.651515, "sk": 0.4625, "sl": 0.575, "sq": 0.5625, "sr": 0.5375, "sv": 0.5, "tr": 0.4875, "uk": 0.5625}}, {"model": "google/gemma-3-27b-it", "avg_exact": 0.252139, "avg_wp": 0.488591, "total": 1753, "lang_exact": {"ar": 0.3, "az": 0.15, "be": 0.225, "bg": 0.3, "bo": 0.325, "ca": 0.175, "cn": 0.333333, "cs": 0.325, "da": 0.1, "el": 0.15, "en": 0.5, "es": 0.3, "et": 0.225, "eu": 0.233333, "fa": 0.266667, "fi": 0.225, "fr": 0.2, "gl": 0.25, "hu": 0.25, "hv": 0.25, "is": 0.375, "it": 0.225, "ka": 0.175, "la": 0.366667, "li": 0.1, "lv": 0.325, "mk": 0.15, "mt": 0.225, "nl": 0.325, "no": 0.175, "pl": 0.416667, "pt": 0.25, "ro": 0.05, "ru": 0.454545, "sk": 0.2, "sl": 0.275, "sq": 0.25, "sr": 0.1, "sv": 0.025, "tr": 0.325, "uk": 0.325}, "lang_wp": {"ar": 0.4875, "az": 0.4, "be": 0.4625, "bg": 0.4875, "bo": 0.5875, "ca": 0.4, "cn": 0.575, "cs": 0.4625, "da": 0.375, "el": 0.325, "en": 0.65, "es": 0.5125, "et": 0.5125, "eu": 0.475, "fa": 0.55, "fi": 0.475, "fr": 0.475, "gl": 0.466667, "hu": 0.55, "hv": 0.5, "is": 0.5625, "it": 0.4875, "ka": 0.375, "la": 0.525, "li": 0.4125, "lv": 0.5875, "mk": 0.275, "mt": 0.5125, "nl": 0.5875, "no": 0.3875, "pl": 0.591667, "pt": 0.4625, "ro": 0.35, "ru": 0.666667, "sk": 0.5375, "sl": 0.5125, "sq": 0.475, "sr": 0.4125, "sv": 0.3625, "tr": 0.5625, "uk": 0.5625}}, {"model": "qwen/qwen-2.5-7b-instruct", "avg_exact": 0.26526, "avg_wp": 0.484598, "total": 1753, "lang_exact": {"ar": 0.2, "az": 0.15, "be": 0.25, "bg": 0.35, "bo": 0.4, "ca": 0.225, "cn": 0.316667, "cs": 0.3, "da": 0.325, "el": 0.2, "en": 0.4, "es": 0.325, "et": 0.25, "eu": 0.216667, "fa": 0.3, "fi": 0.275, "fr": 0.35, "gl": 0.366667, "hu": 0.225, "hv": 0.3, "is": 0.325, "it": 0.3, "ka": 0.125, "la": 0.183333, "li": 0.2, "lv": 0.35, "mk": 0.15, "mt": 0.225, "nl": 0.275, "no": 0.3, "pl": 0.216667, "pt": 0.25, "ro": 0.2, "ru": 0.393939, "sk": 0.325, "sl": 0.275, "sq": 0.1, "sr": 0.05, "sv": 0.35, "tr": 0.3, "uk": 0.275}, "lang_wp": {"ar": 0.3625, "az": 0.4, "be": 0.5125, "bg": 0.575, "bo": 0.625, "ca": 0.425, "cn": 0.525, "cs": 0.4875, "da": 0.525, "el": 0.425, "en": 0.6125, "es": 0.5625, "et": 0.5, "eu": 0.425, "fa": 0.466667, "fi": 0.5125, "fr": 0.5375, "gl": 0.566667, "hu": 0.475, "hv": 0.525, "is": 0.5125, "it": 0.5, "ka": 0.3125, "la": 0.383333, "li": 0.3875, "lv": 0.65, "mk": 0.3375, "mt": 0.4125, "nl": 0.525, "no": 0.5125, "pl": 0.425, "pt": 0.475, "ro": 0.4125, "ru": 0.621212, "sk": 0.5625, "sl": 0.5, "sq": 0.35, "sr": 0.35, "sv": 0.6, "tr": 0.5625, "uk": 0.5125}}, {"model": "meta-llama/llama-4-maverick", "avg_exact": 0.268291, "avg_wp": 0.47344, "total": 17112, "lang_exact": {"ar": 0.27, "az": 0.235, "be": 0.2225, "bg": 0.273481, "bo": 0.285, "ca": 0.259053, "cn": 0.388333, "cs": 0.285, "da": 0.225, "el": 0.26, "en": 0.28, "es": 0.267267, "et": 0.315, "eu": 0.221106, "fa": 0.278333, "fi": 0.2425, "fr": 0.225131, "gl": 0.271667, "hu": 0.24, "hv": 0.31, "is": 0.41, "it": 0.2775, "ka": 0.113208, "la": 0.397661, "li": 0.2375, "lv": 0.2725, "mk": 0.19, "mt": 0.2125, "nl": 0.3575, "no": 0.185, "pl": 0.355517, "pt": 0.235, "ro": 0.1525, "ru": 0.330033, "sk": 0.2025, "sl": 0.2675, "sq": 0.2825, "sr": 0.2, "sv": 0.185, "tr": 0.395, "uk": 0.248663}, "lang_wp": {"ar": 0.435, "az": 0.38125, "be": 0.475, "bg": 0.476519, "bo": 0.54, "ca": 0.415042, "cn": 0.620833, "cs": 0.52125, "da": 0.37125, "el": 0.4625, "en": 0.4425, "es": 0.462462, "et": 0.5675, "eu": 0.403685, "fa": 0.490833, "fi": 0.5075, "fr": 0.454188, "gl": 0.45, "hu": 0.42875, "hv": 0.53875, "is": 0.57625, "it": 0.48875, "ka": 0.278302, "la": 0.562378, "li": 0.43375, "lv": 0.485, "mk": 0.3675, "mt": 0.40125, "nl": 0.59, "no": 0.38125, "pl": 0.573555, "pt": 0.425, "ro": 0.38625, "ru": 0.617162, "sk": 0.42375, "sl": 0.48125, "sq": 0.56625, "sr": 0.3875, "sv": 0.3425, "tr": 0.60875, "uk": 0.471925}}, {"model": "google/gemma-3-4b-it", "avg_exact": 0.222031, "avg_wp": 0.460126, "total": 1743, "lang_exact": {"ar": 0.1, "az": 0.15, "be": 0.225, "bg": 0.2, "bo": 0.125, "ca": 0.051282, "cn": 0.298246, "cs": 0.394737, "da": 0.25, "el": 0.175, "en": 0.425, "es": 0.225, "et": 0.375, "eu": 0.216667, "fa": 0.216667, "fi": 0.25, "fr": 0.125, "gl": 0.25, "hu": 0.125, "hv": 0.225, "is": 0.15, "it": 0.275, "ka": 0.1, "la": 0.166667, "li": 0.175, "lv": 0.2, "mk": 0.15, "mt": 0.025, "nl": 0.2, "no": 0.25, "pl": 0.316667, "pt": 0.461538, "ro": 0.125, "ru": 0.272727, "sk": 0.263158, "sl": 0.225, "sq": 0.358974, "sr": 0.225, "sv": 0.25, "tr": 0.2, "uk": 0.275}, "lang_wp": {"ar": 0.425, "az": 0.5, "be": 0.5, "bg": 0.4125, "bo": 0.45, "ca": 0.294872, "cn": 0.54386, "cs": 0.539474, "da": 0.45, "el": 0.375, "en": 0.6375, "es": 0.425, "et": 0.5875, "eu": 0.441667, "fa": 0.45, "fi": 0.525, "fr": 0.35, "gl": 0.541667, "hu": 0.4, "hv": 0.475, "is": 0.375, "it": 0.4875, "ka": 0.325, "la": 0.35, "li": 0.4875, "lv": 0.525, "mk": 0.3625, "mt": 0.325, "nl": 0.4375, "no": 0.475, "pl": 0.516667, "pt": 0.615385, "ro": 0.3375, "ru": 0.545455, "sk": 0.526316, "sl": 0.425, "sq": 0.538462, "sr": 0.4875, "sv": 0.5, "tr": 0.4375, "uk": 0.45}}, {"model": "mistralai/mixtral-8x7b-instruct", "avg_exact": 0.245143, "avg_wp": 0.453429, "total": 1750, "lang_exact": {"ar": 0.175, "az": 0.2, "be": 0.15, "bg": 0.225, "bo": 0.25, "ca": 0.175, "cn": 0.254237, "cs": 0.4, "da": 0.25, "el": 0.2, "en": 0.3, "es": 0.3, "et": 0.275, "eu": 0.25, "fa": 0.183333, "fi": 0.275, "fr": 0.25, "gl": 0.216667, "hu": 0.25, "hv": 0.225, "is": 0.15, "it": 0.225, "ka": 0.153846, "la": 0.283333, "li": 0.2, "lv": 0.25, "mk": 0.225, "mt": 0.225, "nl": 0.3, "no": 0.325, "pl": 0.283333, "pt": 0.225, "ro": 0.3, "ru": 0.34375, "sk": 0.225, "sl": 0.325, "sq": 0.2, "sr": 0.25, "sv": 0.225, "tr": 0.275, "uk": 0.275}, "lang_wp": {"ar": 0.2875, "az": 0.3875, "be": 0.45, "bg": 0.4375, "bo": 0.425, "ca": 0.3375, "cn": 0.466102, "cs": 0.5625, "da": 0.5125, "el": 0.4, "en": 0.5125, "es": 0.5125, "et": 0.4875, "eu": 0.408333, "fa": 0.408333, "fi": 0.525, "fr": 0.4125, "gl": 0.408333, "hu": 0.4875, "hv": 0.475, "is": 0.4, "it": 0.4125, "ka": 0.269231, "la": 0.466667, "li": 0.4375, "lv": 0.5, "mk": 0.3875, "mt": 0.3625, "nl": 0.5625, "no": 0.55, "pl": 0.45, "pt": 0.475, "ro": 0.525, "ru": 0.53125, "sk": 0.4625, "sl": 0.5, "sq": 0.4, "sr": 0.475, "sv": 0.55, "tr": 0.4875, "uk": 0.55}}, {"model": "mistralai/mistral-small-3.2-24b-instruct", "avg_exact": 0.25029, "avg_wp": 0.450929, "total": 1722, "lang_exact": {"ar": 0.2, "az": 0.25, "be": 0.275, "bg": 0.225, "bo": 0.275, "ca": 0.225, "cn": 0.316667, "cs": 0.225, "da": 0.35, "el": 0.25, "en": 0.3, "es": 0.3, "et": 0.15, "eu": 0.183333, "fa": 0.333333, "fi": 0.275, "fr": 0.25, "gl": 0.166667, "hu": 0.275, "hv": 0.275, "is": 0.375, "it": 0.275, "ka": 0.2, "la": 0.4, "li": 0.205128, "lv": 0.325, "mk": 0.2, "mt": 0.1, "nl": 0.225, "no": 0.25, "pl": 0.3, "pt": 0.25, "ro": 0.222222, "ru": 0.3, "sk": 0.142857, "sl": 0.193548, "sq": 0.157895, "sr": 0.2, "sv": 0.3, "tr": 0.175, "uk": 0.225}, "lang_wp": {"ar": 0.4125, "az": 0.4375, "be": 0.4875, "bg": 0.425, "bo": 0.45, "ca": 0.425, "cn": 0.55, "cs": 0.3875, "da": 0.55, "el": 0.3625, "en": 0.4125, "es": 0.425, "et": 0.4, "eu": 0.458333, "fa": 0.575, "fi": 0.3875, "fr": 0.425, "gl": 0.408333, "hu": 0.4875, "hv": 0.5125, "is": 0.6, "it": 0.4125, "ka": 0.375, "la": 0.566667, "li": 0.423077, "lv": 0.575, "mk": 0.3625, "mt": 0.35, "nl": 0.4125, "no": 0.375, "pl": 0.483333, "pt": 0.4, "ro": 0.375, "ru": 0.416667, "sk": 0.410714, "sl": 0.467742, "sq": 0.513158, "sr": 0.4625, "sv": 0.4375, "tr": 0.5125, "uk": 0.3875}}, {"model": "mistralai/mistral-7b-instruct", "avg_exact": 0.243871, "avg_wp": 0.418065, "total": 1550, "lang_exact": {"ar": 0.058824, "az": 0.27027, "be": 0.382353, "bg": 0.243243, "bo": 0.314286, "ca": 0.285714, "cn": 0.339286, "cs": 0.428571, "da": 0.28, "el": 0.194444, "en": 0.125, "es": 0.241379, "et": 0.482759, "eu": 0.226415, "fa": 0.355932, "fi": 0.269231, "fr": 0.184211, "gl": 0.206897, "hu": 0.138889, "hv": 0.361111, "is": 0.166667, "it": 0.28125, "ka": 0.277778, "la": 0.241379, "li": 0.171429, "lv": 0.142857, "mk": 0.205128, "mt": 0.162162, "nl": 0.285714, "no": 0.205128, "pl": 0.272727, "pt": 0.189189, "ro": 0.138889, "ru": 0.354839, "sk": 0.236842, "sl": 0.289474, "sq": 0.27027, "sr": 0.282051, "sv": 0.210526, "tr": 0.102564, "uk": 0.175}, "lang_wp": {"ar": 0.191176, "az": 0.445946, "be": 0.558824, "bg": 0.432432, "bo": 0.542857, "ca": 0.457143, "cn": 0.508929, "cs": 0.52381, "da": 0.4, "el": 0.388889, "en": 0.265625, "es": 0.396552, "et": 0.62069, "eu": 0.349057, "fa": 0.542373, "fi": 0.365385, "fr": 0.381579, "gl": 0.387931, "hu": 0.319444, "hv": 0.513889, "is": 0.416667, "it": 0.5, "ka": 0.416667, "la": 0.431034, "li": 0.285714, "lv": 0.328571, "mk": 0.397436, "mt": 0.337838, "nl": 0.5, "no": 0.384615, "pl": 0.445455, "pt": 0.378378, "ro": 0.333333, "ru": 0.467742, "sk": 0.434211, "sl": 0.394737, "sq": 0.486486, "sr": 0.487179, "sv": 0.421053, "tr": 0.320513, "uk": 0.35}}, {"model": "mistralai/mistral-small-24b-instruct-2501", "avg_exact": 0.179897, "avg_wp": 0.363221, "total": 1751, "lang_exact": {"ar": 0.05, "az": 0.25, "be": 0.075, "bg": 0.2, "bo": 0.25, "ca": 0.25, "cn": 0.254237, "cs": 0.15, "da": 0.225, "el": 0.2, "en": 0.25, "es": 0.25, "et": 0.25, "eu": 0.2, "fa": 0.2, "fi": 0.225, "fr": 0.225, "gl": 0.183333, "hu": 0.125, "hv": 0.175, "is": 0.15, "it": 0.25, "ka": 0.025, "la": 0.35, "li": 0.05, "lv": 0.125, "mk": 0.225, "mt": 0.25, "nl": 0.175, "no": 0.225, "pl": 0.233333, "pt": 0.225, "ro": 0.1, "ru": 0.0, "sk": 0.15, "sl": 0.225, "sq": 0.025, "sr": 0.0, "sv": 0.25, "tr": 0.075, "uk": 0.075}, "lang_wp": {"ar": 0.225, "az": 0.45, "be": 0.3125, "bg": 0.4375, "bo": 0.3875, "ca": 0.3625, "cn": 0.457627, "cs": 0.3375, "da": 0.375, "el": 0.35, "en": 0.4125, "es": 0.4125, "et": 0.4125, "eu": 0.358333, "fa": 0.4, "fi": 0.3375, "fr": 0.375, "gl": 0.291667, "hu": 0.275, "hv": 0.35, "is": 0.375, "it": 0.4, "ka": 0.2625, "la": 0.516667, "li": 0.3, "lv": 0.3625, "mk": 0.375, "mt": 0.4, "nl": 0.3375, "no": 0.3625, "pl": 0.416667, "pt": 0.4125, "ro": 0.225, "ru": 0.28125, "sk": 0.325, "sl": 0.475, "sq": 0.375, "sr": 0.225, "sv": 0.4, "tr": 0.275, "uk": 0.325}}, {"model": "mistralai/ministral-14b-2512", "avg_exact": 0.196235, "avg_wp": 0.353394, "total": 1753, "lang_exact": {"ar": 0.15, "az": 0.225, "be": 0.175, "bg": 0.2, "bo": 0.25, "ca": 0.225, "cn": 0.233333, "cs": 0.25, "da": 0.175, "el": 0.25, "en": 0.275, "es": 0.175, "et": 0.075, "eu": 0.266667, "fa": 0.216667, "fi": 0.125, "fr": 0.25, "gl": 0.216667, "hu": 0.175, "hv": 0.2, "is": 0.325, "it": 0.125, "ka": 0.05, "la": 0.416667, "li": 0.125, "lv": 0.1, "mk": 0.175, "mt": 0.05, "nl": 0.1, "no": 0.125, "pl": 0.233333, "pt": 0.15, "ro": 0.175, "ru": 0.181818, "sk": 0.175, "sl": 0.125, "sq": 0.175, "sr": 0.225, "sv": 0.275, "tr": 0.225, "uk": 0.2}, "lang_wp": {"ar": 0.3125, "az": 0.3625, "be": 0.35, "bg": 0.4125, "bo": 0.35, "ca": 0.4, "cn": 0.425, "cs": 0.4125, "da": 0.325, "el": 0.375, "en": 0.4125, "es": 0.35, "et": 0.225, "eu": 0.425, "fa": 0.441667, "fi": 0.275, "fr": 0.375, "gl": 0.383333, "hu": 0.3, "hv": 0.3375, "is": 0.4875, "it": 0.3125, "ka": 0.2125, "la": 0.533333, "li": 0.2625, "lv": 0.2875, "mk": 0.3375, "mt": 0.175, "nl": 0.225, "no": 0.225, "pl": 0.433333, "pt": 0.2875, "ro": 0.3375, "ru": 0.333333, "sk": 0.3375, "sl": 0.325, "sq": 0.3375, "sr": 0.325, "sv": 0.4, "tr": 0.425, "uk": 0.375}}, {"model": "meta-llama/llama-3.1-8b-instruct", "avg_exact": 0.209262, "avg_wp": 0.341052, "total": 1749, "lang_exact": {"ar": 0.135135, "az": 0.2, "be": 0.3, "bg": 0.175, "bo": 0.2, "ca": 0.225, "cn": 0.3, "cs": 0.25, "da": 0.25, "el": 0.225, "en": 0.275, "es": 0.075, "et": 0.275, "eu": 0.116667, "fa": 0.083333, "fi": 0.333333, "fr": 0.1, "gl": 0.133333, "hu": 0.175, "hv": 0.375, "is": 0.25, "it": 0.2, "ka": 0.225, "la": 0.116667, "li": 0.2, "lv": 0.225, "mk": 0.275, "mt": 0.2, "nl": 0.225, "no": 0.225, "pl": 0.083333, "pt": 0.225, "ro": 0.075, "ru": 0.242424, "sk": 0.225, "sl": 0.25, "sq": 0.225, "sr": 0.25, "sv": 0.375, "tr": 0.275, "uk": 0.225}, "lang_wp": {"ar": 0.297297, "az": 0.275, "be": 0.4625, "bg": 0.3375, "bo": 0.3125, "ca": 0.35, "cn": 0.441667, "cs": 0.4, "da": 0.3875, "el": 0.325, "en": 0.4, "es": 0.2125, "et": 0.3875, "eu": 0.191667, "fa": 0.216667, "fi": 0.5, "fr": 0.2375, "gl": 0.266667, "hu": 0.3375, "hv": 0.5375, "is": 0.375, "it": 0.325, "ka": 0.35, "la": 0.325, "li": 0.3, "lv": 0.4, "mk": 0.4, "mt": 0.3, "nl": 0.3625, "no": 0.3375, "pl": 0.258333, "pt": 0.35, "ro": 0.2, "ru": 0.30303, "sk": 0.3625, "sl": 0.375, "sq": 0.3375, "sr": 0.375, "sv": 0.475, "tr": 0.4, "uk": 0.3625}}, {"model": "speakleash/Bielik-11B-v2.6-Instruct", "avg_exact": 0.1502, "avg_wp": 0.340091, "total": 1751, "lang_exact": {"ar": 0.05, "az": 0.075, "be": 0.0, "bg": 0.175, "bo": 0.2, "ca": 0.175, "cn": 0.186441, "cs": 0.1, "da": 0.225, "el": 0.275, "en": 0.025, "es": 0.2, "et": 0.2, "eu": 0.166667, "fa": 0.266667, "fi": 0.125, "fr": 0.225, "gl": 0.216667, "hu": 0.2, "hv": 0.225, "is": 0.0, "it": 0.125, "ka": 0.0, "la": 0.2, "li": 0.225, "lv": 0.125, "mk": 0.2, "mt": 0.15, "nl": 0.225, "no": 0.15, "pl": 0.183333, "pt": 0.075, "ro": 0.075, "ru": 0.090909, "sk": 0.125, "sl": 0.225, "sq": 0.0, "sr": 0.05, "sv": 0.15, "tr": 0.225, "uk": 0.075}, "lang_wp": {"ar": 0.175, "az": 0.25, "be": 0.1875, "bg": 0.3875, "bo": 0.4, "ca": 0.425, "cn": 0.29661, "cs": 0.3, "da": 0.4125, "el": 0.45, "en": 0.2, "es": 0.4625, "et": 0.4375, "eu": 0.333333, "fa": 0.433333, "fi": 0.35, "fr": 0.4375, "gl": 0.333333, "hu": 0.3375, "hv": 0.4375, "is": 0.1125, "it": 0.375, "ka": 0.128205, "la": 0.308333, "li": 0.4375, "lv": 0.3875, "mk": 0.4125, "mt": 0.35, "nl": 0.4625, "no": 0.4125, "pl": 0.4, "pt": 0.4125, "ro": 0.2625, "ru": 0.30303, "sk": 0.2875, "sl": 0.4375, "sq": 0.2, "sr": 0.2, "sv": 0.3375, "tr": 0.375, "uk": 0.25}}, {"model": "CYFRAGOVPL/Llama-PLLuM-70B-chat-250801", "avg_exact": 0.227169, "avg_wp": 0.335616, "total": 1752, "lang_exact": {"ar": 0.15, "az": 0.275, "be": 0.25, "bg": 0.175, "bo": 0.25, "ca": 0.175, "cn": 0.152542, "cs": 0.225, "da": 0.175, "el": 0.225, "en": 0.225, "es": 0.125, "et": 0.25, "eu": 0.1, "fa": 0.35, "fi": 0.25, "fr": 0.225, "gl": 0.133333, "hu": 0.175, "hv": 0.375, "is": 0.4, "it": 0.2, "ka": 0.175, "la": 0.3, "li": 0.25, "lv": 0.325, "mk": 0.225, "mt": 0.275, "nl": 0.25, "no": 0.25, "pl": 0.1, "pt": 0.25, "ro": 0.25, "ru": 0.30303, "sk": 0.2, "sl": 0.175, "sq": 0.25, "sr": 0.325, "sv": 0.25, "tr": 0.275, "uk": 0.15}, "lang_wp": {"ar": 0.35, "az": 0.4, "be": 0.3625, "bg": 0.275, "bo": 0.4, "ca": 0.2375, "cn": 0.194915, "cs": 0.375, "da": 0.2875, "el": 0.3125, "en": 0.2375, "es": 0.125, "et": 0.375, "eu": 0.183333, "fa": 0.508333, "fi": 0.3625, "fr": 0.35, "gl": 0.2, "hu": 0.2125, "hv": 0.55, "is": 0.5875, "it": 0.275, "ka": 0.2375, "la": 0.5, "li": 0.35, "lv": 0.4625, "mk": 0.275, "mt": 0.4625, "nl": 0.3625, "no": 0.3125, "pl": 0.141667, "pt": 0.3625, "ro": 0.3625, "ru": 0.439394, "sk": 0.325, "sl": 0.2875, "sq": 0.375, "sr": 0.475, "sv": 0.375, "tr": 0.4125, "uk": 0.2375}}, {"model": "CYFRAGOVPL/pllum-12b-nc-chat-250715", "avg_exact": 0.146689, "avg_wp": 0.234018, "total": 1752, "lang_exact": {"ar": 0.075, "az": 0.175, "be": 0.275, "bg": 0.125, "bo": 0.325, "ca": 0.275, "cn": 0.101695, "cs": 0.075, "da": 0.125, "el": 0.175, "en": 0.15, "es": 0.2, "et": 0.075, "eu": 0.083333, "fa": 0.216667, "fi": 0.075, "fr": 0.25, "gl": 0.1, "hu": 0.025, "hv": 0.325, "is": 0.15, "it": 0.125, "ka": 0.075, "la": 0.133333, "li": 0.15, "lv": 0.1, "mk": 0.075, "mt": 0.1, "nl": 0.075, "no": 0.2, "pl": 0.116667, "pt": 0.1, "ro": 0.35, "ru": 0.212121, "sk": 0.15, "sl": 0.15, "sq": 0.075, "sr": 0.175, "sv": 0.125, "tr": 0.15, "uk": 0.075}, "lang_wp": {"ar": 0.0875, "az": 0.3375, "be": 0.3625, "bg": 0.2, "bo": 0.4875, "ca": 0.3625, "cn": 0.144068, "cs": 0.1125, "da": 0.2125, "el": 0.25, "en": 0.275, "es": 0.275, "et": 0.15, "eu": 0.116667, "fa": 0.35, "fi": 0.1375, "fr": 0.3875, "gl": 0.183333, "hu": 0.125, "hv": 0.475, "is": 0.225, "it": 0.225, "ka": 0.1125, "la": 0.308333, "li": 0.2125, "lv": 0.2375, "mk": 0.15, "mt": 0.2, "nl": 0.15, "no": 0.275, "pl": 0.141667, "pt": 0.1375, "ro": 0.425, "ru": 0.30303, "sk": 0.2625, "sl": 0.3, "sq": 0.1375, "sr": 0.25, "sv": 0.1625, "tr": 0.2625, "uk": 0.175}}];
|
| 168 |
const ALL_LANGS = ["ar", "az", "be", "bg", "bo", "ca", "cn", "cs", "da", "el", "en", "es", "et", "eu", "fa", "fi", "fr", "gl", "hu", "hv", "is", "it", "ka", "la", "li", "lv", "mk", "mt", "nl", "no", "pl", "pt", "ro", "ru", "sk", "sl", "sq", "sr", "sv", "tr", "uk"];
|
| 169 |
const LANG_NAMES = {"af": "Afrikaans", "ar": "Arabic", "az": "Azerbaijani", "be": "Belarusian", "bo": "Bosnian", "bg": "Bulgarian", "bn": "Bulgarian", "ca": "Catalan", "cs": "Czech", "cn": "Chinese", "cy": "Welsh", "da": "Danish", "de": "German", "el": "Greek", "en": "English", "eo": "Esperanto", "es": "Spanish", "et": "Estonian", "eu": "Basque", "fa": "Faroese", "fi": "Finnish", "fr": "French", "ga": "Irish", "gl": "Galician", "gu": "Gujarati", "he": "Hebrew", "hi": "Hindi", "hr": "Croatian", "hu": "Hungarian", "hy": "Armenian", "hv": "Croatia", "id": "Indonesian", "is": "Icelandic", "it": "Italian", "ja": "Japanese", "ka": "Georgian", "kk": "Kazakh", "km": "Khmer", "kn": "Kannada", "ko": "Korean", "la": "Latin", "li": "Lithuanian", "lv": "Latvian", "mk": "Macedonian", "ml": "Malayalam", "mn": "Mongolian", "mr": "Marathi", "ms": "Malay", "mt": "Maltese", "my": "Burmese", "ne": "Nepali", "nl": "Dutch", "no": "Norwegian", "pa": "Punjabi", "pe": "Persian", "pl": "Polish", "pt": "Portuguese", "ro": "Romanian", "ru": "Russian", "si": "Sinhala", "sk": "Slovak", "sl": "Slovenian", "sq": "Albanian", "sr": "Serbian", "sv": "Swedish", "sw": "Swahili", "ta": "Tamil", "te": "Telugu", "th": "Thai", "tl": "Filipino", "tr": "Turkish", "uk": "Ukrainian", "ur": "Urdu", "uz": "Uzbek", "vi": "Vietnamese", "zh": "Chinese", "zu": "Zulu"};
|
| 170 |
const LANG_COUNTS = {"ar": 1790, "az": 1795, "be": 1794, "bg": 1721, "bo": 1795, "ca": 1712, "cn": 2686, "cs": 1779, "da": 1784, "el": 1794, "en": 1791, "es": 1652, "et": 1788, "eu": 2684, "fa": 2697, "fi": 1784, "fr": 1761, "gl": 2698, "hu": 1796, "hv": 1796, "is": 1790, "it": 1791, "ka": 1629, "la": 2522, "li": 1793, "lv": 1795, "mk": 1799, "mt": 1797, "nl": 1795, "no": 1799, "pl": 2637, "pt": 1795, "ro": 1790, "ru": 1424, "sk": 1784, "sl": 1788, "sq": 1793, "sr": 1798, "sv": 1797, "tr": 1799, "uk": 1747};
|
|
@@ -408,9 +475,198 @@
|
|
| 408 |
`77760 predictions · ${ALL_LANGS.length} languages · ${ALL_ROWS.length} models`;
|
| 409 |
}
|
| 410 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 411 |
render();
|
| 412 |
renderChart();
|
| 413 |
-
renderDist(); // disabled for testing
|
|
|
|
|
|
|
|
|
|
|
|
|
| 414 |
})();
|
| 415 |
</script>
|
| 416 |
</body>
|
|
|
|
| 20 |
letter-spacing: -0.02em; color: #f1f5f9; margin-bottom: 6px;
|
| 21 |
}
|
| 22 |
.subtitle { color: #64748b; font-size: 12px; margin-bottom: 12px; }
|
| 23 |
+
.meta-subtitle {
|
| 24 |
+
color: #64748b; font-size: 12px; margin-bottom: 20px;
|
| 25 |
+
line-height: 1.8;
|
| 26 |
+
}
|
| 27 |
+
.sep { color: #334155; margin: 0 8px; }
|
| 28 |
.scoring-note {
|
| 29 |
display: inline-flex; gap: 16px; flex-wrap: wrap;
|
| 30 |
background: #131820; border: 1px solid #1e2a3a;
|
|
|
|
| 115 |
display: inline-block; height: 6px; border-radius: 3px;
|
| 116 |
background: #2563eb; vertical-align: middle; margin-left: 4px; opacity: 0.7;
|
| 117 |
}
|
| 118 |
+
/* ── analysis sections ── */
|
| 119 |
+
.analysis-grid {
|
| 120 |
+
display: grid;
|
| 121 |
+
grid-template-columns: 1fr 1fr;
|
| 122 |
+
gap: 24px;
|
| 123 |
+
margin-bottom: 52px;
|
| 124 |
+
}
|
| 125 |
+
@media (max-width: 900px) { .analysis-grid { grid-template-columns: 1fr; } }
|
| 126 |
+
.analysis-card {
|
| 127 |
+
background: #111827; border: 1px solid #1e2a3a;
|
| 128 |
+
border-radius: 10px; padding: 22px 20px;
|
| 129 |
+
}
|
| 130 |
+
.analysis-card h3 {
|
| 131 |
+
font-family: 'Syne', sans-serif; font-size: 14px; font-weight: 700;
|
| 132 |
+
color: #f1f5f9; margin-bottom: 4px;
|
| 133 |
+
}
|
| 134 |
+
.analysis-card .card-sub {
|
| 135 |
+
font-size: 11px; color: #475569; margin-bottom: 16px; line-height: 1.5;
|
| 136 |
+
}
|
| 137 |
+
.model-select {
|
| 138 |
+
background: #1a2236; border: 1px solid #2d3748; border-radius: 6px;
|
| 139 |
+
color: #cbd5e1; font: inherit; font-size: 11px;
|
| 140 |
+
padding: 5px 10px; margin-bottom: 14px; cursor: pointer; width: 100%;
|
| 141 |
+
}
|
| 142 |
+
.model-select:focus { outline: none; border-color: #38bdf8; }
|
| 143 |
.footer { margin-top: 20px; font-size: 11px; color: #2d3748; text-align: right; }
|
| 144 |
::-webkit-scrollbar { height: 5px; background: #0d1117; }
|
| 145 |
::-webkit-scrollbar-thumb { background: #2d3748; border-radius: 3px; }
|
|
|
|
| 149 |
<h1>Text Quality Rating Benchmark</h1>
|
| 150 |
<p class="meta-subtitle">
|
| 151 |
LLM accuracy at rating text quality on a 1–6 scale across multiple languages
|
| 152 |
+
<span class="sep">·</span> Labeled by DeepSeek V3.2 & judged by Gemini 3 Flash
|
| 153 |
<span class="sep">·</span> Documents sourced from FineWeb dataset
|
| 154 |
</p>
|
| 155 |
|
|
|
|
| 174 |
</table>
|
| 175 |
</div>
|
| 176 |
|
| 177 |
+
<!-- DISTRIBUTION SECTION (disabled for testing)
|
| 178 |
<p class="section-title">Dataset Distribution</p>
|
| 179 |
<p class="subtitle" style="margin-bottom:20px">Number of unique texts per rating score (1–6) for each language</p>
|
| 180 |
<div class="dist-wrap">
|
|
|
|
| 183 |
<tbody id="dist-body"></tbody>
|
| 184 |
</table>
|
| 185 |
</div>
|
| 186 |
+
-->
|
| 187 |
|
| 188 |
<p class="section-title" style="margin-top:52px">Global Model Comparison</p>
|
| 189 |
<p class="subtitle" style="margin-bottom:20px">Weighted Score vs Exact Accuracy — all languages combined, sorted by Weighted Score</p>
|
|
|
|
| 191 |
<canvas id="globalChart"></canvas>
|
| 192 |
</div>
|
| 193 |
|
| 194 |
+
<p class="section-title" style="margin-bottom:4px;margin-top:52px">Model Error Analysis</p>
|
| 195 |
+
<p class="subtitle" style="margin-bottom:20px">Bias, critical misclassifications and confusion patterns</p>
|
| 196 |
+
|
| 197 |
+
<div class="analysis-grid">
|
| 198 |
+
|
| 199 |
+
<!-- Bias lollipop -->
|
| 200 |
+
<div class="analysis-card">
|
| 201 |
+
<h3>Prediction Bias</h3>
|
| 202 |
+
<p class="card-sub">Average error (predicted − ground truth). Negative = underestimation, positive = overestimation.</p>
|
| 203 |
+
<div style="position:relative">
|
| 204 |
+
<canvas id="biasChart"></canvas>
|
| 205 |
+
</div>
|
| 206 |
+
</div>
|
| 207 |
+
|
| 208 |
+
<!-- Critical confusion 1/2 vs 5/6 -->
|
| 209 |
+
<div class="analysis-card">
|
| 210 |
+
<h3>Critical Confusion Rate</h3>
|
| 211 |
+
<p class="card-sub">
|
| 212 |
+
% of low-quality texts (rating 1–2) predicted as high-quality (5–6) and vice versa.
|
| 213 |
+
These are the most dangerous misclassifications.
|
| 214 |
+
</p>
|
| 215 |
+
<canvas id="criticalChart"></canvas>
|
| 216 |
+
</div>
|
| 217 |
+
|
| 218 |
+
</div>
|
| 219 |
+
|
| 220 |
+
<!-- Full confusion heatmap with model dropdown -->
|
| 221 |
+
<div class="analysis-card" style="margin-bottom:52px">
|
| 222 |
+
<h3>Confusion Matrix</h3>
|
| 223 |
+
<p class="card-sub">Row = ground truth rating, column = predicted rating. Values show % of predictions within each true class.</p>
|
| 224 |
+
<select class="model-select" id="confModelSelect" onchange="renderConfusion()"></select>
|
| 225 |
+
<div id="confusionWrap" style="overflow-x:auto">
|
| 226 |
+
<canvas id="confusionChart"></canvas>
|
| 227 |
+
</div>
|
| 228 |
+
</div>
|
| 229 |
+
|
| 230 |
<div class="footer" id="footer"></div>
|
| 231 |
|
| 232 |
<script>
|
| 233 |
(function() {
|
| 234 |
+
const ALL_ROWS = [{"model": "Qwen/Qwen3.5-397B-A17B-FP8", "avg_exact": 0.656148, "avg_wp": 0.808234, "avg_bias": -0.1102, "total": 17112, "lang_exact": {"ar": 0.675, "az": 0.7025, "be": 0.785, "bg": 0.759669, "bo": 0.735, "ca": 0.732591, "cn": 0.628333, "cs": 0.7425, "da": 0.565, "el": 0.6075, "en": 0.46, "es": 0.777778, "et": 0.6575, "eu": 0.474037, "fa": 0.565, "fi": 0.77, "fr": 0.769634, "gl": 0.521667, "hu": 0.6475, "hv": 0.7175, "is": 0.715, "it": 0.78, "ka": 0.688679, "la": 0.662768, "li": 0.6, "lv": 0.725, "mk": 0.54, "mt": 0.74, "nl": 0.58, "no": 0.695, "pl": 0.486865, "pt": 0.695, "ro": 0.625, "ru": 0.759076, "sk": 0.63, "sl": 0.715, "sq": 0.8025, "sr": 0.5625, "sv": 0.66, "tr": 0.55, "uk": 0.764706}, "lang_wp": {"ar": 0.8225, "az": 0.83375, "be": 0.87375, "bg": 0.870166, "bo": 0.85625, "ca": 0.85376, "cn": 0.7875, "cs": 0.85125, "da": 0.77125, "el": 0.79625, "en": 0.71125, "es": 0.876877, "et": 0.8175, "eu": 0.68593, "fa": 0.758333, "fi": 0.87125, "fr": 0.870419, "gl": 0.721667, "hu": 0.79625, "hv": 0.85625, "is": 0.8475, "it": 0.87875, "ka": 0.794025, "la": 0.798246, "li": 0.78875, "lv": 0.85625, "mk": 0.75125, "mt": 0.8475, "nl": 0.78, "no": 0.8075, "pl": 0.707531, "pt": 0.80625, "ro": 0.7825, "ru": 0.866337, "sk": 0.8, "sl": 0.85, "sq": 0.89375, "sr": 0.77375, "sv": 0.82125, "tr": 0.755, "uk": 0.874332}, "confusion": {"1": {"1": 0.7778, "2": 0.2149, "3": 0.0035, "4": 0.003, "5": 0.0003, "6": 0.0005}, "2": {"2": 0.6939, "1": 0.239, "3": 0.0258, "4": 0.0335, "5": 0.002, "6": 0.0058}, "5": {"5": 0.659, "4": 0.1832, "6": 0.1293, "2": 0.0205, "3": 0.008}, "6": {"5": 0.3482, "6": 0.5891, "3": 0.0008, "4": 0.0455, "2": 0.0154, "1": 0.0011}, "3": {"4": 0.376, "2": 0.3614, "5": 0.0535, "3": 0.1977, "1": 0.0081, "6": 0.0032}, "4": {"4": 0.49, "5": 0.2871, "3": 0.0871, "2": 0.1114, "6": 0.02, "1": 0.0043}}}, {"model": "google/gemini-3-flash-preview", "avg_exact": 0.577296, "avg_wp": 0.760411, "avg_bias": 0.0496, "total": 1753, "lang_exact": {"ar": 0.575, "az": 0.55, "be": 0.625, "bg": 0.75, "bo": 0.575, "ca": 0.725, "cn": 0.566667, "cs": 0.525, "da": 0.475, "el": 0.575, "en": 0.4, "es": 0.825, "et": 0.625, "eu": 0.416667, "fa": 0.516667, "fi": 0.625, "fr": 0.675, "gl": 0.533333, "hu": 0.575, "hv": 0.7, "is": 0.55, "it": 0.725, "ka": 0.6, "la": 0.5, "li": 0.5, "lv": 0.6, "mk": 0.525, "mt": 0.7, "nl": 0.45, "no": 0.6, "pl": 0.366667, "pt": 0.525, "ro": 0.625, "ru": 0.69697, "sk": 0.525, "sl": 0.7, "sq": 0.675, "sr": 0.475, "sv": 0.6, "tr": 0.6, "uk": 0.6}, "lang_wp": {"ar": 0.7875, "az": 0.6875, "be": 0.8125, "bg": 0.8625, "bo": 0.775, "ca": 0.8375, "cn": 0.783333, "cs": 0.725, "da": 0.725, "el": 0.775, "en": 0.65, "es": 0.9125, "et": 0.8125, "eu": 0.608333, "fa": 0.725, "fi": 0.775, "fr": 0.8125, "gl": 0.7, "hu": 0.7875, "hv": 0.85, "is": 0.7125, "it": 0.85, "ka": 0.8, "la": 0.658333, "li": 0.725, "lv": 0.7875, "mk": 0.7125, "mt": 0.8, "nl": 0.7, "no": 0.775, "pl": 0.641667, "pt": 0.75, "ro": 0.75, "ru": 0.833333, "sk": 0.7625, "sl": 0.8375, "sq": 0.825, "sr": 0.7375, "sv": 0.8, "tr": 0.7625, "uk": 0.7875}, "confusion": {"2": {"2": 0.435, "1": 0.3325, "3": 0.135, "5": 0.03, "4": 0.06, "6": 0.0075}, "1": {"2": 0.1463, "1": 0.822, "4": 0.0073, "5": 0.0073, "3": 0.0171}, "5": {"5": 0.5268, "6": 0.3537, "4": 0.0976, "3": 0.0195, "2": 0.0024}, "6": {"6": 0.6336, "5": 0.3333, "4": 0.028, "3": 0.0025, "2": 0.0025}, "3": {"4": 0.3286, "3": 0.2286, "2": 0.2429, "5": 0.1571, "1": 0.0429}, "4": {"4": 0.2857, "5": 0.4286, "3": 0.1286, "6": 0.1, "2": 0.0571}}}, {"model": "openai/gpt-4o-mini", "avg_exact": 0.596461, "avg_wp": 0.757991, "avg_bias": -0.1427, "total": 1752, "lang_exact": {"ar": 0.625, "az": 0.5, "be": 0.725, "bg": 0.675, "bo": 0.675, "ca": 0.475, "cn": 0.7, "cs": 0.6, "da": 0.525, "el": 0.525, "en": 0.425, "es": 0.8, "et": 0.875, "eu": 0.35, "fa": 0.533333, "fi": 0.65, "fr": 0.575, "gl": 0.433333, "hu": 0.65, "hv": 0.7, "is": 0.6, "it": 0.725, "ka": 0.475, "la": 0.4, "li": 0.675, "lv": 0.725, "mk": 0.4, "mt": 0.6, "nl": 0.475, "no": 0.675, "pl": 0.383333, "pt": 0.75, "ro": 0.5, "ru": 0.848485, "sk": 0.7, "sl": 0.7, "sq": 0.7, "sr": 0.475, "sv": 0.725, "tr": 0.65, "uk": 0.692308}, "lang_wp": {"ar": 0.7875, "az": 0.725, "be": 0.8625, "bg": 0.8125, "bo": 0.8375, "ca": 0.675, "cn": 0.841667, "cs": 0.7625, "da": 0.725, "el": 0.7375, "en": 0.5875, "es": 0.9, "et": 0.9375, "eu": 0.583333, "fa": 0.733333, "fi": 0.775, "fr": 0.7625, "gl": 0.666667, "hu": 0.8125, "hv": 0.825, "is": 0.7875, "it": 0.8375, "ka": 0.6625, "la": 0.566667, "li": 0.8125, "lv": 0.8625, "mk": 0.575, "mt": 0.7875, "nl": 0.7, "no": 0.7875, "pl": 0.566667, "pt": 0.8625, "ro": 0.7, "ru": 0.893939, "sk": 0.825, "sl": 0.8375, "sq": 0.825, "sr": 0.6875, "sv": 0.8375, "tr": 0.8, "uk": 0.833333}, "confusion": {"5": {"5": 0.6537, "4": 0.1439, "6": 0.1561, "2": 0.0146, "3": 0.0244, "1": 0.0073}, "2": {"3": 0.155, "1": 0.2625, "4": 0.0275, "2": 0.54, "5": 0.0125, "6": 0.0025}, "1": {"2": 0.1951, "1": 0.7488, "3": 0.0512, "4": 0.0049}, "6": {"5": 0.3316, "6": 0.5306, "4": 0.0561, "3": 0.0281, "2": 0.0306, "1": 0.023}, "3": {"4": 0.2286, "3": 0.3286, "2": 0.2429, "1": 0.1714, "5": 0.0286}, "4": {"4": 0.3286, "3": 0.1429, "5": 0.3286, "2": 0.1286, "6": 0.0286, "1": 0.0429}}}, {"model": "qwen/qwen3-235b-a22b-2507", "avg_exact": 0.496292, "avg_wp": 0.693953, "avg_bias": -0.4398, "total": 1753, "lang_exact": {"ar": 0.6, "az": 0.475, "be": 0.55, "bg": 0.65, "bo": 0.55, "ca": 0.525, "cn": 0.433333, "cs": 0.425, "da": 0.45, "el": 0.55, "en": 0.425, "es": 0.525, "et": 0.5, "eu": 0.416667, "fa": 0.616667, "fi": 0.55, "fr": 0.6, "gl": 0.433333, "hu": 0.65, "hv": 0.625, "is": 0.475, "it": 0.65, "ka": 0.325, "la": 0.433333, "li": 0.4, "lv": 0.575, "mk": 0.35, "mt": 0.475, "nl": 0.35, "no": 0.4, "pl": 0.316667, "pt": 0.575, "ro": 0.5, "ru": 0.454545, "sk": 0.45, "sl": 0.575, "sq": 0.6, "sr": 0.5, "sv": 0.475, "tr": 0.45, "uk": 0.625}, "lang_wp": {"ar": 0.775, "az": 0.7, "be": 0.75, "bg": 0.8, "bo": 0.75, "ca": 0.7, "cn": 0.616667, "cs": 0.6375, "da": 0.6625, "el": 0.725, "en": 0.55, "es": 0.725, "et": 0.75, "eu": 0.608333, "fa": 0.783333, "fi": 0.725, "fr": 0.7625, "gl": 0.625, "hu": 0.8125, "hv": 0.8, "is": 0.7, "it": 0.8125, "ka": 0.6, "la": 0.558333, "li": 0.6625, "lv": 0.775, "mk": 0.575, "mt": 0.675, "nl": 0.6, "no": 0.65, "pl": 0.458333, "pt": 0.775, "ro": 0.7375, "ru": 0.712121, "sk": 0.7, "sl": 0.775, "sq": 0.7625, "sr": 0.725, "sv": 0.725, "tr": 0.675, "uk": 0.8}, "confusion": {"1": {"1": 0.9293, "2": 0.0439, "3": 0.0244, "4": 0.0024}, "2": {"4": 0.02, "3": 0.095, "1": 0.81, "6": 0.0025, "2": 0.0725}, "5": {"5": 0.4171, "4": 0.2585, "3": 0.061, "6": 0.2073, "1": 0.0463, "2": 0.0098}, "6": {"6": 0.6438, "4": 0.0662, "5": 0.2239, "3": 0.0382, "2": 0.0051, "1": 0.0229}, "3": {"2": 0.0857, "1": 0.5714, "3": 0.2143, "4": 0.1286}, "4": {"1": 0.2714, "4": 0.3, "2": 0.1143, "6": 0.0429, "3": 0.1714, "5": 0.1}}}, {"model": "deepseek/deepseek-v3.2", "avg_exact": 0.403879, "avg_wp": 0.629492, "avg_bias": -0.5031, "total": 1753, "lang_exact": {"ar": 0.375, "az": 0.325, "be": 0.475, "bg": 0.475, "bo": 0.475, "ca": 0.425, "cn": 0.533333, "cs": 0.5, "da": 0.275, "el": 0.25, "en": 0.525, "es": 0.425, "et": 0.425, "eu": 0.333333, "fa": 0.35, "fi": 0.375, "fr": 0.45, "gl": 0.483333, "hu": 0.4, "hv": 0.475, "is": 0.575, "it": 0.425, "ka": 0.35, "la": 0.3, "li": 0.375, "lv": 0.4, "mk": 0.325, "mt": 0.375, "nl": 0.325, "no": 0.4, "pl": 0.366667, "pt": 0.475, "ro": 0.25, "ru": 0.484848, "sk": 0.375, "sl": 0.6, "sq": 0.375, "sr": 0.3, "sv": 0.375, "tr": 0.375, "uk": 0.425}, "lang_wp": {"ar": 0.625, "az": 0.5625, "be": 0.725, "bg": 0.675, "bo": 0.7, "ca": 0.6375, "cn": 0.741667, "cs": 0.6875, "da": 0.5125, "el": 0.475, "en": 0.6875, "es": 0.7, "et": 0.7, "eu": 0.541667, "fa": 0.633333, "fi": 0.5875, "fr": 0.6875, "gl": 0.683333, "hu": 0.65, "hv": 0.7125, "is": 0.7625, "it": 0.6875, "ka": 0.575, "la": 0.5, "li": 0.5375, "lv": 0.6625, "mk": 0.55, "mt": 0.5625, "nl": 0.5625, "no": 0.625, "pl": 0.558333, "pt": 0.7, "ro": 0.4625, "ru": 0.712121, "sk": 0.625, "sl": 0.775, "sq": 0.6625, "sr": 0.6, "sv": 0.6, "tr": 0.625, "uk": 0.6125}, "confusion": {"1": {"1": 0.8829, "2": 0.0683, "3": 0.0195, "4": 0.022, "6": 0.0024, "5": 0.0049}, "2": {"1": 0.5925, "2": 0.205, "3": 0.11, "4": 0.0725, "6": 0.01, "5": 0.01}, "5": {"4": 0.5195, "6": 0.0317, "5": 0.3366, "3": 0.0829, "2": 0.0171, "1": 0.0122}, "6": {"4": 0.2316, "6": 0.1832, "5": 0.5242, "1": 0.0305, "3": 0.0254, "2": 0.0051}, "3": {"3": 0.2286, "4": 0.3286, "2": 0.1429, "1": 0.2571, "5": 0.0429}, "4": {"3": 0.1571, "4": 0.5429, "5": 0.0857, "1": 0.1714, "2": 0.0429}}}, {"model": "z-ai/glm-4-32b", "avg_exact": 0.432402, "avg_wp": 0.620936, "avg_bias": 0.0877, "total": 1753, "lang_exact": {"ar": 0.375, "az": 0.5, "be": 0.625, "bg": 0.375, "bo": 0.625, "ca": 0.35, "cn": 0.383333, "cs": 0.625, "da": 0.35, "el": 0.55, "en": 0.3, "es": 0.35, "et": 0.525, "eu": 0.3, "fa": 0.383333, "fi": 0.525, "fr": 0.525, "gl": 0.25, "hu": 0.5, "hv": 0.625, "is": 0.35, "it": 0.4, "ka": 0.5, "la": 0.433333, "li": 0.425, "lv": 0.525, "mk": 0.225, "mt": 0.35, "nl": 0.425, "no": 0.475, "pl": 0.35, "pt": 0.25, "ro": 0.3, "ru": 0.515152, "sk": 0.525, "sl": 0.475, "sq": 0.7, "sr": 0.375, "sv": 0.4, "tr": 0.425, "uk": 0.525}, "lang_wp": {"ar": 0.525, "az": 0.7, "be": 0.775, "bg": 0.5375, "bo": 0.7625, "ca": 0.5, "cn": 0.583333, "cs": 0.775, "da": 0.6, "el": 0.7375, "en": 0.45, "es": 0.575, "et": 0.725, "eu": 0.533333, "fa": 0.608333, "fi": 0.6375, "fr": 0.6625, "gl": 0.375, "hu": 0.65, "hv": 0.8125, "is": 0.5875, "it": 0.6, "ka": 0.65, "la": 0.558333, "li": 0.65, "lv": 0.7125, "mk": 0.4125, "mt": 0.5625, "nl": 0.675, "no": 0.675, "pl": 0.575, "pt": 0.4375, "ro": 0.525, "ru": 0.69697, "sk": 0.7375, "sl": 0.6875, "sq": 0.8375, "sr": 0.6, "sv": 0.625, "tr": 0.675, "uk": 0.7125}, "confusion": {"1": {"1": 0.7217, "3": 0.2389, "5": 0.0369, "4": 0.0025}, "2": {"3": 0.5682, "5": 0.0732, "1": 0.3207, "4": 0.0278, "6": 0.0101}, "5": {"5": 0.5479, "6": 0.1818, "3": 0.1425, "4": 0.1057, "1": 0.0221}, "6": {"6": 0.553, "5": 0.3488, "3": 0.0646, "4": 0.0181, "1": 0.0155}, "3": {"3": 0.2429, "4": 0.1571, "5": 0.3857, "1": 0.1857, "6": 0.0286}, "4": {"4": 0.1618, "5": 0.4853, "3": 0.1912, "1": 0.1176, "6": 0.0441}}}, {"model": "speakleash/Bielik-11B-v3.0-Instruct", "avg_exact": 0.41462, "avg_wp": 0.601942, "avg_bias": -0.2942, "total": 1751, "lang_exact": {"ar": 0.025, "az": 0.525, "be": 0.2, "bg": 0.525, "bo": 0.675, "ca": 0.325, "cn": 0.271186, "cs": 0.5, "da": 0.425, "el": 0.325, "en": 0.5, "es": 0.475, "et": 0.55, "eu": 0.25, "fa": 0.366667, "fi": 0.425, "fr": 0.5, "gl": 0.4, "hu": 0.475, "hv": 0.525, "is": 0.225, "it": 0.475, "ka": 0.230769, "la": 0.3, "li": 0.45, "lv": 0.425, "mk": 0.275, "mt": 0.375, "nl": 0.45, "no": 0.475, "pl": 0.366667, "pt": 0.475, "ro": 0.425, "ru": 0.606061, "sk": 0.475, "sl": 0.55, "sq": 0.5, "sr": 0.45, "sv": 0.425, "tr": 0.45, "uk": 0.625}, "lang_wp": {"ar": 0.15, "az": 0.6875, "be": 0.275, "bg": 0.675, "bo": 0.7875, "ca": 0.6125, "cn": 0.40678, "cs": 0.6125, "da": 0.675, "el": 0.5625, "en": 0.65, "es": 0.7125, "et": 0.7375, "eu": 0.483333, "fa": 0.583333, "fi": 0.5375, "fr": 0.725, "gl": 0.608333, "hu": 0.6625, "hv": 0.675, "is": 0.3875, "it": 0.6875, "ka": 0.371795, "la": 0.483333, "li": 0.675, "lv": 0.675, "mk": 0.575, "mt": 0.575, "nl": 0.7125, "no": 0.6875, "pl": 0.541667, "pt": 0.6875, "ro": 0.65, "ru": 0.727273, "sk": 0.6125, "sl": 0.6625, "sq": 0.6625, "sr": 0.6, "sv": 0.7, "tr": 0.7, "uk": 0.75}, "confusion": {"1": {"3": 0.0442, "5": 0.026, "1": 0.8623, "2": 0.0338, "4": 0.0338}, "2": {"1": 0.5652, "3": 0.1576, "5": 0.0408, "2": 0.1087, "4": 0.1277}, "5": {"3": 0.0281, "5": 0.8235, "1": 0.0358, "4": 0.1125}, "6": {"3": 0.0278, "5": 0.8056, "4": 0.0741, "1": 0.0833, "6": 0.0093}, "3": {"1": 0.2687, "3": 0.0597, "5": 0.0746, "4": 0.5821, "2": 0.0149}, "4": {"3": 0.0909, "5": 0.3939, "1": 0.1061, "2": 0.0303, "4": 0.3788}}}, {"model": "google/gemini-2.0-flash-lite-001", "avg_exact": 0.385054, "avg_wp": 0.586423, "avg_bias": -0.2835, "total": 1753, "lang_exact": {"ar": 0.325, "az": 0.325, "be": 0.55, "bg": 0.6, "bo": 0.4, "ca": 0.275, "cn": 0.45, "cs": 0.3, "da": 0.4, "el": 0.225, "en": 0.525, "es": 0.4, "et": 0.55, "eu": 0.333333, "fa": 0.516667, "fi": 0.4, "fr": 0.425, "gl": 0.383333, "hu": 0.45, "hv": 0.425, "is": 0.2, "it": 0.425, "ka": 0.3, "la": 0.216667, "li": 0.425, "lv": 0.475, "mk": 0.2, "mt": 0.425, "nl": 0.4, "no": 0.325, "pl": 0.366667, "pt": 0.375, "ro": 0.275, "ru": 0.606061, "sk": 0.425, "sl": 0.375, "sq": 0.425, "sr": 0.25, "sv": 0.425, "tr": 0.4, "uk": 0.275}, "lang_wp": {"ar": 0.525, "az": 0.6, "be": 0.75, "bg": 0.7625, "bo": 0.5875, "ca": 0.525, "cn": 0.608333, "cs": 0.5, "da": 0.55, "el": 0.5, "en": 0.7125, "es": 0.65, "et": 0.6875, "eu": 0.566667, "fa": 0.691667, "fi": 0.6375, "fr": 0.6125, "gl": 0.633333, "hu": 0.6375, "hv": 0.625, "is": 0.45, "it": 0.6125, "ka": 0.525, "la": 0.383333, "li": 0.575, "lv": 0.7, "mk": 0.4625, "mt": 0.65, "nl": 0.6125, "no": 0.4875, "pl": 0.55, "pt": 0.525, "ro": 0.4375, "ru": 0.712121, "sk": 0.6, "sl": 0.6125, "sq": 0.6125, "sr": 0.4875, "sv": 0.6375, "tr": 0.5875, "uk": 0.525}, "confusion": {"2": {"3": 0.3775, "1": 0.2725, "2": 0.28, "4": 0.0525, "6": 0.0175}, "1": {"3": 0.1098, "1": 0.7171, "2": 0.1585, "4": 0.0098, "6": 0.0049}, "5": {"4": 0.5293, "3": 0.1341, "5": 0.178, "6": 0.1561, "1": 0.0024}, "6": {"6": 0.3333, "4": 0.3868, "5": 0.1323, "3": 0.1349, "1": 0.0102, "2": 0.0025}, "3": {"6": 0.0571, "3": 0.5143, "4": 0.2143, "5": 0.0714, "1": 0.0571, "2": 0.0857}, "4": {"4": 0.4143, "6": 0.1714, "3": 0.3571, "2": 0.0286, "5": 0.0286}}}, {"model": "google/gemma-3-12b-it", "avg_exact": 0.337707, "avg_wp": 0.573873, "avg_bias": -0.0331, "total": 1753, "lang_exact": {"ar": 0.275, "az": 0.35, "be": 0.35, "bg": 0.475, "bo": 0.45, "ca": 0.225, "cn": 0.383333, "cs": 0.45, "da": 0.375, "el": 0.275, "en": 0.45, "es": 0.325, "et": 0.4, "eu": 0.233333, "fa": 0.25, "fi": 0.4, "fr": 0.425, "gl": 0.166667, "hu": 0.35, "hv": 0.4, "is": 0.475, "it": 0.325, "ka": 0.3, "la": 0.3, "li": 0.25, "lv": 0.425, "mk": 0.275, "mt": 0.25, "nl": 0.45, "no": 0.375, "pl": 0.366667, "pt": 0.25, "ro": 0.25, "ru": 0.575758, "sk": 0.275, "sl": 0.35, "sq": 0.35, "sr": 0.325, "sv": 0.175, "tr": 0.325, "uk": 0.375}, "lang_wp": {"ar": 0.5, "az": 0.625, "be": 0.6125, "bg": 0.6625, "bo": 0.675, "ca": 0.4875, "cn": 0.583333, "cs": 0.625, "da": 0.5875, "el": 0.5125, "en": 0.5625, "es": 0.5875, "et": 0.625, "eu": 0.466667, "fa": 0.533333, "fi": 0.625, "fr": 0.675, "gl": 0.375, "hu": 0.6125, "hv": 0.6375, "is": 0.6875, "it": 0.5375, "ka": 0.475, "la": 0.525, "li": 0.5875, "lv": 0.6625, "mk": 0.55, "mt": 0.5375, "nl": 0.725, "no": 0.5625, "pl": 0.575, "pt": 0.525, "ro": 0.4875, "ru": 0.787879, "sk": 0.5625, "sl": 0.6, "sq": 0.5125, "sr": 0.5875, "sv": 0.5125, "tr": 0.6125, "uk": 0.575}, "confusion": {"1": {"2": 0.4366, "4": 0.0195, "1": 0.3195, "3": 0.2024, "6": 0.0122, "5": 0.0098}, "2": {"4": 0.0675, "3": 0.4425, "2": 0.24, "5": 0.045, "1": 0.1825, "6": 0.0225}, "5": {"5": 0.7171, "3": 0.0268, "4": 0.0854, "1": 0.1244, "6": 0.0439, "2": 0.0024}, "6": {"6": 0.1148, "5": 0.6862, "3": 0.0459, "4": 0.0995, "1": 0.0434, "2": 0.0102}, "3": {"3": 0.2429, "4": 0.3143, "5": 0.2, "1": 0.1857, "2": 0.0571}, "4": {"5": 0.6286, "4": 0.1286, "3": 0.1, "1": 0.1286, "6": 0.0143}}}, {"model": "mistralai/mistral-nemo", "avg_exact": 0.309184, "avg_wp": 0.499715, "avg_bias": -0.3506, "total": 1753, "lang_exact": {"ar": 0.325, "az": 0.45, "be": 0.475, "bg": 0.325, "bo": 0.375, "ca": 0.25, "cn": 0.383333, "cs": 0.425, "da": 0.375, "el": 0.3, "en": 0.25, "es": 0.35, "et": 0.225, "eu": 0.216667, "fa": 0.266667, "fi": 0.35, "fr": 0.275, "gl": 0.283333, "hu": 0.2, "hv": 0.425, "is": 0.3, "it": 0.2, "ka": 0.425, "la": 0.183333, "li": 0.325, "lv": 0.425, "mk": 0.375, "mt": 0.325, "nl": 0.35, "no": 0.375, "pl": 0.25, "pt": 0.35, "ro": 0.2, "ru": 0.212121, "sk": 0.375, "sl": 0.35, "sq": 0.3, "sr": 0.25, "sv": 0.175, "tr": 0.275, "uk": 0.25}, "lang_wp": {"ar": 0.5, "az": 0.6125, "be": 0.675, "bg": 0.4625, "bo": 0.5, "ca": 0.5125, "cn": 0.608333, "cs": 0.5125, "da": 0.5375, "el": 0.4625, "en": 0.5, "es": 0.575, "et": 0.4875, "eu": 0.441667, "fa": 0.5, "fi": 0.525, "fr": 0.5625, "gl": 0.475, "hu": 0.425, "hv": 0.5875, "is": 0.4625, "it": 0.4, "ka": 0.5875, "la": 0.333333, "li": 0.525, "lv": 0.6625, "mk": 0.5375, "mt": 0.425, "nl": 0.4875, "no": 0.5375, "pl": 0.466667, "pt": 0.525, "ro": 0.3, "ru": 0.484848, "sk": 0.6, "sl": 0.575, "sq": 0.425, "sr": 0.45, "sv": 0.4375, "tr": 0.45, "uk": 0.4375}, "confusion": {"2": {"1": 0.2982, "5": 0.1028, "2": 0.3759, "4": 0.0602, "3": 0.1604, "6": 0.0025}, "1": {"2": 0.2689, "1": 0.4572, "5": 0.11, "3": 0.1149, "4": 0.0342, "6": 0.0147}, "5": {"4": 0.2836, "5": 0.4132, "2": 0.0856, "1": 0.066, "3": 0.1467, "6": 0.0049}, "6": {"5": 0.5109, "2": 0.0656, "1": 0.1148, "4": 0.1967, "6": 0.0137, "3": 0.0984}, "3": {"2": 0.2429, "4": 0.2143, "3": 0.1857, "5": 0.2714, "1": 0.0857}, "4": {"5": 0.4143, "2": 0.1429, "3": 0.1286, "4": 0.2571, "1": 0.0571}}}, {"model": "z-ai/glm-4.5-air", "avg_exact": 0.36203, "avg_wp": 0.498575, "avg_bias": -0.5908, "total": 1754, "lang_exact": {"ar": 0.317073, "az": 0.4, "be": 0.525, "bg": 0.475, "bo": 0.45, "ca": 0.375, "cn": 0.4, "cs": 0.35, "da": 0.325, "el": 0.325, "en": 0.275, "es": 0.35, "et": 0.6, "eu": 0.3, "fa": 0.283333, "fi": 0.45, "fr": 0.3, "gl": 0.383333, "hu": 0.375, "hv": 0.175, "is": 0.25, "it": 0.125, "ka": 0.35, "la": 0.15, "li": 0.175, "lv": 0.1, "mk": 0.2, "mt": 0.275, "nl": 0.225, "no": 0.375, "pl": 0.35, "pt": 0.525, "ro": 0.4, "ru": 0.484848, "sk": 0.55, "sl": 0.625, "sq": 0.65, "sr": 0.325, "sv": 0.5, "tr": 0.45, "uk": 0.5}, "lang_wp": {"ar": 0.52439, "az": 0.55, "be": 0.6375, "bg": 0.6375, "bo": 0.575, "ca": 0.475, "cn": 0.6, "cs": 0.5, "da": 0.4875, "el": 0.5125, "en": 0.4375, "es": 0.5, "et": 0.75, "eu": 0.433333, "fa": 0.525, "fi": 0.5125, "fr": 0.475, "gl": 0.541667, "hu": 0.425, "hv": 0.1875, "is": 0.3625, "it": 0.15, "ka": 0.4125, "la": 0.208333, "li": 0.25, "lv": 0.2, "mk": 0.3375, "mt": 0.4375, "nl": 0.375, "no": 0.5125, "pl": 0.5, "pt": 0.6625, "ro": 0.5375, "ru": 0.621212, "sk": 0.7, "sl": 0.775, "sq": 0.75, "sr": 0.55, "sv": 0.625, "tr": 0.6375, "uk": 0.6625}, "confusion": {"1": {"1": 0.7601, "3": 0.0665, "2": 0.1676, "6": 0.0029, "5": 0.0029}, "2": {"1": 0.3848, "3": 0.2182, "2": 0.3909, "6": 0.003, "5": 0.003}, "5": {"4": 0.0496, "6": 0.0496, "5": 0.3469, "3": 0.5364, "2": 0.0117, "1": 0.0058}, "6": {"6": 0.2669, "5": 0.4141, "3": 0.2638, "4": 0.0337, "2": 0.0031, "1": 0.0184}, "3": {"3": 0.6, "2": 0.1167, "1": 0.2833}, "4": {"3": 0.661, "5": 0.1186, "2": 0.1017, "6": 0.0339, "1": 0.0678, "4": 0.0169}}}, {"model": "meta-llama/llama-4-scout", "avg_exact": 0.380137, "avg_wp": 0.497717, "avg_bias": 1.0869, "total": 1752, "lang_exact": {"ar": 0.325, "az": 0.475, "be": 0.3, "bg": 0.375, "bo": 0.425, "ca": 0.3, "cn": 0.25, "cs": 0.525, "da": 0.375, "el": 0.275, "en": 0.225, "es": 0.475, "et": 0.425, "eu": 0.254237, "fa": 0.4, "fi": 0.5, "fr": 0.375, "gl": 0.2, "hu": 0.45, "hv": 0.45, "is": 0.55, "it": 0.425, "ka": 0.3, "la": 0.316667, "li": 0.35, "lv": 0.45, "mk": 0.275, "mt": 0.35, "nl": 0.325, "no": 0.475, "pl": 0.35, "pt": 0.5, "ro": 0.325, "ru": 0.545455, "sk": 0.5, "sl": 0.4, "sq": 0.575, "sr": 0.275, "sv": 0.425, "tr": 0.425, "uk": 0.35}, "lang_wp": {"ar": 0.4625, "az": 0.5625, "be": 0.4, "bg": 0.45, "bo": 0.5375, "ca": 0.3875, "cn": 0.441667, "cs": 0.6, "da": 0.4875, "el": 0.4, "en": 0.325, "es": 0.5875, "et": 0.5875, "eu": 0.389831, "fa": 0.55, "fi": 0.625, "fr": 0.4375, "gl": 0.433333, "hu": 0.5375, "hv": 0.525, "is": 0.65, "it": 0.4625, "ka": 0.4125, "la": 0.425, "li": 0.45, "lv": 0.575, "mk": 0.425, "mt": 0.5125, "nl": 0.475, "no": 0.6125, "pl": 0.441667, "pt": 0.6, "ro": 0.475, "ru": 0.575758, "sk": 0.55, "sl": 0.525, "sq": 0.6625, "sr": 0.4125, "sv": 0.5375, "tr": 0.6, "uk": 0.4625}, "confusion": {"5": {"5": 0.6634, "6": 0.2317, "4": 0.0537, "1": 0.0488, "2": 0.0024}, "2": {"4": 0.1253, "5": 0.5965, "2": 0.0251, "6": 0.0877, "1": 0.1404, "3": 0.0251}, "1": {"1": 0.357, "6": 0.1296, "4": 0.0733, "5": 0.3741, "2": 0.0416, "3": 0.0244}, "6": {"5": 0.3766, "6": 0.5903, "4": 0.0305, "1": 0.0025}, "3": {"4": 0.2, "5": 0.6571, "6": 0.0571, "1": 0.0714, "3": 0.0143}, "4": {"5": 0.7246, "6": 0.1739, "4": 0.0725, "1": 0.0145, "2": 0.0145}}}, {"model": "meta-llama/llama-3.3-70b-instruct", "avg_exact": 0.366589, "avg_wp": 0.49652, "avg_bias": 0.846, "total": 1724, "lang_exact": {"ar": 0.384615, "az": 0.394737, "be": 0.475, "bg": 0.4, "bo": 0.45, "ca": 0.25, "cn": 0.305085, "cs": 0.525, "da": 0.358974, "el": 0.447368, "en": 0.25641, "es": 0.324324, "et": 0.512821, "eu": 0.293103, "fa": 0.310345, "fi": 0.538462, "fr": 0.384615, "gl": 0.183333, "hu": 0.45, "hv": 0.4, "is": 0.45, "it": 0.384615, "ka": 0.35, "la": 0.310345, "li": 0.25641, "lv": 0.425, "mk": 0.225, "mt": 0.275, "nl": 0.425, "no": 0.375, "pl": 0.4, "pt": 0.358974, "ro": 0.131579, "ru": 0.515152, "sk": 0.425, "sl": 0.435897, "sq": 0.282051, "sr": 0.333333, "sv": 0.410256, "tr": 0.475, "uk": 0.35}, "lang_wp": {"ar": 0.5, "az": 0.513158, "be": 0.5625, "bg": 0.5375, "bo": 0.5375, "ca": 0.3625, "cn": 0.466102, "cs": 0.5875, "da": 0.5, "el": 0.5, "en": 0.435897, "es": 0.405405, "et": 0.679487, "eu": 0.431034, "fa": 0.465517, "fi": 0.653846, "fr": 0.448718, "gl": 0.383333, "hu": 0.5375, "hv": 0.5125, "is": 0.625, "it": 0.512821, "ka": 0.4625, "la": 0.431034, "li": 0.423077, "lv": 0.6, "mk": 0.3625, "mt": 0.3875, "nl": 0.5875, "no": 0.475, "pl": 0.558333, "pt": 0.474359, "ro": 0.315789, "ru": 0.530303, "sk": 0.6375, "sl": 0.551282, "sq": 0.487179, "sr": 0.448718, "sv": 0.512821, "tr": 0.625, "uk": 0.4375}, "confusion": {"1": {"5": 0.041, "4": 0.4641, "1": 0.3026, "3": 0.0436, "2": 0.1462, "6": 0.0026}, "2": {"4": 0.6972, "5": 0.1349, "2": 0.0636, "6": 0.0229, "3": 0.0433, "1": 0.0382}, "5": {"4": 0.06, "6": 0.2025, "5": 0.7325, "1": 0.005}, "6": {"5": 0.5026, "6": 0.4689, "4": 0.0259, "1": 0.0026}, "3": {"5": 0.5397, "4": 0.3016, "6": 0.0952, "1": 0.0476, "2": 0.0159}, "4": {"5": 0.5797, "6": 0.1884, "4": 0.2174, "1": 0.0145}}}, {"model": "openai/gpt-4.1-nano", "avg_exact": 0.293212, "avg_wp": 0.494295, "avg_bias": -0.6606, "total": 1753, "lang_exact": {"ar": 0.25, "az": 0.275, "be": 0.25, "bg": 0.35, "bo": 0.325, "ca": 0.125, "cn": 0.4, "cs": 0.35, "da": 0.375, "el": 0.225, "en": 0.1, "es": 0.275, "et": 0.45, "eu": 0.2, "fa": 0.366667, "fi": 0.5, "fr": 0.325, "gl": 0.3, "hu": 0.25, "hv": 0.35, "is": 0.3, "it": 0.3, "ka": 0.125, "la": 0.2, "li": 0.275, "lv": 0.425, "mk": 0.125, "mt": 0.25, "nl": 0.3, "no": 0.275, "pl": 0.3, "pt": 0.225, "ro": 0.25, "ru": 0.393939, "sk": 0.275, "sl": 0.3, "sq": 0.325, "sr": 0.325, "sv": 0.375, "tr": 0.325, "uk": 0.325}, "lang_wp": {"ar": 0.5, "az": 0.5125, "be": 0.4875, "bg": 0.55, "bo": 0.525, "ca": 0.4, "cn": 0.616667, "cs": 0.55, "da": 0.425, "el": 0.475, "en": 0.4125, "es": 0.4625, "et": 0.6125, "eu": 0.391667, "fa": 0.6, "fi": 0.6, "fr": 0.5125, "gl": 0.5, "hu": 0.475, "hv": 0.5375, "is": 0.55, "it": 0.5125, "ka": 0.3125, "la": 0.316667, "li": 0.5375, "lv": 0.6125, "mk": 0.3125, "mt": 0.3875, "nl": 0.4125, "no": 0.425, "pl": 0.541667, "pt": 0.425, "ro": 0.4625, "ru": 0.651515, "sk": 0.4625, "sl": 0.575, "sq": 0.5625, "sr": 0.5375, "sv": 0.5, "tr": 0.4875, "uk": 0.5625}, "confusion": {"1": {"2": 0.6683, "4": 0.0244, "1": 0.2659, "3": 0.0415}, "2": {"2": 0.7325, "4": 0.035, "1": 0.0575, "3": 0.1675, "5": 0.0075}, "5": {"4": 0.5927, "2": 0.0951, "5": 0.0439, "3": 0.2463, "6": 0.0195, "1": 0.0024}, "6": {"6": 0.1018, "5": 0.0763, "4": 0.4148, "2": 0.2188, "3": 0.1705, "1": 0.0178}, "3": {"3": 0.4, "2": 0.3429, "4": 0.1571, "1": 0.0857, "5": 0.0143}, "4": {"4": 0.3714, "3": 0.3286, "2": 0.2571, "5": 0.0286, "1": 0.0143}}}, {"model": "google/gemma-3-27b-it", "avg_exact": 0.252139, "avg_wp": 0.488591, "avg_bias": 0.3009, "total": 1753, "lang_exact": {"ar": 0.3, "az": 0.15, "be": 0.225, "bg": 0.3, "bo": 0.325, "ca": 0.175, "cn": 0.333333, "cs": 0.325, "da": 0.1, "el": 0.15, "en": 0.5, "es": 0.3, "et": 0.225, "eu": 0.233333, "fa": 0.266667, "fi": 0.225, "fr": 0.2, "gl": 0.25, "hu": 0.25, "hv": 0.25, "is": 0.375, "it": 0.225, "ka": 0.175, "la": 0.366667, "li": 0.1, "lv": 0.325, "mk": 0.15, "mt": 0.225, "nl": 0.325, "no": 0.175, "pl": 0.416667, "pt": 0.25, "ro": 0.05, "ru": 0.454545, "sk": 0.2, "sl": 0.275, "sq": 0.25, "sr": 0.1, "sv": 0.025, "tr": 0.325, "uk": 0.325}, "lang_wp": {"ar": 0.4875, "az": 0.4, "be": 0.4625, "bg": 0.4875, "bo": 0.5875, "ca": 0.4, "cn": 0.575, "cs": 0.4625, "da": 0.375, "el": 0.325, "en": 0.65, "es": 0.5125, "et": 0.5125, "eu": 0.475, "fa": 0.55, "fi": 0.475, "fr": 0.475, "gl": 0.466667, "hu": 0.55, "hv": 0.5, "is": 0.5625, "it": 0.4875, "ka": 0.375, "la": 0.525, "li": 0.4125, "lv": 0.5875, "mk": 0.275, "mt": 0.5125, "nl": 0.5875, "no": 0.3875, "pl": 0.591667, "pt": 0.4625, "ro": 0.35, "ru": 0.666667, "sk": 0.5375, "sl": 0.5125, "sq": 0.475, "sr": 0.4125, "sv": 0.3625, "tr": 0.5625, "uk": 0.5625}, "confusion": {"1": {"3": 0.4415, "1": 0.1951, "4": 0.0976, "2": 0.2537, "5": 0.0049, "6": 0.0073}, "2": {"4": 0.3175, "3": 0.5875, "2": 0.0675, "5": 0.0175, "1": 0.01}, "5": {"4": 0.3716, "5": 0.6186, "3": 0.0073, "6": 0.0024}, "6": {"4": 0.2519, "5": 0.6889, "3": 0.0206, "6": 0.036, "1": 0.0026}, "3": {"4": 0.6429, "5": 0.0714, "3": 0.2571, "1": 0.0286}, "4": {"4": 0.7143, "5": 0.2143, "3": 0.0714}}}, {"model": "qwen/qwen-2.5-7b-instruct", "avg_exact": 0.26526, "avg_wp": 0.484598, "avg_bias": -0.2856, "total": 1753, "lang_exact": {"ar": 0.2, "az": 0.15, "be": 0.25, "bg": 0.35, "bo": 0.4, "ca": 0.225, "cn": 0.316667, "cs": 0.3, "da": 0.325, "el": 0.2, "en": 0.4, "es": 0.325, "et": 0.25, "eu": 0.216667, "fa": 0.3, "fi": 0.275, "fr": 0.35, "gl": 0.366667, "hu": 0.225, "hv": 0.3, "is": 0.325, "it": 0.3, "ka": 0.125, "la": 0.183333, "li": 0.2, "lv": 0.35, "mk": 0.15, "mt": 0.225, "nl": 0.275, "no": 0.3, "pl": 0.216667, "pt": 0.25, "ro": 0.2, "ru": 0.393939, "sk": 0.325, "sl": 0.275, "sq": 0.1, "sr": 0.05, "sv": 0.35, "tr": 0.3, "uk": 0.275}, "lang_wp": {"ar": 0.3625, "az": 0.4, "be": 0.5125, "bg": 0.575, "bo": 0.625, "ca": 0.425, "cn": 0.525, "cs": 0.4875, "da": 0.525, "el": 0.425, "en": 0.6125, "es": 0.5625, "et": 0.5, "eu": 0.425, "fa": 0.466667, "fi": 0.5125, "fr": 0.5375, "gl": 0.566667, "hu": 0.475, "hv": 0.525, "is": 0.5125, "it": 0.5, "ka": 0.3125, "la": 0.383333, "li": 0.3875, "lv": 0.65, "mk": 0.3375, "mt": 0.4125, "nl": 0.525, "no": 0.5125, "pl": 0.425, "pt": 0.475, "ro": 0.4125, "ru": 0.621212, "sk": 0.5625, "sl": 0.5, "sq": 0.35, "sr": 0.35, "sv": 0.6, "tr": 0.5625, "uk": 0.5125}, "confusion": {"1": {"3": 0.2073, "1": 0.6415, "4": 0.0854, "2": 0.061, "5": 0.0049}, "2": {"4": 0.1859, "3": 0.3593, "1": 0.397, "2": 0.0427, "5": 0.0126, "6": 0.0025}, "5": {"4": 0.622, "6": 0.0732, "5": 0.2195, "3": 0.0683, "1": 0.0146, "2": 0.0024}, "6": {"4": 0.5725, "6": 0.0992, "3": 0.056, "5": 0.2595, "1": 0.0051, "2": 0.0076}, "3": {"3": 0.2286, "4": 0.4571, "1": 0.1857, "2": 0.0429, "5": 0.0714, "6": 0.0143}, "4": {"2": 0.0429, "4": 0.5714, "3": 0.2143, "5": 0.0857, "6": 0.0143, "1": 0.0714}}}, {"model": "meta-llama/llama-4-maverick", "avg_exact": 0.268291, "avg_wp": 0.47344, "avg_bias": -0.0631, "total": 17112, "lang_exact": {"ar": 0.27, "az": 0.235, "be": 0.2225, "bg": 0.273481, "bo": 0.285, "ca": 0.259053, "cn": 0.388333, "cs": 0.285, "da": 0.225, "el": 0.26, "en": 0.28, "es": 0.267267, "et": 0.315, "eu": 0.221106, "fa": 0.278333, "fi": 0.2425, "fr": 0.225131, "gl": 0.271667, "hu": 0.24, "hv": 0.31, "is": 0.41, "it": 0.2775, "ka": 0.113208, "la": 0.397661, "li": 0.2375, "lv": 0.2725, "mk": 0.19, "mt": 0.2125, "nl": 0.3575, "no": 0.185, "pl": 0.355517, "pt": 0.235, "ro": 0.1525, "ru": 0.330033, "sk": 0.2025, "sl": 0.2675, "sq": 0.2825, "sr": 0.2, "sv": 0.185, "tr": 0.395, "uk": 0.248663}, "lang_wp": {"ar": 0.435, "az": 0.38125, "be": 0.475, "bg": 0.476519, "bo": 0.54, "ca": 0.415042, "cn": 0.620833, "cs": 0.52125, "da": 0.37125, "el": 0.4625, "en": 0.4425, "es": 0.462462, "et": 0.5675, "eu": 0.403685, "fa": 0.490833, "fi": 0.5075, "fr": 0.454188, "gl": 0.45, "hu": 0.42875, "hv": 0.53875, "is": 0.57625, "it": 0.48875, "ka": 0.278302, "la": 0.562378, "li": 0.43375, "lv": 0.485, "mk": 0.3675, "mt": 0.40125, "nl": 0.59, "no": 0.38125, "pl": 0.573555, "pt": 0.425, "ro": 0.38625, "ru": 0.617162, "sk": 0.42375, "sl": 0.48125, "sq": 0.56625, "sr": 0.3875, "sv": 0.3425, "tr": 0.60875, "uk": 0.471925}, "confusion": {"1": {"4": 0.2138, "1": 0.6218, "3": 0.0839, "2": 0.0678, "5": 0.0101, "6": 0.0026}, "2": {"4": 0.477, "3": 0.1474, "2": 0.0717, "1": 0.2827, "5": 0.0186, "6": 0.0026}, "5": {"4": 0.6117, "5": 0.2445, "6": 0.038, "1": 0.0987, "3": 0.0065, "2": 0.0005}, "6": {"5": 0.5201, "4": 0.3008, "6": 0.1339, "1": 0.0377, "3": 0.005, "2": 0.0025}, "3": {"3": 0.0877, "4": 0.7083, "5": 0.1298, "2": 0.0253, "1": 0.0455, "6": 0.0034}, "4": {"4": 0.6123, "5": 0.2996, "3": 0.0206, "6": 0.0206, "2": 0.0132, "1": 0.0338}}}, {"model": "google/gemma-3-4b-it", "avg_exact": 0.222031, "avg_wp": 0.460126, "avg_bias": -0.1819, "total": 1743, "lang_exact": {"ar": 0.1, "az": 0.15, "be": 0.225, "bg": 0.2, "bo": 0.125, "ca": 0.051282, "cn": 0.298246, "cs": 0.394737, "da": 0.25, "el": 0.175, "en": 0.425, "es": 0.225, "et": 0.375, "eu": 0.216667, "fa": 0.216667, "fi": 0.25, "fr": 0.125, "gl": 0.25, "hu": 0.125, "hv": 0.225, "is": 0.15, "it": 0.275, "ka": 0.1, "la": 0.166667, "li": 0.175, "lv": 0.2, "mk": 0.15, "mt": 0.025, "nl": 0.2, "no": 0.25, "pl": 0.316667, "pt": 0.461538, "ro": 0.125, "ru": 0.272727, "sk": 0.263158, "sl": 0.225, "sq": 0.358974, "sr": 0.225, "sv": 0.25, "tr": 0.2, "uk": 0.275}, "lang_wp": {"ar": 0.425, "az": 0.5, "be": 0.5, "bg": 0.4125, "bo": 0.45, "ca": 0.294872, "cn": 0.54386, "cs": 0.539474, "da": 0.45, "el": 0.375, "en": 0.6375, "es": 0.425, "et": 0.5875, "eu": 0.441667, "fa": 0.45, "fi": 0.525, "fr": 0.35, "gl": 0.541667, "hu": 0.4, "hv": 0.475, "is": 0.375, "it": 0.4875, "ka": 0.325, "la": 0.35, "li": 0.4875, "lv": 0.525, "mk": 0.3625, "mt": 0.325, "nl": 0.4375, "no": 0.475, "pl": 0.516667, "pt": 0.615385, "ro": 0.3375, "ru": 0.545455, "sk": 0.526316, "sl": 0.425, "sq": 0.538462, "sr": 0.4875, "sv": 0.5, "tr": 0.4375, "uk": 0.45}, "confusion": {"1": {"2": 0.5575, "3": 0.2812, "1": 0.1174, "6": 0.0147, "4": 0.022, "5": 0.0073}, "2": {"5": 0.0126, "4": 0.0578, "6": 0.0075, "3": 0.6131, "2": 0.2864, "1": 0.0226}, "5": {"4": 0.3985, "6": 0.0196, "5": 0.3496, "3": 0.2274, "2": 0.0049}, "6": {"5": 0.2828, "4": 0.2699, "3": 0.329, "6": 0.0617, "2": 0.0206, "1": 0.036}, "3": {"3": 0.6029, "5": 0.1176, "4": 0.1471, "2": 0.1029, "1": 0.0294}, "4": {"4": 0.2429, "3": 0.4286, "5": 0.3, "2": 0.0143, "1": 0.0143}}}, {"model": "mistralai/mixtral-8x7b-instruct", "avg_exact": 0.245143, "avg_wp": 0.453429, "avg_bias": 0.4636, "total": 1750, "lang_exact": {"ar": 0.175, "az": 0.2, "be": 0.15, "bg": 0.225, "bo": 0.25, "ca": 0.175, "cn": 0.254237, "cs": 0.4, "da": 0.25, "el": 0.2, "en": 0.3, "es": 0.3, "et": 0.275, "eu": 0.25, "fa": 0.183333, "fi": 0.275, "fr": 0.25, "gl": 0.216667, "hu": 0.25, "hv": 0.225, "is": 0.15, "it": 0.225, "ka": 0.153846, "la": 0.283333, "li": 0.2, "lv": 0.25, "mk": 0.225, "mt": 0.225, "nl": 0.3, "no": 0.325, "pl": 0.283333, "pt": 0.225, "ro": 0.3, "ru": 0.34375, "sk": 0.225, "sl": 0.325, "sq": 0.2, "sr": 0.25, "sv": 0.225, "tr": 0.275, "uk": 0.275}, "lang_wp": {"ar": 0.2875, "az": 0.3875, "be": 0.45, "bg": 0.4375, "bo": 0.425, "ca": 0.3375, "cn": 0.466102, "cs": 0.5625, "da": 0.5125, "el": 0.4, "en": 0.5125, "es": 0.5125, "et": 0.4875, "eu": 0.408333, "fa": 0.408333, "fi": 0.525, "fr": 0.4125, "gl": 0.408333, "hu": 0.4875, "hv": 0.475, "is": 0.4, "it": 0.4125, "ka": 0.269231, "la": 0.466667, "li": 0.4375, "lv": 0.5, "mk": 0.3875, "mt": 0.3625, "nl": 0.5625, "no": 0.55, "pl": 0.45, "pt": 0.475, "ro": 0.525, "ru": 0.53125, "sk": 0.4625, "sl": 0.5, "sq": 0.4, "sr": 0.475, "sv": 0.55, "tr": 0.4875, "uk": 0.55}, "confusion": {"1": {"4": 0.1744, "3": 0.398, "2": 0.2875, "5": 0.0491, "1": 0.0909}, "2": {"4": 0.3501, "5": 0.1511, "1": 0.063, "3": 0.3753, "2": 0.0605}, "5": {"5": 0.8, "3": 0.0366, "4": 0.1415, "1": 0.0122, "6": 0.0098}, "6": {"5": 0.7621, "3": 0.0588, "4": 0.1228, "2": 0.0128, "6": 0.0281, "1": 0.0153}, "3": {"3": 0.1143, "5": 0.3714, "4": 0.4429, "1": 0.0429, "2": 0.0286}, "4": {"5": 0.5857, "4": 0.3, "6": 0.0286, "1": 0.0286, "3": 0.0571}}}, {"model": "mistralai/mistral-small-3.2-24b-instruct", "avg_exact": 0.25029, "avg_wp": 0.450929, "avg_bias": 0.6868, "total": 1722, "lang_exact": {"ar": 0.2, "az": 0.25, "be": 0.275, "bg": 0.225, "bo": 0.275, "ca": 0.225, "cn": 0.316667, "cs": 0.225, "da": 0.35, "el": 0.25, "en": 0.3, "es": 0.3, "et": 0.15, "eu": 0.183333, "fa": 0.333333, "fi": 0.275, "fr": 0.25, "gl": 0.166667, "hu": 0.275, "hv": 0.275, "is": 0.375, "it": 0.275, "ka": 0.2, "la": 0.4, "li": 0.205128, "lv": 0.325, "mk": 0.2, "mt": 0.1, "nl": 0.225, "no": 0.25, "pl": 0.3, "pt": 0.25, "ro": 0.222222, "ru": 0.3, "sk": 0.142857, "sl": 0.193548, "sq": 0.157895, "sr": 0.2, "sv": 0.3, "tr": 0.175, "uk": 0.225}, "lang_wp": {"ar": 0.4125, "az": 0.4375, "be": 0.4875, "bg": 0.425, "bo": 0.45, "ca": 0.425, "cn": 0.55, "cs": 0.3875, "da": 0.55, "el": 0.3625, "en": 0.4125, "es": 0.425, "et": 0.4, "eu": 0.458333, "fa": 0.575, "fi": 0.3875, "fr": 0.425, "gl": 0.408333, "hu": 0.4875, "hv": 0.5125, "is": 0.6, "it": 0.4125, "ka": 0.375, "la": 0.566667, "li": 0.423077, "lv": 0.575, "mk": 0.3625, "mt": 0.35, "nl": 0.4125, "no": 0.375, "pl": 0.483333, "pt": 0.4, "ro": 0.375, "ru": 0.416667, "sk": 0.410714, "sl": 0.467742, "sq": 0.513158, "sr": 0.4625, "sv": 0.4375, "tr": 0.5125, "uk": 0.3875}, "confusion": {"1": {"3": 0.5664, "4": 0.1228, "2": 0.0902, "1": 0.0902, "5": 0.1303}, "2": {"3": 0.4444, "4": 0.2879, "5": 0.2348, "2": 0.0126, "1": 0.0202}, "5": {"4": 0.1156, "5": 0.8668, "1": 0.005, "6": 0.0075, "3": 0.005}, "6": {"5": 0.8892, "4": 0.0722, "6": 0.0335, "3": 0.0052}, "3": {"4": 0.4143, "5": 0.4, "3": 0.1714, "6": 0.0143}, "4": {"5": 0.5857, "4": 0.2857, "3": 0.1, "6": 0.0286}}}, {"model": "mistralai/mistral-7b-instruct", "avg_exact": 0.243871, "avg_wp": 0.418065, "avg_bias": 0.3484, "total": 1550, "lang_exact": {"ar": 0.058824, "az": 0.27027, "be": 0.382353, "bg": 0.243243, "bo": 0.314286, "ca": 0.285714, "cn": 0.339286, "cs": 0.428571, "da": 0.28, "el": 0.194444, "en": 0.125, "es": 0.241379, "et": 0.482759, "eu": 0.226415, "fa": 0.355932, "fi": 0.269231, "fr": 0.184211, "gl": 0.206897, "hu": 0.138889, "hv": 0.361111, "is": 0.166667, "it": 0.28125, "ka": 0.277778, "la": 0.241379, "li": 0.171429, "lv": 0.142857, "mk": 0.205128, "mt": 0.162162, "nl": 0.285714, "no": 0.205128, "pl": 0.272727, "pt": 0.189189, "ro": 0.138889, "ru": 0.354839, "sk": 0.236842, "sl": 0.289474, "sq": 0.27027, "sr": 0.282051, "sv": 0.210526, "tr": 0.102564, "uk": 0.175}, "lang_wp": {"ar": 0.191176, "az": 0.445946, "be": 0.558824, "bg": 0.432432, "bo": 0.542857, "ca": 0.457143, "cn": 0.508929, "cs": 0.52381, "da": 0.4, "el": 0.388889, "en": 0.265625, "es": 0.396552, "et": 0.62069, "eu": 0.349057, "fa": 0.542373, "fi": 0.365385, "fr": 0.381579, "gl": 0.387931, "hu": 0.319444, "hv": 0.513889, "is": 0.416667, "it": 0.5, "ka": 0.416667, "la": 0.431034, "li": 0.285714, "lv": 0.328571, "mk": 0.397436, "mt": 0.337838, "nl": 0.5, "no": 0.384615, "pl": 0.445455, "pt": 0.378378, "ro": 0.333333, "ru": 0.467742, "sk": 0.434211, "sl": 0.394737, "sq": 0.486486, "sr": 0.487179, "sv": 0.421053, "tr": 0.320513, "uk": 0.35}, "confusion": {"1": {"6": 0.0292, "3": 0.414, "5": 0.0962, "4": 0.1224, "2": 0.1341, "1": 0.2041}, "2": {"5": 0.2367, "3": 0.3373, "6": 0.0355, "2": 0.071, "4": 0.2219, "1": 0.0976}, "5": {"6": 0.0875, "4": 0.1545, "1": 0.0875, "5": 0.6093, "3": 0.0554, "2": 0.0058}, "6": {"5": 0.5989, "1": 0.0917, "6": 0.1519, "3": 0.0544, "4": 0.0974, "2": 0.0057}, "3": {"5": 0.4308, "2": 0.1077, "3": 0.2, "1": 0.0923, "4": 0.1385, "6": 0.0308}, "4": {"5": 0.5152, "4": 0.1364, "1": 0.1364, "6": 0.0758, "3": 0.0758, "2": 0.0606}}}, {"model": "mistralai/mistral-small-24b-instruct-2501", "avg_exact": 0.179897, "avg_wp": 0.363221, "avg_bias": -0.8472, "total": 1751, "lang_exact": {"ar": 0.05, "az": 0.25, "be": 0.075, "bg": 0.2, "bo": 0.25, "ca": 0.25, "cn": 0.254237, "cs": 0.15, "da": 0.225, "el": 0.2, "en": 0.25, "es": 0.25, "et": 0.25, "eu": 0.2, "fa": 0.2, "fi": 0.225, "fr": 0.225, "gl": 0.183333, "hu": 0.125, "hv": 0.175, "is": 0.15, "it": 0.25, "ka": 0.025, "la": 0.35, "li": 0.05, "lv": 0.125, "mk": 0.225, "mt": 0.25, "nl": 0.175, "no": 0.225, "pl": 0.233333, "pt": 0.225, "ro": 0.1, "ru": 0.0, "sk": 0.15, "sl": 0.225, "sq": 0.025, "sr": 0.0, "sv": 0.25, "tr": 0.075, "uk": 0.075}, "lang_wp": {"ar": 0.225, "az": 0.45, "be": 0.3125, "bg": 0.4375, "bo": 0.3875, "ca": 0.3625, "cn": 0.457627, "cs": 0.3375, "da": 0.375, "el": 0.35, "en": 0.4125, "es": 0.4125, "et": 0.4125, "eu": 0.358333, "fa": 0.4, "fi": 0.3375, "fr": 0.375, "gl": 0.291667, "hu": 0.275, "hv": 0.35, "is": 0.375, "it": 0.4, "ka": 0.2625, "la": 0.516667, "li": 0.3, "lv": 0.3625, "mk": 0.375, "mt": 0.4, "nl": 0.3375, "no": 0.3625, "pl": 0.416667, "pt": 0.4125, "ro": 0.225, "ru": 0.28125, "sk": 0.325, "sl": 0.475, "sq": 0.375, "sr": 0.225, "sv": 0.4, "tr": 0.275, "uk": 0.325}, "confusion": {"1": {"4": 0.1324, "2": 0.1961, "3": 0.1225, "1": 0.5319, "6": 0.0123, "5": 0.0049}, "2": {"4": 0.25, "3": 0.1725, "2": 0.0875, "1": 0.4775, "5": 0.0075, "6": 0.005}, "5": {"4": 0.527, "2": 0.0098, "3": 0.0343, "1": 0.4093, "5": 0.0172, "6": 0.0025}, "6": {"4": 0.6752, "5": 0.0997, "6": 0.0153, "1": 0.179, "3": 0.0307}, "3": {"3": 0.1571, "4": 0.4571, "5": 0.0143, "6": 0.0143, "1": 0.2571, "2": 0.1}, "4": {"4": 0.5571, "6": 0.0143, "1": 0.2714, "2": 0.0429, "3": 0.1143}}}, {"model": "mistralai/ministral-14b-2512", "avg_exact": 0.196235, "avg_wp": 0.353394, "avg_bias": 0.71, "total": 1753, "lang_exact": {"ar": 0.15, "az": 0.225, "be": 0.175, "bg": 0.2, "bo": 0.25, "ca": 0.225, "cn": 0.233333, "cs": 0.25, "da": 0.175, "el": 0.25, "en": 0.275, "es": 0.175, "et": 0.075, "eu": 0.266667, "fa": 0.216667, "fi": 0.125, "fr": 0.25, "gl": 0.216667, "hu": 0.175, "hv": 0.2, "is": 0.325, "it": 0.125, "ka": 0.05, "la": 0.416667, "li": 0.125, "lv": 0.1, "mk": 0.175, "mt": 0.05, "nl": 0.1, "no": 0.125, "pl": 0.233333, "pt": 0.15, "ro": 0.175, "ru": 0.181818, "sk": 0.175, "sl": 0.125, "sq": 0.175, "sr": 0.225, "sv": 0.275, "tr": 0.225, "uk": 0.2}, "lang_wp": {"ar": 0.3125, "az": 0.3625, "be": 0.35, "bg": 0.4125, "bo": 0.35, "ca": 0.4, "cn": 0.425, "cs": 0.4125, "da": 0.325, "el": 0.375, "en": 0.4125, "es": 0.35, "et": 0.225, "eu": 0.425, "fa": 0.441667, "fi": 0.275, "fr": 0.375, "gl": 0.383333, "hu": 0.3, "hv": 0.3375, "is": 0.4875, "it": 0.3125, "ka": 0.2125, "la": 0.533333, "li": 0.2625, "lv": 0.2875, "mk": 0.3375, "mt": 0.175, "nl": 0.225, "no": 0.225, "pl": 0.433333, "pt": 0.2875, "ro": 0.3375, "ru": 0.333333, "sk": 0.3375, "sl": 0.325, "sq": 0.3375, "sr": 0.325, "sv": 0.4, "tr": 0.425, "uk": 0.375}, "confusion": {"1": {"3": 0.3, "5": 0.2293, "4": 0.3268, "6": 0.0537, "1": 0.0902}, "2": {"4": 0.335, "5": 0.29, "3": 0.225, "6": 0.0475, "1": 0.1025}, "5": {"4": 0.2296, "5": 0.5506, "3": 0.0543, "6": 0.079, "1": 0.0864}, "6": {"5": 0.5872, "4": 0.1795, "6": 0.1385, "1": 0.0615, "3": 0.0333}, "3": {"5": 0.4143, "4": 0.3143, "3": 0.1571, "6": 0.0714, "1": 0.0429}, "4": {"5": 0.5571, "4": 0.2714, "3": 0.0714, "6": 0.1}}}, {"model": "meta-llama/llama-3.1-8b-instruct", "avg_exact": 0.209262, "avg_wp": 0.341052, "avg_bias": -0.1998, "total": 1749, "lang_exact": {"ar": 0.135135, "az": 0.2, "be": 0.3, "bg": 0.175, "bo": 0.2, "ca": 0.225, "cn": 0.3, "cs": 0.25, "da": 0.25, "el": 0.225, "en": 0.275, "es": 0.075, "et": 0.275, "eu": 0.116667, "fa": 0.083333, "fi": 0.333333, "fr": 0.1, "gl": 0.133333, "hu": 0.175, "hv": 0.375, "is": 0.25, "it": 0.2, "ka": 0.225, "la": 0.116667, "li": 0.2, "lv": 0.225, "mk": 0.275, "mt": 0.2, "nl": 0.225, "no": 0.225, "pl": 0.083333, "pt": 0.225, "ro": 0.075, "ru": 0.242424, "sk": 0.225, "sl": 0.25, "sq": 0.225, "sr": 0.25, "sv": 0.375, "tr": 0.275, "uk": 0.225}, "lang_wp": {"ar": 0.297297, "az": 0.275, "be": 0.4625, "bg": 0.3375, "bo": 0.3125, "ca": 0.35, "cn": 0.441667, "cs": 0.4, "da": 0.3875, "el": 0.325, "en": 0.4, "es": 0.2125, "et": 0.3875, "eu": 0.191667, "fa": 0.216667, "fi": 0.5, "fr": 0.2375, "gl": 0.266667, "hu": 0.3375, "hv": 0.5375, "is": 0.375, "it": 0.325, "ka": 0.35, "la": 0.325, "li": 0.3, "lv": 0.4, "mk": 0.4, "mt": 0.3, "nl": 0.3625, "no": 0.3375, "pl": 0.258333, "pt": 0.35, "ro": 0.2, "ru": 0.30303, "sk": 0.3625, "sl": 0.375, "sq": 0.3375, "sr": 0.375, "sv": 0.475, "tr": 0.4, "uk": 0.3625}, "confusion": {"1": {"1": 0.388, "2": 0.0651, "5": 0.2161, "4": 0.2396, "6": 0.0755, "3": 0.0156}, "2": {"1": 0.4027, "4": 0.1973, "5": 0.2453, "6": 0.104, "3": 0.0027, "2": 0.048}, "5": {"5": 0.2825, "4": 0.155, "1": 0.415, "2": 0.005, "6": 0.1425}, "6": {"4": 0.1362, "1": 0.3316, "2": 0.0051, "5": 0.3111, "6": 0.2159}, "3": {"5": 0.3857, "6": 0.1143, "1": 0.3714, "4": 0.1, "2": 0.0286}, "4": {"4": 0.029, "5": 0.4783, "6": 0.087, "1": 0.3478, "2": 0.029, "3": 0.029}}}, {"model": "speakleash/Bielik-11B-v2.6-Instruct", "avg_exact": 0.1502, "avg_wp": 0.340091, "avg_bias": 0.5597, "total": 1751, "lang_exact": {"ar": 0.05, "az": 0.075, "be": 0.0, "bg": 0.175, "bo": 0.2, "ca": 0.175, "cn": 0.186441, "cs": 0.1, "da": 0.225, "el": 0.275, "en": 0.025, "es": 0.2, "et": 0.2, "eu": 0.166667, "fa": 0.266667, "fi": 0.125, "fr": 0.225, "gl": 0.216667, "hu": 0.2, "hv": 0.225, "is": 0.0, "it": 0.125, "ka": 0.0, "la": 0.2, "li": 0.225, "lv": 0.125, "mk": 0.2, "mt": 0.15, "nl": 0.225, "no": 0.15, "pl": 0.183333, "pt": 0.075, "ro": 0.075, "ru": 0.090909, "sk": 0.125, "sl": 0.225, "sq": 0.0, "sr": 0.05, "sv": 0.15, "tr": 0.225, "uk": 0.075}, "lang_wp": {"ar": 0.175, "az": 0.25, "be": 0.1875, "bg": 0.3875, "bo": 0.4, "ca": 0.425, "cn": 0.29661, "cs": 0.3, "da": 0.4125, "el": 0.45, "en": 0.2, "es": 0.4625, "et": 0.4375, "eu": 0.333333, "fa": 0.433333, "fi": 0.35, "fr": 0.4375, "gl": 0.333333, "hu": 0.3375, "hv": 0.4375, "is": 0.1125, "it": 0.375, "ka": 0.128205, "la": 0.308333, "li": 0.4375, "lv": 0.3875, "mk": 0.4125, "mt": 0.35, "nl": 0.4625, "no": 0.4125, "pl": 0.4, "pt": 0.4125, "ro": 0.2625, "ru": 0.30303, "sk": 0.2875, "sl": 0.4375, "sq": 0.2, "sr": 0.2, "sv": 0.3375, "tr": 0.375, "uk": 0.25}, "confusion": {"1": {"3": 0.8971, "2": 0.0049, "6": 0.076, "5": 0.0074, "1": 0.0147}, "2": {"3": 0.84, "6": 0.13, "5": 0.0225, "4": 0.005, "1": 0.0025}, "5": {"3": 0.3659, "6": 0.4439, "4": 0.0512, "5": 0.1366, "1": 0.0024}, "6": {"3": 0.2901, "6": 0.4148, "4": 0.0712, "5": 0.2239}, "3": {"6": 0.4143, "5": 0.0429, "3": 0.5429}, "4": {"6": 0.5, "3": 0.4571, "5": 0.0429}}}, {"model": "CYFRAGOVPL/Llama-PLLuM-70B-chat-250801", "avg_exact": 0.227169, "avg_wp": 0.335616, "avg_bias": -1.8853, "total": 1752, "lang_exact": {"ar": 0.15, "az": 0.275, "be": 0.25, "bg": 0.175, "bo": 0.25, "ca": 0.175, "cn": 0.152542, "cs": 0.225, "da": 0.175, "el": 0.225, "en": 0.225, "es": 0.125, "et": 0.25, "eu": 0.1, "fa": 0.35, "fi": 0.25, "fr": 0.225, "gl": 0.133333, "hu": 0.175, "hv": 0.375, "is": 0.4, "it": 0.2, "ka": 0.175, "la": 0.3, "li": 0.25, "lv": 0.325, "mk": 0.225, "mt": 0.275, "nl": 0.25, "no": 0.25, "pl": 0.1, "pt": 0.25, "ro": 0.25, "ru": 0.30303, "sk": 0.2, "sl": 0.175, "sq": 0.25, "sr": 0.325, "sv": 0.25, "tr": 0.275, "uk": 0.15}, "lang_wp": {"ar": 0.35, "az": 0.4, "be": 0.3625, "bg": 0.275, "bo": 0.4, "ca": 0.2375, "cn": 0.194915, "cs": 0.375, "da": 0.2875, "el": 0.3125, "en": 0.2375, "es": 0.125, "et": 0.375, "eu": 0.183333, "fa": 0.508333, "fi": 0.3625, "fr": 0.35, "gl": 0.2, "hu": 0.2125, "hv": 0.55, "is": 0.5875, "it": 0.275, "ka": 0.2375, "la": 0.5, "li": 0.35, "lv": 0.4625, "mk": 0.275, "mt": 0.4625, "nl": 0.3625, "no": 0.3125, "pl": 0.141667, "pt": 0.3625, "ro": 0.3625, "ru": 0.439394, "sk": 0.325, "sl": 0.2875, "sq": 0.375, "sr": 0.475, "sv": 0.375, "tr": 0.4125, "uk": 0.2375}, "confusion": {"1": {"1": 0.9601, "2": 0.0114, "3": 0.0199, "5": 0.0028, "6": 0.0057}, "2": {"4": 0.016, "1": 0.9169, "3": 0.0479, "5": 0.016, "2": 0.0032}, "5": {"4": 0.0897, "5": 0.1063, "1": 0.7774, "3": 0.0199, "6": 0.0066}, "6": {"5": 0.0915, "4": 0.0881, "1": 0.7695, "6": 0.0441, "3": 0.0068}, "3": {"1": 0.7407, "3": 0.2037, "4": 0.0556}, "4": {"1": 0.5652, "5": 0.1304, "3": 0.1957, "4": 0.087, "6": 0.0217}}}, {"model": "CYFRAGOVPL/pllum-12b-nc-chat-250715", "avg_exact": 0.146689, "avg_wp": 0.234018, "avg_bias": -1.0019, "total": 1752, "lang_exact": {"ar": 0.075, "az": 0.175, "be": 0.275, "bg": 0.125, "bo": 0.325, "ca": 0.275, "cn": 0.101695, "cs": 0.075, "da": 0.125, "el": 0.175, "en": 0.15, "es": 0.2, "et": 0.075, "eu": 0.083333, "fa": 0.216667, "fi": 0.075, "fr": 0.25, "gl": 0.1, "hu": 0.025, "hv": 0.325, "is": 0.15, "it": 0.125, "ka": 0.075, "la": 0.133333, "li": 0.15, "lv": 0.1, "mk": 0.075, "mt": 0.1, "nl": 0.075, "no": 0.2, "pl": 0.116667, "pt": 0.1, "ro": 0.35, "ru": 0.212121, "sk": 0.15, "sl": 0.15, "sq": 0.075, "sr": 0.175, "sv": 0.125, "tr": 0.15, "uk": 0.075}, "lang_wp": {"ar": 0.0875, "az": 0.3375, "be": 0.3625, "bg": 0.2, "bo": 0.4875, "ca": 0.3625, "cn": 0.144068, "cs": 0.1125, "da": 0.2125, "el": 0.25, "en": 0.275, "es": 0.275, "et": 0.15, "eu": 0.116667, "fa": 0.35, "fi": 0.1375, "fr": 0.3875, "gl": 0.183333, "hu": 0.125, "hv": 0.475, "is": 0.225, "it": 0.225, "ka": 0.1125, "la": 0.308333, "li": 0.2125, "lv": 0.2375, "mk": 0.15, "mt": 0.2, "nl": 0.15, "no": 0.275, "pl": 0.141667, "pt": 0.1375, "ro": 0.425, "ru": 0.30303, "sk": 0.2625, "sl": 0.3, "sq": 0.1375, "sr": 0.25, "sv": 0.1625, "tr": 0.2625, "uk": 0.175}, "confusion": {"1": {"1": 0.6739, "4": 0.0739, "3": 0.0783, "2": 0.0565, "6": 0.1, "5": 0.0174}, "5": {"1": 0.5176, "3": 0.0211, "6": 0.1373, "4": 0.1585, "5": 0.162, "2": 0.0035}, "6": {"1": 0.5607, "5": 0.1308, "4": 0.1402, "2": 0.028, "6": 0.1168, "3": 0.0234}, "2": {"1": 0.5738, "5": 0.0779, "2": 0.0738, "6": 0.0779, "4": 0.1189, "3": 0.0779}, "3": {"3": 0.1316, "1": 0.3158, "6": 0.1579, "2": 0.1053, "4": 0.2632, "5": 0.0263}, "4": {"1": 0.3415, "6": 0.2683, "5": 0.0976, "4": 0.1951, "3": 0.0976}}}];
|
| 235 |
const ALL_LANGS = ["ar", "az", "be", "bg", "bo", "ca", "cn", "cs", "da", "el", "en", "es", "et", "eu", "fa", "fi", "fr", "gl", "hu", "hv", "is", "it", "ka", "la", "li", "lv", "mk", "mt", "nl", "no", "pl", "pt", "ro", "ru", "sk", "sl", "sq", "sr", "sv", "tr", "uk"];
|
| 236 |
const LANG_NAMES = {"af": "Afrikaans", "ar": "Arabic", "az": "Azerbaijani", "be": "Belarusian", "bo": "Bosnian", "bg": "Bulgarian", "bn": "Bulgarian", "ca": "Catalan", "cs": "Czech", "cn": "Chinese", "cy": "Welsh", "da": "Danish", "de": "German", "el": "Greek", "en": "English", "eo": "Esperanto", "es": "Spanish", "et": "Estonian", "eu": "Basque", "fa": "Faroese", "fi": "Finnish", "fr": "French", "ga": "Irish", "gl": "Galician", "gu": "Gujarati", "he": "Hebrew", "hi": "Hindi", "hr": "Croatian", "hu": "Hungarian", "hy": "Armenian", "hv": "Croatia", "id": "Indonesian", "is": "Icelandic", "it": "Italian", "ja": "Japanese", "ka": "Georgian", "kk": "Kazakh", "km": "Khmer", "kn": "Kannada", "ko": "Korean", "la": "Latin", "li": "Lithuanian", "lv": "Latvian", "mk": "Macedonian", "ml": "Malayalam", "mn": "Mongolian", "mr": "Marathi", "ms": "Malay", "mt": "Maltese", "my": "Burmese", "ne": "Nepali", "nl": "Dutch", "no": "Norwegian", "pa": "Punjabi", "pe": "Persian", "pl": "Polish", "pt": "Portuguese", "ro": "Romanian", "ru": "Russian", "si": "Sinhala", "sk": "Slovak", "sl": "Slovenian", "sq": "Albanian", "sr": "Serbian", "sv": "Swedish", "sw": "Swahili", "ta": "Tamil", "te": "Telugu", "th": "Thai", "tl": "Filipino", "tr": "Turkish", "uk": "Ukrainian", "ur": "Urdu", "uz": "Uzbek", "vi": "Vietnamese", "zh": "Chinese", "zu": "Zulu"};
|
| 237 |
const LANG_COUNTS = {"ar": 1790, "az": 1795, "be": 1794, "bg": 1721, "bo": 1795, "ca": 1712, "cn": 2686, "cs": 1779, "da": 1784, "el": 1794, "en": 1791, "es": 1652, "et": 1788, "eu": 2684, "fa": 2697, "fi": 1784, "fr": 1761, "gl": 2698, "hu": 1796, "hv": 1796, "is": 1790, "it": 1791, "ka": 1629, "la": 2522, "li": 1793, "lv": 1795, "mk": 1799, "mt": 1797, "nl": 1795, "no": 1799, "pl": 2637, "pt": 1795, "ro": 1790, "ru": 1424, "sk": 1784, "sl": 1788, "sq": 1793, "sr": 1798, "sv": 1797, "tr": 1799, "uk": 1747};
|
|
|
|
| 475 |
`77760 predictions · ${ALL_LANGS.length} languages · ${ALL_ROWS.length} models`;
|
| 476 |
}
|
| 477 |
|
| 478 |
+
// ── bias lollipop ──
|
| 479 |
+
function renderBias() {
|
| 480 |
+
const sorted = [...ALL_ROWS].sort((a, b) => a.avg_bias - b.avg_bias);
|
| 481 |
+
const labels = sorted.map(r => r.model);
|
| 482 |
+
const values = sorted.map(r => +(r.avg_bias).toFixed(3));
|
| 483 |
+
const colors = values.map(v => v >= 0 ? '#16a34a' : '#dc2626');
|
| 484 |
+
|
| 485 |
+
const ctx = document.getElementById('biasChart').getContext('2d');
|
| 486 |
+
const h = Math.max(260, sorted.length * 26 + 40);
|
| 487 |
+
document.getElementById('biasChart').style.height = h + 'px';
|
| 488 |
+
|
| 489 |
+
new Chart(ctx, {
|
| 490 |
+
type: 'bar',
|
| 491 |
+
data: { labels, datasets: [{
|
| 492 |
+
label: 'Mean Error', data: values, backgroundColor: colors,
|
| 493 |
+
borderRadius: 3, barPercentage: 0.45,
|
| 494 |
+
}] },
|
| 495 |
+
options: {
|
| 496 |
+
indexAxis: 'y', responsive: true, maintainAspectRatio: false,
|
| 497 |
+
animation: { duration: 400 },
|
| 498 |
+
plugins: {
|
| 499 |
+
legend: { display: false },
|
| 500 |
+
tooltip: { backgroundColor: '#1e2a3a', callbacks: {
|
| 501 |
+
label: ctx => ` Bias: ${ctx.parsed.x > 0 ? '+' : ''}${ctx.parsed.x.toFixed(3)}`
|
| 502 |
+
} }
|
| 503 |
+
},
|
| 504 |
+
scales: {
|
| 505 |
+
x: { grid: { color: '#1a2236' },
|
| 506 |
+
ticks: { color: '#64748b', font: { family: 'JetBrains Mono', size: 10 } },
|
| 507 |
+
title: { display: true, text: 'Mean Error (pred − gt)', color: '#64748b', font: { family: 'JetBrains Mono', size: 10 } }
|
| 508 |
+
},
|
| 509 |
+
y: { grid: { display: false },
|
| 510 |
+
ticks: { color: '#cbd5e1', font: { family: 'JetBrains Mono', size: 10 } }
|
| 511 |
+
}
|
| 512 |
+
}
|
| 513 |
+
}
|
| 514 |
+
});
|
| 515 |
+
}
|
| 516 |
+
|
| 517 |
+
// ── critical confusion ──
|
| 518 |
+
function renderCritical() {
|
| 519 |
+
const LOW = new Set([1, 2]);
|
| 520 |
+
const HIGH = new Set([5, 6]);
|
| 521 |
+
const sorted = [...ALL_ROWS].sort((a, b) => b.avg_wp - a.avg_wp);
|
| 522 |
+
const labels = sorted.map(r => r.model);
|
| 523 |
+
const lh = [], hl = [];
|
| 524 |
+
|
| 525 |
+
sorted.forEach(row => {
|
| 526 |
+
let lhNumer = 0, lhDenom = 0, hlNumer = 0, hlDenom = 0;
|
| 527 |
+
Object.entries(row.confusion || {}).forEach(([gtStr, preds]) => {
|
| 528 |
+
const gt = parseInt(gtStr);
|
| 529 |
+
const rowTotal = Object.values(preds).reduce((a, b) => a + b, 0);
|
| 530 |
+
if (LOW.has(gt)) {
|
| 531 |
+
lhDenom += rowTotal;
|
| 532 |
+
Object.entries(preds).forEach(([pStr, v]) => {
|
| 533 |
+
if (HIGH.has(parseInt(pStr))) lhNumer += v * rowTotal;
|
| 534 |
+
});
|
| 535 |
+
}
|
| 536 |
+
if (HIGH.has(gt)) {
|
| 537 |
+
hlDenom += rowTotal;
|
| 538 |
+
Object.entries(preds).forEach(([pStr, v]) => {
|
| 539 |
+
if (LOW.has(parseInt(pStr))) hlNumer += v * rowTotal;
|
| 540 |
+
});
|
| 541 |
+
}
|
| 542 |
+
});
|
| 543 |
+
lh.push(lhDenom > 0 ? +(lhNumer / lhDenom * 100).toFixed(1) : 0);
|
| 544 |
+
hl.push(hlDenom > 0 ? +(hlNumer / hlDenom * 100).toFixed(1) : 0);
|
| 545 |
+
});
|
| 546 |
+
|
| 547 |
+
const ctx = document.getElementById('criticalChart').getContext('2d');
|
| 548 |
+
const h = Math.max(260, sorted.length * 26 + 60);
|
| 549 |
+
document.getElementById('criticalChart').style.height = h + 'px';
|
| 550 |
+
|
| 551 |
+
new Chart(ctx, {
|
| 552 |
+
type: 'bar',
|
| 553 |
+
data: { labels, datasets: [
|
| 554 |
+
{ label: 'Low→High (1–2 pred as 5–6)', data: lh, backgroundColor: '#dc2626', borderRadius: 3, barPercentage: 0.7 },
|
| 555 |
+
{ label: 'High→Low (5–6 pred as 1–2)', data: hl, backgroundColor: '#f97316', borderRadius: 3, barPercentage: 0.7 },
|
| 556 |
+
] },
|
| 557 |
+
options: {
|
| 558 |
+
indexAxis: 'y', responsive: true, maintainAspectRatio: false,
|
| 559 |
+
animation: { duration: 400 },
|
| 560 |
+
plugins: {
|
| 561 |
+
legend: { position: 'bottom', labels: { color: '#94a3b8', font: { family: 'JetBrains Mono', size: 10 }, boxWidth: 12, padding: 16 } },
|
| 562 |
+
tooltip: { backgroundColor: '#1e2a3a', callbacks: { label: ctx => ` ${ctx.dataset.label}: ${ctx.parsed.x}%` } }
|
| 563 |
+
},
|
| 564 |
+
scales: {
|
| 565 |
+
x: { min: 0, grid: { color: '#1a2236' },
|
| 566 |
+
ticks: { color: '#64748b', font: { family: 'JetBrains Mono', size: 10 }, callback: v => v + '%' },
|
| 567 |
+
title: { display: true, text: '% of predictions within true class', color: '#64748b', font: { family: 'JetBrains Mono', size: 10 } }
|
| 568 |
+
},
|
| 569 |
+
y: { grid: { display: false },
|
| 570 |
+
ticks: { color: '#cbd5e1', font: { family: 'JetBrains Mono', size: 10 } }
|
| 571 |
+
}
|
| 572 |
+
}
|
| 573 |
+
}
|
| 574 |
+
});
|
| 575 |
+
}
|
| 576 |
+
|
| 577 |
+
// ── confusion heatmap with dropdown ──
|
| 578 |
+
let confChartInstance = null;
|
| 579 |
+
|
| 580 |
+
function populateConfSelect() {
|
| 581 |
+
const sel = document.getElementById('confModelSelect');
|
| 582 |
+
ALL_ROWS.forEach((row, i) => {
|
| 583 |
+
const opt = document.createElement('option');
|
| 584 |
+
opt.value = i;
|
| 585 |
+
opt.textContent = row.model;
|
| 586 |
+
sel.appendChild(opt);
|
| 587 |
+
});
|
| 588 |
+
}
|
| 589 |
+
|
| 590 |
+
window.renderConfusion = function() {
|
| 591 |
+
const idx = parseInt(document.getElementById('confModelSelect').value || '0');
|
| 592 |
+
const row = ALL_ROWS[idx];
|
| 593 |
+
const conf = row.confusion || {};
|
| 594 |
+
const scores = [1, 2, 3, 4, 5, 6];
|
| 595 |
+
|
| 596 |
+
const data = [];
|
| 597 |
+
scores.forEach((gt, ri) => {
|
| 598 |
+
const preds = conf[gt] || {};
|
| 599 |
+
const rowSum = Object.values(preds).reduce((a, b) => a + b, 0);
|
| 600 |
+
scores.forEach((pred, ci) => {
|
| 601 |
+
const v = rowSum > 0 ? (preds[pred] || 0) : 0;
|
| 602 |
+
data.push({ x: ci, y: ri, v });
|
| 603 |
+
});
|
| 604 |
+
});
|
| 605 |
+
|
| 606 |
+
const ctx = document.getElementById('confusionChart').getContext('2d');
|
| 607 |
+
if (confChartInstance) confChartInstance.destroy();
|
| 608 |
+
document.getElementById('confusionChart').style.height = '340px';
|
| 609 |
+
|
| 610 |
+
function cellColor(ri, ci, v) {
|
| 611 |
+
if (ri === ci) return `rgba(22,163,74,${0.15 + v * 0.85})`;
|
| 612 |
+
if (Math.abs(ri-ci)>=3) return `rgba(220,38,38,${v * 0.9})`;
|
| 613 |
+
return `rgba(37,99,235,${v * 0.75})`;
|
| 614 |
+
}
|
| 615 |
+
|
| 616 |
+
confChartInstance = new Chart(ctx, {
|
| 617 |
+
type: 'scatter',
|
| 618 |
+
data: { datasets: [{ data, pointRadius: 0 }] },
|
| 619 |
+
options: {
|
| 620 |
+
responsive: true, maintainAspectRatio: false, animation: { duration: 300 },
|
| 621 |
+
plugins: {
|
| 622 |
+
legend: { display: false },
|
| 623 |
+
tooltip: { backgroundColor: '#1e2a3a', callbacks: {
|
| 624 |
+
title: items => `GT ${scores[items[0].raw.y]} → Pred ${scores[items[0].raw.x]}`,
|
| 625 |
+
label: item => ` ${(item.raw.v * 100).toFixed(1)}% of true-class predictions`
|
| 626 |
+
} }
|
| 627 |
+
},
|
| 628 |
+
scales: {
|
| 629 |
+
x: { type: 'linear', min: -0.5, max: 5.5,
|
| 630 |
+
ticks: { stepSize: 1, callback: v => 'Pred ' + (scores[v] || ''), color: '#64748b', font: { family: 'JetBrains Mono', size: 10 } },
|
| 631 |
+
grid: { color: '#1a2236' },
|
| 632 |
+
},
|
| 633 |
+
y: { type: 'linear', min: -0.5, max: 5.5,
|
| 634 |
+
ticks: { stepSize: 1, callback: v => 'GT ' + (scores[v] || ''), color: '#64748b', font: { family: 'JetBrains Mono', size: 10 } },
|
| 635 |
+
grid: { color: '#1a2236' },
|
| 636 |
+
}
|
| 637 |
+
}
|
| 638 |
+
},
|
| 639 |
+
plugins: [{
|
| 640 |
+
id: 'heatmap',
|
| 641 |
+
afterDraw(chart) {
|
| 642 |
+
const {ctx, scales: {x, y}} = chart;
|
| 643 |
+
const cellW = x.getPixelForValue(1) - x.getPixelForValue(0);
|
| 644 |
+
const cellH = y.getPixelForValue(0) - y.getPixelForValue(1);
|
| 645 |
+
data.forEach(d => {
|
| 646 |
+
const cx = x.getPixelForValue(d.x);
|
| 647 |
+
const cy = y.getPixelForValue(d.y);
|
| 648 |
+
ctx.fillStyle = cellColor(d.y, d.x, d.v);
|
| 649 |
+
ctx.fillRect(cx - cellW/2 + 1, cy - cellH/2 + 1, cellW - 2, cellH - 2);
|
| 650 |
+
if (d.v > 0.005) {
|
| 651 |
+
ctx.fillStyle = d.v > 0.3 ? '#fff' : '#94a3b8';
|
| 652 |
+
ctx.font = `bold 11px JetBrains Mono, monospace`;
|
| 653 |
+
ctx.textAlign = 'center';
|
| 654 |
+
ctx.textBaseline = 'middle';
|
| 655 |
+
ctx.fillText((d.v * 100).toFixed(0) + '%', cx, cy);
|
| 656 |
+
}
|
| 657 |
+
});
|
| 658 |
+
}
|
| 659 |
+
}]
|
| 660 |
+
});
|
| 661 |
+
};
|
| 662 |
+
|
| 663 |
render();
|
| 664 |
renderChart();
|
| 665 |
+
// renderDist(); // disabled for testing
|
| 666 |
+
renderBias();
|
| 667 |
+
renderCritical();
|
| 668 |
+
populateConfSelect();
|
| 669 |
+
renderConfusion();
|
| 670 |
})();
|
| 671 |
</script>
|
| 672 |
</body>
|