Improve org page: remove stats/leaderboard/dialect/methodology, add logo, update team with emails
Browse files- index.html +22 -427
index.html
CHANGED
|
@@ -48,50 +48,13 @@
|
|
| 48 |
pointer-events: none;
|
| 49 |
}
|
| 50 |
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
width: 72px;
|
| 54 |
-
height: 72px;
|
| 55 |
-
margin: 0 auto 20px;
|
| 56 |
-
border-radius: 18px;
|
| 57 |
-
background: linear-gradient(135deg, rgba(74,222,128,0.15), rgba(34,197,94,0.08));
|
| 58 |
-
border: 1.5px solid rgba(74,222,128,0.25);
|
| 59 |
-
display: flex;
|
| 60 |
-
align-items: center;
|
| 61 |
-
justify-content: center;
|
| 62 |
-
overflow: hidden;
|
| 63 |
-
}
|
| 64 |
-
.hero-icon img {
|
| 65 |
-
width: 52px;
|
| 66 |
-
height: 52px;
|
| 67 |
-
object-fit: contain;
|
| 68 |
display: block;
|
| 69 |
-
|
| 70 |
-
|
| 71 |
-
.hero-icon-fallback {
|
| 72 |
-
font-size: 2rem;
|
| 73 |
-
color: var(--brand);
|
| 74 |
-
}
|
| 75 |
-
|
| 76 |
-
/* "Haakkim" wordmark */
|
| 77 |
-
.hero-wordmark {
|
| 78 |
-
font-size: clamp(2.8rem, 7vw, 4.5rem);
|
| 79 |
-
font-weight: 900;
|
| 80 |
-
letter-spacing: -0.02em;
|
| 81 |
-
color: var(--brand);
|
| 82 |
-
line-height: 1;
|
| 83 |
-
margin-bottom: 10px;
|
| 84 |
}
|
| 85 |
|
| 86 |
-
/* Arabic subtitle */
|
| 87 |
-
.hero-ar {
|
| 88 |
-
font-size: 2rem;
|
| 89 |
-
font-weight: 700;
|
| 90 |
-
color: rgba(74,222,128,0.7);
|
| 91 |
-
direction: rtl;
|
| 92 |
-
margin-bottom: 20px;
|
| 93 |
-
letter-spacing: 0.05em;
|
| 94 |
-
}
|
| 95 |
.hero p {
|
| 96 |
font-size: 1.1rem;
|
| 97 |
color: var(--muted);
|
|
@@ -152,112 +115,6 @@
|
|
| 152 |
margin-bottom: 32px;
|
| 153 |
}
|
| 154 |
|
| 155 |
-
/* ββ Stats grid ββββββββββββββββββββββββββββ */
|
| 156 |
-
.stats-grid {
|
| 157 |
-
display: flex;
|
| 158 |
-
flex-wrap: wrap;
|
| 159 |
-
justify-content: center;
|
| 160 |
-
gap: 16px;
|
| 161 |
-
}
|
| 162 |
-
.stat-card {
|
| 163 |
-
background: var(--surface);
|
| 164 |
-
border: 1px solid var(--border);
|
| 165 |
-
border-radius: var(--radius);
|
| 166 |
-
padding: 24px 20px;
|
| 167 |
-
text-align: center;
|
| 168 |
-
transition: border-color 0.2s, transform 0.2s;
|
| 169 |
-
min-width: 155px;
|
| 170 |
-
flex: 1 1 155px;
|
| 171 |
-
max-width: 200px;
|
| 172 |
-
}
|
| 173 |
-
.stat-card:hover { border-color: var(--brand); transform: translateY(-2px); }
|
| 174 |
-
.stat-value {
|
| 175 |
-
font-size: 2.2rem;
|
| 176 |
-
font-weight: 800;
|
| 177 |
-
color: var(--brand);
|
| 178 |
-
line-height: 1;
|
| 179 |
-
margin-bottom: 6px;
|
| 180 |
-
}
|
| 181 |
-
.stat-label {
|
| 182 |
-
font-size: 0.8rem;
|
| 183 |
-
color: var(--muted);
|
| 184 |
-
text-transform: uppercase;
|
| 185 |
-
letter-spacing: 0.05em;
|
| 186 |
-
}
|
| 187 |
-
|
| 188 |
-
/* ββ Leaderboard table βββββββββββββββββββββ */
|
| 189 |
-
.table-wrap {
|
| 190 |
-
overflow-x: auto;
|
| 191 |
-
border-radius: var(--radius);
|
| 192 |
-
border: 1px solid var(--border);
|
| 193 |
-
}
|
| 194 |
-
table {
|
| 195 |
-
width: 100%;
|
| 196 |
-
border-collapse: collapse;
|
| 197 |
-
font-size: 0.9rem;
|
| 198 |
-
}
|
| 199 |
-
thead th {
|
| 200 |
-
background: var(--surface2);
|
| 201 |
-
color: var(--muted);
|
| 202 |
-
font-size: 0.78rem;
|
| 203 |
-
text-transform: uppercase;
|
| 204 |
-
letter-spacing: 0.06em;
|
| 205 |
-
padding: 12px 16px;
|
| 206 |
-
text-align: left;
|
| 207 |
-
white-space: nowrap;
|
| 208 |
-
}
|
| 209 |
-
tbody tr {
|
| 210 |
-
border-top: 1px solid var(--border);
|
| 211 |
-
transition: background 0.15s;
|
| 212 |
-
}
|
| 213 |
-
tbody tr:hover { background: var(--surface2); }
|
| 214 |
-
td {
|
| 215 |
-
padding: 12px 16px;
|
| 216 |
-
vertical-align: middle;
|
| 217 |
-
}
|
| 218 |
-
.rank-badge {
|
| 219 |
-
display: inline-flex;
|
| 220 |
-
align-items: center;
|
| 221 |
-
justify-content: center;
|
| 222 |
-
width: 28px;
|
| 223 |
-
height: 28px;
|
| 224 |
-
border-radius: 50%;
|
| 225 |
-
font-weight: 700;
|
| 226 |
-
font-size: 0.85rem;
|
| 227 |
-
}
|
| 228 |
-
.rank-1 { background: linear-gradient(135deg, #f59e0b, #fbbf24); color: #000; }
|
| 229 |
-
.rank-2 { background: linear-gradient(135deg, #94a3b8, #cbd5e1); color: #000; }
|
| 230 |
-
.rank-3 { background: linear-gradient(135deg, #cd7c2f, #e09050); color: #000; }
|
| 231 |
-
.rank-other { background: var(--surface2); color: var(--muted); }
|
| 232 |
-
.model-name {
|
| 233 |
-
font-family: 'SFMono-Regular', Consolas, monospace;
|
| 234 |
-
font-size: 0.83rem;
|
| 235 |
-
color: var(--text);
|
| 236 |
-
}
|
| 237 |
-
.model-org {
|
| 238 |
-
color: var(--muted);
|
| 239 |
-
font-size: 0.78rem;
|
| 240 |
-
}
|
| 241 |
-
.score-val {
|
| 242 |
-
font-weight: 700;
|
| 243 |
-
color: var(--brand);
|
| 244 |
-
font-variant-numeric: tabular-nums;
|
| 245 |
-
}
|
| 246 |
-
.ci-val {
|
| 247 |
-
color: var(--muted);
|
| 248 |
-
font-size: 0.78rem;
|
| 249 |
-
font-variant-numeric: tabular-nums;
|
| 250 |
-
}
|
| 251 |
-
.battles-badge {
|
| 252 |
-
display: inline-flex;
|
| 253 |
-
align-items: center;
|
| 254 |
-
background: var(--surface2);
|
| 255 |
-
border-radius: 99px;
|
| 256 |
-
padding: 2px 10px;
|
| 257 |
-
font-size: 0.78rem;
|
| 258 |
-
color: var(--muted);
|
| 259 |
-
}
|
| 260 |
-
|
| 261 |
/* ββ Cards βββββββββββββββββββββββββββββββββ */
|
| 262 |
.cards-grid {
|
| 263 |
display: grid;
|
|
@@ -290,57 +147,6 @@
|
|
| 290 |
color: var(--muted);
|
| 291 |
}
|
| 292 |
|
| 293 |
-
/* ββ Dialect pills βββββββββββββββββββββββββ */
|
| 294 |
-
.dialect-list {
|
| 295 |
-
display: flex;
|
| 296 |
-
flex-wrap: wrap;
|
| 297 |
-
gap: 10px;
|
| 298 |
-
}
|
| 299 |
-
.dialect-pill {
|
| 300 |
-
background: var(--surface2);
|
| 301 |
-
border: 1px solid var(--border);
|
| 302 |
-
border-radius: 99px;
|
| 303 |
-
padding: 6px 14px;
|
| 304 |
-
font-size: 0.82rem;
|
| 305 |
-
display: flex;
|
| 306 |
-
align-items: center;
|
| 307 |
-
gap: 8px;
|
| 308 |
-
}
|
| 309 |
-
.dialect-pill .bar {
|
| 310 |
-
height: 4px;
|
| 311 |
-
border-radius: 99px;
|
| 312 |
-
background: var(--brand);
|
| 313 |
-
min-width: 4px;
|
| 314 |
-
}
|
| 315 |
-
|
| 316 |
-
/* ββ Scoring steps βββββββββββββββββββββββββ */
|
| 317 |
-
.method-steps {
|
| 318 |
-
display: grid;
|
| 319 |
-
grid-template-columns: repeat(auto-fit, minmax(220px, 1fr));
|
| 320 |
-
gap: 16px;
|
| 321 |
-
}
|
| 322 |
-
.step {
|
| 323 |
-
background: var(--surface);
|
| 324 |
-
border: 1px solid var(--border);
|
| 325 |
-
border-radius: var(--radius);
|
| 326 |
-
padding: 20px;
|
| 327 |
-
}
|
| 328 |
-
.step-num {
|
| 329 |
-
width: 32px;
|
| 330 |
-
height: 32px;
|
| 331 |
-
border-radius: 50%;
|
| 332 |
-
background: var(--brand);
|
| 333 |
-
color: #0a0f0a;
|
| 334 |
-
font-weight: 800;
|
| 335 |
-
font-size: 0.9rem;
|
| 336 |
-
display: flex;
|
| 337 |
-
align-items: center;
|
| 338 |
-
justify-content: center;
|
| 339 |
-
margin-bottom: 12px;
|
| 340 |
-
}
|
| 341 |
-
.step h4 { font-size: 0.92rem; font-weight: 700; margin-bottom: 6px; }
|
| 342 |
-
.step p { font-size: 0.82rem; color: var(--muted); }
|
| 343 |
-
|
| 344 |
/* ββ Dataset callout βββββββββββββββββββββββ */
|
| 345 |
.dataset-callout {
|
| 346 |
background: linear-gradient(135deg, rgba(74,222,128,0.06), rgba(99,102,241,0.06));
|
|
@@ -386,48 +192,21 @@
|
|
| 386 |
font-weight: 800;
|
| 387 |
color: #0a0f0a;
|
| 388 |
margin: 0 auto 14px;
|
| 389 |
-
overflow: hidden;
|
| 390 |
flex-shrink: 0;
|
| 391 |
}
|
| 392 |
-
.team-avatar img {
|
| 393 |
-
width: 100%;
|
| 394 |
-
height: 100%;
|
| 395 |
-
object-fit: cover;
|
| 396 |
-
border-radius: 50%;
|
| 397 |
-
}
|
| 398 |
.team-name { font-weight: 700; font-size: 1rem; margin-bottom: 3px; }
|
| 399 |
.team-role { font-size: 0.78rem; color: var(--brand); font-weight: 600; margin-bottom: 4px; text-transform: uppercase; letter-spacing: 0.04em; }
|
| 400 |
.team-affil { font-size: 0.76rem; color: var(--muted); margin-bottom: 10px; }
|
| 401 |
-
.team-bio {
|
| 402 |
-
font-size: 0.8rem;
|
| 403 |
-
color: var(--muted);
|
| 404 |
-
line-height: 1.55;
|
| 405 |
-
text-align: left;
|
| 406 |
-
margin-bottom: 12px;
|
| 407 |
-
flex: 1;
|
| 408 |
-
}
|
| 409 |
.team-links { display: flex; gap: 10px; flex-wrap: wrap; justify-content: center; }
|
| 410 |
-
.team-
|
| 411 |
display: inline-flex;
|
| 412 |
align-items: center;
|
| 413 |
gap: 4px;
|
| 414 |
font-size: 0.76rem;
|
| 415 |
-
color: var(--brand);
|
| 416 |
-
text-decoration: none;
|
| 417 |
-
}
|
| 418 |
-
.team-hf:hover { text-decoration: underline; }
|
| 419 |
-
|
| 420 |
-
/* ββ Note box ββββββββββββββββββββββββββββββ */
|
| 421 |
-
.note-box {
|
| 422 |
-
background: rgba(74,222,128,0.05);
|
| 423 |
-
border: 1px solid rgba(74,222,128,0.2);
|
| 424 |
-
border-radius: 8px;
|
| 425 |
-
padding: 14px 18px;
|
| 426 |
-
font-size: 0.85rem;
|
| 427 |
color: var(--muted);
|
| 428 |
-
|
| 429 |
}
|
| 430 |
-
.
|
| 431 |
|
| 432 |
/* ββ Footer ββββββββββββββββββββββββββββββββ */
|
| 433 |
footer {
|
|
@@ -452,14 +231,13 @@
|
|
| 452 |
HERO
|
| 453 |
ββββββββββββββββββββββββββββββββββββββββββββββββββββ -->
|
| 454 |
<div class="hero">
|
| 455 |
-
<
|
| 456 |
-
|
| 457 |
-
|
| 458 |
-
|
| 459 |
-
|
| 460 |
-
|
| 461 |
-
<div
|
| 462 |
-
<div class="hero-ar">ΨΩΩΩΩΩ
</div>
|
| 463 |
|
| 464 |
<p>An open arena-style human preference evaluation platform for Arabic large language models β built from the ground up for Arabic.</p>
|
| 465 |
|
|
@@ -472,157 +250,6 @@
|
|
| 472 |
|
| 473 |
<div class="container">
|
| 474 |
|
| 475 |
-
<!-- βββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 476 |
-
STATS
|
| 477 |
-
ββββββββββββββββββββββββββββββββββββββββββββββββββββ -->
|
| 478 |
-
<section>
|
| 479 |
-
<div class="section-title">Current Snapshot β v1.0</div>
|
| 480 |
-
<div class="section-sub">Statistics from the first public release of Haakkim battle data</div>
|
| 481 |
-
<div class="stats-grid">
|
| 482 |
-
<div class="stat-card">
|
| 483 |
-
<div class="stat-value">1,273</div>
|
| 484 |
-
<div class="stat-label">Total Battles</div>
|
| 485 |
-
</div>
|
| 486 |
-
<div class="stat-card">
|
| 487 |
-
<div class="stat-value">831</div>
|
| 488 |
-
<div class="stat-label">BT-Ranked Battles</div>
|
| 489 |
-
</div>
|
| 490 |
-
<div class="stat-card">
|
| 491 |
-
<div class="stat-value">67</div>
|
| 492 |
-
<div class="stat-label">Models Ranked</div>
|
| 493 |
-
</div>
|
| 494 |
-
<div class="stat-card">
|
| 495 |
-
<div class="stat-value">11</div>
|
| 496 |
-
<div class="stat-label">Arabic Dialects</div>
|
| 497 |
-
</div>
|
| 498 |
-
<div class="stat-card">
|
| 499 |
-
<div class="stat-value">582</div>
|
| 500 |
-
<div class="stat-label">ESS (Clamped)</div>
|
| 501 |
-
</div>
|
| 502 |
-
<div class="stat-card">
|
| 503 |
-
<div class="stat-value">0.35</div>
|
| 504 |
-
<div class="stat-label">Graph Density</div>
|
| 505 |
-
</div>
|
| 506 |
-
</div>
|
| 507 |
-
</section>
|
| 508 |
-
|
| 509 |
-
<!-- βββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 510 |
-
MSA LEADERBOARD TOP 10
|
| 511 |
-
ββββββββββββββββββββββββββββββββββββββββββββββββββββ -->
|
| 512 |
-
<section>
|
| 513 |
-
<div class="section-title">MSA Leaderboard β Top 10</div>
|
| 514 |
-
<div class="section-sub">BradleyβTerry scores (1000-centered log-odds). Full 67-model leaderboard at <a href="https://haakkim.tech/#leaderboard" style="color:var(--brand)">haakkim.tech</a></div>
|
| 515 |
-
<div class="table-wrap">
|
| 516 |
-
<table>
|
| 517 |
-
<thead>
|
| 518 |
-
<tr>
|
| 519 |
-
<th>Rank</th>
|
| 520 |
-
<th>Model</th>
|
| 521 |
-
<th>BT Score</th>
|
| 522 |
-
<th>95% CI</th>
|
| 523 |
-
<th>Battles</th>
|
| 524 |
-
</tr>
|
| 525 |
-
</thead>
|
| 526 |
-
<tbody>
|
| 527 |
-
<tr>
|
| 528 |
-
<td><span class="rank-badge rank-1">1</span></td>
|
| 529 |
-
<td><span class="model-org">mistralai/</span><span class="model-name">ministral-3b-2512</span></td>
|
| 530 |
-
<td><span class="score-val">1001.75</span></td>
|
| 531 |
-
<td><span class="ci-val">[1001.20, 1002.93]</span></td>
|
| 532 |
-
<td><span class="battles-badge">40</span></td>
|
| 533 |
-
</tr>
|
| 534 |
-
<tr>
|
| 535 |
-
<td><span class="rank-badge rank-2">2</span></td>
|
| 536 |
-
<td><span class="model-org">mistralai/</span><span class="model-name">ministral-8b-2512</span></td>
|
| 537 |
-
<td><span class="score-val">1001.61</span></td>
|
| 538 |
-
<td><span class="ci-val">[1000.72, 1002.97]</span></td>
|
| 539 |
-
<td><span class="battles-badge">43</span></td>
|
| 540 |
-
</tr>
|
| 541 |
-
<tr>
|
| 542 |
-
<td><span class="rank-badge rank-3">3</span></td>
|
| 543 |
-
<td><span class="model-org">Qwen/</span><span class="model-name">Qwen3-235B-A22B-Thinking-2507</span></td>
|
| 544 |
-
<td><span class="score-val">1001.21</span></td>
|
| 545 |
-
<td><span class="ci-val">[1000.47, 1002.00]</span></td>
|
| 546 |
-
<td><span class="battles-badge">38</span></td>
|
| 547 |
-
</tr>
|
| 548 |
-
<tr>
|
| 549 |
-
<td><span class="rank-badge rank-other">4</span></td>
|
| 550 |
-
<td><span class="model-org">Qwen/</span><span class="model-name">Qwen3-30B-A3B-Instruct-2507</span></td>
|
| 551 |
-
<td><span class="score-val">1001.14</span></td>
|
| 552 |
-
<td><span class="ci-val">[999.96, 1002.83]</span></td>
|
| 553 |
-
<td><span class="battles-badge">31</span></td>
|
| 554 |
-
</tr>
|
| 555 |
-
<tr>
|
| 556 |
-
<td><span class="rank-badge rank-other">5</span></td>
|
| 557 |
-
<td><span class="model-org">deepseek/</span><span class="model-name">deepseek-v3.2-exp</span></td>
|
| 558 |
-
<td><span class="score-val">1001.13</span></td>
|
| 559 |
-
<td><span class="ci-val">[1000.27, 1002.16]</span></td>
|
| 560 |
-
<td><span class="battles-badge">38</span></td>
|
| 561 |
-
</tr>
|
| 562 |
-
<tr>
|
| 563 |
-
<td><span class="rank-badge rank-other">6</span></td>
|
| 564 |
-
<td><span class="model-org">deepseek/</span><span class="model-name">deepseek-v3.1</span></td>
|
| 565 |
-
<td><span class="score-val">1000.99</span></td>
|
| 566 |
-
<td><span class="ci-val">[999.81, 1002.07]</span></td>
|
| 567 |
-
<td><span class="battles-badge">29</span></td>
|
| 568 |
-
</tr>
|
| 569 |
-
<tr>
|
| 570 |
-
<td><span class="rank-badge rank-other">7</span></td>
|
| 571 |
-
<td><span class="model-org">Qwen/</span><span class="model-name">Qwen3-235B-A22B-Instruct-2507</span></td>
|
| 572 |
-
<td><span class="score-val">1000.98</span></td>
|
| 573 |
-
<td><span class="ci-val">[1000.12, 1002.08]</span></td>
|
| 574 |
-
<td><span class="battles-badge">39</span></td>
|
| 575 |
-
</tr>
|
| 576 |
-
<tr>
|
| 577 |
-
<td><span class="rank-badge rank-other">8</span></td>
|
| 578 |
-
<td><span class="model-org">deepseek/</span><span class="model-name">deepseek-r1-0528</span></td>
|
| 579 |
-
<td><span class="score-val">1000.93</span></td>
|
| 580 |
-
<td><span class="ci-val">[1000.10, 1002.14]</span></td>
|
| 581 |
-
<td><span class="battles-badge">38</span></td>
|
| 582 |
-
</tr>
|
| 583 |
-
<tr>
|
| 584 |
-
<td><span class="rank-badge rank-other">9</span></td>
|
| 585 |
-
<td><span class="model-org">openai/</span><span class="model-name">gpt-oss-120b</span></td>
|
| 586 |
-
<td><span class="score-val">1000.93</span></td>
|
| 587 |
-
<td><span class="ci-val">[1000.04, 1002.58]</span></td>
|
| 588 |
-
<td><span class="battles-badge">25</span></td>
|
| 589 |
-
</tr>
|
| 590 |
-
<tr>
|
| 591 |
-
<td><span class="rank-badge rank-other">10</span></td>
|
| 592 |
-
<td><span class="model-org">deepseek/</span><span class="model-name">deepseek-v3.2</span></td>
|
| 593 |
-
<td><span class="score-val">1000.89</span></td>
|
| 594 |
-
<td><span class="ci-val">[999.86, 1002.25]</span></td>
|
| 595 |
-
<td><span class="battles-badge">31</span></td>
|
| 596 |
-
</tr>
|
| 597 |
-
</tbody>
|
| 598 |
-
</table>
|
| 599 |
-
</div>
|
| 600 |
-
<div class="note-box">
|
| 601 |
-
<strong>Score scale:</strong> Haakkim uses unscaled log-odds units centered at 1000 β a 1-point gap corresponds to win odds of eΒΉ β 2.7:1, producing a ~4-point spread across 67 models. Chatbot Arena-style Elo (Γ173.7) encodes identical win probabilities with hundreds-of-points spreads.
|
| 602 |
-
</div>
|
| 603 |
-
</section>
|
| 604 |
-
|
| 605 |
-
<!-- βββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 606 |
-
DIALECT COVERAGE
|
| 607 |
-
ββββββββββββββββββββββββββββββββββββββββββββββββββββ -->
|
| 608 |
-
<section>
|
| 609 |
-
<div class="section-title">Arabic Dialect Coverage</div>
|
| 610 |
-
<div class="section-sub">11 varieties β from Modern Standard Arabic to regional dialects across the Arab world</div>
|
| 611 |
-
<div class="dialect-list">
|
| 612 |
-
<div class="dialect-pill">MSA <div class="bar" style="width:80px"></div> 77.5%</div>
|
| 613 |
-
<div class="dialect-pill">Tunisian <div class="bar" style="width:22px"></div> 9.0%</div>
|
| 614 |
-
<div class="dialect-pill">Saudi <div class="bar" style="width:16px"></div> 6.5%</div>
|
| 615 |
-
<div class="dialect-pill">Egyptian <div class="bar" style="width:9px"></div> 3.5%</div>
|
| 616 |
-
<div class="dialect-pill">Levantine <div class="bar" style="width:5px"></div> 1.7%</div>
|
| 617 |
-
<div class="dialect-pill">Sudanese <div class="bar" style="width:4px"></div> 0.9%</div>
|
| 618 |
-
<div class="dialect-pill">Omani <div class="bar" style="width:4px"></div> 0.4%</div>
|
| 619 |
-
<div class="dialect-pill">Iraqi <div class="bar" style="width:4px"></div> 0.2%</div>
|
| 620 |
-
<div class="dialect-pill">Moroccan <div class="bar" style="width:4px"></div> <0.1%</div>
|
| 621 |
-
<div class="dialect-pill">Libyan <div class="bar" style="width:4px"></div> <0.1%</div>
|
| 622 |
-
<div class="dialect-pill">Algerian <div class="bar" style="width:4px"></div> <0.1%</div>
|
| 623 |
-
</div>
|
| 624 |
-
</section>
|
| 625 |
-
|
| 626 |
<!-- βββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 627 |
EVALUATION MODES
|
| 628 |
ββββββββββββββββββββββββββββββββββββββββββββββββββββ -->
|
|
@@ -651,36 +278,6 @@
|
|
| 651 |
</div>
|
| 652 |
</section>
|
| 653 |
|
| 654 |
-
<!-- βββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 655 |
-
SCORING METHODOLOGY
|
| 656 |
-
ββββββββββββββββββββββββββββββββββββββββββββββββββββ -->
|
| 657 |
-
<section>
|
| 658 |
-
<div class="section-title">Scoring Methodology</div>
|
| 659 |
-
<div class="section-sub">Statistically rigorous BradleyβTerry model with four key components</div>
|
| 660 |
-
<div class="method-steps">
|
| 661 |
-
<div class="step">
|
| 662 |
-
<div class="step-num">1</div>
|
| 663 |
-
<h4>Inverse-Probability Weighting</h4>
|
| 664 |
-
<p>Corrects for non-uniform model exposure using Ξ΅-greedy adaptive sampling weights, clamped to [P1, P99].</p>
|
| 665 |
-
</div>
|
| 666 |
-
<div class="step">
|
| 667 |
-
<div class="step-num">2</div>
|
| 668 |
-
<h4>Bootstrap Confidence Intervals</h4>
|
| 669 |
-
<p>200 vote-level resamples per run to produce 95% CIs on every model's BT score.</p>
|
| 670 |
-
</div>
|
| 671 |
-
<div class="step">
|
| 672 |
-
<div class="step-num">3</div>
|
| 673 |
-
<h4>Rankability Gate</h4>
|
| 674 |
-
<p>BT scores published only when the comparison graph is fully connected and ESS is sufficient; otherwise win-rate fallback is shown.</p>
|
| 675 |
-
</div>
|
| 676 |
-
<div class="step">
|
| 677 |
-
<div class="step-num">4</div>
|
| 678 |
-
<h4>Log-odds Scale</h4>
|
| 679 |
-
<p>1000-centered unscaled log-odds. A 1-point gap β 2.7:1 win odds. Full reproducibility: pipeline and dataset are open.</p>
|
| 680 |
-
</div>
|
| 681 |
-
</div>
|
| 682 |
-
</section>
|
| 683 |
-
|
| 684 |
<!-- βββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 685 |
DATASET CALLOUT
|
| 686 |
ββββββββββββββββββββββββββββββββββββββββββββββββββββ -->
|
|
@@ -703,36 +300,34 @@
|
|
| 703 |
<div class="section-sub">College of Computing, Umm Al-Qura University β Mecca, Saudi Arabia</div>
|
| 704 |
<div class="team-grid">
|
| 705 |
|
| 706 |
-
<!-- Mourad Mars -->
|
| 707 |
<div class="team-card">
|
| 708 |
<div class="team-avatar">MM</div>
|
| 709 |
<div class="team-name">Dr. Mourad Mars</div>
|
| 710 |
-
<div class="team-role">
|
| 711 |
-
<div class="team-affil">
|
| 712 |
<div class="team-links">
|
| 713 |
-
<a href="
|
| 714 |
-
<a href="mailto:msmars@uqu.edu.sa" class="team-hf">β msmars@uqu.edu.sa</a>
|
| 715 |
</div>
|
| 716 |
</div>
|
| 717 |
|
| 718 |
-
<!-- Hassan Barmandah -->
|
| 719 |
<div class="team-card">
|
| 720 |
<div class="team-avatar">HB</div>
|
| 721 |
<div class="team-name">Hassan Barmandah</div>
|
| 722 |
<div class="team-role">AI Researcher</div>
|
| 723 |
-
<div class="team-affil">
|
| 724 |
<div class="team-links">
|
| 725 |
-
<a href="
|
| 726 |
-
<a href="https://github.com/HasanBGIt" class="team-hf">β₯ HasanBGIt</a>
|
| 727 |
</div>
|
| 728 |
</div>
|
| 729 |
|
| 730 |
-
<!-- Abdulrhman Alassaf -->
|
| 731 |
<div class="team-card">
|
| 732 |
<div class="team-avatar">AA</div>
|
| 733 |
<div class="team-name">Abdulrhman Alassaf</div>
|
| 734 |
<div class="team-role">Software Engineer</div>
|
| 735 |
<div class="team-affil">Umm Al-Qura University</div>
|
|
|
|
|
|
|
|
|
|
| 736 |
</div>
|
| 737 |
|
| 738 |
</div>
|
|
|
|
| 48 |
pointer-events: none;
|
| 49 |
}
|
| 50 |
|
| 51 |
+
.hero-logo {
|
| 52 |
+
height: 90px;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 53 |
display: block;
|
| 54 |
+
margin: 0 auto 28px;
|
| 55 |
+
object-fit: contain;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
}
|
| 57 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 58 |
.hero p {
|
| 59 |
font-size: 1.1rem;
|
| 60 |
color: var(--muted);
|
|
|
|
| 115 |
margin-bottom: 32px;
|
| 116 |
}
|
| 117 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
/* ββ Cards βββββββββββββββββββββββββββββββββ */
|
| 119 |
.cards-grid {
|
| 120 |
display: grid;
|
|
|
|
| 147 |
color: var(--muted);
|
| 148 |
}
|
| 149 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 150 |
/* ββ Dataset callout βββββββββββββββββββββββ */
|
| 151 |
.dataset-callout {
|
| 152 |
background: linear-gradient(135deg, rgba(74,222,128,0.06), rgba(99,102,241,0.06));
|
|
|
|
| 192 |
font-weight: 800;
|
| 193 |
color: #0a0f0a;
|
| 194 |
margin: 0 auto 14px;
|
|
|
|
| 195 |
flex-shrink: 0;
|
| 196 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 197 |
.team-name { font-weight: 700; font-size: 1rem; margin-bottom: 3px; }
|
| 198 |
.team-role { font-size: 0.78rem; color: var(--brand); font-weight: 600; margin-bottom: 4px; text-transform: uppercase; letter-spacing: 0.04em; }
|
| 199 |
.team-affil { font-size: 0.76rem; color: var(--muted); margin-bottom: 10px; }
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 200 |
.team-links { display: flex; gap: 10px; flex-wrap: wrap; justify-content: center; }
|
| 201 |
+
.team-email {
|
| 202 |
display: inline-flex;
|
| 203 |
align-items: center;
|
| 204 |
gap: 4px;
|
| 205 |
font-size: 0.76rem;
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 206 |
color: var(--muted);
|
| 207 |
+
text-decoration: none;
|
| 208 |
}
|
| 209 |
+
.team-email:hover { color: var(--brand); text-decoration: underline; }
|
| 210 |
|
| 211 |
/* ββ Footer ββββββββββββββββββββββββββββββββ */
|
| 212 |
footer {
|
|
|
|
| 231 |
HERO
|
| 232 |
ββββββββββββββββββββββββββββββββββββββββββββββββββββ -->
|
| 233 |
<div class="hero">
|
| 234 |
+
<img
|
| 235 |
+
src="haakkim-logo-withname.png"
|
| 236 |
+
alt="Haakkim"
|
| 237 |
+
class="hero-logo"
|
| 238 |
+
onerror="this.style.display='none'; document.getElementById('hero-fallback').style.display='block';"
|
| 239 |
+
/>
|
| 240 |
+
<div id="hero-fallback" style="display:none; font-size:2.8rem; font-weight:900; color:var(--brand); margin-bottom:20px;">Haakkim ΨΩΩΩΩΩ
</div>
|
|
|
|
| 241 |
|
| 242 |
<p>An open arena-style human preference evaluation platform for Arabic large language models β built from the ground up for Arabic.</p>
|
| 243 |
|
|
|
|
| 250 |
|
| 251 |
<div class="container">
|
| 252 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 253 |
<!-- βββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 254 |
EVALUATION MODES
|
| 255 |
ββββββββββββββββββββββββββββββββββββββββββββββββββββ -->
|
|
|
|
| 278 |
</div>
|
| 279 |
</section>
|
| 280 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 281 |
<!-- βββββββββββββββββββββββββββββββββββββββββββββββββββ
|
| 282 |
DATASET CALLOUT
|
| 283 |
ββββββββββββββββββββββββββββββββββββββββββββββββββββ -->
|
|
|
|
| 300 |
<div class="section-sub">College of Computing, Umm Al-Qura University β Mecca, Saudi Arabia</div>
|
| 301 |
<div class="team-grid">
|
| 302 |
|
|
|
|
| 303 |
<div class="team-card">
|
| 304 |
<div class="team-avatar">MM</div>
|
| 305 |
<div class="team-name">Dr. Mourad Mars</div>
|
| 306 |
+
<div class="team-role">Principal Investigator</div>
|
| 307 |
+
<div class="team-affil">Umm Al-Qura University</div>
|
| 308 |
<div class="team-links">
|
| 309 |
+
<a href="mailto:msmars@uqu.edu.sa" class="team-email">β msmars@uqu.edu.sa</a>
|
|
|
|
| 310 |
</div>
|
| 311 |
</div>
|
| 312 |
|
|
|
|
| 313 |
<div class="team-card">
|
| 314 |
<div class="team-avatar">HB</div>
|
| 315 |
<div class="team-name">Hassan Barmandah</div>
|
| 316 |
<div class="team-role">AI Researcher</div>
|
| 317 |
+
<div class="team-affil">Umm Al-Qura University</div>
|
| 318 |
<div class="team-links">
|
| 319 |
+
<a href="mailto:hassanhbarmandah@gmail.com" class="team-email">β hassanhbarmandah@gmail.com</a>
|
|
|
|
| 320 |
</div>
|
| 321 |
</div>
|
| 322 |
|
|
|
|
| 323 |
<div class="team-card">
|
| 324 |
<div class="team-avatar">AA</div>
|
| 325 |
<div class="team-name">Abdulrhman Alassaf</div>
|
| 326 |
<div class="team-role">Software Engineer</div>
|
| 327 |
<div class="team-affil">Umm Al-Qura University</div>
|
| 328 |
+
<div class="team-links">
|
| 329 |
+
<a href="mailto:aaalassaf@outlook.com" class="team-email">β aaalassaf@outlook.com</a>
|
| 330 |
+
</div>
|
| 331 |
</div>
|
| 332 |
|
| 333 |
</div>
|