Spaces:
Running
Running
Update index.html
Browse files- index.html +36 -33
index.html
CHANGED
|
@@ -367,59 +367,59 @@
|
|
| 367 |
},
|
| 368 |
{
|
| 369 |
rank: 13,
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 370 |
name: "Falcon-H1-1.5B-Instruct",
|
| 371 |
score: 81,
|
| 372 |
strengths: "Good at logic, math, and factual questions.",
|
| 373 |
weaknesses: "Fails translation completely and often gives blank/junk answers."
|
| 374 |
},
|
| 375 |
-
{
|
| 376 |
-
rank: 14,
|
| 377 |
-
name: "lfm2-700m",
|
| 378 |
-
score: 75.5,
|
| 379 |
-
strengths: "Handles sentiment, math, and logic correctly.",
|
| 380 |
-
weaknesses: "Many failures in reasoning (cause/effect), tool use, synonyms, and grammar."
|
| 381 |
-
},
|
| 382 |
{
|
| 383 |
rank: 15,
|
| 384 |
-
name: "
|
| 385 |
-
score:
|
| 386 |
-
strengths: "
|
| 387 |
-
weaknesses: "
|
| 388 |
},
|
| 389 |
{
|
| 390 |
rank: 16,
|
| 391 |
-
name: "
|
| 392 |
-
score:
|
| 393 |
-
strengths: "
|
| 394 |
-
weaknesses: "
|
| 395 |
},
|
| 396 |
{
|
| 397 |
rank: 17,
|
| 398 |
-
name: "
|
| 399 |
-
score:
|
| 400 |
-
strengths: "
|
| 401 |
-
weaknesses: "
|
| 402 |
},
|
| 403 |
{
|
| 404 |
rank: 18,
|
| 405 |
-
name: "
|
| 406 |
-
score:
|
| 407 |
-
strengths: "
|
| 408 |
-
weaknesses: "
|
| 409 |
},
|
| 410 |
{
|
| 411 |
rank: 19,
|
| 412 |
-
name: "qwen2.5-0.
|
| 413 |
-
score:
|
| 414 |
-
strengths: "
|
| 415 |
-
weaknesses: "
|
| 416 |
},
|
| 417 |
{
|
| 418 |
rank: 20,
|
| 419 |
-
name: "
|
| 420 |
-
score:
|
| 421 |
-
strengths: "
|
| 422 |
-
weaknesses: "
|
| 423 |
}
|
| 424 |
];
|
| 425 |
|
|
@@ -435,7 +435,10 @@
|
|
| 435 |
function populateTable() {
|
| 436 |
const tbody = document.querySelector('#performanceTable tbody');
|
| 437 |
|
| 438 |
-
|
|
|
|
|
|
|
|
|
|
| 439 |
const percentage = (model.score / maxScore) * 100;
|
| 440 |
|
| 441 |
const row = document.createElement('tr');
|
|
@@ -446,7 +449,7 @@
|
|
| 446 |
<td class="rank">#${model.rank}</td>
|
| 447 |
<td class="model-name">${model.name}</td>
|
| 448 |
<td>
|
| 449 |
-
<div class="score">${model.score} / ${maxScore}</div>
|
| 450 |
<div class="progress-container">
|
| 451 |
<div class="progress-bar" style="width: ${percentage}%"></div>
|
| 452 |
</div>
|
|
|
|
| 367 |
},
|
| 368 |
{
|
| 369 |
rank: 13,
|
| 370 |
+
name: "arco-3",
|
| 371 |
+
score: 83,
|
| 372 |
+
strengths: "One of the most powerful 0.6b models; perfect at code gen, sentiment, math, and core knowledge.",
|
| 373 |
+
weaknesses: "Fails completely at summarization (hallucinations), sequencing, and rhyming. Poor reasoning."
|
| 374 |
+
},
|
| 375 |
+
{
|
| 376 |
+
rank: 14,
|
| 377 |
name: "Falcon-H1-1.5B-Instruct",
|
| 378 |
score: 81,
|
| 379 |
strengths: "Good at logic, math, and factual questions.",
|
| 380 |
weaknesses: "Fails translation completely and often gives blank/junk answers."
|
| 381 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 382 |
{
|
| 383 |
rank: 15,
|
| 384 |
+
name: "Llama-3.2-SUN-HDIC-1B-Instruct.Q8_0.gguf",
|
| 385 |
+
score: 79,
|
| 386 |
+
strengths: "Strong in synonyms, math, and factual recall; decent at core NLP.",
|
| 387 |
+
weaknesses: "Complete failure at summarization and misconception correction; bad factual hallucinations."
|
| 388 |
},
|
| 389 |
{
|
| 390 |
rank: 16,
|
| 391 |
+
name: "Piaget-0.6B.Q8_0.gguf",
|
| 392 |
+
score: 78,
|
| 393 |
+
strengths: "Excellent at core knowledge tasks: Sentiment, Object Location, Antonyms, Categorization, Math, Factual QA.",
|
| 394 |
+
weaknesses: "Complete failure at Summarization, Sequencing, and Rhyming. Very poor at Grammar and Misconception Correction."
|
| 395 |
},
|
| 396 |
{
|
| 397 |
rank: 17,
|
| 398 |
+
name: "lfm2-700m",
|
| 399 |
+
score: 75.5,
|
| 400 |
+
strengths: "Handles sentiment, math, and logic correctly.",
|
| 401 |
+
weaknesses: "Many failures in reasoning (cause/effect), tool use, synonyms, and grammar."
|
| 402 |
},
|
| 403 |
{
|
| 404 |
rank: 18,
|
| 405 |
+
name: "Qwen3-psychological-reasoning-0.6B.Q8_0.gguf",
|
| 406 |
+
score: 73,
|
| 407 |
+
strengths: "Excels at factual recall and classification (Sentiment, Object Location, Math, Factual QA, NER).",
|
| 408 |
+
weaknesses: "Very poor at reasoning and creativity; complete failure in summarization, sequencing, and rhyming."
|
| 409 |
},
|
| 410 |
{
|
| 411 |
rank: 19,
|
| 412 |
+
name: "qwen2.5-0.5b-instruct",
|
| 413 |
+
score: 72,
|
| 414 |
+
strengths: "Decent at math, basic commands, and some logic.",
|
| 415 |
+
weaknesses: "Fails creative tasks (rhyming, synonyms) and suffers major headline hallucinations."
|
| 416 |
},
|
| 417 |
{
|
| 418 |
rank: 20,
|
| 419 |
+
name: "qwen3-0.6b-notetaker-q8_0.gguf",
|
| 420 |
+
score: 71,
|
| 421 |
+
strengths: "Excels at a wide range of core knowledge and classification tasks (sentiment, math, NER, factual QA).",
|
| 422 |
+
weaknesses: "Complete failure at complex reasoning, creativity, and nuanced language (cause/effect, idioms, sequencing)."
|
| 423 |
}
|
| 424 |
];
|
| 425 |
|
|
|
|
| 435 |
function populateTable() {
|
| 436 |
const tbody = document.querySelector('#performanceTable tbody');
|
| 437 |
|
| 438 |
+
// Filter to top 20 for display
|
| 439 |
+
const top20Models = models.slice(0, 20);
|
| 440 |
+
|
| 441 |
+
top20Models.forEach((model, index) => {
|
| 442 |
const percentage = (model.score / maxScore) * 100;
|
| 443 |
|
| 444 |
const row = document.createElement('tr');
|
|
|
|
| 449 |
<td class="rank">#${model.rank}</td>
|
| 450 |
<td class="model-name">${model.name}</td>
|
| 451 |
<td>
|
| 452 |
+
<div class="score">${model.score.toFixed(1)} / ${maxScore}</div>
|
| 453 |
<div class="progress-container">
|
| 454 |
<div class="progress-bar" style="width: ${percentage}%"></div>
|
| 455 |
</div>
|