appvoid commited on
Commit
1b51b33
·
verified ·
1 Parent(s): bfd97f5

Update index.html

Browse files
Files changed (1) hide show
  1. index.html +36 -33
index.html CHANGED
@@ -367,59 +367,59 @@
367
  },
368
  {
369
  rank: 13,
 
 
 
 
 
 
 
370
  name: "Falcon-H1-1.5B-Instruct",
371
  score: 81,
372
  strengths: "Good at logic, math, and factual questions.",
373
  weaknesses: "Fails translation completely and often gives blank/junk answers."
374
  },
375
- {
376
- rank: 14,
377
- name: "lfm2-700m",
378
- score: 75.5,
379
- strengths: "Handles sentiment, math, and logic correctly.",
380
- weaknesses: "Many failures in reasoning (cause/effect), tool use, synonyms, and grammar."
381
- },
382
  {
383
  rank: 15,
384
- name: "qwen2.5-0.5b-instruct",
385
- score: 72,
386
- strengths: "Decent at math, basic commands, and some logic.",
387
- weaknesses: "Fails creative tasks (rhyming, synonyms) and suffers major headline hallucinations."
388
  },
389
  {
390
  rank: 16,
391
- name: "Dolphin3.0-Qwen2.5-0.5B",
392
- score: 69.5,
393
- strengths: "Best of the small models; handles math and antonyms well.",
394
- weaknesses: "Completely fails synonym generation and most grammar correction tasks."
395
  },
396
  {
397
  rank: 17,
398
- name: "qwen3-0.6B",
399
- score: 67,
400
- strengths: "Correct on basic math and antonyms.",
401
- weaknesses: "Riddled with bizarre, nonsensical answers (e.g., '3D', '2D Notation')."
402
  },
403
  {
404
  rank: 18,
405
- name: "Auto-Completer-0.2.Q8_0.gguf",
406
- score: 60,
407
- strengths: "Perfect in Antonyms, Translation, Math, and Logic.",
408
- weaknesses: "Complete failure in most other areas; reinforces misconceptions, cannot follow sequences."
409
  },
410
  {
411
  rank: 19,
412
- name: "qwen2.5-0.5B",
413
- score: 60,
414
- strengths: "Passes basic math and antonym tasks.",
415
- weaknesses: "Very unreliable; outputs long numbers for text tasks, fails creative tasks."
416
  },
417
  {
418
  rank: 20,
419
- name: "NxMobileLM-1.5B-SFT",
420
- score: 59.5,
421
- strengths: "Passes math and some grammar/logic.",
422
- weaknesses: "Extremely unreliable, with frequent junk ('{', '1', emojis) or non-English outputs."
423
  }
424
  ];
425
 
@@ -435,7 +435,10 @@
435
  function populateTable() {
436
  const tbody = document.querySelector('#performanceTable tbody');
437
 
438
- models.forEach((model, index) => {
 
 
 
439
  const percentage = (model.score / maxScore) * 100;
440
 
441
  const row = document.createElement('tr');
@@ -446,7 +449,7 @@
446
  <td class="rank">#${model.rank}</td>
447
  <td class="model-name">${model.name}</td>
448
  <td>
449
- <div class="score">${model.score} / ${maxScore}</div>
450
  <div class="progress-container">
451
  <div class="progress-bar" style="width: ${percentage}%"></div>
452
  </div>
 
367
  },
368
  {
369
  rank: 13,
370
+ name: "arco-3",
371
+ score: 83,
372
+ strengths: "One of the most powerful 0.6b models; perfect at code gen, sentiment, math, and core knowledge.",
373
+ weaknesses: "Fails completely at summarization (hallucinations), sequencing, and rhyming. Poor reasoning."
374
+ },
375
+ {
376
+ rank: 14,
377
  name: "Falcon-H1-1.5B-Instruct",
378
  score: 81,
379
  strengths: "Good at logic, math, and factual questions.",
380
  weaknesses: "Fails translation completely and often gives blank/junk answers."
381
  },
 
 
 
 
 
 
 
382
  {
383
  rank: 15,
384
+ name: "Llama-3.2-SUN-HDIC-1B-Instruct.Q8_0.gguf",
385
+ score: 79,
386
+ strengths: "Strong in synonyms, math, and factual recall; decent at core NLP.",
387
+ weaknesses: "Complete failure at summarization and misconception correction; bad factual hallucinations."
388
  },
389
  {
390
  rank: 16,
391
+ name: "Piaget-0.6B.Q8_0.gguf",
392
+ score: 78,
393
+ strengths: "Excellent at core knowledge tasks: Sentiment, Object Location, Antonyms, Categorization, Math, Factual QA.",
394
+ weaknesses: "Complete failure at Summarization, Sequencing, and Rhyming. Very poor at Grammar and Misconception Correction."
395
  },
396
  {
397
  rank: 17,
398
+ name: "lfm2-700m",
399
+ score: 75.5,
400
+ strengths: "Handles sentiment, math, and logic correctly.",
401
+ weaknesses: "Many failures in reasoning (cause/effect), tool use, synonyms, and grammar."
402
  },
403
  {
404
  rank: 18,
405
+ name: "Qwen3-psychological-reasoning-0.6B.Q8_0.gguf",
406
+ score: 73,
407
+ strengths: "Excels at factual recall and classification (Sentiment, Object Location, Math, Factual QA, NER).",
408
+ weaknesses: "Very poor at reasoning and creativity; complete failure in summarization, sequencing, and rhyming."
409
  },
410
  {
411
  rank: 19,
412
+ name: "qwen2.5-0.5b-instruct",
413
+ score: 72,
414
+ strengths: "Decent at math, basic commands, and some logic.",
415
+ weaknesses: "Fails creative tasks (rhyming, synonyms) and suffers major headline hallucinations."
416
  },
417
  {
418
  rank: 20,
419
+ name: "qwen3-0.6b-notetaker-q8_0.gguf",
420
+ score: 71,
421
+ strengths: "Excels at a wide range of core knowledge and classification tasks (sentiment, math, NER, factual QA).",
422
+ weaknesses: "Complete failure at complex reasoning, creativity, and nuanced language (cause/effect, idioms, sequencing)."
423
  }
424
  ];
425
 
 
435
  function populateTable() {
436
  const tbody = document.querySelector('#performanceTable tbody');
437
 
438
+ // Filter to top 20 for display
439
+ const top20Models = models.slice(0, 20);
440
+
441
+ top20Models.forEach((model, index) => {
442
  const percentage = (model.score / maxScore) * 100;
443
 
444
  const row = document.createElement('tr');
 
449
  <td class="rank">#${model.rank}</td>
450
  <td class="model-name">${model.name}</td>
451
  <td>
452
+ <div class="score">${model.score.toFixed(1)} / ${maxScore}</div>
453
  <div class="progress-container">
454
  <div class="progress-bar" style="width: ${percentage}%"></div>
455
  </div>