Spaces:
Running
Running
| <html lang="en"> | |
| <head> | |
| <meta charset="UTF-8"> | |
| <meta name="viewport" content="width=device-width,initial-scale=1.0"> | |
| <title>ποΈ Smol AI WorldCup 2026 β SHIFT Benchmark</title> | |
| <link href="https://fonts.googleapis.com/css2?family=Teko:wght@300;400;500;600;700&family=Outfit:wght@300;400;500;600;700;800&family=JetBrains+Mono:wght@400;500;600;700&display=swap" rel="stylesheet"> | |
| <script src="https://cdnjs.cloudflare.com/ajax/libs/Chart.js/4.4.1/chart.umd.min.js"> | |
| // βββ TOOLTIP βββ | |
| const ttEl=document.getElementById('tooltip'); | |
| document.querySelectorAll('th[data-tip]').forEach(th=>{ | |
| th.addEventListener('mouseenter',function(e){ | |
| ttEl.innerHTML=this.getAttribute('data-tip'); | |
| ttEl.classList.add('show'); | |
| const r=this.getBoundingClientRect(); | |
| ttEl.style.left=Math.min(r.left+r.width/2-ttEl.offsetWidth/2, window.innerWidth-ttEl.offsetWidth-10)+'px'; | |
| ttEl.style.top=(r.top-ttEl.offsetHeight-8)+'px'; | |
| }); | |
| th.addEventListener('mouseleave',function(){ttEl.classList.remove('show');}); | |
| }); | |
| // βββ INSIGHTS TAB βββ | |
| function showIns(id,el){document.querySelectorAll('.ins-sub').forEach(d=>d.style.display='none'); | |
| document.getElementById(id).style.display=''; | |
| document.querySelectorAll('#insTabs div').forEach(d=>d.classList.remove('on'));el.classList.add('on');} | |
| // βββ BADGES βββ | |
| function getBadges(r){ | |
| let b='';const n=r[0]; | |
| // Precompute all PIRs to find rankings | |
| const allWCS=D.map(x=>({n:x[0],w:calcWCS(x)})).sort((a,b)=>b.w-a.w); | |
| const allPIR=D.map(x=>({n:x[0],p:calcPIR(x)})).sort((a,b)=>b.p-a.p); | |
| const allSHIFT=D.map(x=>({n:x[0],s:x[25]})).sort((a,b)=>b.s-a.s); | |
| const allSpeed=D.map(x=>({n:x[0],t:x[8]})).sort((a,b)=>b.t-a.t); | |
| const wcsRank=allWCS.findIndex(x=>x.n===n); | |
| const pirRank=allPIR.findIndex(x=>x.n===n); | |
| const shiftRank=allSHIFT.findIndex(x=>x.n===n); | |
| const speedRank=allSpeed.findIndex(x=>x.n===n); | |
| if(wcsRank===0) b+='<span class="badge badge-best" title="π #1 WorldCup Score = best quality x efficiency">π #1 WCS</span>'; | |
| if(shiftRank===0) b+='<span class="badge badge-best" title="β Highest SHIFT composite score">β #1 SHIFT</span>'; | |
| if(pirRank===0) b+='<span class="badge badge-value" title="π Highest PIR β most efficient pound-for-pound">π₯ #1 PIR</span>'; | |
| else if(pirRank<3) b+='<span class="badge badge-value" title="Top 3 PIR efficiency">π₯ Top3</span>'; | |
| if(speedRank===0) b+='<span class="badge badge-fast" title="β‘ Fastest model (highest tok/s)">β‘ Fastest</span>'; | |
| if(r[12]>=85) b+='<span class="badge badge-brain" title="π‘ Honesty score β₯85 β exceptional hallucination resistance">π‘ Honest</span>'; | |
| if(r[24]!=null&&r[24]>=55) b+='<span class="badge badge-phone" title="π Union β₯55 β over 78% of SOTA frontier models">π SOTA78%+</span>'; | |
| if(r[13]<=30) b+='<span class="badge badge-warn" title="β H1 Hallucination trap β€30 β high fabrication risk">β H1 Risk</span>'; | |
| return b; | |
| } | |
| </script> | |
| <style> | |
| *{margin:0;padding:0;box-sizing:border-box;} | |
| :root{ | |
| --pitch:#0a1f0f;--pitch2:#0d2912;--board:#0c1a0c;--board-alt:#101f10; | |
| --border:#1a3a1a;--border-hi:#2a5a2a;--pitch-line:rgba(255,255,255,.06); | |
| --gold:#ffd700;--gold2:#ffec3d;--white:#e8f0e0;--silver:#a8b8a0;--muted:#5a7a52; | |
| --red:#ff4444;--blue:#44aaff;--green:#44ff88;--amber:#ffaa22;--purple:#9b59b6; | |
| --shadow:0 4px 20px rgba(0,0,0,.5); | |
| --font:'Outfit',sans-serif;--font-score:'Teko',sans-serif;--font-mono:'JetBrains Mono',monospace; | |
| --lc1:#b4b4b4;--lc2:#ff6432;--lc3:#9b59b6;--lc4:#ffd700;--lc5:#a78bfa; | |
| } | |
| html{scroll-behavior:smooth;-webkit-overflow-scrolling:touch;overflow-y:auto;height:auto;touch-action:manipulation;} | |
| body{font-family:var(--font);background:var(--pitch);color:var(--white);min-height:100vh;overflow-x:hidden;overflow-y:auto;font-size:13px;height:auto;-webkit-overflow-scrolling:touch;touch-action:pan-y pan-x;overscroll-behavior:auto;} | |
| body::before{content:"";position:fixed;inset:0;z-index:0;pointer-events:none;touch-action:none; | |
| background:repeating-linear-gradient(90deg,transparent,transparent 80px,var(--pitch-line) 80px,var(--pitch-line) 81px), | |
| repeating-linear-gradient(0deg,transparent,transparent 80px,var(--pitch-line) 80px,var(--pitch-line) 81px), | |
| radial-gradient(ellipse 120% 60% at 50% -10%,rgba(255,215,0,.03),transparent 60%);} | |
| body::after{content:"";position:fixed;top:0;left:50%;transform:translateX(-50%);width:200px;height:200px;border-radius:50%;border:1px solid var(--pitch-line);z-index:0;pointer-events:none;touch-action:none;} | |
| .wrap{position:relative;z-index:1;max-width:1400px;margin:0 auto;padding:20px 16px 60px;touch-action:pan-y pan-x;} | |
| ::-webkit-scrollbar{width:5px;height:4px;}::-webkit-scrollbar-thumb{background:rgba(255,215,0,.2);border-radius:10px;} | |
| /* βββ HEADER βββ */ | |
| header{text-align:center;margin-bottom:24px;padding:28px 20px 22px;background:linear-gradient(180deg,rgba(10,31,15,.95),rgba(13,41,18,.9),rgba(10,31,15,.95)); | |
| border:1px solid var(--border);border-radius:4px;position:relative;overflow:hidden;box-shadow:0 0 60px rgba(255,215,0,.05),inset 0 1px 0 rgba(255,215,0,.1);} | |
| header::before{content:"";position:absolute;top:0;left:0;right:0;height:3px;background:linear-gradient(90deg,transparent,var(--gold),transparent);} | |
| header::after{content:"";position:absolute;bottom:0;left:0;right:0;height:3px;background:linear-gradient(90deg,transparent,var(--green),transparent);} | |
| .match-day{font-family:var(--font-score);font-size:14px;letter-spacing:8px;text-transform:uppercase;color:var(--gold);margin-bottom:4px;font-weight:300;} | |
| h1{font-family:var(--font-score);font-size:clamp(40px,6vw,68px);font-weight:700;letter-spacing:2px;line-height:.95;color:var(--gold);text-shadow:0 0 40px rgba(255,215,0,.3);} | |
| h1 span{font-size:.42em;color:var(--green);text-shadow:none;display:block;letter-spacing:4px;font-weight:400;margin-top:2px;} | |
| .tagline{font-size:11px;color:var(--silver);letter-spacing:1px;margin-bottom:12px;font-weight:300;} | |
| .tagline b{color:var(--gold);font-weight:600;} | |
| .score-strip{display:flex;gap:0;justify-content:center;margin-bottom:14px;} | |
| .ss{padding:8px 16px;text-align:center;border:1px solid var(--border);background:linear-gradient(180deg,#0a180a,#0d1f0d);} | |
| .ss:first-child{border-radius:4px 0 0 4px;}.ss:last-child{border-radius:0 4px 4px 0;} | |
| .ss-n{font-family:var(--font-score);font-size:28px;font-weight:700;line-height:1;color:var(--gold);text-shadow:0 0 10px rgba(255,215,0,.3);} | |
| .ss-l{font-size:7.5px;text-transform:uppercase;letter-spacing:2px;color:var(--muted);margin-top:2px;} | |
| .links{display:flex;gap:8px;justify-content:center;flex-wrap:wrap;} | |
| .links a{font-family:var(--font-mono);font-size:8px;font-weight:700;padding:4px 12px;border-radius:3px;text-decoration:none;border:1px solid;transition:all .2s;} | |
| .link-hf{background:rgba(255,215,0,.1);border-color:rgba(255,215,0,.3)!important;color:var(--gold);} | |
| .link-fb{background:rgba(68,170,255,.08);border-color:rgba(68,170,255,.3)!important;color:var(--blue);} | |
| /* βββ TABS βββ */ | |
| .group-bar{display:flex;gap:0;border:1px solid var(--border);border-radius:4px;overflow-x:auto;margin-bottom:16px;background:linear-gradient(180deg,#0a180a,#0d1f0d);} | |
| .gtab{padding:10px 18px;font-family:var(--font-score);font-size:15px;font-weight:500;letter-spacing:1px;color:var(--muted);cursor:pointer;border-right:1px solid var(--border);transition:all .15s;white-space:nowrap;position:relative;} | |
| .gtab:last-child{border-right:none;}.gtab:hover{color:var(--white);background:rgba(255,255,255,.03);} | |
| .gtab.on{color:var(--gold);background:rgba(255,215,0,.06);} | |
| .gtab.on::after{content:"";position:absolute;bottom:0;left:0;right:0;height:2px;background:var(--gold);} | |
| .tpane{display:none;}.tpane.on{display:block;} | |
| /* βββ FILTER βββ */ | |
| .filter-bar{display:flex;gap:6px;flex-wrap:wrap;margin-bottom:12px;align-items:center;} | |
| .filter-bar input{padding:6px 12px;border:1px solid var(--border);border-radius:3px;background:var(--board);font-family:var(--font-mono);font-size:10px;color:var(--white);outline:none;min-width:180px;} | |
| .filter-bar input:focus{border-color:var(--gold);box-shadow:0 0 0 2px rgba(255,215,0,.1);} | |
| .fb{padding:5px 12px;border:1px solid var(--border);border-radius:3px;background:var(--board);color:var(--silver);font-family:var(--font-score);font-size:13px;cursor:pointer;transition:all .15s;} | |
| .fb:hover{border-color:var(--border-hi);color:var(--white);}.fb.on{background:rgba(255,215,0,.12);border-color:var(--gold);color:var(--gold);font-weight:600;} | |
| /* βββ TABLE βββ */ | |
| .scoreboard{background:var(--board);border:1px solid var(--border);border-radius:4px;overflow-x:auto;box-shadow:var(--shadow),inset 0 1px 0 rgba(255,215,0,.05);-webkit-overflow-scrolling:touch;touch-action:pan-x pan-y;} | |
| table{width:100%;border-collapse:collapse;} | |
| thead{background:linear-gradient(180deg,#0a150a,#0c1a0c);} | |
| thead tr{border-bottom:2px solid var(--gold);} | |
| th{padding:9px 5px;text-align:center;font-family:var(--font-score);font-size:11px;font-weight:500;letter-spacing:1px;text-transform:uppercase;color:var(--gold);white-space:nowrap;cursor:pointer;vertical-align:bottom;} | |
| th.c-model{text-align:left;padding-left:14px;min-width:160px;position:sticky;left:0;background:linear-gradient(180deg,#0a150a,#0c1a0c);z-index:4;} | |
| th:hover{color:var(--gold2);text-shadow:0 0 8px rgba(255,215,0,.3);} | |
| .sa{opacity:.4;font-size:7px;margin-left:2px;} | |
| tbody tr{border-bottom:1px solid rgba(255,255,255,.04);transition:background .15s;} | |
| tbody tr:hover{background:rgba(255,215,0,.03);} | |
| td{padding:7px 5px;text-align:center;vertical-align:middle;} | |
| td.c-model{text-align:left;padding-left:14px;position:sticky;left:0;background:var(--board);z-index:1;} | |
| tbody tr:hover td.c-model{background:rgba(255,215,0,.03);} | |
| .rank{font-family:var(--font-score);font-size:20px;font-weight:700;color:var(--gold);min-width:28px;text-align:center;} | |
| .rank.r1{text-shadow:0 0 12px rgba(255,215,0,.5);}.rank.r2{color:#c0c0c0;}.rank.r3{color:#cd7f32;} | |
| .m-name{font-weight:700;font-size:12px;color:var(--white);}.m-info{display:flex;gap:4px;align-items:center;margin-top:2px;} | |
| .m-prov{font-family:var(--font-mono);font-size:8px;color:var(--muted);}.m-dot{width:5px;height:5px;border-radius:50%;} | |
| .jersey{display:inline-flex;align-items:center;gap:3px;padding:2px 7px;border-radius:3px;font-family:var(--font-score);font-size:11px;font-weight:600;letter-spacing:.5px;border:1px solid;} | |
| .j-nano{background:rgba(180,180,180,.1);border-color:rgba(180,180,180,.3);color:var(--lc1);} | |
| .j-micro{background:rgba(255,100,50,.1);border-color:rgba(255,100,50,.3);color:var(--lc2);} | |
| .j-light{background:rgba(155,89,182,.1);border-color:rgba(155,89,182,.3);color:var(--lc3);} | |
| .j-edge{background:rgba(255,215,0,.1);border-color:rgba(255,215,0,.3);color:var(--lc4);} | |
| .j-darwin{background:rgba(109,40,217,.15);border-color:rgba(109,40,217,.4);color:#a78bfa;} | |
| .led{font-family:var(--font-score);font-weight:600;font-size:17px;letter-spacing:1px;} | |
| .led-sm{font-size:14px;} | |
| .mbar{width:34px;height:3px;background:rgba(255,255,255,.06);border-radius:2px;margin:2px auto 0;overflow:hidden;} | |
| .mbar-f{height:100%;border-radius:2px;} | |
| .pir-mega{font-family:var(--font-score);font-size:24px;font-weight:700;letter-spacing:1px;} | |
| .wcs-mega{font-family:var(--font-score);font-size:26px;font-weight:900;letter-spacing:1px;text-shadow:0 0 12px rgba(255,215,0,.3);} | |
| .wcs-1{color:var(--gold);text-shadow:0 0 20px rgba(255,215,0,.5);}.wcs-2{color:#c0c0c0;text-shadow:0 0 14px rgba(192,192,192,.4);} | |
| .wcs-3{color:#cd7f32;text-shadow:0 0 14px rgba(205,127,50,.4);}.wcs-good{color:var(--green);}.wcs-avg{color:var(--blue);}.wcs-low{color:var(--muted);} | |
| .pir-glow{color:var(--gold);text-shadow:0 0 12px rgba(255,215,0,.4);} | |
| .pir-good{color:var(--green);text-shadow:0 0 8px rgba(68,255,136,.3);} | |
| .pir-avg{color:var(--silver);}.pir-low{color:var(--muted);} | |
| .na{color:var(--muted);font-family:var(--font-mono);font-size:9px;} | |
| /* βββ CHARTS βββ */ | |
| .cpanel{background:var(--board);border:1px solid var(--border);border-radius:4px;padding:18px;box-shadow:var(--shadow);margin-bottom:14px;} | |
| .cpanel h3{font-family:var(--font-score);font-size:17px;font-weight:500;color:var(--gold);letter-spacing:1px;margin-bottom:3px;} | |
| .cpanel p{font-size:9px;color:var(--muted);margin-bottom:12px;} | |
| .cgrid{display:grid;grid-template-columns:1fr 1fr;gap:14px;} | |
| .shift-tabs{display:flex;gap:0;border:1px solid var(--border);border-radius:4px;overflow:hidden;margin-bottom:14px;} | |
| .stab{flex:1;padding:8px;text-align:center;font-family:var(--font-score);font-size:14px;letter-spacing:1px;color:var(--muted);cursor:pointer;border-right:1px solid var(--border);transition:all .15s;} | |
| .stab:last-child{border-right:none;}.stab:hover{color:var(--white);background:rgba(255,255,255,.03);} | |
| .stab.on{color:var(--gold);background:rgba(255,215,0,.06);} | |
| /* INFO */ | |
| .info-cards{display:grid;grid-template-columns:repeat(auto-fit,minmax(280px,1fr));gap:12px;} | |
| .icard{background:var(--board);border:1px solid var(--border);border-radius:4px;padding:16px;position:relative;overflow:hidden;} | |
| .icard::before{content:"";position:absolute;top:0;left:0;right:0;height:2px;background:var(--gold);} | |
| .icard h4{font-family:var(--font-score);font-size:16px;font-weight:500;color:var(--gold);letter-spacing:1px;margin-bottom:8px;} | |
| .icard p{font-size:10px;color:var(--silver);line-height:1.8;} | |
| .legend{display:flex;flex-wrap:wrap;gap:10px;padding:8px 0;font-size:9px;color:var(--muted);font-family:var(--font-mono);} | |
| /* BADGES */ | |
| .badge{display:inline-block;font-size:7px;font-weight:700;padding:1px 4px;border-radius:2px;margin-left:3px;vertical-align:middle;letter-spacing:0.5px;cursor:help;} | |
| .badge-best{background:rgba(255,215,0,.15);border:1px solid rgba(255,215,0,.4);color:var(--gold);} | |
| .badge-fast{background:rgba(68,255,136,.12);border:1px solid rgba(68,255,136,.3);color:var(--green);} | |
| .badge-value{background:rgba(16,163,127,.12);border:1px solid rgba(16,163,127,.3);color:#10a37f;} | |
| .badge-brain{background:rgba(99,102,241,.12);border:1px solid rgba(99,102,241,.3);color:#6366f1;} | |
| .badge-phone{background:rgba(66,133,244,.12);border:1px solid rgba(66,133,244,.3);color:#4285f4;} | |
| .badge-warn{background:rgba(239,68,68,.1);border:1px solid rgba(239,68,68,.3);color:#ef4444;} | |
| /* INSIGHTS TAB */ | |
| .ins-grid{display:grid;grid-template-columns:1fr 1fr;gap:14px;} | |
| .ins-card{background:var(--board);border:1px solid var(--border);border-radius:6px;padding:16px;position:relative;overflow:hidden;} | |
| .ins-card h4{font-family:var(--font-score);font-size:15px;color:var(--gold);letter-spacing:1px;margin-bottom:8px;} | |
| .ins-card p{font-size:10px;color:var(--silver);line-height:1.6;margin-bottom:6px;} | |
| .ins-card .ins-num{font-family:var(--font-score);font-size:32px;font-weight:700;letter-spacing:2px;} | |
| .ins-bar{display:flex;align-items:center;gap:6px;padding:3px 0;font-size:10px;} | |
| .ins-bar-name{width:130px;color:var(--silver);overflow:hidden;text-overflow:ellipsis;white-space:nowrap;} | |
| .ins-bar-fill{flex:1;height:8px;background:rgba(255,255,255,.06);border-radius:4px;overflow:hidden;} | |
| .ins-bar-f{height:100%;border-radius:4px;} | |
| .ins-bar-val{width:40px;text-align:right;font-weight:700;font-family:var(--font-mono);} | |
| .ins-rec{background:linear-gradient(135deg,rgba(255,215,0,.05),rgba(68,255,136,.05));border:1px solid rgba(255,215,0,.2);border-radius:6px;padding:14px;margin:6px 0;} | |
| .ins-rec h5{font-family:var(--font-score);font-size:13px;color:var(--gold);margin-bottom:6px;} | |
| .ins-rec .rec-model{display:flex;align-items:center;gap:8px;padding:6px 0;border-bottom:1px solid rgba(255,255,255,.04);} | |
| .ins-rec .rec-emoji{font-size:20px;} | |
| .ins-rec .rec-name{font-weight:700;font-size:12px;} | |
| .ins-rec .rec-desc{font-size:9px;color:var(--muted);} | |
| .ins-stab{display:flex;gap:0;border:1px solid var(--border);border-radius:4px;overflow:hidden;margin-bottom:14px;} | |
| .ins-stab div{flex:1;padding:7px;text-align:center;font-family:var(--font-score);font-size:11px;letter-spacing:1px;color:var(--muted);cursor:pointer;border-right:1px solid var(--border);} | |
| .ins-stab div:last-child{border-right:none;} | |
| .ins-stab div:hover{color:var(--white);background:rgba(255,255,255,.03);} | |
| .ins-stab div.on{color:var(--gold);background:rgba(255,215,0,.06);} | |
| /* TOOLTIP β fixed position via JS */ | |
| #tooltip{display:none;position:fixed;z-index:9999;pointer-events:none; | |
| background:rgba(10,31,15,.96);border:1px solid var(--gold);border-radius:4px;padding:7px 10px; | |
| font-size:10px;font-weight:400;letter-spacing:0;text-transform:none;color:var(--silver); | |
| white-space:nowrap;box-shadow:0 4px 16px rgba(0,0,0,.6);font-family:var(--font);line-height:1.5;max-width:360px;} | |
| #tooltip.show{display:block;} | |
| #tooltip::after{content:"";position:absolute;top:100%;left:50%;transform:translateX(-50%); | |
| border:5px solid transparent;border-top-color:rgba(10,31,15,.96);} | |
| @media(max-width:768px),(max-height:500px){ | |
| html,body{overflow-y:auto!important;height:auto!important;-webkit-overflow-scrolling:touch!important;overscroll-behavior:auto;position:relative!important;touch-action:pan-y pan-x!important;} | |
| body::before,body::after{display:none!important;} | |
| .wrap{padding:10px 8px 40px;overflow:visible!important;} | |
| .scoreboard{-webkit-overflow-scrolling:touch;} | |
| h1{font-size:28px!important;}h1 span{font-size:9px!important;} | |
| .tagline{font-size:9px!important;margin-bottom:4px!important;} | |
| .match-day{font-size:7px!important;margin-bottom:2px!important;} | |
| header{padding:8px 10px 6px!important;} | |
| /* Collapse WCS formula banner on mobile */ | |
| header>div[style*="max-width:680px"]{display:none!important;} | |
| /* Compact score strip */ | |
| .score-strip{margin-bottom:6px!important;gap:0!important;}.ss{padding:2px 8px!important;}.ss-n{font-size:18px!important;}.ss-l{font-size:6px!important;} | |
| /* Compact links */ | |
| .links{margin-bottom:4px!important;gap:4px!important;}.links a{font-size:7px!important;padding:2px 8px!important;} | |
| /* Compact tab bar */ | |
| .gtab{padding:6px 8px!important;font-size:10px!important;} | |
| .group-bar{margin-bottom:8px!important;} | |
| /* Compact filter bar β single row, horizontal scroll */ | |
| .filter-bar{flex-direction:row!important;flex-wrap:nowrap!important;overflow-x:auto!important;gap:4px!important;margin-bottom:6px!important;padding-bottom:4px!important;} | |
| .filter-bar input{min-width:120px!important;width:auto!important;padding:4px 8px!important;font-size:9px!important;} | |
| .fb{padding:3px 8px!important;font-size:9px!important;white-space:nowrap!important;} | |
| .filter-bar span{font-size:9px!important;} | |
| /* Table */ | |
| table{font-size:9px;}th,td{padding:4px 2px;}th.c-model,td.c-model{min-width:110px!important;position:static!important;} | |
| .cgrid{grid-template-columns:1fr;}.info-cards{grid-template-columns:1fr;}.ins-grid{grid-template-columns:1fr;}} | |
| @media(orientation:landscape) and (max-height:500px){ | |
| body::before,body::after{display:none!important;} | |
| .wrap{padding:10px 8px 40px;} | |
| h1{font-size:28px!important;} | |
| header{padding:14px 10px 10px;} | |
| .score-strip .ss{padding:4px 10px;} | |
| .ss-n{font-size:20px!important;} | |
| } | |
| </style> | |
| </head> | |
| <body> | |
| <div id="tooltip"></div> | |
| <div class="wrap"> | |
| <header> | |
| <div class="match-day">Season 1 Β· March 2026 Β· SHIFT Framework</div> | |
| <h1>SMOL AI WORLDCUP<span>Size Β· Honesty Β· Intelligence Β· Fast Β· Thrift</span></h1> | |
| <div class="tagline">World's first <b>5-axis benchmark</b> for small AI Β· 125 questions Β· <b>7 languages</b> Β· π₯ League One Β· β½ La Liga Β· π Premier Β· π Champions</div> | |
| <div style="margin:14px auto 10px;max-width:680px;background:linear-gradient(135deg,rgba(255,215,0,.06),rgba(68,255,136,.04));border:1px solid rgba(255,215,0,.2);border-radius:8px;padding:14px 20px;text-align:center"> | |
| <div style="font-family:var(--font-score);font-size:11px;color:var(--gold);letter-spacing:3px;margin-bottom:8px">π OFFICIAL RANKING FORMULA</div> | |
| <div style="font-family:Georgia,'Times New Roman',serif;font-size:28px;color:var(--white);letter-spacing:1px;line-height:1.3"> | |
| WCS = <span style="font-size:22px;vertical-align:top">β</span><span style="border-top:2px solid var(--gold);padding:0 6px;color:var(--gold)">SHIFT Γ PIR<sub style="font-size:14px">norm</sub></span> | |
| </div> | |
| <div style="display:flex;justify-content:center;gap:24px;margin-top:10px;flex-wrap:wrap"> | |
| <div style="text-align:center"> | |
| <div style="font-size:8px;color:var(--muted);letter-spacing:1px">QUALITY</div> | |
| <div style="font-family:var(--font-score);font-size:14px;color:var(--green)">SHIFT</div> | |
| <div style="font-size:8px;color:var(--silver)">HΓ0.4 + IΓ0.6</div> | |
| </div> | |
| <div style="font-size:18px;color:var(--muted);align-self:center">Γ</div> | |
| <div style="text-align:center"> | |
| <div style="font-size:8px;color:var(--muted);letter-spacing:1px">EFFICIENCY</div> | |
| <div style="font-family:var(--font-score);font-size:14px;color:var(--blue)">PIR<sub style="font-size:8px">norm</sub></div> | |
| <div style="font-size:8px;color:var(--silver)">(IΓHΓF) Γ· (SΓT) β log scale</div> | |
| </div> | |
| </div> | |
| <div style="margin-top:8px;font-size:9px;color:var(--silver);line-height:1.5"> | |
| <b style="color:var(--gold)">Both quality AND efficiency must be high.</b> A model that's smart but huge, or tiny but dumb, ranks low.<br> | |
| <span style="color:var(--muted)">S</span>ize Β· <span style="color:var(--green)">H</span>onesty Β· <span style="color:var(--blue)">I</span>ntelligence Β· <span style="color:var(--green)">F</span>ast Β· <span style="color:var(--silver)">T</span>hrift β all 5 axes matter. | |
| </div> | |
| </div> | |
| <div class="score-strip"> | |
| <div class="ss"><div class="ss-n">18</div><div class="ss-l">Models</div></div> | |
| <div class="ss"><div class="ss-n">125</div><div class="ss-l">Questions</div></div> | |
| <div class="ss"><div class="ss-n" style="color:var(--green)">7</div><div class="ss-l">Languages</div></div> | |
| <div class="ss"><div class="ss-n">5</div><div class="ss-l">SHIFT Axes</div></div> | |
| <div class="ss"><div class="ss-n">4</div><div class="ss-l">Leagues</div></div> | |
| </div> | |
| <div class="links"> | |
| <a href="https://huggingface.co/spaces/ginigen-ai/smol-worldcup" target="_blank" style="background:rgba(68,255,136,.12);border:1px solid rgba(68,255,136,.3)!important;color:var(--green);font-family:var(--font-mono);font-size:8px;font-weight:700;padding:4px 12px;border-radius:3px;text-decoration:none">β½ Evaluate Model</a> | |
| <a href="https://huggingface.co/datasets/ginigen-ai/smol-worldcup" target="_blank" class="link-hf">π€ HF Dataset</a> | |
| <a href="https://huggingface.co/spaces/FINAL-Bench/all-bench-leaderboard" target="_blank" style="background:rgba(255,215,0,.08);border:1px solid rgba(255,215,0,.25)!important;color:var(--gold);font-family:var(--font-mono);font-size:8px;font-weight:700;padding:4px 12px;border-radius:3px;text-decoration:none">π ALL Bench Leaderboard</a> | |
| </div> | |
| </header> | |
| <div class="group-bar"> | |
| <div class="gtab on" onclick="showTab('tbl',this)">π STANDINGS</div> | |
| <div class="gtab" onclick="showTab('shift5',this)">π SHIFT AXES</div> | |
| <div class="gtab" onclick="showTab('value',this)">π° VALUE</div> | |
| <div class="gtab" onclick="showTab('sota',this)">π VS SOTA</div> | |
| <div class="gtab" onclick="showTab('matchup',this)">β MATCHUP</div> | |
| <div class="gtab" onclick="showTab('insights',this)">π¬ INSIGHTS</div> | |
| <div class="gtab" onclick="showTab('info',this)">π RULES</div> | |
| </div> | |
| <!-- βββ TAB: STANDINGS βββ --> | |
| <div id="tbl" class="tpane on"> | |
| <div class="filter-bar"> | |
| <input type="text" id="searchBox" placeholder="π Search models..." oninput="doSearch(this.value)"> | |
| <span style="font-family:var(--font-score);font-size:12px;color:var(--muted);letter-spacing:1px">LEAGUE:</span> | |
| <button class="fb on" onclick="flt('all',this)">ALL</button> | |
| <button class="fb" onclick="flt('nano',this)">π₯ League One</button> | |
| <button class="fb" onclick="flt('micro',this)">β½ La Liga</button> | |
| <button class="fb" onclick="flt('light',this)">π Premier</button> | |
| <button class="fb" onclick="flt('edge',this)">π Champions</button> | |
| <button class="fb" onclick="flt('darwin',this)">𧬠Darwin</button> | |
| <span style="color:var(--border)">β</span> | |
| <button class="fb" onclick="flt('moe',this)">β‘ MoE</button> | |
| <button class="fb" onclick="flt('thinking',this)">π§ Thinking</button> | |
| </div> | |
| <div class="scoreboard"><table id="T"><thead><tr> | |
| <th style="min-width:36px" data-tip="Ranking<br>π Champion Β· π₯π₯ Podium<br>β½ Top 8 Β· π― Top 16 Β· π Top 32">#</th> | |
| <th class="c-model" onclick="srt('name')">PLAYER<span class="sa">β</span></th> | |
| <th data-tip="WorldCup Score = β(SHIFT Γ PIR_norm)<br>Combined quality + efficiency ranking.<br>THE primary ranking metric." onclick="srt('wcs')" style="min-width:60px;color:var(--gold)">π WCS<span class="sa">β</span></th> | |
| <th data-tip="Punch Index Ratio<br>(IΓHΓF)Γ·(SΓT) β higher = more efficient" onclick="srt('pir')" style="min-width:50px;color:var(--silver)">π₯ PIR<span class="sa">β</span></th> | |
| <th data-tip="SHIFT Composite Score<br>Weighted avg of Honesty + Intelligence" onclick="srt('shift')" style="min-width:50px">β SHIFT<span class="sa">β</span></th> | |
| <th data-tip="Honesty Score (0-100)<br>Avg of TrapΒ·CalibrationΒ·RefusalΒ·Correction" onclick="srt('honesty')" style="min-width:54px;color:var(--green)">π‘ Honesty<span class="sa">β</span></th> | |
| <th data-tip="Intelligence Score (0-100)<br>Avg of ReasonΒ·MathΒ·CodeΒ·MultilingualΒ·Meta" onclick="srt('intelligence')" style="min-width:54px;color:var(--blue)">π§ Intel<span class="sa">β</span></th> | |
| <th data-tip="π UNION EVAL β 19 cross-benchmark Qs<br>Same test given to frontier SOTA models:<br>ββββββββββββββββββββββ<br>π₯ Claude Sonnet 4.6 Β·Β· 69.9<br>π₯ Claude Opus 4.6 Β·Β·Β·Β· 69.3<br>π₯ GPT-5.4 Β·Β·Β·Β·Β·Β·Β·Β·Β·Β·Β· 62.4<br> 4 DeepSeek V3.2 Β·Β·Β·Β· 60.3<br> 5 GPT-OSS-120B Β·Β·Β·Β·Β· 60.0<br> 6 Qwen3.5-397B Β·Β·Β·Β·Β· 57.1<br>ββββββββββββββββββββββ<br>Smol best: Gemma-3-12B = 57.1 (82% of SOTA!)<br>A 12B model matching a 397B giant." onclick="srt('union')" style="min-width:50px;color:var(--gold)">π Union<span class="sa">β</span></th> | |
| <th data-tip="Football League Tier by RAM<br>π₯ <2GB Β· β½ 2-4GB Β· π 4-8GB Β· π 8GB+ Β· 𧬠Darwin (MoE 3B active)" onclick="srt('league')" style="min-width:76px">β League<span class="sa">β</span></th> | |
| <th data-tip="Active Parameters (B)<br>MoE models: only active experts counted" onclick="srt('active')" style="min-width:50px">π Params<span class="sa">β</span></th> | |
| <th data-tip="Speed: Tokens per second<br>Thinking models are slower (reasoning overhead)" onclick="srt('fast')" style="min-width:48px;color:var(--green)">β‘ tok/s<span class="sa">β</span></th> | |
| <th data-tip="Peak VRAM/RAM (GB) β Total model footprint at Q4<br>MoE models: full weights loaded, not just active experts<br>See Active Params for per-token compute" onclick="srt('thrift')" style="min-width:46px">πΎ RAM<span class="sa">β</span></th> | |
| <th data-tip="H1: Hallucination Trap (10Q)<br>Can it refuse to fabricate fake facts?" onclick="srt('h1')" style="min-width:44px">πͺ€ Trap<span class="sa">β</span></th> | |
| <th data-tip="H2: Confidence Calibration (10Q)<br>Does confidence match actual accuracy?" onclick="srt('h2')" style="min-width:44px">π Calib<span class="sa">β</span></th> | |
| <th data-tip="H3: Refusal Balance (10Q)<br>Refuses impossible, answers possible" onclick="srt('h3')" style="min-width:44px">π« Refuse<span class="sa">β</span></th> | |
| <th data-tip="H4: Self-Correction (10Q)<br>Catches own errors and fixes them" onclick="srt('h4')" style="min-width:44px">π Fix<span class="sa">β</span></th> | |
| <th data-tip="I1: Reasoning & Logic (15Q)<br>Syllogisms, puzzles, pattern recognition" onclick="srt('i1')" style="min-width:44px">π§© Logic<span class="sa">β</span></th> | |
| <th data-tip="I2: Math & Quantitative (10Q)<br>Arithmetic to compound interest" onclick="srt('i2')" style="min-width:44px">π’ Math<span class="sa">β</span></th> | |
| <th data-tip="I3: Coding (10Q)<br>Python functions with test cases" onclick="srt('i3')" style="min-width:44px">π» Code<span class="sa">β</span></th> | |
| <th data-tip="I4: Multilingual (35Q, 7 languages)<br>KOΒ·ARΒ·PTΒ·TRΒ·BNΒ·TH β idioms, culture" onclick="srt('i4')" style="min-width:44px">π Lang<span class="sa">β</span></th> | |
| <th data-tip="I5: Knowledge & Synthesis (10Q)<br>Constrained explanations, critical thinking" onclick="srt('i5')" style="min-width:44px">π Know<span class="sa">β</span></th> | |
| <th data-tip="I6: Metacognition (5Q)<br>Self-awareness, knowledge boundaries, claim analysis" onclick="srt('i6')" style="min-width:44px">𧬠Meta<span class="sa">β</span></th> | |
| </tr></thead><tbody id="TB"></tbody></table></div> | |
| <div class="legend"> | |
| <span style="color:var(--gold)">π₯ PIR = (IΓHΓF)/(SΓT)</span>β | |
| <span style="color:var(--lc1)">π₯ League One(<2GB)</span> | |
| <span style="color:var(--lc2)">β½ La Liga(2-4GB)</span> | |
| <span style="color:var(--lc3)">π Premier(4-8GB)</span> | |
| <span style="color:var(--lc4)">π Champions(8GB+)</span> | |
| <span style="color:var(--lc5)">𧬠Darwin(MoE 3B)</span> | |
| </div> | |
| </div> | |
| <!-- βββ TAB: SHIFT AXES βββ --> | |
| <div id="shift5" class="tpane"> | |
| <div style="text-align:center;margin-bottom:14px"><span style="font-family:var(--font-score);font-size:22px;color:var(--gold);letter-spacing:2px">SHIFT 5-AXIS DEEP DIVE</span></div> | |
| <div class="shift-tabs"> | |
| <div class="stab on" onclick="drawAxis('S',this)">π¦ SIZE</div> | |
| <div class="stab" onclick="drawAxis('H',this)">π‘ HONESTY</div> | |
| <div class="stab" onclick="drawAxis('I',this)">π§ INTEL</div> | |
| <div class="stab" onclick="drawAxis('F',this)">β‘ FAST</div> | |
| <div class="stab" onclick="drawAxis('T',this)">πΎ THRIFT</div> | |
| </div> | |
| <div class="cpanel"><canvas id="cAxis" height="350"></canvas></div> | |
| <div class="cgrid" id="axisDetail"></div> | |
| </div> | |
| <!-- βββ TAB: VALUE βββ --> | |
| <div id="value" class="tpane"> | |
| <div style="text-align:center;margin-bottom:14px"><span style="font-family:var(--font-score);font-size:22px;color:var(--gold);letter-spacing:2px">π° BEST VALUE β GIANT KILLERS</span> | |
| <div style="font-size:10px;color:var(--muted);margin-top:4px">Which models deliver the most intelligence per GB of RAM?</div></div> | |
| <div class="cpanel"><h3>π INTELLIGENCE vs RESOURCE β WHO PUNCHES UP?</h3> | |
| <p>Upper-left = best value (high performance, low resource). Dot size = PIR. π₯ League One models in the upper-left are Giant Killers.</p> | |
| <div style="position:relative;height:400px"><canvas id="cValue"></canvas></div> | |
| <div id="valueRank" style="margin-top:12px"></div></div> | |
| <div class="cgrid"> | |
| <div class="cpanel"><h3>π₯ LEAGUE CHAMPIONS β SHIFT RADAR</h3><p>Best model from each league, compared on 5 SHIFT axes. Outer = better.</p> | |
| <div style="max-width:400px;margin:0 auto"><canvas id="cLeagueRadar" width="400" height="400"></canvas></div></div> | |
| <div class="cpanel"><h3>β‘ SPEED EFFICIENCY β tok/s PER GB</h3><p>Who squeezes the most speed from each GB of RAM? Bigger slice = more efficient.</p> | |
| <div style="max-width:400px;margin:0 auto"><canvas id="cSpeedEff" width="400" height="400"></canvas></div></div> | |
| </div> | |
| </div> | |
| <!-- βββ TAB: VS SOTA βββ --> | |
| <div id="sota" class="tpane"> | |
| <div style="text-align:center;margin-bottom:14px"><span style="font-family:var(--font-score);font-size:22px;color:var(--gold);letter-spacing:2px">π GIANT KILLING INDEX</span> | |
| <div style="font-size:10px;color:var(--muted);margin-top:4px">Small models vs Frontier giants β same Union Eval questions</div></div> | |
| <div class="cpanel"><h3>π SMOL vs SOTA β SCATTER MAP</h3><p>Red zone = Frontier giants. Colored dots = Smol challengers. Closer to red = closer to SOTA.</p> | |
| <canvas id="cSOTA" height="360"></canvas> | |
| <div style="margin-top:12px;padding:10px;background:rgba(255,215,0,.05);border:1px solid rgba(255,215,0,.15);border-radius:4px" id="sotaInsight"></div> | |
| </div> | |
| </div> | |
| <!-- βββ TAB: MATCHUP βββ --> | |
| <div id="matchup" class="tpane"> | |
| <div style="text-align:center;margin-bottom:14px"><span style="font-family:var(--font-score);font-size:22px;color:var(--gold);letter-spacing:2px">β TALE OF THE TAPE</span></div> | |
| <div style="display:flex;gap:16px;justify-content:center;margin-bottom:14px;flex-wrap:wrap"> | |
| <div style="text-align:center"><div style="font-family:var(--font-score);font-size:12px;color:var(--blue);letter-spacing:2px;margin-bottom:4px">π΅ BLUE CORNER</div> | |
| <select id="radarA" onchange="drawRadar()" style="padding:8px 12px;border:1px solid var(--border);border-radius:3px;background:var(--board);color:var(--white);font-family:var(--font-mono);font-size:11px;min-width:180px"></select></div> | |
| <div style="font-family:var(--font-score);font-size:28px;color:var(--gold);align-self:center">VS</div> | |
| <div style="text-align:center"><div style="font-family:var(--font-score);font-size:12px;color:var(--red);letter-spacing:2px;margin-bottom:4px">π΄ RED CORNER</div> | |
| <select id="radarB" onchange="drawRadar()" style="padding:8px 12px;border:1px solid var(--border);border-radius:3px;background:var(--board);color:var(--white);font-family:var(--font-mono);font-size:11px;min-width:180px"></select></div> | |
| </div> | |
| <div style="max-width:480px;margin:0 auto"><canvas id="cRadar" height="360"></canvas></div> | |
| </div> | |
| <!-- βββ TAB: INSIGHTS βββ --> | |
| <div id="insights" class="tpane"> | |
| <div style="text-align:center;margin-bottom:10px"><span style="font-family:var(--font-score);font-size:20px;color:var(--gold);letter-spacing:2px">π¬ KEY DISCOVERIES</span> | |
| <div style="font-size:9px;color:var(--muted);margin-top:2px">Data-driven insights from SHIFT 125Q + Union 19Q + Speed measurement on 19 models</div></div> | |
| <div class="ins-stab" id="insTabs"> | |
| <div class="on" onclick="showIns('ins1',this)">π₯ 4B vs 8B</div> | |
| <div onclick="showIns('ins2',this)">π MoE Edge</div> | |
| <div onclick="showIns('ins3',this)">π§ Thinking</div> | |
| <div onclick="showIns('ins4',this)">πͺ€ Hallucination</div> | |
| <div onclick="showIns('ins5',this)">β‘ Speed</div> | |
| <div onclick="showIns('ins6',this)">π Recommend</div> | |
| </div> | |
| <!-- ββ Discovery 1: 4B beats 8B ββ --> | |
| <div id="ins1" class="ins-sub"> | |
| <div class="ins-grid"> | |
| <div class="ins-card"> | |
| <h4>π₯ "4B BEATS 8B"</h4> | |
| <p>A 4B model using only 2GB RAM achieves higher SHIFT scores than most 8B models requiring 5.5GB. <b style="color:var(--gold)">Doubling parameters β doubling performance.</b></p> | |
| <div style="margin-top:10px"> | |
| <div class="ins-bar"><span class="ins-bar-name" style="color:#4285f4">β½ Gemma-3n-E4B (2GB)</span><div class="ins-bar-fill"><div class="ins-bar-f" style="width:77.3%;background:var(--gold)"></div></div><span class="ins-bar-val" style="color:var(--gold)">77.3</span></div> | |
| <div class="ins-bar"><span class="ins-bar-name" style="color:#f97316">β½ Qwen3-4B (2.8GB)</span><div class="ins-bar-fill"><div class="ins-bar-f" style="width:76.8%;background:var(--gold)"></div></div><span class="ins-bar-val" style="color:var(--gold)">76.8</span></div> | |
| <div class="ins-bar"><span class="ins-bar-name" style="color:#f97316">π Qwen3-8B (5.5GB)</span><div class="ins-bar-fill"><div class="ins-bar-f" style="width:76.9%;background:var(--green)"></div></div><span class="ins-bar-val" style="color:var(--green)">76.9</span></div> | |
| <div class="ins-bar"><span class="ins-bar-name" style="color:#0668E1">π Llama-3.1-8B (5.5GB)</span><div class="ins-bar-fill"><div class="ins-bar-f" style="width:61%;background:var(--muted)"></div></div><span class="ins-bar-val" style="color:var(--muted)">61.0</span></div> | |
| </div> | |
| <p style="margin-top:8px;color:var(--gold);font-weight:700;font-size:9px">β SHIFT gap: 0.1 points for 2.75Γ more RAM</p> | |
| </div> | |
| <div class="ins-card"> | |
| <h4>π 1.7B REBELLION</h4> | |
| <p><b style="color:var(--green)">Qwen3-1.7B (1.2GB)</b> outscores three 7-14B models. Latest architecture + small size > old architecture + big size.</p> | |
| <div style="margin-top:10px"> | |
| <div class="ins-bar"><span class="ins-bar-name" style="color:#f97316">π₯ Qwen3-1.7B (1.2GB)</span><div class="ins-bar-fill"><div class="ins-bar-f" style="width:66.8%;background:var(--green)"></div></div><span class="ins-bar-val" style="color:var(--green)">66.8</span></div> | |
| <div class="ins-bar"><span class="ins-bar-name" style="color:#ff7000">π Mistral-7B (5GB)</span><div class="ins-bar-fill"><div class="ins-bar-f" style="width:60.6%;background:var(--muted)"></div></div><span class="ins-bar-val" style="color:var(--muted)">60.6</span></div> | |
| <div class="ins-bar"><span class="ins-bar-name" style="color:#0668E1">π Llama-3.1-8B (5.5GB)</span><div class="ins-bar-fill"><div class="ins-bar-f" style="width:61%;background:var(--muted)"></div></div><span class="ins-bar-val" style="color:var(--muted)">61.0</span></div> | |
| <div class="ins-bar"><span class="ins-bar-name" style="color:#6366f1">π DeepSeek-R1-14B (9.5GB)</span><div class="ins-bar-fill"><div class="ins-bar-f" style="width:59.8%;background:var(--muted)"></div></div><span class="ins-bar-val" style="color:var(--muted)">59.8</span></div> | |
| </div> | |
| <p style="margin-top:8px;color:var(--green);font-weight:700;font-size:9px">β 1.7B beats 7B, 8B, and 14B models</p> | |
| </div> | |
| </div> | |
| </div> | |
| <!-- ββ Discovery 2: MoE Revolution ββ --> | |
| <div id="ins2" class="ins-sub" style="display:none"> | |
| <div class="ins-grid"> | |
| <div class="ins-card"> | |
| <h4>π MoE = COMPUTE EFFICIENCY REVOLUTION</h4> | |
| <p>MoE models activate only a fraction of their parameters per token. GPT-OSS-20B has 21B total params but uses only <b style="color:var(--gold)">3.6B active per token</b> β achieving <b style="color:var(--green)">Champions-level quality</b> with <b style="color:var(--gold)">6Γ less compute</b> than equivalent dense models.</p> | |
| <div style="margin-top:10px"> | |
| <div class="ins-bar"><span class="ins-bar-name">π GPT-OSS-20B (3.6B active)</span><div class="ins-bar-fill"><div class="ins-bar-f" style="width:76.9%;background:var(--gold)"></div></div><span class="ins-bar-val" style="color:var(--gold)">76.9</span></div> | |
| <div class="ins-bar"><span class="ins-bar-name">π Qwen3.5-35B-A3B (12GB)</span><div class="ins-bar-fill"><div class="ins-bar-f" style="width:75.3%;background:var(--green)"></div></div><span class="ins-bar-val" style="color:var(--green)">75.3</span></div> | |
| <div class="ins-bar"><span class="ins-bar-name">β½ GLM-4.7-Flash (3.5GB)</span><div class="ins-bar-fill"><div class="ins-bar-f" style="width:74.8%;background:var(--green)"></div></div><span class="ins-bar-val" style="color:var(--green)">74.8</span></div> | |
| <div class="ins-bar"><span class="ins-bar-name">π Llama-4-Scout (10GB)</span><div class="ins-bar-fill"><div class="ins-bar-f" style="width:74.2%;background:var(--green)"></div></div><span class="ins-bar-val" style="color:var(--green)">74.2</span></div> | |
| </div> | |
| </div> | |
| <div class="ins-card"> | |
| <h4>π MoE vs Dense: COMPUTE PER TOKEN</h4> | |
| <p>At similar SHIFT scores (~75), MoE activates <b style="color:var(--gold)">3-6Γ fewer parameters per token</b>. Note: total model file is larger.</p> | |
| <div style="display:flex;gap:16px;margin-top:10px;text-align:center"> | |
| <div style="flex:1;padding:12px;background:rgba(16,163,127,.08);border-radius:6px"><div style="font-size:8px;color:var(--muted)">GPT-OSS-20B (MoE)</div><div class="ins-num" style="color:#10a37f">3.6<span style="font-size:14px">B active</span></div><div style="font-size:10px;color:var(--green)">SHIFT 76.9 Β· 14GB total</div></div> | |
| <div style="flex:1;padding:12px;background:rgba(255,255,255,.03);border-radius:6px"><div style="font-size:8px;color:var(--muted)">Gemma-3-12B (Dense)</div><div class="ins-num" style="color:var(--muted)">8.5<span style="font-size:14px">GB</span></div><div style="font-size:10px;color:var(--green)">SHIFT 75.7</div></div> | |
| </div> | |
| <p style="margin-top:8px;font-size:9px;color:var(--gold)">β Same quality, 5.7Γ less RAM</p> | |
| </div> | |
| </div> | |
| </div> | |
| <!-- ββ Discovery 3: Thinking Double-Edged ββ --> | |
| <div id="ins3" class="ins-sub" style="display:none"> | |
| <div class="ins-grid"> | |
| <div class="ins-card"> | |
| <h4>π§ THINKING: DOUBLE-EDGED SWORD</h4> | |
| <p>Thinking models (<think> tags) hurt SHIFT scores because internal reasoning <b style="color:var(--red)">disrupts JSON structured output</b>.</p> | |
| <div style="margin-top:10px"> | |
| <div class="ins-bar"><span class="ins-bar-name">Qwen3-8B (non-think)</span><div class="ins-bar-fill"><div class="ins-bar-f" style="width:76.9%;background:var(--green)"></div></div><span class="ins-bar-val" style="color:var(--green)">76.9</span></div> | |
| <div class="ins-bar"><span class="ins-bar-name">DeepSeek-R1-7B π§ </span><div class="ins-bar-fill"><div class="ins-bar-f" style="width:68.2%;background:var(--muted)"></div></div><span class="ins-bar-val" style="color:var(--muted)">68.2</span></div> | |
| <div style="font-size:9px;color:var(--red);padding:4px 0">β Same ~8B size, 8.7 point gap</div> | |
| <div class="ins-bar"><span class="ins-bar-name">Gemma-3-12B (non-think)</span><div class="ins-bar-fill"><div class="ins-bar-f" style="width:75.7%;background:var(--green)"></div></div><span class="ins-bar-val" style="color:var(--green)">75.7</span></div> | |
| <div class="ins-bar"><span class="ins-bar-name">DeepSeek-R1-14B π§ </span><div class="ins-bar-fill"><div class="ins-bar-f" style="width:59.8%;background:var(--red)"></div></div><span class="ins-bar-val" style="color:var(--red)">59.8</span></div> | |
| <div style="font-size:9px;color:var(--red);padding:4px 0">β Similar size, 15.9 point gap!</div> | |
| </div> | |
| </div> | |
| <div class="ins-card"> | |
| <h4>β‘ THINKING SPEED PENALTY</h4> | |
| <p>Thinking models generate 2-3Γ more tokens internally, making them appear <b style="color:var(--red)">much slower</b> externally.</p> | |
| <div style="margin-top:10px"> | |
| <div class="ins-bar"><span class="ins-bar-name">Qwen3-8B</span><div class="ins-bar-fill"><div class="ins-bar-f" style="width:77%;background:var(--green)"></div></div><span class="ins-bar-val" style="color:var(--green)">186.8</span></div> | |
| <div class="ins-bar"><span class="ins-bar-name">Llama-3.1-8B</span><div class="ins-bar-fill"><div class="ins-bar-f" style="width:78%;background:var(--green)"></div></div><span class="ins-bar-val" style="color:var(--green)">187.7</span></div> | |
| <div class="ins-bar"><span class="ins-bar-name">DeepSeek-R1-7B π§ </span><div class="ins-bar-fill"><div class="ins-bar-f" style="width:29%;background:var(--red)"></div></div><span class="ins-bar-val" style="color:var(--red)">69.2</span></div> | |
| <div class="ins-bar"><span class="ins-bar-name">Nemotron-Nano π§ </span><div class="ins-bar-fill"><div class="ins-bar-f" style="width:12%;background:var(--red)"></div></div><span class="ins-bar-val" style="color:var(--red)">29.8</span></div> | |
| <div class="ins-bar"><span class="ins-bar-name">DeepSeek-R1-14B π§ </span><div class="ins-bar-fill"><div class="ins-bar-f" style="width:9%;background:var(--red)"></div></div><span class="ins-bar-val" style="color:var(--red)">21.4</span></div> | |
| </div> | |
| <p style="margin-top:8px;font-size:9px;color:var(--silver)">tok/s measured Β· Non-thinking 8B β 187 Β· Thinking 8B β 30-69</p> | |
| </div> | |
| </div> | |
| </div> | |
| <!-- ββ Discovery 4: Hallucination ββ --> | |
| <div id="ins4" class="ins-sub" style="display:none"> | |
| <div class="ins-card" style="max-width:100%"> | |
| <h4>πͺ€ HALLUCINATION TRAP β THE MOST DRAMATIC METRIC</h4> | |
| <p>H1 tests if models fabricate details about fake people, papers, and products. Range: <b style="color:var(--green)">100</b> to <b style="color:var(--red)">20</b> β widest spread of any metric (80 points!).</p> | |
| <div style="margin-top:12px"> | |
| <div class="ins-bar"><span class="ins-bar-name">Qwen3-4B / Qwen3-8B / GPT-OSS / GLM</span><div class="ins-bar-fill"><div class="ins-bar-f" style="width:100%;background:var(--gold)"></div></div><span class="ins-bar-val" style="color:var(--gold)">100</span></div> | |
| <div class="ins-bar"><span class="ins-bar-name">Gemma-3n-E4B / Llama-4-Scout</span><div class="ins-bar-fill"><div class="ins-bar-f" style="width:90%;background:var(--green)"></div></div><span class="ins-bar-val" style="color:var(--green)">90</span></div> | |
| <div class="ins-bar"><span class="ins-bar-name">DeepSeek-R1-7B / Gemma-3-12B</span><div class="ins-bar-fill"><div class="ins-bar-f" style="width:80%;background:var(--green)"></div></div><span class="ins-bar-val" style="color:var(--green)">80</span></div> | |
| <div class="ins-bar"><span class="ins-bar-name">Llama-3.1-8B / Nemotron-Nano</span><div class="ins-bar-fill"><div class="ins-bar-f" style="width:70%;background:var(--blue)"></div></div><span class="ins-bar-val" style="color:var(--blue)">70</span></div> | |
| <div class="ins-bar"><span class="ins-bar-name">Qwen3-1.7B / DeepSeek-R1-14B</span><div class="ins-bar-fill"><div class="ins-bar-f" style="width:60%;background:var(--muted)"></div></div><span class="ins-bar-val" style="color:var(--muted)">60</span></div> | |
| <div class="ins-bar"><span class="ins-bar-name">Mistral-7B-v0.2</span><div class="ins-bar-fill"><div class="ins-bar-f" style="width:40%;background:#f97316"></div></div><span class="ins-bar-val" style="color:#f97316">40</span></div> | |
| <div class="ins-bar"><span class="ins-bar-name">Tiny-Aya-Fire</span><div class="ins-bar-fill"><div class="ins-bar-f" style="width:30%;background:#ef4444"></div></div><span class="ins-bar-val" style="color:#ef4444">30</span></div> | |
| <div class="ins-bar"><span class="ins-bar-name" style="color:var(--red)">Llama-3.2-1B</span><div class="ins-bar-fill"><div class="ins-bar-f" style="width:20%;background:#ef4444"></div></div><span class="ins-bar-val" style="color:#ef4444">20</span></div> | |
| </div> | |
| <p style="margin-top:10px;font-size:10px;color:var(--red)">β 1.3B model fabricates fake content 80% of the time when prompted with nonexistent entities.</p> | |
| <p style="font-size:10px;color:var(--green)">β Qwen3 family achieves 100% trap detection across all sizes (1.7B β 35B).</p> | |
| </div> | |
| </div> | |
| <!-- ββ Discovery 5: Speed ββ --> | |
| <div id="ins5" class="ins-sub" style="display:none"> | |
| <div class="ins-card" style="max-width:100%"> | |
| <h4>β‘ SPEED RANKING β tok/s (measured via HF Inference)</h4> | |
| <p>Provider choice impacts speed more than model size. Groq chip (Llama-4-Scout) achieves 240 tok/s while featherless Dense (Gemma-3-12B) only 18.7.</p> | |
| <div style="margin-top:12px"> | |
| <div class="ins-bar"><span class="ins-bar-name">Llama-4-Scout (Groq)</span><div class="ins-bar-fill"><div class="ins-bar-f" style="width:100%;background:var(--gold)"></div></div><span class="ins-bar-val" style="color:var(--gold)">240.5</span></div> | |
| <div class="ins-bar"><span class="ins-bar-name">Llama-3.1-8B (cerebras)</span><div class="ins-bar-fill"><div class="ins-bar-f" style="width:78%;background:var(--green)"></div></div><span class="ins-bar-val" style="color:var(--green)">187.7</span></div> | |
| <div class="ins-bar"><span class="ins-bar-name">Qwen3-8B (fireworks)</span><div class="ins-bar-fill"><div class="ins-bar-f" style="width:78%;background:var(--green)"></div></div><span class="ins-bar-val" style="color:var(--green)">186.8</span></div> | |
| <div class="ins-bar"><span class="ins-bar-name">Qwen3.5-9B (together)</span><div class="ins-bar-fill"><div class="ins-bar-f" style="width:54%;background:var(--green)"></div></div><span class="ins-bar-val" style="color:var(--green)">130.6</span></div> | |
| <div class="ins-bar"><span class="ins-bar-name">Llama-3.2-1B (novita)</span><div class="ins-bar-fill"><div class="ins-bar-f" style="width:47%;background:var(--blue)"></div></div><span class="ins-bar-val" style="color:var(--blue)">113.2</span></div> | |
| <div class="ins-bar"><span class="ins-bar-name">Tiny-Aya-Fire (cohere)</span><div class="ins-bar-fill"><div class="ins-bar-f" style="width:46%;background:var(--blue)"></div></div><span class="ins-bar-val" style="color:var(--blue)">111.6</span></div> | |
| <div class="ins-bar"><span class="ins-bar-name">Qwen3.5-35B-A3B (novita)</span><div class="ins-bar-fill"><div class="ins-bar-f" style="width:45%;background:var(--blue)"></div></div><span class="ins-bar-val" style="color:var(--blue)">108.7</span></div> | |
| <div class="ins-bar"><span class="ins-bar-name">GPT-OSS-20B (novita)</span><div class="ins-bar-fill"><div class="ins-bar-f" style="width:30%;background:var(--blue)"></div></div><span class="ins-bar-val" style="color:var(--blue)">71.9</span></div> | |
| <div class="ins-bar"><span class="ins-bar-name">DeepSeek-R1-7B π§ </span><div class="ins-bar-fill"><div class="ins-bar-f" style="width:29%;background:var(--muted)"></div></div><span class="ins-bar-val" style="color:var(--muted)">69.2</span></div> | |
| <div class="ins-bar"><span class="ins-bar-name">GLM-4.7-Flash π§ </span><div class="ins-bar-fill"><div class="ins-bar-f" style="width:21%;background:var(--muted)"></div></div><span class="ins-bar-val" style="color:var(--muted)">50.8</span></div> | |
| <div class="ins-bar"><span class="ins-bar-name">Qwen3-4B (nscale)</span><div class="ins-bar-fill"><div class="ins-bar-f" style="width:21%;background:var(--muted)"></div></div><span class="ins-bar-val" style="color:var(--muted)">50.0</span></div> | |
| <div class="ins-bar"><span class="ins-bar-name">Gemma-3n-E4B (together)</span><div class="ins-bar-fill"><div class="ins-bar-f" style="width:18%;background:var(--muted)"></div></div><span class="ins-bar-val" style="color:var(--muted)">43.8</span></div> | |
| <div class="ins-bar"><span class="ins-bar-name">Qwen3-1.7B (featherless)</span><div class="ins-bar-fill"><div class="ins-bar-f" style="width:13%;background:var(--muted)"></div></div><span class="ins-bar-val" style="color:var(--muted)">30.1</span></div> | |
| <div class="ins-bar"><span class="ins-bar-name">Nemotron-Nano π§ </span><div class="ins-bar-fill"><div class="ins-bar-f" style="width:12%;background:var(--red)"></div></div><span class="ins-bar-val" style="color:var(--red)">29.8</span></div> | |
| <div class="ins-bar"><span class="ins-bar-name">DeepSeek-R1-14B π§ </span><div class="ins-bar-fill"><div class="ins-bar-f" style="width:9%;background:var(--red)"></div></div><span class="ins-bar-val" style="color:var(--red)">21.4</span></div> | |
| <div class="ins-bar"><span class="ins-bar-name">Gemma-3-12B (featherless)</span><div class="ins-bar-fill"><div class="ins-bar-f" style="width:8%;background:var(--red)"></div></div><span class="ins-bar-val" style="color:var(--red)">18.7</span></div> | |
| <div class="ins-bar"><span class="ins-bar-name">Mistral-7B (featherless)</span><div class="ins-bar-fill"><div class="ins-bar-f" style="width:7%;background:var(--red)"></div></div><span class="ins-bar-val" style="color:var(--red)">17.8</span></div> | |
| </div> | |
| </div> | |
| </div> | |
| <!-- ββ Discovery 6: Recommendations ββ --> | |
| <div id="ins6" class="ins-sub" style="display:none"> | |
| <div style="text-align:center;margin-bottom:14px"><span style="font-family:var(--font-score);font-size:16px;color:var(--gold);letter-spacing:2px">π BEST MODEL FOR YOUR USE CASE</span><div style="font-size:9px;color:var(--muted);margin-top:4px">Ranked by WCS (WorldCup Score) = β(SHIFT Γ PIR_norm) β quality AND efficiency combined</div></div> | |
| <div class="ins-grid"> | |
| <div class="ins-rec"> | |
| <h5>π OVERALL CHAMPION (WCS #1)</h5> | |
| <div class="rec-model"><span class="rec-emoji">π₯</span><div><div class="rec-name" style="color:#10a37f">GPT-OSS-20B</div><div class="rec-desc">WCS #1 Β· SHIFT 76.9 (2nd) Β· 3.6B active params Β· 14GB total Β· MoE<br>Best balance of quality and compute efficiency. Only 3.6B active per token from 21B total.</div></div></div> | |
| </div> | |
| <div class="ins-rec"> | |
| <h5>β‘ EFFICIENCY KING (PIR #1)</h5> | |
| <div class="rec-model"><span class="rec-emoji">π</span><div><div class="rec-name" style="color:#0668E1">Llama-3.2-1B</div><div class="rec-desc">PIR 6952 (1st!) Β· 0.9GB RAM Β· 113 tok/s<br>SHIFT is low (49.7) but for 1.3B params, it's remarkable efficiency. IoT/Raspberry Pi.</div></div></div> | |
| </div> | |
| <div class="ins-rec"> | |
| <h5>π° BEST VALUE</h5> | |
| <div class="rec-model"><span class="rec-emoji">π</span><div><div class="rec-name" style="color:#10a37f">GPT-OSS-20B</div><div class="rec-desc">SHIFT 76.9 Β· 3.6B active / 21B total Β· Union 54.2<br>Champions-level quality with fraction of compute per token. MoE efficiency at its best.</div></div></div> | |
| </div> | |
| <div class="ins-rec"> | |
| <h5>π§ SMARTEST (Honesty King)</h5> | |
| <div class="rec-model"><span class="rec-emoji">π</span><div><div class="rec-name" style="color:#f97316">Qwen3-8B</div><div class="rec-desc">H=87.9 (highest!) Β· SHIFT 76.9 Β· 186.8 tok/s<br>Best honesty score of any model tested. H1=100, H3=100. Fast too.</div></div></div> | |
| </div> | |
| <div class="ins-rec"> | |
| <h5>π± BEST FOR SMARTPHONE</h5> | |
| <div class="rec-model"><span class="rec-emoji">π±</span><div><div class="rec-name" style="color:#4285f4">Gemma-3n-E4B</div><div class="rec-desc">2GB RAM Β· SHIFT 77.3 Β· Union 47.4 (68% of SOTA)<br>Runs on 4GB phones. Higher quality than most laptop models.</div></div></div> | |
| </div> | |
| <div class="ins-rec"> | |
| <h5>ποΈ SPEED DEMON</h5> | |
| <div class="rec-model"><span class="rec-emoji">ποΈ</span><div><div class="rec-name" style="color:#0668E1">Llama-4-Scout</div><div class="rec-desc">240.5 tok/s (1st!) Β· SHIFT 74.2 Β· MoE 17BΓ16E<br>13Γ faster than Gemma-3-12B. Best for real-time applications.</div></div></div> | |
| </div> | |
| <div class="ins-rec"> | |
| <h5>π₯οΈ BEST FOR PC (SOTA Closest)</h5> | |
| <div class="rec-model"><span class="rec-emoji">π₯οΈ</span><div><div class="rec-name" style="color:#4285f4">Gemma-3-12B</div><div class="rec-desc">Union 57.1 = 82% of Claude Sonnet Β· SHIFT 75.7<br>Closest to frontier SOTA performance among all smol models.</div></div></div> | |
| <div class="rec-model"><span class="rec-emoji">π§¬</span><div><div class="rec-name" style="color:#7c3aed">Darwin-35B-A3B-Opus</div><div class="rec-desc">GPQA 90.0% (surpasses both parents!) Β· MMMLU 85.0% Β· 3B active / 35B MoE Β· 147.8 tok/s<br><a href="https://huggingface.co/FINAL-Bench/Darwin-35B-A3B-Opus" target="_blank" style="color:#a78bfa">Evolutionary merge</a> β child beats Father (84.2%) and Mother (85.0%). SHIFT eval pending.</div></div></div> | |
| </div> | |
| <div class="ins-rec"> | |
| <h5>π BEST MULTILINGUAL</h5> | |
| <div class="rec-model"><span class="rec-emoji">π</span><div><div class="rec-name" style="color:#4285f4">Gemma-3n-E4B</div><div class="rec-desc">I4=65.2 (highest multilingual) Β· 7 languages<br>Best across KO, AR, PT, TR, BN, TH combined score.</div></div></div> | |
| </div> | |
| </div> | |
| </div> | |
| </div> | |
| <!-- βββ TAB: RULES βββ --> | |
| <div id="info" class="tpane"> | |
| <div class="info-cards"> | |
| <div class="icard"><h4>π WHAT IS THIS?</h4><p>World's first 5-axis benchmark for small language models (β€10B active params). SHIFT measures what matters for edge: not just intelligence, but honesty, speed, and efficiency.</p></div> | |
| <div class="icard"><h4>π SHIFT FRAMEWORK</h4><p><b style="color:var(--gold)">S</b>ize β Model footprint<br><b style="color:var(--green)">H</b>onesty β Hallucination, calibration, refusal, self-correction<br><b style="color:var(--blue)">I</b>ntelligence β Reasoning, math, coding, 7 languages, metacognition<br><b style="color:var(--green)">F</b>ast β Tokens/sec, TTFA<br><b style="color:var(--white)">T</b>hrift β Peak VRAM/RAM</p></div> | |
| <div class="icard"><h4>π WCS β WORLDCUP SCORE</h4><p><b style="color:var(--gold)">WCS = β(SHIFT Γ PIR<sub>norm</sub>)</b><br>The official ranking metric. Geometric mean of quality (SHIFT) and efficiency (PIR). Both must be high to score well.</p></div> | |
| <div class="icard"><h4>π₯ PIR FORMULA</h4><p>PIR = (I Γ H Γ F) Γ· (S Γ T) Β· PIR<sub>norm</sub> = logββ(PIR) / logββ(max) Γ 100<br>Efficiency rating. Like boxing's P4P: how much punch per pound of hardware.</p></div> | |
| <div class="icard"><h4>β FOOTBALL LEAGUE TIERS</h4><p>π₯ <b>League One</b> (<2GB) β Raspberry Pi<br>β½ <b>La Liga</b> (2-4GB) β Smartphone<br>π <b>Premier League</b> (4-8GB) β Laptop<br>π <b>Champions League</b> (8GB+) β PC / Desktop<br><span style="font-size:9px;color:var(--muted)">* MoE models load all expert weights; total RAM may exceed 16GB</span></p></div> | |
| <div class="icard"><h4>π 7 LANGUAGES</h4><p>π¬π§ EN Β· π°π· KO Β· πΈπ¦ AR Β· π§π· PT Β· πΉπ· TR Β· π§π© BN Β· πΉπ TH<br>2.7B+ speakers. Sentiment, idioms, translation, culture.</p></div> | |
| <div class="icard"><h4>π UNION EVAL</h4><p>Same 20 cross-benchmark questions given to frontier SOTA models. Direct comparison with Claude, GPT-5, etc. Scores are not publicly disclosed.</p></div> | |
| </div> | |
| <div style="text-align:center;margin-top:24px;padding:18px 14px;border-top:1px solid var(--border)"> | |
| <div style="font-family:var(--font-score);font-size:14px;color:var(--gold);letter-spacing:4px">SMOL AI WORLDCUP</div> | |
| <div style="font-size:9px;color:var(--muted);margin-top:4px">Season 1 Β· v1.3 Β· 125Q SHIFT + 19Q Union Β· 18 Models Β· 12 Makers Β· 7 Languages Β· WCS Ranking Β· Apache 2.0 Β· 2026</div> | |
| <div style="margin-top:12px;padding-top:10px;border-top:1px solid rgba(255,255,255,.04)"> | |
| <a href="https://ginigen.ai" target="_blank" style="text-decoration:none;display:inline-block"> | |
| <span style="font-size:8px;color:var(--muted)">Developed by</span> | |
| <span style="font-family:var(--font-score);font-size:16px;letter-spacing:2px;color:var(--gold);margin-left:4px">Ginigen<span style="color:var(--green)">.ai</span></span> | |
| </a> | |
| <div style="font-size:8px;color:var(--muted);margin-top:3px;letter-spacing:0.5px">Small but Mighty AI</div> | |
| </div> | |
| </div> | |
| </div> | |
| </div> | |
| <script> | |
| // βββ DATA [name,prov,color,type,group, params,active,ram,toks, arch,lic,moe, H,H1,H2,H3,H4, I,I1,I2,I3,I4,I5,I6, union,shift,thinking] | |
| // β REAL SCORES + REAL SPEED β evaluated via HF Inference API, SHIFT 125Q + Union 19Q + Speed 5x3, 2026-03-10 | |
| const D=[ | |
| // π₯ League One (<2GB) | |
| ["Llama-3.2-1B","Meta","#0668E1","open","nano",1.3,1.3,0.9,116.6,"Dense","Apache2",false, 53.9,20.0,66.5,60.0,69.0, 43.7,10.0,48.0,50.0,39.8,70.0,64.0, 23.4,47.8,false], | |
| ["Qwen3-1.7B","Alibaba","#f97316","open","nano",1.7,1.7,1.2,30.1,"Dense","Apache2",false, 71.4,60.0,44.5,100.0,81.0, 63.7,53.3,100.0,50.0,53.0,68.0,90.0, null,66.8,false], | |
| ["GPT-OSS-20B","OpenAI","#10a37f","open","edge",21.0,3.6,14.0,71.9,"MoE","Apache2",true, 84.9,100.0,63.6,100.0,76.0, 71.5,63.3,100.0,100.0,60.5,70.0,90.0, 54.2,76.9,true], | |
| // β½ La Liga (2-4GB) | |
| ["Tiny-Aya-Fire","Cohere","#39d98a","open","micro",3.3,3.3,2.2,111.6,"Dense","Apache2",false, 54.9,30.0,37.5,80.0,72.0, 61.5,26.7,70.0,70.0,60.8,71.0,74.0, 27.6,58.9,false], | |
| ["Qwen3-4B","Alibaba","#f97316","open","micro",4.0,4.0,2.8,50,"Dense","Apache2",false, 81.4,100.0,42.5,100.0,83.0, 73.8,63.3,100.0,100.0,64.0,74.0,90.0, 43.2,76.8,false], | |
| ["Gemma-3n-E4B","Google","#4285f4","open","micro",4.0,2.0,2.0,43.8,"PLE","Gemma",false, 82.6,90.0,51.5,100.0,89.0, 73.8,68.7,100.0,90.0,65.2,72.0,90.0, 47.4,77.3,false], | |
| ["GLM-4.7-Flash","Zhipu AI","#00d4aa","open","edge",47.0,4.7,26.0,50.8,"MoE","MIT",true, 81.1,100.0,38.5,100.0,86.0, 70.6,66.7,100.0,90.0,60.0,70.0,90.0, null,74.8,true], | |
| // π Premier (4-8GB) | |
| ["Mistral-7B-v0.2","Mistral","#ff7000","open","light",7.2,7.2,5.0,17.8,"Dense","Apache2",false, 71.0,40.0,58.0,100.0,86.0, 53.6,16.7,45.0,30.0,60.5,63.0,72.0, null,60.6,false], | |
| ["DeepSeek-R1-7B","DeepSeek","#6366f1","open","light",7.6,7.6,5.5,69.2,"Dense","MIT",false, 81.5,80.0,65.0,100.0,81.0, 59.4,46.7,80.0,20.0,59.3,73.0,78.0, null,68.2,true], | |
| ["Qwen3-8B","Alibaba","#f97316","open","light",8.0,8.0,5.5,186.8,"Dense","Apache2",false, 87.9,100.0,65.5,100.0,86.0, 69.6,62.0,90.0,100.0,58.5,73.0,90.0, null,76.9,false], | |
| ["Llama-3.1-8B","Meta","#0668E1","open","light",8.0,8.0,5.5,187.7,"Dense","Apache2",false, 76.2,70.0,78.0,90.0,67.0, 50.8,43.3,58.0,0.0,57.2,54.0,61.0, 41.1,61.0,false], | |
| ["Nemotron-Nano-8B","NVIDIA","#76b900","open","light",8.0,8.0,5.5,29.8,"Dense","Apache2",false, 77.8,70.0,49.0,100.0,92.0, 57.9,46.7,80.0,80.0,46.3,70.5,82.0, null,65.9,true], | |
| ["Qwen3.5-9B","Alibaba","#f97316","open","light",9.7,9.7,6.2,130.6,"Dense","Apache2",false, 86.1,90.0,54.5,100.0,100.0, 61.1,75.3,100.0,90.0,43.7,57.0,88.0, null,71.1,false], | |
| ["OLMo-3-7B","AllenAI","#2563eb","open","light",7.0,7.0,5.0,50,"Dense","Apache2",false, 68.8,60.0,40.3,100.0,75.0, 71.2,50.0,100.0,100.0,62.3,69.0,90.0, null,70.2,false], | |
| // π Champions (8GB+) | |
| ["Gemma-3-12B","Google","#4285f4","open","edge",12.0,12.0,8.5,18.7,"Dense","Gemma",false, 80.1,80.0,54.5,100.0,86.0, 72.8,63.3,100.0,100.0,63.7,74.0,82.0, 57.1,75.7,false], | |
| ["DeepSeek-R1-14B","DeepSeek","#6366f1","open","edge",14.8,14.8,9.5,21.4,"Dense","MIT",false, 71.9,60.0,65.5,70.0,92.0, 51.8,96.7,50.0,50.0,48.8,34.0,46.0, null,59.8,true], | |
| ["Qwen3.5-35B-A3B","Alibaba","#f97316","open","edge",36.0,3.0,20.0,108.7,"MoE","Apache2",true, 82.0,100.0,43.9,100.0,84.0, 70.9,72.0,100.0,100.0,58.5,69.0,88.0, null,75.3,false], | |
| ["Llama-4-Scout","Meta","#0668E1","open","edge",17.0,1.0,10.0,240.5,"MoE","Llama4",true, 80.9,90.0,41.5,100.0,92.0, 69.7,60.0,100.0,90.0,60.8,70.0,82.0, 47.6,74.2,false], | |
| // 𧬠Darwin (Evolutionary Merge β Child > Parents) | |
| ["𧬠Darwin-35B-A3B-Opus","VIDRAFT","#7c3aed","open","darwin",35.0,3.0,18.0,147.8,"MoE+Hybrid","Apache2",true, 83.5,100.0,46.5,100.0,88.0, 72.4,75.3,100.0,100.0,60.2,71.0,90.0, null,76.8,true], | |
| ]; | |
| const WC={nano:{icon:'π₯ ',name:'League One',cls:'j-nano',tier:1},micro:{icon:'β½',name:'La Liga',cls:'j-micro',tier:2}, | |
| light:{icon:'π ',name:'Premier',cls:'j-light',tier:3},edge:{icon:'π',name:'Champions',cls:'j-edge',tier:4}, | |
| darwin:{icon:'π§¬',name:'Darwin',cls:'j-darwin',tier:5}}; | |
| const LC={nano:'#b4b4b4',micro:'#ff6432',light:'#9b59b6',edge:'#ffd700',darwin:'#a78bfa'}; | |
| const SOTA=[{n:'Claude Sonnet 4.6',s:69.9},{n:'Claude Opus 4.6',s:69.3},{n:'GPT-5.4',s:62.4},{n:'DeepSeek V3.2',s:60.3}, | |
| {n:'GPT-OSS-120B',s:60.0},{n:'Qwen3.5-397B',s:57.1},{n:'GPT-5.2',s:56.2},{n:'GPT-5-Nano',s:54.5},{n:'GPT-5.3 Codex',s:53.2},{n:'GPT-5.1',s:49.2}]; | |
| function calcPIR(r){const I=(r[17]||1)/100,H=(r[12]||1)/100,F=Math.min((r[8]||1)/100,1),S=Math.max(r[6]||.1,.05)/20,T=Math.max(r[7]||.1,.05)/16;return Math.round(I*H*F/(S*T)*100*10)/10;} | |
| const _maxPIR=Math.max(...D.map(r=>calcPIR(r)));const _maxLog=Math.log10(_maxPIR); | |
| function calcWCS(r){const shift=r[25]||0;const pir=calcPIR(r);const pn=(Math.log10(Math.max(pir,1))/_maxLog)*100;return Math.round(Math.sqrt(shift*pn)*10)/10;} | |
| function gc(v){return v>=80?'var(--gold)':v>=65?'var(--green)':v>=50?'var(--blue)':'var(--muted)';} | |
| function ledCell(v){if(v==null)return'<span class="na">β</span>';const c=gc(v);return`<div style="text-align:center"><span class="led led-sm" style="color:${c}">${typeof v==='number'&&v%1?v.toFixed(1):v}</span><div class="mbar"><div class="mbar-f" style="width:${Math.min(v,100)}%;background:${c}"></div></div></div>`;} | |
| // βββ TABLE βββ | |
| let sortKey='',sortDir=-1; | |
| function buildTable(){const tb=document.getElementById('TB');tb.innerHTML=''; | |
| D.forEach((r,i)=>{const pir=calcPIR(r);const wcs=calcWCS(r);const wc=WC[r[4]];const tr=document.createElement('tr'); | |
| tr.dataset.group=r[4];tr.dataset.moe=r[11]?'1':'';tr.dataset.thinking=r[26]?'1':'';tr.dataset.name=r[0].toLowerCase(); | |
| const rc=i<3?'r'+(i+1):'';const wcc=i===0?'wcs-1':i===1?'wcs-2':i===2?'wcs-3':wcs>=70?'wcs-good':wcs>=55?'wcs-avg':'wcs-low';const pc=pir>=25?'pir-glow':pir>=15?'pir-good':pir>=8?'pir-avg':'pir-low'; | |
| const medal=i===0?'π':i===1?'π₯':i===2?'π₯':i<8?'β½':i<16?'π―':i<32?'π':''; | |
| tr.innerHTML=`<td><div style="text-align:center"><span class="rank ${rc}">${i+1}</span>${medal?'<div style="font-size:12px;line-height:1">'+medal+'</div>':''}</div></td> | |
| <td class="c-model"><div class="m-name">${r[0]}${r[26]?' <span style="font-size:8px;color:var(--blue)">π§ </span>':''}${getBadges(r)}</div><div class="m-info"><div class="m-dot" style="background:${r[2]}"></div><span class="m-prov">${r[1]} Β· ${r[9]}${r[11]?' MoE':''}</span></div></td> | |
| <td><span class="wcs-mega ${wcc}">${wcs}</span></td> | |
| <td><span class="pir-mega ${pc}" style="font-size:16px">${pir}</span></td> | |
| <td>${ledCell(r[25])}</td><td>${ledCell(r[12])}</td><td>${ledCell(r[17])}</td> | |
| <td>${r[24]!=null?`<span class="led led-sm" style="color:var(--gold)">${r[24].toFixed(1)}</span>`:'<span class="na">β</span>'}</td> | |
| <td><span class="jersey ${wc.cls}">${wc.icon} ${wc.name}</span></td> | |
| <td><span class="led led-sm" style="color:var(--silver)">${r[6]}B</span></td> | |
| <td><span class="led led-sm" style="color:${r[8]>=80?'var(--green)':r[8]>=40?'var(--silver)':'var(--red)'}">${r[8]}</span></td> | |
| <td><span class="led led-sm" style="color:var(--silver)">${r[7]}</span></td> | |
| <td>${ledCell(r[13])}</td><td>${ledCell(r[14])}</td><td>${ledCell(r[15])}</td><td>${ledCell(r[16])}</td> | |
| <td>${ledCell(r[18])}</td><td>${ledCell(r[19])}</td><td>${ledCell(r[20])}</td><td>${ledCell(r[21])}</td><td>${ledCell(r[22])}</td><td>${ledCell(r[23])}</td>`; | |
| tb.appendChild(tr);});} | |
| const SM={name:r=>r[0],wcs:r=>calcWCS(r),pir:r=>calcPIR(r),shift:r=>r[25]||0,honesty:r=>r[12]||0,intelligence:r=>r[17]||0,union:r=>r[24]||-1, | |
| league:r=>(WC[r[4]]||{tier:0}).tier,active:r=>r[6]||0,fast:r=>r[8]||0,thrift:r=>r[7]||0, | |
| h1:r=>r[13]||0,h2:r=>r[14]||0,h3:r=>r[15]||0,h4:r=>r[16]||0, | |
| i1:r=>r[18]||0,i2:r=>r[19]||0,i3:r=>r[20]||0,i4:r=>r[21]||0,i5:r=>r[22]||0,i6:r=>r[23]||0}; | |
| function srt(k){if(sortKey===k)sortDir*=-1;else{sortKey=k;sortDir=-1;}const fn=SM[k]||(r=>0); | |
| D.sort((a,b)=>{const va=fn(a),vb=fn(b);return typeof va==='string'?sortDir*va.localeCompare(vb):sortDir*((va||0)-(vb||0));});buildTable();} | |
| function doSearch(q){q=q.toLowerCase();document.querySelectorAll('#TB tr').forEach(tr=>tr.style.display=tr.dataset.name.includes(q)?'':'none');} | |
| function flt(f,btn){document.querySelectorAll('.filter-bar .fb').forEach(b=>b.classList.remove('on'));btn.classList.add('on'); | |
| document.querySelectorAll('#TB tr').forEach(tr=>{if(f==='all')tr.style.display=''; | |
| else if(f==='moe')tr.style.display=tr.dataset.moe?'':'none'; | |
| else if(f==='thinking')tr.style.display=tr.dataset.thinking?'':'none'; | |
| else tr.style.display=tr.dataset.group===f?'':'none';});} | |
| function showTab(id,el){document.querySelectorAll('.tpane').forEach(p=>p.classList.remove('on'));document.querySelectorAll('.gtab').forEach(t=>t.classList.remove('on')); | |
| document.getElementById(id).classList.add('on');if(el)el.classList.add('on'); | |
| if(id==='shift5'){setTimeout(()=>drawAxis('S',document.querySelector('.stab.on')),50);}if(id==='value'){valueDrawn=false;setTimeout(drawValue,100);}if(id==='sota'){sotaDrawn=false;setTimeout(drawSOTA,50);}if(id==='matchup')setTimeout(drawRadar,50);} | |
| // This replaces the entire script block | |
| // βββ LABEL PLUGIN β shows model names on all charts βββ | |
| const labelPlugin={id:'floatingLabels',afterDraw(chart){ | |
| if(!chart.config.options.plugins?.floatingLabels)return; | |
| const ctx=chart.ctx;ctx.save();ctx.font='500 10px Teko';ctx.textAlign='left'; | |
| const meta=chart.getDatasetMeta(0); | |
| const names=chart.config.options.plugins.floatingLabels.names||[]; | |
| meta.data.forEach((pt,i)=>{if(!names[i])return; | |
| ctx.fillStyle=chart.config.options.plugins.floatingLabels.color||'#a8b8a0'; | |
| ctx.fillText(names[i],pt.x+6,pt.y-4);});ctx.restore();}}; | |
| Chart.register(labelPlugin); | |
| // βββ SHIFT AXES β S/F/T use polarArea, H/I use scatter+labels βββ | |
| let axisChart=null,axisDrawn=false; | |
| function drawAxis(axis,el){ | |
| document.querySelectorAll('.stab').forEach(t=>t.classList.remove('on'));if(el)el.classList.add('on'); | |
| axisDrawn=true; | |
| const canvas=document.getElementById('cAxis'); | |
| const ctx=canvas.getContext('2d'); | |
| if(axisChart){axisChart.destroy();axisChart=null;} | |
| const sorted=[...D].sort((a,b)=>calcPIR(b)-calcPIR(a)); | |
| const names=sorted.map(r=>r[0]); | |
| const det=document.getElementById('axisDetail');det.innerHTML=''; | |
| if(axis==='S'){ | |
| // POLAR AREA: each model = slice, size = inverse of params (smaller = bigger slice = better) | |
| const maxP=Math.max(...sorted.map(r=>r[6])); | |
| const vals=sorted.map(r=>Math.round((1-r[6]/maxP/1.2)*100)); | |
| axisChart=new Chart(ctx,{type:'polarArea', | |
| data:{labels:names,datasets:[{data:vals,backgroundColor:sorted.map(r=>LC[r[4]]+'88'),borderColor:sorted.map(r=>LC[r[4]]),borderWidth:1}]}, | |
| options:{plugins:{legend:{display:false},tooltip:{callbacks:{label:c=>{const r=sorted[c.dataIndex];return`${r[0]}: ${r[6]}B params Β· ${r[7]}GB RAM Β· PIR ${calcPIR(r)}`;}}}, | |
| floatingLabels:{names,color:'#e8f0e0'}}, | |
| scales:{r:{ticks:{display:false},grid:{color:'rgba(255,215,0,.08)'}}},responsive:true}}); | |
| det.innerHTML=`<div class="cpanel"><h3>π¦ SIZE ANALYSIS</h3><p style="color:var(--silver);line-height:2"> | |
| Bigger slice = smaller model = better size efficiency.<br> | |
| ${sorted.map(r=>`<span style="color:${LC[r[4]]}">${WC[r[4]].icon}</span> <b>${r[0]}</b>: ${r[6]}B active params, ${r[5]}B total, ${r[7]}GB RAM`).join('<br>')}</p></div> | |
| <div class="cpanel"><h3>π SIZE CHAMPION</h3><p style="font-size:16px;color:var(--gold);font-family:var(--font-score)">${sorted.filter(r=>r[25]>=60).sort((a,b)=>a[6]-b[6])[0]?.name||sorted[0][0]}</p> | |
| <p style="color:var(--silver)">Smallest model with SHIFT β₯ 60</p></div>`; | |
| }else if(axis==='H'){ | |
| axisChart=new Chart(ctx,{type:'scatter', | |
| data:{datasets:[ | |
| {label:'⬀ H1(Trap) vs H2(Calibration)',data:sorted.map(r=>({x:r[13]||0,y:r[14]||0})),backgroundColor:sorted.map(r=>LC[r[4]]+'cc'),pointRadius:8}, | |
| {label:'β² H3(Refusal) vs H4(Correction)',data:sorted.map(r=>({x:r[15]||0,y:r[16]||0})),backgroundColor:sorted.map(r=>LC[r[4]]+'66'),pointRadius:6,pointStyle:'triangle'}]}, | |
| options:{plugins:{legend:{labels:{color:'#a8b8a0',font:{size:11,family:'Teko'}}}, | |
| tooltip:{callbacks:{label:c=>{const r=sorted[c.dataIndex];return`${r[0]}: (${c.parsed.x}, ${c.parsed.y})`;}}}, | |
| floatingLabels:{names,color:'#a8b8a0'}}, | |
| scales:{x:{title:{display:true,text:'H1 Trap Detection / H3 Refusal β',color:'#a8b8a0',font:{family:'Teko',size:14}},grid:{color:'rgba(255,215,0,.06)'},ticks:{color:'#a8b8a0'},min:25,max:85}, | |
| y:{title:{display:true,text:'H2 Calibration / H4 Self-Correction β',color:'#a8b8a0',font:{family:'Teko',size:14}},grid:{color:'rgba(255,215,0,.06)'},ticks:{color:'#a8b8a0'},min:25,max:80}},responsive:true}}); | |
| det.innerHTML=`<div class="cpanel"><h3>π‘ HONESTY BREAKDOWN</h3><p style="color:var(--silver);line-height:2"> | |
| ⬀ = H1(Trap Detection) vs H2(Confidence Calibration)<br>Ⲡ= H3(Refusal Balance) vs H4(Self-Correction)<br> | |
| Upper-right = most honest. Models near origin = poor honesty.<br><br> | |
| ${sorted.slice(0,5).map(r=>`<b style="color:${LC[r[4]]}">${r[0]}</b>: H1=${r[13]} H2=${r[14]} H3=${r[15]} H4=${r[16]} β Avg <b>${r[12]}</b>`).join('<br>')}</p></div> | |
| <div class="cpanel"><h3>π HONESTY CHAMPION</h3><p style="font-size:16px;color:var(--green);font-family:var(--font-score)">${[...sorted].sort((a,b)=>(b[12]||0)-(a[12]||0))[0][0]}</p> | |
| <p style="color:var(--silver)">Highest combined Honesty score</p></div>`; | |
| }else if(axis==='I'){ | |
| axisChart=new Chart(ctx,{type:'scatter', | |
| data:{datasets:[ | |
| {label:'⬀ I1(Reason) vs I2(Math)',data:sorted.map(r=>({x:r[18]||0,y:r[19]||0})),backgroundColor:sorted.map(r=>LC[r[4]]+'cc'),pointRadius:8}, | |
| {label:'β² I4(Multilingual) vs I6(Meta)',data:sorted.map(r=>({x:r[21]||0,y:r[23]||0})),backgroundColor:sorted.map(r=>LC[r[4]]+'66'),pointRadius:6,pointStyle:'triangle'}]}, | |
| options:{plugins:{legend:{labels:{color:'#a8b8a0',font:{size:11,family:'Teko'}}}, | |
| tooltip:{callbacks:{label:c=>{const r=sorted[c.dataIndex];return`${r[0]}: (${c.parsed.x}, ${c.parsed.y})`;}}}, | |
| floatingLabels:{names,color:'#a8b8a0'}}, | |
| scales:{x:{title:{display:true,text:'I1 Reasoning / I4 Multilingual β',color:'#a8b8a0',font:{family:'Teko',size:14}},grid:{color:'rgba(255,215,0,.06)'},ticks:{color:'#a8b8a0'},min:15,max:90}, | |
| y:{title:{display:true,text:'I2 Math / I6 Metacognition β',color:'#a8b8a0',font:{family:'Teko',size:14}},grid:{color:'rgba(255,215,0,.06)'},ticks:{color:'#a8b8a0'},min:15,max:85}},responsive:true}}); | |
| det.innerHTML=`<div class="cpanel"><h3>π§ INTELLIGENCE BREAKDOWN</h3><p style="color:var(--silver);line-height:2"> | |
| ⬀ = I1(Reasoning) vs I2(Math)<br>Ⲡ= I4(Multilingual 7 langs) vs I6(Metacognition)<br><br> | |
| ${sorted.slice(0,5).map(r=>`<b style="color:${LC[r[4]]}">${r[0]}</b>: Reason=${r[18]} Math=${r[19]} Code=${r[20]} Multi=${r[21]} Know=${r[22]} Meta=${r[23]} β Avg <b>${r[17]}</b>`).join('<br>')}</p></div> | |
| <div class="cpanel"><h3>π INTELLIGENCE CHAMPION</h3><p style="font-size:16px;color:var(--blue);font-family:var(--font-score)">${[...sorted].sort((a,b)=>(b[17]||0)-(a[17]||0))[0][0]}</p> | |
| <p style="color:var(--silver)">Highest combined Intelligence score</p></div>`; | |
| }else if(axis==='F'){ | |
| // POLAR AREA: each model = slice, bigger = faster | |
| const vals=sorted.map(r=>r[8]||0); | |
| axisChart=new Chart(ctx,{type:'polarArea', | |
| data:{labels:names.map((n,i)=>`${n} (${vals[i]}t/s)`),datasets:[{data:vals, | |
| backgroundColor:sorted.map(r=>r[26]?'rgba(255,68,68,.5)':LC[r[4]]+'88'),borderColor:sorted.map(r=>r[26]?'#ff4444':LC[r[4]]),borderWidth:1}]}, | |
| options:{plugins:{legend:{display:false}, | |
| tooltip:{callbacks:{label:c=>{const r=sorted[c.dataIndex];return`${r[0]}: ${r[8]} tok/s Β· ${r[26]?'π§ Thinking':'Standard'} Β· PIR ${calcPIR(r)}`;}}}, | |
| floatingLabels:{names:sorted.map(r=>`${r[0]} ${r[8]}t/s`),color:'#e8f0e0'}}, | |
| scales:{r:{ticks:{display:false},grid:{color:'rgba(255,215,0,.08)'}}},responsive:true}}); | |
| det.innerHTML=`<div class="cpanel"><h3>β‘ SPEED ANALYSIS</h3><p style="color:var(--silver);line-height:2"> | |
| Bigger slice = faster. <span style="color:#ff4444">π΄ Red = Thinking models (slow due to reasoning tokens)</span><br><br> | |
| ${[...sorted].sort((a,b)=>b[8]-a[8]).map(r=>`<span style="color:${r[26]?'#ff4444':LC[r[4]]}">${r[26]?'π§ ':'β‘'}</span> <b>${r[0]}</b>: ${r[8]} tok/s ${r[26]?'(Thinking β effective speed much lower)':''}`).join('<br>')}</p></div> | |
| <div class="cpanel"><h3>π SPEED CHAMPION</h3><p style="font-size:16px;color:var(--green);font-family:var(--font-score)">${[...sorted].sort((a,b)=>b[8]-a[8])[0][0]}</p> | |
| <p style="color:var(--silver)">Fastest raw tok/s (${[...sorted].sort((a,b)=>b[8]-a[8])[0][8]} tok/s)</p></div>`; | |
| }else{ // T | |
| // POLAR AREA: each model = slice, bigger = less RAM = more thrifty | |
| const maxR=Math.max(...sorted.map(r=>r[7])); | |
| const vals=sorted.map(r=>Math.round((1-r[7]/maxR/1.1)*100)); | |
| axisChart=new Chart(ctx,{type:'polarArea', | |
| data:{labels:names.map((n,i)=>`${n} (${sorted[i][7]}GB)`),datasets:[{data:vals, | |
| backgroundColor:sorted.map(r=>LC[r[4]]+'88'),borderColor:sorted.map(r=>LC[r[4]]),borderWidth:1}]}, | |
| options:{plugins:{legend:{display:false}, | |
| tooltip:{callbacks:{label:c=>{const r=sorted[c.dataIndex];return`${r[0]}: ${r[7]}GB RAM Β· ${r[6]}B active Β· PIR ${calcPIR(r)}`;}}}, | |
| floatingLabels:{names:sorted.map(r=>`${r[0]} ${r[7]}GB`),color:'#e8f0e0'}}, | |
| scales:{r:{ticks:{display:false},grid:{color:'rgba(255,215,0,.08)'}}},responsive:true}}); | |
| det.innerHTML=`<div class="cpanel"><h3>πΎ THRIFT ANALYSIS</h3><p style="color:var(--silver);line-height:2"> | |
| Bigger slice = less RAM = more thrifty. MoE models load all expert weights (large footprint) but activate only a fraction per token (low compute).<br><br> | |
| ${[...sorted].sort((a,b)=>a[7]-b[7]).map(r=>`<span style="color:${LC[r[4]]}">${WC[r[4]].icon}</span> <b>${r[0]}</b>: ${r[7]}GB peak RAM Β· ${r[6]}B active Β· ${r[11]?'MoE':'Dense'}`).join('<br>')}</p></div> | |
| <div class="cpanel"><h3>π THRIFT CHAMPION</h3><p style="font-size:16px;color:var(--gold);font-family:var(--font-score)">${[...sorted].sort((a,b)=>a[7]-b[7])[0][0]}</p> | |
| <p style="color:var(--silver)">Least RAM (${[...sorted].sort((a,b)=>a[7]-b[7])[0][7]}GB) with usable performance</p></div>`; | |
| } | |
| } | |
| // βββ VALUE TAB βββ | |
| let valueDrawn=false; | |
| function destroyChart(id){try{const c=Chart.getChart(id);if(c)c.destroy();}catch(e){}} | |
| function drawValue(){ | |
| valueDrawn=true; | |
| destroyChart('cValue');destroyChart('cLeagueRadar');destroyChart('cSpeedEff'); | |
| const sorted=[...D].sort((a,b)=>calcPIR(b)-calcPIR(a)); | |
| // ββ Chart 1: RAM vs SHIFT scatter ββ | |
| const ctx1=document.getElementById('cValue'); | |
| if(!ctx1)return; | |
| const ds1=D.map(r=>({x:r[7],y:r[25]||0})); | |
| const bg1=D.map(r=>LC[r[4]]+'cc'); | |
| const pr1=D.map(r=>Math.max(Math.min(calcPIR(r)/3,18),5)); | |
| const chart1=new Chart(ctx1,{type:'scatter', | |
| data:{datasets:[{data:ds1,backgroundColor:bg1,pointRadius:pr1,pointHoverRadius:pr1.map(v=>v+3)}]}, | |
| options:{ | |
| responsive:true,maintainAspectRatio:false, | |
| layout:{padding:{top:20,right:20}}, | |
| plugins:{legend:{display:false}, | |
| tooltip:{callbacks:{label:function(c){const r=D[c.dataIndex];return r[0]+': SHIFT='+r[25]+' RAM='+r[7]+'GB PIR='+calcPIR(r);}}}}, | |
| scales:{ | |
| x:{reverse:true,title:{display:true,text:'β RAM (GB) β Less = Better',color:'#a8b8a0',font:{family:'Teko',size:15}}, | |
| grid:{color:'rgba(255,215,0,.06)'},ticks:{color:'#a8b8a0',font:{family:'Teko',size:12}}}, | |
| y:{title:{display:true,text:'SHIFT Score β Higher = Better β',color:'#a8b8a0',font:{family:'Teko',size:15}}, | |
| grid:{color:'rgba(255,215,0,.06)'},ticks:{color:'#a8b8a0',font:{family:'Teko',size:12}}} | |
| } | |
| } | |
| }); | |
| // Draw labels on scatter (after render) | |
| setTimeout(function(){ | |
| const meta=chart1.getDatasetMeta(0); | |
| const cvs=ctx1;const cctx=cvs.getContext('2d'); | |
| cctx.save();cctx.font='500 11px Teko'; | |
| meta.data.forEach(function(pt,i){ | |
| cctx.fillStyle=LC[D[i][4]]||'#a8b8a0'; | |
| cctx.fillText(D[i][0],pt.x+8,pt.y-6); | |
| }); | |
| cctx.restore(); | |
| },300); | |
| // Value ranking text | |
| const vr=document.getElementById('valueRank'); | |
| const byValue=[...D].sort((a,b)=>{const va=(a[25]||0)/Math.max(a[7],.1);const vb=(b[25]||0)/Math.max(b[7],.1);return vb-va;}); | |
| vr.innerHTML='<div style="font-family:var(--font-score);font-size:13px;color:var(--gold);margin-bottom:6px;letter-spacing:1px">π VALUE RANKING β SHIFT PER GB</div>'+ | |
| byValue.map(function(r,i){ | |
| const val=((r[25]||0)/Math.max(r[7],.1)).toFixed(1); | |
| const pir=calcPIR(r); | |
| return '<div style="display:flex;gap:8px;align-items:center;padding:4px 0;border-bottom:1px solid rgba(255,215,0,.06)">'+ | |
| '<span style="font-family:var(--font-score);font-size:18px;color:'+(i<3?'var(--gold)':'var(--silver)')+';min-width:24px">'+(i+1)+'</span>'+ | |
| '<span style="color:'+LC[r[4]]+'">'+WC[r[4]].icon+'</span>'+ | |
| '<span style="font-weight:600;font-size:11px;min-width:120px">'+r[0]+'</span>'+ | |
| '<span style="font-family:var(--font-score);font-size:16px;color:var(--gold)">'+val+'</span>'+ | |
| '<span style="font-size:8px;color:var(--muted)">SHIFT/GB</span>'+ | |
| '<span style="font-family:var(--font-score);font-size:12px;color:var(--silver);margin-left:auto">PIR '+pir+'</span>'+ | |
| '<span style="font-size:8px;color:var(--muted)">'+r[7]+'GB</span></div>'; | |
| }).join(''); | |
| // ββ Chart 2: League radar ββ | |
| const ctx2=document.getElementById('cLeagueRadar'); | |
| if(!ctx2)return; | |
| const tops=['nano','micro','light','edge'].map(function(g){ | |
| const m=D.filter(function(r){return r[4]===g;}).sort(function(a,b){return calcPIR(b)-calcPIR(a);}); | |
| return m[0]; | |
| }).filter(Boolean); | |
| new Chart(ctx2,{type:'radar', | |
| data:{labels:['π§ Intel','π‘ Honesty','β‘ Speed','π¦ Size(inv)','πΎ Thrift(inv)'], | |
| datasets:tops.map(function(r){return{ | |
| label:WC[r[4]].icon+' '+r[0], | |
| data:[r[17]||0,r[12]||0,Math.min(r[8]/1.2,100),100-Math.min(r[5]*5,100),100-Math.min(r[7]*7,100)], | |
| backgroundColor:LC[r[4]]+'22',borderColor:LC[r[4]],pointBackgroundColor:LC[r[4]],borderWidth:2};})}, | |
| options:{ | |
| responsive:true,maintainAspectRatio:true, | |
| scales:{r:{beginAtZero:true,max:100,ticks:{display:false,stepSize:25}, | |
| grid:{color:'rgba(255,215,0,.1)'},angleLines:{color:'rgba(255,215,0,.1)'}, | |
| pointLabels:{font:{size:12,family:'Teko'},color:'#a8b8a0'}}}, | |
| plugins:{legend:{position:'bottom',labels:{color:'#a8b8a0',font:{size:10,family:'Teko'},padding:12}}} | |
| } | |
| }); | |
| // ββ Chart 3: Speed efficiency polar ββ | |
| const ctx3=document.getElementById('cSpeedEff'); | |
| if(!ctx3)return; | |
| const eff=[...D].sort(function(a,b){return(b[8]/Math.max(b[7],.1))-(a[8]/Math.max(a[7],.1));}); | |
| new Chart(ctx3,{type:'polarArea', | |
| data:{ | |
| labels:eff.map(function(r){return r[0]+' ('+(r[8]/Math.max(r[7],.1)).toFixed(0)+' t/s/GB)';}), | |
| datasets:[{ | |
| data:eff.map(function(r){return Math.round(r[8]/Math.max(r[7],.1));}), | |
| backgroundColor:eff.map(function(r){return LC[r[4]]+'88';}), | |
| borderColor:eff.map(function(r){return LC[r[4]];}), | |
| borderWidth:1 | |
| }] | |
| }, | |
| options:{ | |
| responsive:true,maintainAspectRatio:true, | |
| plugins:{legend:{display:false}, | |
| tooltip:{callbacks:{label:function(c){var r=eff[c.dataIndex];return r[0]+': '+(r[8]/Math.max(r[7],.1)).toFixed(1)+' tok/s per GB';}}}}, | |
| scales:{r:{ticks:{display:false},grid:{color:'rgba(255,215,0,.1)'}}} | |
| } | |
| }); | |
| } | |
| // βββ SOTA TAB βββ | |
| let sotaDrawn=false; | |
| function drawSOTA(){sotaDrawn=true;destroyChart('cSOTA'); | |
| const smolPts=D.map(r=>({x:r[7],y:r[24]||r[25]*0.7,name:r[0],g:r[4],real:r[24]!=null})); | |
| const sotaPts=SOTA.map(s=>({x:20+Math.random()*30,y:s.s,name:s.n})); | |
| new Chart(document.getElementById('cSOTA'),{type:'scatter', | |
| data:{datasets:[ | |
| {label:'π΄ Frontier Giants',data:sotaPts.map(p=>({x:p.x,y:p.y})),backgroundColor:'rgba(255,68,68,.5)',pointRadius:10,pointStyle:'rectRot'}, | |
| {label:'β½ Smol Challengers',data:smolPts.map(p=>({x:p.x,y:p.y})),backgroundColor:smolPts.map(p=>LC[p.g]+'aa'),pointRadius:smolPts.map(p=>p.real?10:6)}]}, | |
| options:{plugins:{legend:{labels:{color:'#a8b8a0',font:{size:12,family:'Teko'}}}, | |
| tooltip:{callbacks:{label:c=>{const ds=c.datasetIndex;if(ds===0)return sotaPts[c.dataIndex].name+': '+c.parsed.y; | |
| const p=smolPts[c.dataIndex];return p.name+': '+c.parsed.y.toFixed(1)+(p.real?'':'(est)');}}}}, | |
| scales:{x:{title:{display:true,text:'Resource Tier (RAM GB) β',color:'#a8b8a0',font:{family:'Teko',size:14}},grid:{color:'rgba(255,215,0,.06)'},ticks:{color:'#a8b8a0'}}, | |
| y:{title:{display:true,text:'Union Eval Score β',color:'#a8b8a0',font:{family:'Teko',size:14}},grid:{color:'rgba(255,215,0,.06)'},ticks:{color:'#a8b8a0'},min:20,max:75}},responsive:true}}); | |
| const best=smolPts.sort((a,b)=>b.y-a.y)[0];const pct=Math.round(best.y/69.9*100); | |
| document.getElementById('sotaInsight').innerHTML=`<div style="font-family:var(--font-score);font-size:13px;color:var(--gold);margin-bottom:4px">π₯ GIANT KILLING HIGHLIGHT</div> | |
| <div style="font-size:10px;color:var(--silver);line-height:1.8">Top challenger <b style="color:var(--gold)">${best.name}</b> reaches <b style="color:var(--gold)">${pct}%</b> of SOTA ceiling (Claude Sonnet 69.9). | |
| ${best.real?'β Union Eval confirmed':'β οΈ Estimated from SHIFT score (Γ0.7)'}. Using only <b>${D.find(r=>r[0]===best.name)?.[7]||'?'}GB</b> RAM.</div>`; | |
| } | |
| // βββ MATCHUP βββ | |
| let radarChart=null; | |
| function drawRadar(){const a=D.find(r=>r[0]===document.getElementById('radarA').value),b=D.find(r=>r[0]===document.getElementById('radarB').value); | |
| if(!a||!b)return;const ctx=document.getElementById('cRadar').getContext('2d');if(radarChart)radarChart.destroy(); | |
| radarChart=new Chart(ctx,{type:'radar',data:{labels:['π§ Intelligence','π‘ Honesty','β‘ Speed','π¦ Size(inv)','πΎ Thrift(inv)'], | |
| datasets:[{label:a[0],data:[a[17]||0,a[12]||0,Math.min(a[8]/1.2,100),100-Math.min(a[5]*5,100),100-Math.min(a[7]*7,100)], | |
| backgroundColor:'rgba(68,170,255,.15)',borderColor:'#44aaff',pointBackgroundColor:'#44aaff',borderWidth:2}, | |
| {label:b[0],data:[b[17]||0,b[12]||0,Math.min(b[8]/1.2,100),100-Math.min(b[5]*5,100),100-Math.min(b[7]*7,100)], | |
| backgroundColor:'rgba(255,68,68,.15)',borderColor:'#ff4444',pointBackgroundColor:'#ff4444',borderWidth:2}]}, | |
| options:{scales:{r:{beginAtZero:true,max:100,ticks:{display:false,stepSize:25},grid:{color:'rgba(255,215,0,.08)'},angleLines:{color:'rgba(255,215,0,.08)'}, | |
| pointLabels:{font:{size:12,family:'Teko',weight:'500'},color:'#a8b8a0'}}}, | |
| plugins:{legend:{labels:{color:'#a8b8a0',font:{size:12,family:'Teko'}}}},responsive:true}});} | |
| // βββ INIT βββ | |
| srt('wcs'); | |
| const sA=document.getElementById('radarA'),sB=document.getElementById('radarB'); | |
| D.forEach((r,i)=>{sA.innerHTML+=`<option${i===0?' selected':''}>${r[0]}</option>`;sB.innerHTML+=`<option${i===1?' selected':''}>${r[0]}</option>`;}); | |
| // βββ TOOLTIP βββ | |
| const ttEl=document.getElementById('tooltip'); | |
| document.querySelectorAll('th[data-tip]').forEach(th=>{ | |
| th.addEventListener('mouseenter',function(e){ | |
| ttEl.innerHTML=this.getAttribute('data-tip'); | |
| ttEl.classList.add('show'); | |
| const r=this.getBoundingClientRect(); | |
| ttEl.style.left=Math.min(r.left+r.width/2-ttEl.offsetWidth/2, window.innerWidth-ttEl.offsetWidth-10)+'px'; | |
| ttEl.style.top=(r.top-ttEl.offsetHeight-8)+'px'; | |
| }); | |
| th.addEventListener('mouseleave',function(){ttEl.classList.remove('show');}); | |
| }); | |
| // βββ INSIGHTS TAB βββ | |
| function showIns(id,el){document.querySelectorAll('.ins-sub').forEach(d=>d.style.display='none'); | |
| document.getElementById(id).style.display=''; | |
| document.querySelectorAll('#insTabs div').forEach(d=>d.classList.remove('on'));el.classList.add('on');} | |
| // βββ BADGES βββ | |
| function getBadges(r){ | |
| let b='';const n=r[0]; | |
| // Precompute all PIRs to find rankings | |
| const allWCS=D.map(x=>({n:x[0],w:calcWCS(x)})).sort((a,b)=>b.w-a.w); | |
| const allPIR=D.map(x=>({n:x[0],p:calcPIR(x)})).sort((a,b)=>b.p-a.p); | |
| const allSHIFT=D.map(x=>({n:x[0],s:x[25]})).sort((a,b)=>b.s-a.s); | |
| const allSpeed=D.map(x=>({n:x[0],t:x[8]})).sort((a,b)=>b.t-a.t); | |
| const wcsRank=allWCS.findIndex(x=>x.n===n); | |
| const pirRank=allPIR.findIndex(x=>x.n===n); | |
| const shiftRank=allSHIFT.findIndex(x=>x.n===n); | |
| const speedRank=allSpeed.findIndex(x=>x.n===n); | |
| if(wcsRank===0) b+='<span class="badge badge-best" title="π #1 WorldCup Score = best quality x efficiency">π #1 WCS</span>'; | |
| if(shiftRank===0) b+='<span class="badge badge-best" title="β Highest SHIFT composite score">β #1 SHIFT</span>'; | |
| if(pirRank===0) b+='<span class="badge badge-value" title="π Highest PIR β most efficient pound-for-pound">π₯ #1 PIR</span>'; | |
| else if(pirRank<3) b+='<span class="badge badge-value" title="Top 3 PIR efficiency">π₯ Top3</span>'; | |
| if(speedRank===0) b+='<span class="badge badge-fast" title="β‘ Fastest model (highest tok/s)">β‘ Fastest</span>'; | |
| if(r[12]>=85) b+='<span class="badge badge-brain" title="π‘ Honesty score β₯85 β exceptional hallucination resistance">π‘ Honest</span>'; | |
| if(r[24]!=null&&r[24]>=55) b+='<span class="badge badge-phone" title="π Union β₯55 β over 78% of SOTA frontier models">π SOTA78%+</span>'; | |
| if(r[13]<=30) b+='<span class="badge badge-warn" title="β H1 Hallucination trap β€30 β high fabrication risk">β H1 Risk</span>'; | |
| return b; | |
| } | |
| </script> | |
| </body> | |
| </html> |