Spaces:
Running
Running
| <html lang="en"> | |
| <head> | |
| <meta charset="UTF-8"> | |
| <meta name="viewport" content="width=device-width, initial-scale=1.0"> | |
| <title>LLM Explorer 2026 — Size · Speed · Benchmarks · Price</title> | |
| <script src="https://cdnjs.cloudflare.com/ajax/libs/plotly.js/2.26.0/plotly.min.js"></script> | |
| <link href="https://fonts.googleapis.com/css2?family=Space+Grotesk:wght@300;400;500;600&family=JetBrains+Mono:wght@400;500&display=swap" rel="stylesheet"> | |
| <style> | |
| *,*::before,*::after{box-sizing:border-box;margin:0;padding:0} | |
| :root{--bg:#0c0d10;--surface:#13151a;--surface2:#1c1f27;--border:#2a2d38;--border2:#383c4a;--text:#e8eaf0;--muted:#7a7f96;--amber:#f5a623;--cyan:#3ecfcf;--green:#3ecf8e;--red:#f56565} | |
| body{background:var(--bg);color:var(--text);font-family:'Space Grotesk',sans-serif;min-height:100vh;overflow-x:hidden} | |
| .mono{font-family:'JetBrains Mono',monospace} | |
| header{border-bottom:1px solid var(--border);padding:18px 28px;display:flex;align-items:center;justify-content:space-between;gap:20px} | |
| .logo{display:flex;align-items:center;gap:12px} | |
| .logo-icon{width:34px;height:34px;background:linear-gradient(135deg,var(--amber),#e07b00);border-radius:8px;display:flex;align-items:center;justify-content:center;font-size:17px;flex-shrink:0} | |
| .logo-text h1{font-size:17px;font-weight:600;letter-spacing:-.3px} | |
| .logo-text p{font-size:11px;color:var(--muted);margin-top:1px} | |
| .header-meta{display:flex;gap:12px;align-items:center;flex-wrap:wrap} | |
| .badge{background:var(--surface2);border:1px solid var(--border);border-radius:5px;padding:4px 9px;font-size:11px;color:var(--muted);font-family:'JetBrains Mono',monospace;white-space:nowrap} | |
| .badge strong{color:var(--amber);font-weight:500} | |
| .badge.new{border-color:rgba(62,207,142,.35);color:var(--green)} | |
| .badge.new strong{color:var(--green)} | |
| .stat-strip{display:flex;border-bottom:1px solid var(--border);overflow-x:auto} | |
| .stat-item{flex:1;min-width:128px;padding:12px 18px;border-right:1px solid var(--border)} | |
| .stat-item:last-child{border-right:none} | |
| .stat-label{font-size:10px;text-transform:uppercase;letter-spacing:.8px;color:var(--muted);margin-bottom:3px} | |
| .stat-value{font-size:20px;font-weight:600;font-family:'JetBrains Mono',monospace;color:var(--amber)} | |
| .stat-sub{font-size:10px;color:var(--muted);margin-top:2px} | |
| .main{display:flex;height:calc(100vh - 134px);min-height:560px} | |
| .sidebar{width:212px;min-width:212px;border-right:1px solid var(--border);display:flex;flex-direction:column;overflow-y:auto} | |
| .sidebar-section{padding:13px;border-bottom:1px solid var(--border)} | |
| .sidebar-title{font-size:10px;text-transform:uppercase;letter-spacing:1px;color:var(--muted);margin-bottom:8px} | |
| .nav-item{display:flex;align-items:center;gap:9px;padding:7px 9px;border-radius:6px;cursor:pointer;font-size:13px;color:var(--muted);transition:all .15s;border:1px solid transparent;margin-bottom:2px;user-select:none} | |
| .nav-item:hover{background:var(--surface2);color:var(--text)} | |
| .nav-item.active{background:rgba(245,166,35,.12);color:var(--amber);border-color:rgba(245,166,35,.25)} | |
| .nav-dot{width:8px;height:8px;border-radius:50%;flex-shrink:0} | |
| .filter-item{display:flex;align-items:center;gap:7px;padding:4px;cursor:pointer;font-size:12px;color:var(--text);border-radius:4px;user-select:none} | |
| .filter-item:hover{background:var(--surface2)} | |
| .filter-dot{width:9px;height:9px;border-radius:50%;flex-shrink:0} | |
| .filter-check{width:13px;height:13px;border:1px solid var(--border2);border-radius:3px;background:var(--surface2);display:flex;align-items:center;justify-content:center;flex-shrink:0;font-size:8px} | |
| .filter-check.checked{background:var(--amber);border-color:var(--amber);color:#000} | |
| .filter-count{margin-left:auto;font-size:10px;color:var(--muted);font-family:'JetBrains Mono',monospace} | |
| .toggle-group{display:flex;gap:3px} | |
| .toggle-btn{flex:1;padding:5px 0;font-size:11px;border:1px solid var(--border);border-radius:5px;background:var(--surface2);color:var(--muted);cursor:pointer;text-align:center;transition:all .15s;font-family:'Space Grotesk',sans-serif} | |
| .toggle-btn:hover{border-color:var(--border2);color:var(--text)} | |
| .toggle-btn.active{background:rgba(245,166,35,.12);color:var(--amber);border-color:rgba(245,166,35,.35)} | |
| .bench-btn{display:block;width:100%;padding:6px 9px;font-size:12px;border:1px solid transparent;border-radius:5px;background:none;color:var(--muted);cursor:pointer;text-align:left;transition:all .15s;font-family:'Space Grotesk',sans-serif;margin-bottom:2px} | |
| .bench-btn:hover{background:var(--surface2);color:var(--text)} | |
| .bench-btn.active{background:rgba(245,166,35,.12);color:var(--amber);border-color:rgba(245,166,35,.25)} | |
| .bench-sub{font-size:9px;color:var(--muted);margin-top:1px} | |
| .content{flex:1;display:flex;flex-direction:column;overflow:hidden} | |
| .view{display:none;flex-direction:column;height:100%} | |
| .view.active{display:flex} | |
| .chart-header{padding:13px 22px 10px;border-bottom:1px solid var(--border);flex-shrink:0} | |
| .chart-title{font-size:13px;font-weight:500} | |
| .chart-sub{font-size:11px;color:var(--muted);margin-top:2px} | |
| .chart-wrap{flex:1;padding:6px 10px 12px;overflow:hidden} | |
| .table-view{display:none;flex-direction:column;height:100%} | |
| .table-view.active{display:flex} | |
| .table-controls{padding:10px 18px;border-bottom:1px solid var(--border);display:flex;gap:8px;align-items:center;flex-shrink:0;flex-wrap:wrap} | |
| .search-box{background:var(--surface2);border:1px solid var(--border);border-radius:6px;padding:6px 10px;font-size:13px;color:var(--text);font-family:'Space Grotesk',sans-serif;width:210px;outline:none;transition:border-color .15s} | |
| .search-box:focus{border-color:var(--amber)} | |
| .search-box::placeholder{color:var(--muted)} | |
| .sort-label{font-size:11px;color:var(--muted);margin-left:4px} | |
| .sort-select{background:var(--surface2);border:1px solid var(--border);border-radius:6px;padding:5px 8px;font-size:11px;color:var(--text);font-family:'Space Grotesk',sans-serif;outline:none;cursor:pointer} | |
| .table-wrap{flex:1;overflow-y:auto} | |
| table{width:100%;border-collapse:collapse;font-size:12px} | |
| thead th{position:sticky;top:0;background:var(--surface);border-bottom:1px solid var(--border2);padding:9px 11px;text-align:left;font-size:9px;text-transform:uppercase;letter-spacing:.7px;color:var(--muted);font-weight:500;cursor:pointer;user-select:none;white-space:nowrap} | |
| thead th:hover{color:var(--text)} | |
| thead th.sorted{color:var(--amber)} | |
| tbody tr{border-bottom:1px solid var(--border);transition:background .1s} | |
| tbody tr:hover{background:var(--surface2)} | |
| tbody td{padding:7px 11px;white-space:nowrap;font-family:'JetBrains Mono',monospace;font-size:11px} | |
| .td-name{font-family:'Space Grotesk',sans-serif;font-size:12px;font-weight:500} | |
| .td-provider{font-family:'Space Grotesk',sans-serif;font-size:10px} | |
| .td-open{color:var(--green);font-size:9px;background:rgba(62,207,142,.1);padding:2px 5px;border-radius:3px} | |
| .td-closed{color:var(--cyan);font-size:9px;background:rgba(62,207,207,.1);padding:2px 5px;border-radius:3px} | |
| .td-moe{color:var(--amber);font-size:9px;background:rgba(245,166,35,.1);padding:2px 5px;border-radius:3px;margin-left:3px} | |
| .td-score{color:var(--amber)} | |
| .score-bar{display:inline-block;height:3px;background:var(--amber);border-radius:2px;margin-left:4px;vertical-align:middle;opacity:.4} | |
| .eff-note{padding:8px 22px;font-size:11px;color:var(--muted);border-top:1px solid var(--border);flex-shrink:0} | |
| ::-webkit-scrollbar{width:5px;height:5px} | |
| ::-webkit-scrollbar-track{background:var(--surface)} | |
| ::-webkit-scrollbar-thumb{background:var(--border2);border-radius:3px} | |
| ::-webkit-scrollbar-thumb:hover{background:var(--muted)} | |
| @media(max-width:768px){.sidebar{width:172px;min-width:172px}.stat-item{min-width:100px;padding:9px 12px}.stat-value{font-size:17px}.header-meta{display:none}} | |
| </style> | |
| </head> | |
| <body> | |
| <header> | |
| <div class="logo"> | |
| <div class="logo-icon">🧠</div> | |
| <div class="logo-text"> | |
| <h1>LLM Explorer 2026</h1> | |
| <p>Size · Speed · Benchmarks · Price</p> | |
| </div> | |
| </div> | |
| <div class="header-meta"> | |
| <div class="badge"><strong id="model-count">36</strong> models</div> | |
| <div class="badge"><strong>13</strong> providers</div> | |
| <div class="badge new">Updated <strong>May 2026</strong></div> | |
| <div class="badge">MMLU saturated — now featuring GPQA Diamond & SWE-bench</div> | |
| </div> | |
| </header> | |
| <div class="stat-strip"> | |
| <div class="stat-item"> | |
| <div class="stat-label">Best GPQA Diamond</div> | |
| <div class="stat-value">94.3%</div> | |
| <div class="stat-sub">Gemini 3.1 Pro</div> | |
| </div> | |
| <div class="stat-item"> | |
| <div class="stat-label">Best SWE-bench</div> | |
| <div class="stat-value">87.6%</div> | |
| <div class="stat-sub">Claude Opus 4.7</div> | |
| </div> | |
| <div class="stat-item"> | |
| <div class="stat-label">Fastest API</div> | |
| <div class="stat-value">400 t/s</div> | |
| <div class="stat-sub">Gemini 2.5 Flash-Lite</div> | |
| </div> | |
| <div class="stat-item"> | |
| <div class="stat-label">Cheapest Input</div> | |
| <div class="stat-value">$0.02</div> | |
| <div class="stat-sub">per 1M tokens (Qwen3.5 2B)</div> | |
| </div> | |
| <div class="stat-item"> | |
| <div class="stat-label">Largest Context</div> | |
| <div class="stat-value">10M tok</div> | |
| <div class="stat-sub">Llama 4 Scout</div> | |
| </div> | |
| </div> | |
| <div class="main"> | |
| <nav class="sidebar"> | |
| <div class="sidebar-section"> | |
| <div class="sidebar-title">Views</div> | |
| <div class="nav-item active" onclick="setView('benchmarks',this)"><div class="nav-dot" style="background:#f5a623"></div>Benchmarks</div> | |
| <div class="nav-item" onclick="setView('speed',this)"><div class="nav-dot" style="background:#3ecfcf"></div>Speed</div> | |
| <div class="nav-item" onclick="setView('price',this)"><div class="nav-dot" style="background:#3ecf8e"></div>Price</div> | |
| <div class="nav-item" onclick="setView('efficiency',this)"><div class="nav-dot" style="background:#a78bfa"></div>Efficiency</div> | |
| <div class="nav-item" onclick="setView('table',this)"><div class="nav-dot" style="background:#7a7f96"></div>All Models</div> | |
| </div> | |
| <div class="sidebar-section" id="bench-section"> | |
| <div class="sidebar-title">Benchmark</div> | |
| <button class="bench-btn active" onclick="setBench('mmlu',this)">MMLU<div class="bench-sub">General knowledge (57 subjects)</div></button> | |
| <button class="bench-btn" onclick="setBench('gpqa',this)">GPQA Diamond<div class="bench-sub">PhD-level science questions</div></button> | |
| <button class="bench-btn" onclick="setBench('sweben',this)">SWE-bench Verified<div class="bench-sub">Real GitHub issue resolution</div></button> | |
| <button class="bench-btn" onclick="setBench('aime',this)">AIME 2025<div class="bench-sub">Math olympiad problems</div></button> | |
| </div> | |
| <div class="sidebar-section" id="price-section" style="display:none"> | |
| <div class="sidebar-title">Price Type</div> | |
| <div class="toggle-group"> | |
| <div class="toggle-btn active" onclick="setPriceType('input',this)">Input</div> | |
| <div class="toggle-btn" onclick="setPriceType('output',this)">Output</div> | |
| </div> | |
| </div> | |
| <div class="sidebar-section"> | |
| <div class="sidebar-title">Model Type</div> | |
| <div class="toggle-group"> | |
| <div class="toggle-btn active" id="type-all" onclick="setTypeFilter('all')">All</div> | |
| <div class="toggle-btn" id="type-open" onclick="setTypeFilter('open')">Open</div> | |
| <div class="toggle-btn" id="type-closed" onclick="setTypeFilter('closed')">Closed</div> | |
| </div> | |
| </div> | |
| <div class="sidebar-section"> | |
| <div class="sidebar-title">Providers</div> | |
| <div id="provider-filters"></div> | |
| </div> | |
| </nav> | |
| <div class="content"> | |
| <div class="view active" id="view-benchmarks"> | |
| <div class="chart-header"> | |
| <div class="chart-title" id="bench-chart-title">Model Parameters vs MMLU Score</div> | |
| <div class="chart-sub">Log x-axis · ◆ = MoE model · † = estimated params for closed models · hover for full details</div> | |
| </div> | |
| <div class="chart-wrap"><div id="chart-benchmarks" style="width:100%;height:100%"></div></div> | |
| </div> | |
| <div class="view" id="view-speed"> | |
| <div class="chart-header"> | |
| <div class="chart-title">Model Parameters vs Generation Speed</div> | |
| <div class="chart-sub">Tokens/second via provider API · Source: Artificial Analysis · ◆ = MoE · Log x-axis</div> | |
| </div> | |
| <div class="chart-wrap"><div id="chart-speed" style="width:100%;height:100%"></div></div> | |
| </div> | |
| <div class="view" id="view-price"> | |
| <div class="chart-header"> | |
| <div class="chart-title" id="price-chart-title">Model Parameters vs Input Price ($/1M tokens)</div> | |
| <div class="chart-sub">Log-log scale · Open-source prices via hosted APIs (Together, Fireworks, etc.)</div> | |
| </div> | |
| <div class="chart-wrap"><div id="chart-price" style="width:100%;height:100%"></div></div> | |
| </div> | |
| <div class="view" id="view-efficiency"> | |
| <div class="chart-header"> | |
| <div class="chart-title">Performance vs Price — Efficiency Frontier</div> | |
| <div class="chart-sub">GPQA Diamond score vs input price · Bubble size ∝ total parameters · Upper-left = best value</div> | |
| </div> | |
| <div class="chart-wrap"><div id="chart-efficiency" style="width:100%;height:100%"></div></div> | |
| <div class="eff-note">⬆ Upper-left = high performance + low cost. Bubble size ∝ total parameters. ◆ diamonds = MoE architectures (active params far smaller than total).</div> | |
| </div> | |
| <div class="table-view" id="view-table"> | |
| <div class="table-controls"> | |
| <input class="search-box" type="text" id="search-input" placeholder="Search models or providers…" oninput="renderTable()"> | |
| <span class="sort-label">Sort by</span> | |
| <select class="sort-select" id="sort-col" onchange="renderTable()"> | |
| <option value="gpqa">GPQA Diamond</option> | |
| <option value="sweben">SWE-bench</option> | |
| <option value="aime">AIME 2025</option> | |
| <option value="mmlu">MMLU</option> | |
| <option value="params">Params</option> | |
| <option value="speed">Speed</option> | |
| <option value="input_price">Input Price</option> | |
| <option value="output_price">Output Price</option> | |
| <option value="name">Name</option> | |
| </select> | |
| </div> | |
| <div class="table-wrap"> | |
| <table> | |
| <thead><tr> | |
| <th onclick="sortTable('name')">Model</th> | |
| <th onclick="sortTable('provider')">Provider</th> | |
| <th onclick="sortTable('type')">Type</th> | |
| <th onclick="sortTable('params')">Params (B)</th> | |
| <th onclick="sortTable('mmlu')">MMLU</th> | |
| <th onclick="sortTable('gpqa')" class="sorted">GPQA ◆</th> | |
| <th onclick="sortTable('sweben')">SWE-bench</th> | |
| <th onclick="sortTable('aime')">AIME 2025</th> | |
| <th onclick="sortTable('speed')">t/s</th> | |
| <th onclick="sortTable('input_price')">In $/1M</th> | |
| <th onclick="sortTable('output_price')">Out $/1M</th> | |
| <th onclick="sortTable('context')">Ctx (K)</th> | |
| </tr></thead> | |
| <tbody id="table-body"></tbody> | |
| </table> | |
| </div> | |
| </div> | |
| </div> | |
| </div> | |
| <script> | |
| const PROVIDERS={ | |
| "OpenAI": {color:"#10b981"}, | |
| "Anthropic":{color:"#f97316"}, | |
| "Google": {color:"#60a5fa"}, | |
| "Meta": {color:"#a78bfa"}, | |
| "Mistral": {color:"#e879f9"}, | |
| "Alibaba": {color:"#fbbf24"}, | |
| "DeepSeek": {color:"#f87171"}, | |
| "Microsoft":{color:"#38bdf8"}, | |
| "xAI": {color:"#a3e635"}, | |
| "Moonshot": {color:"#c084fc"}, | |
| "Z.AI": {color:"#fb923c"}, | |
| "MiniMax": {color:"#2dd4bf"}, | |
| "IBM": {color:"#93c5fd"} | |
| }; | |
| const MODELS=[ | |
| {name:"GPT-5.5", provider:"OpenAI", params:200, est:true, moe:false, mmlu:93.0,gpqa:88.0,sweben:85.0,aime:99.0,speed:20, input_price:5.75, output_price:46.00,context:1000, type:"closed"}, | |
| {name:"GPT-5.4", provider:"OpenAI", params:200, est:true, moe:false, mmlu:92.5,gpqa:87.0,sweben:80.0,aime:98.0,speed:25, input_price:2.50, output_price:10.00,context:1000, type:"closed"}, | |
| {name:"GPT-5.2", provider:"OpenAI", params:200, est:true, moe:false, mmlu:91.0,gpqa:85.0,sweben:78.0,aime:97.0,speed:35, input_price:1.75, output_price:14.00,context:128, type:"closed"}, | |
| {name:"GPT-5 mini", provider:"OpenAI", params:30, est:true, moe:false, mmlu:85.0,gpqa:72.0,sweben:70.0,aime:85.0,speed:100,input_price:0.25, output_price:2.00, context:200, type:"closed"}, | |
| {name:"GPT-5 nano", provider:"OpenAI", params:8, est:true, moe:false, mmlu:78.0,gpqa:60.0,sweben:58.0,aime:70.0,speed:160,input_price:0.05, output_price:0.40, context:128, type:"closed"}, | |
| {name:"Claude Opus 4.7", provider:"Anthropic", params:200, est:true, moe:false, mmlu:92.0,gpqa:91.3,sweben:87.6,aime:96.0,speed:22, input_price:5.00, output_price:25.00,context:1000, type:"closed"}, | |
| {name:"Claude Opus 4.6", provider:"Anthropic", params:200, est:true, moe:false, mmlu:91.0,gpqa:89.0,sweben:80.8,aime:94.0,speed:25, input_price:5.00, output_price:25.00,context:200, type:"closed"}, | |
| {name:"Claude Sonnet 4.6", provider:"Anthropic", params:70, est:true, moe:false, mmlu:88.7,gpqa:84.0,sweben:79.6,aime:88.0,speed:55, input_price:3.00, output_price:15.00,context:1000, type:"closed"}, | |
| {name:"Claude Haiku 4.5", provider:"Anthropic", params:13, est:true, moe:false, mmlu:80.0,gpqa:68.0,sweben:62.0,aime:72.0,speed:185,input_price:0.25, output_price:1.25, context:200, type:"closed"}, | |
| {name:"Gemini 3.1 Pro", provider:"Google", params:200, est:true, moe:false, mmlu:92.0,gpqa:94.3,sweben:80.6,aime:96.0,speed:80, input_price:2.00, output_price:12.00,context:2000, type:"closed"}, | |
| {name:"Gemini 3 Flash", provider:"Google", params:30, est:true, moe:false, mmlu:87.0,gpqa:78.0,sweben:78.0,aime:85.0,speed:250,input_price:0.50, output_price:3.00, context:1000, type:"closed"}, | |
| {name:"Gemini 2.5 Pro", provider:"Google", params:100, est:true, moe:false, mmlu:89.0,gpqa:88.0,sweben:75.0,aime:92.0,speed:100,input_price:1.25, output_price:10.00,context:2000, type:"closed"}, | |
| {name:"Gemini 2.5 Flash", provider:"Google", params:30, est:true, moe:false, mmlu:85.0,gpqa:77.0,sweben:72.0,aime:82.0,speed:347,input_price:0.30, output_price:1.50, context:1000, type:"closed"}, | |
| {name:"Gemini 2.5 Flash-Lite",provider:"Google", params:10, est:true, moe:false, mmlu:80.0,gpqa:65.0,sweben:60.0,aime:72.0,speed:400,input_price:0.10, output_price:0.40, context:1000, type:"closed"}, | |
| {name:"Gemma 4 31B", provider:"Google", params:31, est:false, moe:false, mmlu:83.0,gpqa:84.3,sweben:70.0,aime:89.2,speed:90, input_price:0.03, output_price:0.09, context:256, type:"open"}, | |
| {name:"Gemma 3n E4B", provider:"Google", params:4, est:false, moe:false, mmlu:72.0,gpqa:55.0,sweben:44.0,aime:52.0,speed:300,input_price:0.02, output_price:0.06, context:32, type:"open"}, | |
| {name:"Grok 4", provider:"xAI", params:314, est:true, moe:false, mmlu:91.5,gpqa:88.0,sweben:79.0,aime:97.0,speed:30, input_price:3.00, output_price:15.00,context:256, type:"closed"}, | |
| {name:"Llama 4 Maverick", provider:"Meta", params:400, est:false, moe:true, ap:17, mmlu:88.0,gpqa:82.3,sweben:73.0,aime:86.0,speed:80, input_price:0.20, output_price:0.80, context:1000, type:"open"}, | |
| {name:"Llama 4 Scout", provider:"Meta", params:109, est:false, moe:true, ap:17, mmlu:83.0,gpqa:69.0,sweben:62.0,aime:78.0,speed:200,input_price:0.11, output_price:0.34, context:10000,type:"open"}, | |
| {name:"Llama 3.3 70B", provider:"Meta", params:70, est:false, moe:false, mmlu:86.0,gpqa:71.0,sweben:68.0,aime:78.0,speed:50, input_price:0.35, output_price:0.40, context:128, type:"open"}, | |
| {name:"DeepSeek V4", provider:"DeepSeek", params:1000,est:false, moe:true, ap:32, mmlu:91.0,gpqa:90.1,sweben:80.6,aime:90.0,speed:40, input_price:0.14, output_price:0.30, context:1000, type:"open"}, | |
| {name:"DeepSeek V3.2", provider:"DeepSeek", params:671, est:false, moe:true, ap:37, mmlu:91.0,gpqa:81.0,sweben:78.0,aime:90.0,speed:40, input_price:0.28, output_price:0.42, context:128, type:"open"}, | |
| {name:"DeepSeek R1", provider:"DeepSeek", params:671, est:false, moe:false, mmlu:90.8,gpqa:83.0,sweben:77.0,aime:97.3,speed:20, input_price:0.55, output_price:2.19, context:128, type:"open"}, | |
| {name:"Kimi K2.6", provider:"Moonshot", params:1000,est:false, moe:true, ap:32, mmlu:88.0,gpqa:90.5,sweben:80.2,aime:96.4,speed:45, input_price:0.95, output_price:3.00, context:256, type:"open"}, | |
| {name:"GLM-5.1", provider:"Z.AI", params:744, est:false, moe:true, ap:40, mmlu:89.0,gpqa:83.0,sweben:77.8,aime:92.0,speed:55, input_price:0.10, output_price:0.30, context:200, type:"open"}, | |
| {name:"MiniMax M2.5", provider:"MiniMax", params:200, est:true, moe:false, mmlu:87.0,gpqa:78.0,sweben:80.2,aime:82.0,speed:60, input_price:0.30, output_price:1.20, context:128, type:"open"}, | |
| {name:"Qwen3.5 Plus", provider:"Alibaba", params:235, est:false, moe:true, ap:22, mmlu:90.0,gpqa:85.0,sweben:76.0,aime:93.0,speed:55, input_price:0.50, output_price:1.50, context:128, type:"open"}, | |
| {name:"Qwen3.5 32B", provider:"Alibaba", params:32, est:false, moe:false, mmlu:86.0,gpqa:80.0,sweben:72.0,aime:88.0,speed:85, input_price:0.10, output_price:0.30, context:128, type:"open"}, | |
| {name:"Qwen3.5 9B", provider:"Alibaba", params:9, est:false, moe:false, mmlu:84.0,gpqa:81.7,sweben:65.0,aime:83.2,speed:130,input_price:0.05, output_price:0.15, context:128, type:"open"}, | |
| {name:"Qwen3.5 4B", provider:"Alibaba", params:4, est:false, moe:false, mmlu:80.0,gpqa:72.0,sweben:57.0,aime:74.0,speed:200,input_price:0.03, output_price:0.09, context:128, type:"open"}, | |
| {name:"Qwen3.5 2B", provider:"Alibaba", params:2, est:false, moe:false, mmlu:74.0,gpqa:62.0,sweben:48.0,aime:62.0,speed:345,input_price:0.02, output_price:0.06, context:128, type:"open"}, | |
| {name:"Mistral Large 3", provider:"Mistral", params:130, est:true, moe:false, mmlu:86.0,gpqa:78.0,sweben:68.0,aime:80.0,speed:50, input_price:2.00, output_price:6.00, context:128, type:"open"}, | |
| {name:"Mistral Medium 3.5", provider:"Mistral", params:128, est:true, moe:false, mmlu:84.0,gpqa:75.0,sweben:77.6,aime:78.0,speed:70, input_price:0.40, output_price:1.20, context:128, type:"open"}, | |
| {name:"Mistral Small 4", provider:"Mistral", params:22, est:true, moe:false, mmlu:80.0,gpqa:65.0,sweben:58.0,aime:68.0,speed:100,input_price:0.10, output_price:0.30, context:128, type:"open"}, | |
| {name:"Phi-4", provider:"Microsoft", params:14, est:false, moe:false, mmlu:84.8,gpqa:72.0,sweben:63.0,aime:75.0,speed:90, input_price:0.07, output_price:0.28, context:16, type:"open"}, | |
| {name:"Granite 3.3 8B", provider:"IBM", params:8, est:false, moe:false, mmlu:74.0,gpqa:58.0,sweben:50.0,aime:60.0,speed:387,input_price:0.05, output_price:0.15, context:128, type:"open"} | |
| ]; | |
| let activeView='benchmarks',activeBench='mmlu',priceType='input',typeFilter='all',sortAsc=false; | |
| let providerFilter=Object.keys(PROVIDERS).reduce((a,k)=>{a[k]=true;return a},{}); | |
| const BENCH_LABELS={mmlu:'MMLU (%)',gpqa:'GPQA Diamond (%)',sweben:'SWE-bench Verified (%)',aime:'AIME 2025 (%)'}; | |
| const BENCH_TITLES={ | |
| mmlu:'Model Parameters vs MMLU Score (general knowledge)', | |
| gpqa:'Model Parameters vs GPQA Diamond (PhD-level science)', | |
| sweben:'Model Parameters vs SWE-bench Verified (real coding tasks)', | |
| aime:'Model Parameters vs AIME 2025 (math olympiad)' | |
| }; | |
| function getFiltered(){ | |
| return MODELS.filter(m=>(typeFilter==='all'||m.type===typeFilter)&&providerFilter[m.provider]); | |
| } | |
| function hov(m,yKey,yLabel){ | |
| const ps=m.est?`${m.params}B (est.†)`:m.moe?`${m.params}B total / ${m.ap}B active (MoE)`:`${m.params}B`; | |
| return[ | |
| `<b style="color:#f5a623">${m.name}</b>`, | |
| `${m.provider} · ${m.type}${m.moe?' · MoE':''}`, | |
| `Params: ${ps}`, | |
| `${yLabel}: <b>${m[yKey]}</b>`, | |
| `MMLU ${m.mmlu} · GPQA ${m.gpqa} · SWE ${m.sweben} · AIME ${m.aime}`, | |
| `Speed: ${m.speed} t/s · $${m.input_price}/$${m.output_price} per 1M · Ctx: ${m.context}K` | |
| ].join('<br>'); | |
| } | |
| function blo(yLabel,yRange,xType='log'){ | |
| return{ | |
| paper_bgcolor:'rgba(0,0,0,0)',plot_bgcolor:'rgba(0,0,0,0)', | |
| font:{family:"'JetBrains Mono',monospace",color:'#7a7f96',size:11}, | |
| xaxis:{showgrid:true,gridcolor:'#1c1f27',zeroline:false,showline:true,linecolor:'#2a2d38', | |
| tickfont:{color:'#7a7f96',size:10},type:xType, | |
| title:{text:'Parameter Count (Billions) — Log Scale',font:{color:'#7a7f96',size:11},standoff:10}}, | |
| yaxis:{showgrid:true,gridcolor:'#1c1f27',zeroline:false,showline:true,linecolor:'#2a2d38', | |
| tickfont:{color:'#7a7f96',size:10},range:yRange, | |
| title:{text:yLabel,font:{color:'#7a7f96',size:11},standoff:10}}, | |
| legend:{bgcolor:'rgba(19,21,26,0.9)',bordercolor:'#2a2d38',borderwidth:1,font:{color:'#7a7f96',size:10}}, | |
| margin:{l:62,r:18,t:18,b:52}, | |
| hoverlabel:{bgcolor:'#13151a',bordercolor:'#383c4a',font:{family:"'JetBrains Mono',monospace",color:'#e8eaf0',size:11}}, | |
| hovermode:'closest',showlegend:true | |
| }; | |
| } | |
| const CFG={responsive:true,displayModeBar:false}; | |
| function mkTraces(data,yKey,yLabel){ | |
| const byP={}; | |
| data.forEach(m=>{if(!byP[m.provider])byP[m.provider]=[];byP[m.provider].push(m);}); | |
| return Object.entries(byP).map(([p,ms])=>({ | |
| name:p,type:'scatter',mode:'markers', | |
| x:ms.map(m=>m.params),y:ms.map(m=>m[yKey]), | |
| hovertemplate:ms.map(m=>hov(m,yKey,yLabel)+'<extra></extra>'), | |
| marker:{color:PROVIDERS[p].color,size:ms.map(m=>m.moe?14:11), | |
| symbol:ms.map(m=>m.moe?'diamond':'circle'), | |
| line:{color:'rgba(0,0,0,0.35)',width:1},opacity:0.9} | |
| })); | |
| } | |
| function drawBenchmarks(){Plotly.react('chart-benchmarks',mkTraces(getFiltered(),activeBench,BENCH_LABELS[activeBench]),blo(BENCH_LABELS[activeBench],[20,100]),CFG);} | |
| function drawSpeed(){Plotly.react('chart-speed',mkTraces(getFiltered(),'speed','Tokens/sec'),blo('Generation Speed (tokens/sec)',[0,430]),CFG);} | |
| function drawPrice(){ | |
| const pk=priceType==='input'?'input_price':'output_price'; | |
| const pl=priceType==='input'?'Input Price ($/1M tokens)':'Output Price ($/1M tokens)'; | |
| const lo=blo(pl+' — Log Scale',null);lo.yaxis.type='log'; | |
| Plotly.react('chart-price',mkTraces(getFiltered(),pk,pl),lo,CFG); | |
| } | |
| function drawEfficiency(){ | |
| const data=getFiltered(); | |
| const byP={};data.forEach(m=>{if(!byP[m.provider])byP[m.provider]=[];byP[m.provider].push(m);}); | |
| const traces=Object.entries(byP).map(([p,ms])=>({ | |
| name:p,type:'scatter',mode:'markers+text', | |
| x:ms.map(m=>m.input_price),y:ms.map(m=>m.gpqa), | |
| text:ms.map(m=>m.name.length>16?m.name.slice(0,14)+'…':m.name), | |
| textposition:'top center',textfont:{color:'#7a7f96',size:8}, | |
| hovertemplate:ms.map(m=>hov(m,'gpqa','GPQA Diamond')+'<extra></extra>'), | |
| marker:{color:PROVIDERS[p].color, | |
| size:ms.map(m=>Math.max(9,Math.min(28,Math.log2(Math.max(m.params,1))*3))), | |
| symbol:ms.map(m=>m.moe?'diamond':'circle'), | |
| line:{color:'rgba(0,0,0,0.35)',width:1},opacity:0.85,sizemode:'diameter'} | |
| })); | |
| const lo={...blo('GPQA Diamond (%)',[50,97],'log'), | |
| xaxis:{type:'log',showgrid:true,gridcolor:'#1c1f27',zeroline:false,showline:true,linecolor:'#2a2d38', | |
| tickfont:{color:'#7a7f96',size:10}, | |
| title:{text:'Input Price ($/1M tokens) — Log Scale',font:{color:'#7a7f96',size:11},standoff:10}, | |
| range:[Math.log10(0.015),Math.log10(55)]}}; | |
| lo.margin={l:62,r:18,t:28,b:52}; | |
| Plotly.react('chart-efficiency',traces,lo,CFG); | |
| } | |
| function drawAllCharts(){ | |
| if(activeView==='benchmarks')drawBenchmarks(); | |
| else if(activeView==='speed')drawSpeed(); | |
| else if(activeView==='price')drawPrice(); | |
| else if(activeView==='efficiency')drawEfficiency(); | |
| } | |
| function renderTable(){ | |
| const q=(document.getElementById('search-input').value||'').toLowerCase(); | |
| const col=document.getElementById('sort-col').value; | |
| let data=getFiltered().filter(m=>m.name.toLowerCase().includes(q)||m.provider.toLowerCase().includes(q)); | |
| data.sort((a,b)=>{const av=a[col],bv=b[col];if(typeof av==='string')return sortAsc?av.localeCompare(bv):bv.localeCompare(av);return sortAsc?av-bv:bv-av;}); | |
| document.getElementById('table-body').innerHTML=data.map(m=>{ | |
| const c=PROVIDERS[m.provider].color; | |
| const ps=m.est?`${m.params}<span class="td-est">†</span>`:m.moe?`${m.params}<span class="td-est"> (${m.ap}a)</span>`:m.params; | |
| return`<tr> | |
| <td class="td-name">${m.name}</td> | |
| <td class="td-provider" style="color:${c}">${m.provider}</td> | |
| <td><span class="${m.type==='open'?'td-open':'td-closed'}">${m.type}</span>${m.moe?'<span class="td-moe">MoE</span>':''}</td> | |
| <td class="mono">${ps}</td> | |
| <td class="mono">${m.mmlu}</td> | |
| <td class="td-score">${m.gpqa}<span class="score-bar" style="width:${m.gpqa*0.44}px"></span></td> | |
| <td class="mono">${m.sweben}</td> | |
| <td class="mono">${m.aime}</td> | |
| <td class="mono">${m.speed}</td> | |
| <td class="mono">$${m.input_price}</td> | |
| <td class="mono">$${m.output_price}</td> | |
| <td class="mono">${m.context}K</td> | |
| </tr>`; | |
| }).join(''); | |
| document.getElementById('model-count').textContent=data.length; | |
| } | |
| function sortTable(col){ | |
| if(document.getElementById('sort-col').value===col)sortAsc=!sortAsc; | |
| else{document.getElementById('sort-col').value=col;sortAsc=false;} | |
| renderTable(); | |
| } | |
| function buildProviderFilters(){ | |
| const counts={};MODELS.forEach(m=>counts[m.provider]=(counts[m.provider]||0)+1); | |
| document.getElementById('provider-filters').innerHTML=Object.keys(PROVIDERS).map(p=>` | |
| <div class="filter-item" onclick="toggleProvider('${p}')"> | |
| <div class="filter-check checked" id="chk-${p}">✓</div> | |
| <div class="filter-dot" style="background:${PROVIDERS[p].color}"></div> | |
| <span>${p}</span><span class="filter-count">${counts[p]||0}</span> | |
| </div>`).join(''); | |
| } | |
| function toggleProvider(p){ | |
| providerFilter[p]=!providerFilter[p]; | |
| const el=document.getElementById(`chk-${p}`); | |
| el.classList.toggle('checked',providerFilter[p]);el.textContent=providerFilter[p]?'✓':''; | |
| drawAllCharts();if(activeView==='table')renderTable(); | |
| } | |
| function setView(v,el){ | |
| activeView=v; | |
| document.querySelectorAll('.nav-item').forEach(n=>n.classList.remove('active')); | |
| if(el)el.classList.add('active'); | |
| document.querySelectorAll('.view,.table-view').forEach(e=>e.classList.remove('active')); | |
| document.getElementById(`view-${v}`).classList.add('active'); | |
| document.getElementById('bench-section').style.display=v==='benchmarks'?'':'none'; | |
| document.getElementById('price-section').style.display=v==='price'?'':'none'; | |
| if(v==='table')renderTable();else setTimeout(drawAllCharts,0); | |
| } | |
| function setBench(b,el){ | |
| activeBench=b; | |
| document.querySelectorAll('.bench-btn').forEach(e=>e.classList.remove('active')); | |
| if(el)el.classList.add('active'); | |
| document.getElementById('bench-chart-title').textContent=BENCH_TITLES[b]; | |
| drawBenchmarks(); | |
| } | |
| function setPriceType(t,el){ | |
| priceType=t; | |
| document.querySelectorAll('#price-section .toggle-btn').forEach(e=>e.classList.remove('active')); | |
| if(el)el.classList.add('active'); | |
| const lbl=t==='input'?'Input Price ($/1M tokens)':'Output Price ($/1M tokens)'; | |
| document.getElementById('price-chart-title').textContent=`Model Parameters vs ${lbl}`; | |
| drawPrice(); | |
| } | |
| function setTypeFilter(t){ | |
| typeFilter=t; | |
| ['all','open','closed'].forEach(id=>document.getElementById(`type-${id}`).classList.toggle('active',id===t)); | |
| drawAllCharts();if(activeView==='table')renderTable(); | |
| } | |
| buildProviderFilters(); | |
| drawBenchmarks(); | |
| window.addEventListener('resize',()=>{ | |
| if(activeView!=='table'){const el=document.getElementById(`chart-${activeView}`);if(el)Plotly.Plots.resize(el);} | |
| }); | |
| </script> | |
| </body> | |
| </html> | |