Spaces:
Running
Running
| { | |
| "metadata": { | |
| "version": "1.0.0", | |
| "lastUpdated": "2026-03-16T16:20:45.100745Z", | |
| "title": "Official Benchmarks Leaderboard 2026", | |
| "description": "Unified leaderboard for 12 official Hugging Face benchmarks", | |
| "totalModels": 73, | |
| "totalBenchmarks": 12 | |
| }, | |
| "benchmarks": { | |
| "gsm8k": { | |
| "id": "gsm8k", | |
| "name": "GSM8K", | |
| "shortName": "GSM8K", | |
| "description": "Grade School Math 8K - 8.5K high quality grade school math word problems", | |
| "metric": "Accuracy", | |
| "metricUnit": "%", | |
| "url": "https://huggingface.co/datasets/openai/gsm8k", | |
| "huggingfaceUrl": "https://huggingface.co/datasets/openai/gsm8k", | |
| "officialLeaderboard": "https://huggingface.co/datasets/openai/gsm8k", | |
| "category": "math", | |
| "color": "#d97706", | |
| "isGated": false, | |
| "coverage": 0.85 | |
| }, | |
| "mmluPro": { | |
| "id": "mmluPro", | |
| "name": "MMLU-Pro", | |
| "shortName": "MMLU-Pro", | |
| "description": "Massive Multi-task Language Understanding - Pro version with 57K questions", | |
| "metric": "Accuracy", | |
| "metricUnit": "%", | |
| "url": "https://huggingface.co/datasets/TIGER-Lab/MMLU-Pro", | |
| "huggingfaceUrl": "https://huggingface.co/datasets/TIGER-Lab/MMLU-Pro", | |
| "officialLeaderboard": "https://huggingface.co/datasets/TIGER-Lab/MMLU-Pro", | |
| "category": "knowledge", | |
| "color": "#6366f1", | |
| "isGated": false, | |
| "coverage": 0.8 | |
| }, | |
| "gpqa": { | |
| "id": "gpqa", | |
| "name": "GPQA Diamond", | |
| "shortName": "GPQA", | |
| "description": "PhD-level expert questions in biology, physics, and chemistry", | |
| "metric": "Accuracy", | |
| "metricUnit": "%", | |
| "url": "https://huggingface.co/datasets/Idavidrein/gpqa", | |
| "huggingfaceUrl": "https://huggingface.co/datasets/Idavidrein/gpqa", | |
| "officialLeaderboard": null, | |
| "category": "knowledge", | |
| "color": "#6366f1", | |
| "isGated": true, | |
| "coverage": 0.65 | |
| }, | |
| "hle": { | |
| "id": "hle", | |
| "name": "Humanity's Last Exam", | |
| "shortName": "HLE", | |
| "description": "Multi-modal benchmark at the frontier of human knowledge - 2,500 questions", | |
| "metric": "Accuracy", | |
| "metricUnit": "%", | |
| "url": "https://lastexam.ai", | |
| "huggingfaceUrl": "https://huggingface.co/datasets/cais/hle", | |
| "officialLeaderboard": "https://lastexam.ai", | |
| "category": "knowledge", | |
| "color": "#6366f1", | |
| "isGated": true, | |
| "coverage": 0.6 | |
| }, | |
| "olmOcr": { | |
| "id": "olmOcr", | |
| "name": "olmOCR-bench", | |
| "shortName": "olmOCR", | |
| "description": "OCR evaluation with 1,403 PDF files and 7,010 unit test cases", | |
| "metric": "Accuracy", | |
| "metricUnit": "%", | |
| "url": "https://huggingface.co/datasets/allenai/olmOCR-bench", | |
| "huggingfaceUrl": "https://huggingface.co/datasets/allenai/olmOCR-bench", | |
| "officialLeaderboard": "https://huggingface.co/datasets/allenai/olmOCR-bench", | |
| "category": "vision", | |
| "color": "#16a34a", | |
| "isGated": false, | |
| "coverage": 0.45 | |
| }, | |
| "sweVerified": { | |
| "id": "sweVerified", | |
| "name": "SWE-bench Verified", | |
| "shortName": "SWE-V", | |
| "description": "500 human-validated software engineering tasks from GitHub issues", | |
| "metric": "Resolved", | |
| "metricUnit": "%", | |
| "url": "https://www.swebench.com", | |
| "huggingfaceUrl": "https://huggingface.co/datasets/SWE-bench/SWE-bench_Verified", | |
| "officialLeaderboard": "https://www.swebench.com", | |
| "category": "coding", | |
| "color": "#0d9488", | |
| "isGated": false, | |
| "coverage": 0.7 | |
| }, | |
| "arguana": { | |
| "id": "arguana", | |
| "name": "ArguAna (MTEB)", | |
| "shortName": "ArguAna", | |
| "description": "Text retrieval benchmark - argument search (part of MTEB)", | |
| "metric": "nDCG@10", | |
| "metricUnit": "score", | |
| "url": "https://huggingface.co/datasets/mteb/arguana", | |
| "huggingfaceUrl": "https://huggingface.co/datasets/mteb/arguana", | |
| "officialLeaderboard": "https://huggingface.co/datasets/mteb/arguana", | |
| "category": "embedding", | |
| "color": "#7c3aed", | |
| "isGated": false, | |
| "coverage": 0.5 | |
| }, | |
| "swePro": { | |
| "id": "swePro", | |
| "name": "SWE-bench Pro", | |
| "shortName": "SWE-Pro", | |
| "description": "Enterprise-level software engineering tasks - 731 challenging problems", | |
| "metric": "Resolved", | |
| "metricUnit": "%", | |
| "url": "https://scale.com/leaderboard/swe_bench_pro_public", | |
| "huggingfaceUrl": "https://huggingface.co/datasets/ScaleAI/SWE-bench_Pro", | |
| "officialLeaderboard": "https://scale.com/leaderboard/swe_bench_pro_public", | |
| "category": "coding", | |
| "color": "#0d9488", | |
| "isGated": false, | |
| "coverage": 0.55 | |
| }, | |
| "aime2026": { | |
| "id": "aime2026", | |
| "name": "AIME 2026", | |
| "shortName": "AIME", | |
| "description": "American Invitational Mathematics Examination 2026 - 30 problems", | |
| "metric": "Accuracy", | |
| "metricUnit": "%", | |
| "url": "https://matharena.ai/?comp=aime--aime_2026", | |
| "huggingfaceUrl": "https://huggingface.co/datasets/MathArena/aime_2026", | |
| "officialLeaderboard": "https://matharena.ai/?comp=aime--aime_2026", | |
| "category": "math", | |
| "color": "#d97706", | |
| "isGated": false, | |
| "coverage": 0.4 | |
| }, | |
| "terminalBench": { | |
| "id": "terminalBench", | |
| "name": "Terminal-Bench 2.0", | |
| "shortName": "TB 2.0", | |
| "description": "Agentic terminal tasks - containerized evaluation framework", | |
| "metric": "Success Rate", | |
| "metricUnit": "%", | |
| "url": "https://www.tbench.ai/leaderboard/terminal-bench/2.0", | |
| "huggingfaceUrl": "https://huggingface.co/datasets/harborframework/terminal-bench-2.0", | |
| "officialLeaderboard": "https://www.tbench.ai/leaderboard/terminal-bench/2.0", | |
| "category": "agent", | |
| "color": "#0d9488", | |
| "isGated": false, | |
| "coverage": 0.35 | |
| }, | |
| "evasionBench": { | |
| "id": "evasionBench", | |
| "name": "EvasionBench", | |
| "shortName": "EvasionB", | |
| "description": "Detection of evasive language in earnings call Q&A - 16,700+ samples", | |
| "metric": "Accuracy", | |
| "metricUnit": "%", | |
| "url": "https://huggingface.co/datasets/FutureMa/EvasionBench", | |
| "huggingfaceUrl": "https://huggingface.co/datasets/FutureMa/EvasionBench", | |
| "officialLeaderboard": null, | |
| "category": "language", | |
| "color": "#e11d48", | |
| "isGated": false, | |
| "coverage": 0.25 | |
| }, | |
| "hmmt2026": { | |
| "id": "hmmt2026", | |
| "name": "HMMT February 2026", | |
| "shortName": "HMMT", | |
| "description": "Harvard-MIT Math Tournament February 2026 - 33 problems", | |
| "metric": "Accuracy", | |
| "metricUnit": "%", | |
| "url": "https://matharena.ai/?comp=hmmt--hmmt_feb_2026", | |
| "huggingfaceUrl": "https://huggingface.co/datasets/MathArena/hmmt_feb_2026", | |
| "officialLeaderboard": "https://matharena.ai/?comp=hmmt--hmmt_feb_2026", | |
| "category": "math", | |
| "color": "#d97706", | |
| "isGated": false, | |
| "coverage": 0.3 | |
| } | |
| }, | |
| "models": [ | |
| { | |
| "id": "qwen-qwen2-72b", | |
| "name": "Qwen/Qwen2-72B", | |
| "provider": "Qwen", | |
| "type": "open", | |
| "released": "2024.01", | |
| "metadata": { | |
| "license": "Unknown", | |
| "parameters": "Unknown", | |
| "parametersInBillions": 72.7, | |
| "contextWindow": 0, | |
| "modality": "text", | |
| "architecture": "Transformer" | |
| }, | |
| "benchmarks": { | |
| "gsm8k": { | |
| "score": 89.5, | |
| "confidence": "official", | |
| "source": "GSM8K API", | |
| "date": "2026-03-16" | |
| } | |
| }, | |
| "aggregateScore": 89.5, | |
| "coverageCount": 1, | |
| "coveragePercent": 8.3, | |
| "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/620760a26e3b7210c2ff1943/-s1gyJfvbE1RgO5iBeNOi.png" | |
| }, | |
| { | |
| "id": "microsoft-phi-3.5-mini-instruct", | |
| "name": "microsoft/Phi-3.5-mini-instruct", | |
| "provider": "microsoft", | |
| "type": "open", | |
| "released": "2024.01", | |
| "metadata": { | |
| "license": "Unknown", | |
| "parameters": "Unknown", | |
| "parametersInBillions": null, | |
| "contextWindow": 0, | |
| "modality": "text", | |
| "architecture": "Transformer" | |
| }, | |
| "benchmarks": { | |
| "gsm8k": { | |
| "score": 86.2, | |
| "confidence": "official", | |
| "source": "GSM8K API", | |
| "date": "2026-03-16" | |
| } | |
| }, | |
| "aggregateScore": 86.2, | |
| "coverageCount": 1, | |
| "coveragePercent": 8.3, | |
| "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/1583646260758-5e64858c87403103f9f1055d.png" | |
| }, | |
| { | |
| "id": "deepseek-ai-deepseek-r1-0528", | |
| "name": "deepseek-ai/DeepSeek-R1-0528", | |
| "provider": "deepseek-ai", | |
| "type": "open", | |
| "released": "2024.01", | |
| "metadata": { | |
| "license": "Unknown", | |
| "parameters": "Unknown", | |
| "parametersInBillions": 684.5, | |
| "contextWindow": 0, | |
| "modality": "text", | |
| "architecture": "Transformer" | |
| }, | |
| "benchmarks": { | |
| "mmluPro": { | |
| "score": 85.0, | |
| "confidence": "official", | |
| "source": "MMLU-Pro API", | |
| "date": "2026-03-16" | |
| } | |
| }, | |
| "aggregateScore": 85.0, | |
| "coverageCount": 1, | |
| "coveragePercent": 8.3, | |
| "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/6538815d1bdb3c40db94fbfa/xMBly9PUMphrFVMxLX4kq.png" | |
| }, | |
| { | |
| "id": "qwen-qwen3-235b-a22b-thinking-2507", | |
| "name": "Qwen/Qwen3-235B-A22B-Thinking-2507", | |
| "provider": "Qwen", | |
| "type": "open", | |
| "released": "2024.01", | |
| "metadata": { | |
| "license": "Unknown", | |
| "parameters": "Unknown", | |
| "parametersInBillions": 235.0, | |
| "contextWindow": 0, | |
| "modality": "text", | |
| "architecture": "Transformer" | |
| }, | |
| "benchmarks": { | |
| "mmluPro": { | |
| "score": 84.4, | |
| "confidence": "official", | |
| "source": "MMLU-Pro API", | |
| "date": "2026-03-16" | |
| } | |
| }, | |
| "aggregateScore": 84.4, | |
| "coverageCount": 1, | |
| "coveragePercent": 8.3, | |
| "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/620760a26e3b7210c2ff1943/-s1gyJfvbE1RgO5iBeNOi.png" | |
| }, | |
| { | |
| "id": "rednote-dots-ocr-community-dots.ocr-1.5", | |
| "name": "rednote-dots-ocr-community/dots.ocr-1.5", | |
| "provider": "rednote-dots-ocr-community", | |
| "type": "open", | |
| "released": "2024.01", | |
| "metadata": { | |
| "license": "Unknown", | |
| "parameters": "Unknown", | |
| "parametersInBillions": 3.0, | |
| "contextWindow": 0, | |
| "modality": "text", | |
| "architecture": "Transformer" | |
| }, | |
| "benchmarks": { | |
| "olmOcr": { | |
| "score": 83.9, | |
| "confidence": "official", | |
| "source": "olmOCR-bench API", | |
| "date": "2026-03-16" | |
| } | |
| }, | |
| "aggregateScore": 83.9, | |
| "coverageCount": 1, | |
| "coveragePercent": 8.3, | |
| "providerLogoUrl": "https://www.gravatar.com/avatar/dcdda2399a2245a76ca3ee4bb5f6f6cb?d=retro&size=100" | |
| }, | |
| { | |
| "id": "lightonai-lightonocr-2-1b", | |
| "name": "lightonai/LightOnOCR-2-1B", | |
| "provider": "lightonai", | |
| "type": "open", | |
| "released": "2024.01", | |
| "metadata": { | |
| "license": "Unknown", | |
| "parameters": "Unknown", | |
| "parametersInBillions": 1.0, | |
| "contextWindow": 0, | |
| "modality": "text", | |
| "architecture": "Transformer" | |
| }, | |
| "benchmarks": { | |
| "olmOcr": { | |
| "score": 83.2, | |
| "confidence": "official", | |
| "source": "olmOCR-bench API", | |
| "date": "2026-03-16" | |
| } | |
| }, | |
| "aggregateScore": 83.2, | |
| "coverageCount": 1, | |
| "coveragePercent": 8.3, | |
| "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/1651597775471-62715572ab9243b5d40cbb1d.png" | |
| }, | |
| { | |
| "id": "datalab-to-chandra", | |
| "name": "datalab-to/chandra", | |
| "provider": "datalab-to", | |
| "type": "open", | |
| "released": "2024.01", | |
| "metadata": { | |
| "license": "Unknown", | |
| "parameters": "Unknown", | |
| "parametersInBillions": 8.8, | |
| "contextWindow": 0, | |
| "modality": "text", | |
| "architecture": "Transformer" | |
| }, | |
| "benchmarks": { | |
| "olmOcr": { | |
| "score": 83.1, | |
| "confidence": "official", | |
| "source": "olmOCR-bench API", | |
| "date": "2026-03-16" | |
| } | |
| }, | |
| "aggregateScore": 83.1, | |
| "coverageCount": 1, | |
| "coveragePercent": 8.3, | |
| "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/67ab6afe315e622f597bf9e8/YOgg0gVYVXZC1PDIHFTWK.png" | |
| }, | |
| { | |
| "id": "infly-infinity-parser-7b", | |
| "name": "infly/Infinity-Parser-7B", | |
| "provider": "infly", | |
| "type": "open", | |
| "released": "2024.01", | |
| "metadata": { | |
| "license": "Unknown", | |
| "parameters": "Unknown", | |
| "parametersInBillions": 8.3, | |
| "contextWindow": 0, | |
| "modality": "text", | |
| "architecture": "Transformer" | |
| }, | |
| "benchmarks": { | |
| "olmOcr": { | |
| "score": 82.5, | |
| "confidence": "official", | |
| "source": "olmOCR-bench API", | |
| "date": "2026-03-16" | |
| } | |
| }, | |
| "aggregateScore": 82.5, | |
| "coverageCount": 1, | |
| "coveragePercent": 8.3, | |
| "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/63ed9862679c2cc40abb55d2/0n6g0jngiKkRjaEoAvPmM.png" | |
| }, | |
| { | |
| "id": "allenai-olmocr-2-7b-1025-fp8", | |
| "name": "allenai/olmOCR-2-7B-1025-FP8", | |
| "provider": "allenai", | |
| "type": "open", | |
| "released": "2024.01", | |
| "metadata": { | |
| "license": "Unknown", | |
| "parameters": "Unknown", | |
| "parametersInBillions": 8.3, | |
| "contextWindow": 0, | |
| "modality": "text", | |
| "architecture": "Transformer" | |
| }, | |
| "benchmarks": { | |
| "olmOcr": { | |
| "score": 82.4, | |
| "confidence": "official", | |
| "source": "olmOCR-bench API", | |
| "date": "2026-03-16" | |
| } | |
| }, | |
| "aggregateScore": 82.4, | |
| "coverageCount": 1, | |
| "coveragePercent": 8.3, | |
| "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/652db071b62cf1f8463221e2/CxxwFiaomTa1MCX_B7-pT.png" | |
| }, | |
| { | |
| "id": "qwen-qwen3.5-9b", | |
| "name": "Qwen/Qwen3.5-9B", | |
| "provider": "Qwen", | |
| "type": "open", | |
| "released": "2024.01", | |
| "metadata": { | |
| "license": "Unknown", | |
| "parameters": "Unknown", | |
| "parametersInBillions": 9.7, | |
| "contextWindow": 0, | |
| "modality": "text", | |
| "architecture": "Transformer" | |
| }, | |
| "benchmarks": { | |
| "mmluPro": { | |
| "score": 82.5, | |
| "confidence": "official", | |
| "source": "MMLU-Pro API", | |
| "date": "2026-03-16" | |
| }, | |
| "gpqa": { | |
| "score": 81.7, | |
| "confidence": "official", | |
| "source": "GPQA Diamond API", | |
| "date": "2026-03-16" | |
| } | |
| }, | |
| "aggregateScore": 82.1, | |
| "coverageCount": 2, | |
| "coveragePercent": 16.7, | |
| "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/620760a26e3b7210c2ff1943/-s1gyJfvbE1RgO5iBeNOi.png" | |
| }, | |
| { | |
| "id": "deepseek-ai-deepseek-v3-0324", | |
| "name": "deepseek-ai/DeepSeek-V3-0324", | |
| "provider": "deepseek-ai", | |
| "type": "open", | |
| "released": "2024.01", | |
| "metadata": { | |
| "license": "Unknown", | |
| "parameters": "Unknown", | |
| "parametersInBillions": 684.5, | |
| "contextWindow": 0, | |
| "modality": "text", | |
| "architecture": "Transformer" | |
| }, | |
| "benchmarks": { | |
| "mmluPro": { | |
| "score": 81.2, | |
| "confidence": "official", | |
| "source": "MMLU-Pro API", | |
| "date": "2026-03-16" | |
| } | |
| }, | |
| "aggregateScore": 81.2, | |
| "coverageCount": 1, | |
| "coveragePercent": 8.3, | |
| "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/6538815d1bdb3c40db94fbfa/xMBly9PUMphrFVMxLX4kq.png" | |
| }, | |
| { | |
| "id": "qwen-qwen3-next-80b-a3b-instruct", | |
| "name": "Qwen/Qwen3-Next-80B-A3B-Instruct", | |
| "provider": "Qwen", | |
| "type": "open", | |
| "released": "2024.01", | |
| "metadata": { | |
| "license": "Unknown", | |
| "parameters": "Unknown", | |
| "parametersInBillions": 81.3, | |
| "contextWindow": 0, | |
| "modality": "text", | |
| "architecture": "Transformer" | |
| }, | |
| "benchmarks": { | |
| "mmluPro": { | |
| "score": 80.6, | |
| "confidence": "official", | |
| "source": "MMLU-Pro API", | |
| "date": "2026-03-16" | |
| } | |
| }, | |
| "aggregateScore": 80.6, | |
| "coverageCount": 1, | |
| "coveragePercent": 8.3, | |
| "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/620760a26e3b7210c2ff1943/-s1gyJfvbE1RgO5iBeNOi.png" | |
| }, | |
| { | |
| "id": "paddlepaddle-paddleocr-vl", | |
| "name": "PaddlePaddle/PaddleOCR-VL", | |
| "provider": "PaddlePaddle", | |
| "type": "open", | |
| "released": "2024.01", | |
| "metadata": { | |
| "license": "Unknown", | |
| "parameters": "Unknown", | |
| "parametersInBillions": 1.0, | |
| "contextWindow": 0, | |
| "modality": "text", | |
| "architecture": "Transformer" | |
| }, | |
| "benchmarks": { | |
| "olmOcr": { | |
| "score": 80.0, | |
| "confidence": "official", | |
| "source": "olmOCR-bench API", | |
| "date": "2026-03-16" | |
| } | |
| }, | |
| "aggregateScore": 80.0, | |
| "coverageCount": 1, | |
| "coveragePercent": 8.3, | |
| "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/1654942635336-5f3ff69679c1ba4c353d0c5a.png" | |
| }, | |
| { | |
| "id": "rednote-hilab-dots.ocr", | |
| "name": "rednote-hilab/dots.ocr", | |
| "provider": "rednote-hilab", | |
| "type": "open", | |
| "released": "2024.01", | |
| "metadata": { | |
| "license": "Unknown", | |
| "parameters": "Unknown", | |
| "parametersInBillions": null, | |
| "contextWindow": 0, | |
| "modality": "text", | |
| "architecture": "Transformer" | |
| }, | |
| "benchmarks": { | |
| "olmOcr": { | |
| "score": 79.1, | |
| "confidence": "official", | |
| "source": "olmOCR-bench API", | |
| "date": "2026-03-16" | |
| } | |
| }, | |
| "aggregateScore": 79.1, | |
| "coverageCount": 1, | |
| "coveragePercent": 8.3, | |
| "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/6807a1d6504547b3554b9c73/WgnnQDsz7FqnyTtv8mmRO.png" | |
| }, | |
| { | |
| "id": "meituan-longcat-longcat-flash-lite", | |
| "name": "meituan-longcat/LongCat-Flash-Lite", | |
| "provider": "meituan-longcat", | |
| "type": "open", | |
| "released": "2024.01", | |
| "metadata": { | |
| "license": "Unknown", | |
| "parameters": "Unknown", | |
| "parametersInBillions": null, | |
| "contextWindow": 0, | |
| "modality": "text", | |
| "architecture": "Transformer" | |
| }, | |
| "benchmarks": { | |
| "mmluPro": { | |
| "score": 78.29, | |
| "confidence": "official", | |
| "source": "MMLU-Pro API", | |
| "date": "2026-03-16" | |
| } | |
| }, | |
| "aggregateScore": 78.29, | |
| "coverageCount": 1, | |
| "coveragePercent": 8.3, | |
| "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/68a2a29ab9d4c5698e02c747/CDCAx7X7rXDt7xjI-DoxG.png" | |
| }, | |
| { | |
| "id": "deepseek-ai-deepseek-r1", | |
| "name": "deepseek-ai/DeepSeek-R1", | |
| "provider": "deepseek-ai", | |
| "type": "open", | |
| "released": "2024.01", | |
| "metadata": { | |
| "license": "Unknown", | |
| "parameters": "Unknown", | |
| "parametersInBillions": 684.5, | |
| "contextWindow": 0, | |
| "modality": "text", | |
| "architecture": "Transformer" | |
| }, | |
| "benchmarks": { | |
| "mmluPro": { | |
| "score": 84.0, | |
| "confidence": "official", | |
| "source": "MMLU-Pro API", | |
| "date": "2026-03-16" | |
| }, | |
| "gpqa": { | |
| "score": 71.5, | |
| "confidence": "official", | |
| "source": "GPQA Diamond API", | |
| "date": "2026-03-16" | |
| } | |
| }, | |
| "aggregateScore": 77.75, | |
| "coverageCount": 2, | |
| "coveragePercent": 16.7, | |
| "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/6538815d1bdb3c40db94fbfa/xMBly9PUMphrFVMxLX4kq.png" | |
| }, | |
| { | |
| "id": "jdopensource-joyai-llm-flash", | |
| "name": "jdopensource/JoyAI-LLM-Flash", | |
| "provider": "jdopensource", | |
| "type": "open", | |
| "released": "2024.01", | |
| "metadata": { | |
| "license": "Unknown", | |
| "parameters": "Unknown", | |
| "parametersInBillions": 49.3, | |
| "contextWindow": 0, | |
| "modality": "text", | |
| "architecture": "Transformer" | |
| }, | |
| "benchmarks": { | |
| "mmluPro": { | |
| "score": 81.02, | |
| "confidence": "official", | |
| "source": "MMLU-Pro API", | |
| "date": "2026-03-16" | |
| }, | |
| "gpqa": { | |
| "score": 74.43, | |
| "confidence": "official", | |
| "source": "GPQA Diamond API", | |
| "date": "2026-03-16" | |
| } | |
| }, | |
| "aggregateScore": 77.72, | |
| "coverageCount": 2, | |
| "coveragePercent": 16.7, | |
| "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/68c0e2ab44ea28a974e3074b/g-4gTubd16qUtwmGZ0n4h.png" | |
| }, | |
| { | |
| "id": "qwen-qwen3.5-4b", | |
| "name": "Qwen/Qwen3.5-4B", | |
| "provider": "Qwen", | |
| "type": "open", | |
| "released": "2024.01", | |
| "metadata": { | |
| "license": "Unknown", | |
| "parameters": "Unknown", | |
| "parametersInBillions": 4.7, | |
| "contextWindow": 0, | |
| "modality": "text", | |
| "architecture": "Transformer" | |
| }, | |
| "benchmarks": { | |
| "mmluPro": { | |
| "score": 79.1, | |
| "confidence": "official", | |
| "source": "MMLU-Pro API", | |
| "date": "2026-03-16" | |
| }, | |
| "gpqa": { | |
| "score": 76.2, | |
| "confidence": "official", | |
| "source": "GPQA Diamond API", | |
| "date": "2026-03-16" | |
| } | |
| }, | |
| "aggregateScore": 77.65, | |
| "coverageCount": 2, | |
| "coveragePercent": 16.7, | |
| "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/620760a26e3b7210c2ff1943/-s1gyJfvbE1RgO5iBeNOi.png" | |
| }, | |
| { | |
| "id": "deepseek-ai-deepseek-v3", | |
| "name": "deepseek-ai/DeepSeek-V3", | |
| "provider": "deepseek-ai", | |
| "type": "open", | |
| "released": "2024.01", | |
| "metadata": { | |
| "license": "Unknown", | |
| "parameters": "Unknown", | |
| "parametersInBillions": null, | |
| "contextWindow": 0, | |
| "modality": "text", | |
| "architecture": "Transformer" | |
| }, | |
| "benchmarks": { | |
| "gsm8k": { | |
| "score": 89.3, | |
| "confidence": "official", | |
| "source": "GSM8K API", | |
| "date": "2026-03-16" | |
| }, | |
| "mmluPro": { | |
| "score": 64.4, | |
| "confidence": "official", | |
| "source": "MMLU-Pro API", | |
| "date": "2026-03-16" | |
| } | |
| }, | |
| "aggregateScore": 76.85, | |
| "coverageCount": 2, | |
| "coveragePercent": 16.7, | |
| "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/6538815d1bdb3c40db94fbfa/xMBly9PUMphrFVMxLX4kq.png" | |
| }, | |
| { | |
| "id": "deepseek-ai-deepseek-ocr-2", | |
| "name": "deepseek-ai/DeepSeek-OCR-2", | |
| "provider": "deepseek-ai", | |
| "type": "open", | |
| "released": "2024.01", | |
| "metadata": { | |
| "license": "Unknown", | |
| "parameters": "Unknown", | |
| "parametersInBillions": null, | |
| "contextWindow": 0, | |
| "modality": "text", | |
| "architecture": "Transformer" | |
| }, | |
| "benchmarks": { | |
| "olmOcr": { | |
| "score": 76.3, | |
| "confidence": "official", | |
| "source": "olmOCR-bench API", | |
| "date": "2026-03-16" | |
| } | |
| }, | |
| "aggregateScore": 76.3, | |
| "coverageCount": 1, | |
| "coveragePercent": 8.3, | |
| "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/6538815d1bdb3c40db94fbfa/xMBly9PUMphrFVMxLX4kq.png" | |
| }, | |
| { | |
| "id": "lightonai-lightonocr-1b-1025", | |
| "name": "lightonai/LightOnOCR-1B-1025", | |
| "provider": "lightonai", | |
| "type": "open", | |
| "released": "2024.01", | |
| "metadata": { | |
| "license": "Unknown", | |
| "parameters": "Unknown", | |
| "parametersInBillions": 1.0, | |
| "contextWindow": 0, | |
| "modality": "text", | |
| "architecture": "Transformer" | |
| }, | |
| "benchmarks": { | |
| "olmOcr": { | |
| "score": 76.1, | |
| "confidence": "official", | |
| "source": "olmOCR-bench API", | |
| "date": "2026-03-16" | |
| } | |
| }, | |
| "aggregateScore": 76.1, | |
| "coverageCount": 1, | |
| "coveragePercent": 8.3, | |
| "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/1651597775471-62715572ab9243b5d40cbb1d.png" | |
| }, | |
| { | |
| "id": "deepseek-ai-deepseek-ocr", | |
| "name": "deepseek-ai/DeepSeek-OCR", | |
| "provider": "deepseek-ai", | |
| "type": "open", | |
| "released": "2024.01", | |
| "metadata": { | |
| "license": "Unknown", | |
| "parameters": "Unknown", | |
| "parametersInBillions": 3.3, | |
| "contextWindow": 0, | |
| "modality": "text", | |
| "architecture": "Transformer" | |
| }, | |
| "benchmarks": { | |
| "olmOcr": { | |
| "score": 75.7, | |
| "confidence": "official", | |
| "source": "olmOCR-bench API", | |
| "date": "2026-03-16" | |
| } | |
| }, | |
| "aggregateScore": 75.7, | |
| "coverageCount": 1, | |
| "coveragePercent": 8.3, | |
| "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/6538815d1bdb3c40db94fbfa/xMBly9PUMphrFVMxLX4kq.png" | |
| }, | |
| { | |
| "id": "arcee-ai-trinity-large-preview", | |
| "name": "arcee-ai/Trinity-Large-Preview", | |
| "provider": "arcee-ai", | |
| "type": "open", | |
| "released": "2024.01", | |
| "metadata": { | |
| "license": "Unknown", | |
| "parameters": "Unknown", | |
| "parametersInBillions": 398.6, | |
| "contextWindow": 0, | |
| "modality": "text", | |
| "architecture": "Transformer" | |
| }, | |
| "benchmarks": { | |
| "mmluPro": { | |
| "score": 75.2, | |
| "confidence": "official", | |
| "source": "MMLU-Pro API", | |
| "date": "2026-03-16" | |
| } | |
| }, | |
| "aggregateScore": 75.2, | |
| "coverageCount": 1, | |
| "coveragePercent": 8.3, | |
| "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/6435718aaaef013d1aec3b8b/GZPnGkfMn8Ino6JbkL4fJ.png" | |
| }, | |
| { | |
| "id": "opendatalab-mineru2.5-2509-1.2b", | |
| "name": "opendatalab/MinerU2.5-2509-1.2B", | |
| "provider": "opendatalab", | |
| "type": "open", | |
| "released": "2024.01", | |
| "metadata": { | |
| "license": "Unknown", | |
| "parameters": "Unknown", | |
| "parametersInBillions": 1.2, | |
| "contextWindow": 0, | |
| "modality": "text", | |
| "architecture": "Transformer" | |
| }, | |
| "benchmarks": { | |
| "olmOcr": { | |
| "score": 75.2, | |
| "confidence": "official", | |
| "source": "olmOCR-bench API", | |
| "date": "2026-03-16" | |
| } | |
| }, | |
| "aggregateScore": 75.2, | |
| "coverageCount": 1, | |
| "coveragePercent": 8.3, | |
| "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/639c3afa7432f2f5d16b7296/yqxxBknyeqkGnYsjoaR4M.png" | |
| }, | |
| { | |
| "id": "zai-org-glm-ocr", | |
| "name": "zai-org/GLM-OCR", | |
| "provider": "zai-org", | |
| "type": "open", | |
| "released": "2024.01", | |
| "metadata": { | |
| "license": "Unknown", | |
| "parameters": "Unknown", | |
| "parametersInBillions": null, | |
| "contextWindow": 0, | |
| "modality": "text", | |
| "architecture": "Transformer" | |
| }, | |
| "benchmarks": { | |
| "olmOcr": { | |
| "score": 75.2, | |
| "confidence": "official", | |
| "source": "olmOCR-bench API", | |
| "date": "2026-03-16" | |
| } | |
| }, | |
| "aggregateScore": 75.2, | |
| "coverageCount": 1, | |
| "coveragePercent": 8.3, | |
| "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/62dc173789b4cf157d36ebee/i_pxzM2ZDo3Ub-BEgIkE9.png" | |
| }, | |
| { | |
| "id": "moonshotai-kimi-k2.5", | |
| "name": "moonshotai/Kimi-K2.5", | |
| "provider": "moonshotai", | |
| "type": "open", | |
| "released": "2024.01", | |
| "metadata": { | |
| "license": "Unknown", | |
| "parameters": "Unknown", | |
| "parametersInBillions": 1058.6, | |
| "contextWindow": 0, | |
| "modality": "text", | |
| "architecture": "Transformer" | |
| }, | |
| "benchmarks": { | |
| "mmluPro": { | |
| "score": 87.1, | |
| "confidence": "official", | |
| "source": "MMLU-Pro API", | |
| "date": "2026-03-16" | |
| }, | |
| "gpqa": { | |
| "score": 87.6, | |
| "confidence": "official", | |
| "source": "GPQA Diamond API", | |
| "date": "2026-03-16" | |
| }, | |
| "hle": { | |
| "score": 50.2, | |
| "confidence": "official", | |
| "source": "HLE API", | |
| "date": "2026-03-16" | |
| }, | |
| "sweVerified": { | |
| "score": 70.8, | |
| "confidence": "official", | |
| "source": "SWE-bench Verified API", | |
| "date": "2026-03-16" | |
| }, | |
| "aime2026": { | |
| "score": 95.83, | |
| "confidence": "official", | |
| "source": "AIME 2026 API", | |
| "date": "2026-03-16" | |
| }, | |
| "hmmt2026": { | |
| "score": 87.12, | |
| "confidence": "official", | |
| "source": "HMMT Feb 2026 API", | |
| "date": "2026-03-16" | |
| }, | |
| "terminalBench": { | |
| "score": 43.2, | |
| "confidence": "official", | |
| "source": "Terminal-Bench 2.0 API", | |
| "date": "2026-03-16" | |
| } | |
| }, | |
| "aggregateScore": 74.55, | |
| "coverageCount": 7, | |
| "coveragePercent": 58.3, | |
| "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/641c1e77c3983aa9490f8121/X1yT2rsaIbR9cdYGEVu0X.jpeg" | |
| }, | |
| { | |
| "id": "qwen-qwen3.5-397b-a17b", | |
| "name": "Qwen/Qwen3.5-397B-A17B", | |
| "provider": "Qwen", | |
| "type": "open", | |
| "released": "2024.01", | |
| "metadata": { | |
| "license": "Unknown", | |
| "parameters": "Unknown", | |
| "parametersInBillions": 403.4, | |
| "contextWindow": 0, | |
| "modality": "text", | |
| "architecture": "Transformer" | |
| }, | |
| "benchmarks": { | |
| "mmluPro": { | |
| "score": 87.8, | |
| "confidence": "official", | |
| "source": "MMLU-Pro API", | |
| "date": "2026-03-16" | |
| }, | |
| "gpqa": { | |
| "score": 88.4, | |
| "confidence": "official", | |
| "source": "GPQA Diamond API", | |
| "date": "2026-03-16" | |
| }, | |
| "hle": { | |
| "score": 28.7, | |
| "confidence": "official", | |
| "source": "HLE API", | |
| "date": "2026-03-16" | |
| }, | |
| "sweVerified": { | |
| "score": 76.4, | |
| "confidence": "official", | |
| "source": "SWE-bench Verified API", | |
| "date": "2026-03-16" | |
| }, | |
| "aime2026": { | |
| "score": 93.33, | |
| "confidence": "official", | |
| "source": "AIME 2026 API", | |
| "date": "2026-03-16" | |
| }, | |
| "hmmt2026": { | |
| "score": 87.88, | |
| "confidence": "official", | |
| "source": "HMMT Feb 2026 API", | |
| "date": "2026-03-16" | |
| }, | |
| "terminalBench": { | |
| "score": 52.5, | |
| "confidence": "official", | |
| "source": "Terminal-Bench 2.0 API", | |
| "date": "2026-03-16" | |
| } | |
| }, | |
| "aggregateScore": 73.57, | |
| "coverageCount": 7, | |
| "coveragePercent": 58.3, | |
| "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/620760a26e3b7210c2ff1943/-s1gyJfvbE1RgO5iBeNOi.png" | |
| }, | |
| { | |
| "id": "stepfun-ai-step-3.5-flash", | |
| "name": "stepfun-ai/Step-3.5-Flash", | |
| "provider": "stepfun-ai", | |
| "type": "open", | |
| "released": "2024.01", | |
| "metadata": { | |
| "license": "Unknown", | |
| "parameters": "Unknown", | |
| "parametersInBillions": 199.4, | |
| "contextWindow": 0, | |
| "modality": "text", | |
| "architecture": "Transformer" | |
| }, | |
| "benchmarks": { | |
| "mmluPro": { | |
| "score": 84.4, | |
| "confidence": "official", | |
| "source": "MMLU-Pro API", | |
| "date": "2026-03-16" | |
| }, | |
| "gpqa": { | |
| "score": 83.5, | |
| "confidence": "official", | |
| "source": "GPQA Diamond API", | |
| "date": "2026-03-16" | |
| }, | |
| "hle": { | |
| "score": 23.1, | |
| "confidence": "official", | |
| "source": "HLE API", | |
| "date": "2026-03-16" | |
| }, | |
| "sweVerified": { | |
| "score": 74.4, | |
| "confidence": "official", | |
| "source": "SWE-bench Verified API", | |
| "date": "2026-03-16" | |
| }, | |
| "aime2026": { | |
| "score": 96.67, | |
| "confidence": "official", | |
| "source": "AIME 2026 API", | |
| "date": "2026-03-16" | |
| }, | |
| "hmmt2026": { | |
| "score": 86.36, | |
| "confidence": "official", | |
| "source": "HMMT Feb 2026 API", | |
| "date": "2026-03-16" | |
| }, | |
| "terminalBench": { | |
| "score": 51.0, | |
| "confidence": "official", | |
| "source": "Terminal-Bench 2.0 API", | |
| "date": "2026-03-16" | |
| } | |
| }, | |
| "aggregateScore": 71.35, | |
| "coverageCount": 7, | |
| "coveragePercent": 58.3, | |
| "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/66935cee39002fc0569c2943/Qv8QPbkgoKE3wR4jTzHiy.png" | |
| }, | |
| { | |
| "id": "zai-org-glm-5", | |
| "name": "zai-org/GLM-5", | |
| "provider": "zai-org", | |
| "type": "open", | |
| "released": "2024.01", | |
| "metadata": { | |
| "license": "Unknown", | |
| "parameters": "Unknown", | |
| "parametersInBillions": 753.9, | |
| "contextWindow": 0, | |
| "modality": "text", | |
| "architecture": "Transformer" | |
| }, | |
| "benchmarks": { | |
| "gpqa": { | |
| "score": 86.0, | |
| "confidence": "official", | |
| "source": "GPQA Diamond API", | |
| "date": "2026-03-16" | |
| }, | |
| "hle": { | |
| "score": 30.5, | |
| "confidence": "official", | |
| "source": "HLE API", | |
| "date": "2026-03-16" | |
| }, | |
| "sweVerified": { | |
| "score": 72.8, | |
| "confidence": "official", | |
| "source": "SWE-bench Verified API", | |
| "date": "2026-03-16" | |
| }, | |
| "aime2026": { | |
| "score": 95.83, | |
| "confidence": "official", | |
| "source": "AIME 2026 API", | |
| "date": "2026-03-16" | |
| }, | |
| "hmmt2026": { | |
| "score": 86.36, | |
| "confidence": "official", | |
| "source": "HMMT Feb 2026 API", | |
| "date": "2026-03-16" | |
| }, | |
| "terminalBench": { | |
| "score": 52.4, | |
| "confidence": "official", | |
| "source": "Terminal-Bench 2.0 API", | |
| "date": "2026-03-16" | |
| } | |
| }, | |
| "aggregateScore": 70.65, | |
| "coverageCount": 6, | |
| "coveragePercent": 50.0, | |
| "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/62dc173789b4cf157d36ebee/i_pxzM2ZDo3Ub-BEgIkE9.png" | |
| }, | |
| { | |
| "id": "fireredteam-firered-ocr", | |
| "name": "FireRedTeam/FireRed-OCR", | |
| "provider": "FireRedTeam", | |
| "type": "open", | |
| "released": "2024.01", | |
| "metadata": { | |
| "license": "Unknown", | |
| "parameters": "Unknown", | |
| "parametersInBillions": 2.1, | |
| "contextWindow": 0, | |
| "modality": "text", | |
| "architecture": "Transformer" | |
| }, | |
| "benchmarks": { | |
| "olmOcr": { | |
| "score": 70.2, | |
| "confidence": "official", | |
| "source": "olmOCR-bench API", | |
| "date": "2026-03-16" | |
| } | |
| }, | |
| "aggregateScore": 70.2, | |
| "coverageCount": 1, | |
| "coveragePercent": 8.3, | |
| "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/66ec07ef12bd743cfe91004e/PK3bgl6aF2RzW1QFKkq8R.png" | |
| }, | |
| { | |
| "id": "qwen-qwen3-4b-thinking-2507", | |
| "name": "Qwen/Qwen3-4B-Thinking-2507", | |
| "provider": "Qwen", | |
| "type": "open", | |
| "released": "2024.01", | |
| "metadata": { | |
| "license": "Unknown", | |
| "parameters": "Unknown", | |
| "parametersInBillions": 4.0, | |
| "contextWindow": 0, | |
| "modality": "text", | |
| "architecture": "Transformer" | |
| }, | |
| "benchmarks": { | |
| "mmluPro": { | |
| "score": 74.0, | |
| "confidence": "official", | |
| "source": "MMLU-Pro API", | |
| "date": "2026-03-16" | |
| }, | |
| "gpqa": { | |
| "score": 65.8, | |
| "confidence": "official", | |
| "source": "GPQA Diamond API", | |
| "date": "2026-03-16" | |
| } | |
| }, | |
| "aggregateScore": 69.9, | |
| "coverageCount": 2, | |
| "coveragePercent": 16.7, | |
| "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/620760a26e3b7210c2ff1943/-s1gyJfvbE1RgO5iBeNOi.png" | |
| }, | |
| { | |
| "id": "nanonets-nanonets-ocr2-3b", | |
| "name": "nanonets/Nanonets-OCR2-3B", | |
| "provider": "nanonets", | |
| "type": "open", | |
| "released": "2024.01", | |
| "metadata": { | |
| "license": "Unknown", | |
| "parameters": "Unknown", | |
| "parametersInBillions": 3.0, | |
| "contextWindow": 0, | |
| "modality": "text", | |
| "architecture": "Transformer" | |
| }, | |
| "benchmarks": { | |
| "olmOcr": { | |
| "score": 69.5, | |
| "confidence": "official", | |
| "source": "olmOCR-bench API", | |
| "date": "2026-03-16" | |
| } | |
| }, | |
| "aggregateScore": 69.5, | |
| "coverageCount": 1, | |
| "coveragePercent": 8.3, | |
| "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/641fc216a390e539522d511f/Xtxh40e8zSzkuKtCr58DH.jpeg" | |
| }, | |
| { | |
| "id": "moonshotai-kimi-k2-instruct-0905", | |
| "name": "moonshotai/Kimi-K2-Instruct-0905", | |
| "provider": "moonshotai", | |
| "type": "open", | |
| "released": "2024.01", | |
| "metadata": { | |
| "license": "Unknown", | |
| "parameters": "Unknown", | |
| "parametersInBillions": 1026.5, | |
| "contextWindow": 0, | |
| "modality": "text", | |
| "architecture": "Transformer" | |
| }, | |
| "benchmarks": { | |
| "evasionBench": { | |
| "score": 66.68, | |
| "confidence": "official", | |
| "source": "EvasionBench API", | |
| "date": "2026-03-16" | |
| } | |
| }, | |
| "aggregateScore": 66.68, | |
| "coverageCount": 1, | |
| "coveragePercent": 8.3, | |
| "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/641c1e77c3983aa9490f8121/X1yT2rsaIbR9cdYGEVu0X.jpeg" | |
| }, | |
| { | |
| "id": "gair-openswe-72b", | |
| "name": "GAIR/OpenSWE-72B", | |
| "provider": "GAIR", | |
| "type": "open", | |
| "released": "2024.01", | |
| "metadata": { | |
| "license": "Unknown", | |
| "parameters": "Unknown", | |
| "parametersInBillions": 72.7, | |
| "contextWindow": 0, | |
| "modality": "text", | |
| "architecture": "Transformer" | |
| }, | |
| "benchmarks": { | |
| "sweVerified": { | |
| "score": 66.0, | |
| "confidence": "official", | |
| "source": "SWE-bench Verified API", | |
| "date": "2026-03-16" | |
| } | |
| }, | |
| "aggregateScore": 66.0, | |
| "coverageCount": 1, | |
| "coveragePercent": 8.3, | |
| "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/6144a0c4ff1146bbd84d9865/NqAuVddq2ci-AsFcFNbav.png" | |
| }, | |
| { | |
| "id": "qwen-qwen3-4b-instruct-2507", | |
| "name": "Qwen/Qwen3-4B-Instruct-2507", | |
| "provider": "Qwen", | |
| "type": "open", | |
| "released": "2024.01", | |
| "metadata": { | |
| "license": "Unknown", | |
| "parameters": "Unknown", | |
| "parametersInBillions": 4.0, | |
| "contextWindow": 0, | |
| "modality": "text", | |
| "architecture": "Transformer" | |
| }, | |
| "benchmarks": { | |
| "mmluPro": { | |
| "score": 69.6, | |
| "confidence": "official", | |
| "source": "MMLU-Pro API", | |
| "date": "2026-03-16" | |
| }, | |
| "gpqa": { | |
| "score": 62.0, | |
| "confidence": "official", | |
| "source": "GPQA Diamond API", | |
| "date": "2026-03-16" | |
| } | |
| }, | |
| "aggregateScore": 65.8, | |
| "coverageCount": 2, | |
| "coveragePercent": 16.7, | |
| "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/620760a26e3b7210c2ff1943/-s1gyJfvbE1RgO5iBeNOi.png" | |
| }, | |
| { | |
| "id": "deepseek-ai-deepseek-v3.2", | |
| "name": "deepseek-ai/DeepSeek-V3.2", | |
| "provider": "deepseek-ai", | |
| "type": "open", | |
| "released": "2024.01", | |
| "metadata": { | |
| "license": "Unknown", | |
| "parameters": "Unknown", | |
| "parametersInBillions": 685.4, | |
| "contextWindow": 0, | |
| "modality": "text", | |
| "architecture": "Transformer" | |
| }, | |
| "benchmarks": { | |
| "mmluPro": { | |
| "score": 85.0, | |
| "confidence": "official", | |
| "source": "MMLU-Pro API", | |
| "date": "2026-03-16" | |
| }, | |
| "gpqa": { | |
| "score": 82.4, | |
| "confidence": "official", | |
| "source": "GPQA Diamond API", | |
| "date": "2026-03-16" | |
| }, | |
| "hle": { | |
| "score": 40.8, | |
| "confidence": "official", | |
| "source": "HLE API", | |
| "date": "2026-03-16" | |
| }, | |
| "sweVerified": { | |
| "score": 70.0, | |
| "confidence": "official", | |
| "source": "SWE-bench Verified API", | |
| "date": "2026-03-16" | |
| }, | |
| "aime2026": { | |
| "score": 94.17, | |
| "confidence": "official", | |
| "source": "AIME 2026 API", | |
| "date": "2026-03-16" | |
| }, | |
| "hmmt2026": { | |
| "score": 84.09, | |
| "confidence": "official", | |
| "source": "HMMT Feb 2026 API", | |
| "date": "2026-03-16" | |
| }, | |
| "terminalBench": { | |
| "score": 39.6, | |
| "confidence": "official", | |
| "source": "Terminal-Bench 2.0 API", | |
| "date": "2026-03-16" | |
| }, | |
| "swePro": { | |
| "score": 15.56, | |
| "confidence": "official", | |
| "source": "SWE-bench Pro API", | |
| "date": "2026-03-16" | |
| }, | |
| "evasionBench": { | |
| "score": 66.88, | |
| "confidence": "official", | |
| "source": "EvasionBench API", | |
| "date": "2026-03-16" | |
| } | |
| }, | |
| "aggregateScore": 64.28, | |
| "coverageCount": 9, | |
| "coveragePercent": 75.0, | |
| "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/6538815d1bdb3c40db94fbfa/xMBly9PUMphrFVMxLX4kq.png" | |
| }, | |
| { | |
| "id": "qwen-qwen3.5-122b-a10b", | |
| "name": "Qwen/Qwen3.5-122B-A10B", | |
| "provider": "Qwen", | |
| "type": "open", | |
| "released": "2024.01", | |
| "metadata": { | |
| "license": "Unknown", | |
| "parameters": "Unknown", | |
| "parametersInBillions": 125.1, | |
| "contextWindow": 0, | |
| "modality": "text", | |
| "architecture": "Transformer" | |
| }, | |
| "benchmarks": { | |
| "mmluPro": { | |
| "score": 86.7, | |
| "confidence": "official", | |
| "source": "MMLU-Pro API", | |
| "date": "2026-03-16" | |
| }, | |
| "gpqa": { | |
| "score": 86.6, | |
| "confidence": "official", | |
| "source": "GPQA Diamond API", | |
| "date": "2026-03-16" | |
| }, | |
| "hle": { | |
| "score": 25.3, | |
| "confidence": "official", | |
| "source": "HLE API", | |
| "date": "2026-03-16" | |
| }, | |
| "sweVerified": { | |
| "score": 72.0, | |
| "confidence": "official", | |
| "source": "SWE-bench Verified API", | |
| "date": "2026-03-16" | |
| }, | |
| "terminalBench": { | |
| "score": 49.4, | |
| "confidence": "official", | |
| "source": "Terminal-Bench 2.0 API", | |
| "date": "2026-03-16" | |
| } | |
| }, | |
| "aggregateScore": 64.0, | |
| "coverageCount": 5, | |
| "coveragePercent": 41.7, | |
| "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/620760a26e3b7210c2ff1943/-s1gyJfvbE1RgO5iBeNOi.png" | |
| }, | |
| { | |
| "id": "gair-openswe-32b", | |
| "name": "GAIR/OpenSWE-32B", | |
| "provider": "GAIR", | |
| "type": "open", | |
| "released": "2024.01", | |
| "metadata": { | |
| "license": "Unknown", | |
| "parameters": "Unknown", | |
| "parametersInBillions": 32.8, | |
| "contextWindow": 0, | |
| "modality": "text", | |
| "architecture": "Transformer" | |
| }, | |
| "benchmarks": { | |
| "sweVerified": { | |
| "score": 62.4, | |
| "confidence": "official", | |
| "source": "SWE-bench Verified API", | |
| "date": "2026-03-16" | |
| } | |
| }, | |
| "aggregateScore": 62.4, | |
| "coverageCount": 1, | |
| "coveragePercent": 8.3, | |
| "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/6144a0c4ff1146bbd84d9865/NqAuVddq2ci-AsFcFNbav.png" | |
| }, | |
| { | |
| "id": "zai-org-glm-4.7", | |
| "name": "zai-org/GLM-4.7", | |
| "provider": "zai-org", | |
| "type": "open", | |
| "released": "2024.01", | |
| "metadata": { | |
| "license": "Unknown", | |
| "parameters": "Unknown", | |
| "parametersInBillions": 358.3, | |
| "contextWindow": 0, | |
| "modality": "text", | |
| "architecture": "Transformer" | |
| }, | |
| "benchmarks": { | |
| "mmluPro": { | |
| "score": 84.3, | |
| "confidence": "official", | |
| "source": "MMLU-Pro API", | |
| "date": "2026-03-16" | |
| }, | |
| "gpqa": { | |
| "score": 85.7, | |
| "confidence": "official", | |
| "source": "GPQA Diamond API", | |
| "date": "2026-03-16" | |
| }, | |
| "hle": { | |
| "score": 24.8, | |
| "confidence": "official", | |
| "source": "HLE API", | |
| "date": "2026-03-16" | |
| }, | |
| "terminalBench": { | |
| "score": 33.4, | |
| "confidence": "official", | |
| "source": "Terminal-Bench 2.0 API", | |
| "date": "2026-03-16" | |
| }, | |
| "evasionBench": { | |
| "score": 82.91, | |
| "confidence": "official", | |
| "source": "EvasionBench API", | |
| "date": "2026-03-16" | |
| } | |
| }, | |
| "aggregateScore": 62.22, | |
| "coverageCount": 5, | |
| "coveragePercent": 41.7, | |
| "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/62dc173789b4cf157d36ebee/i_pxzM2ZDo3Ub-BEgIkE9.png" | |
| }, | |
| { | |
| "id": "qwen-qwen3.5-27b", | |
| "name": "Qwen/Qwen3.5-27B", | |
| "provider": "Qwen", | |
| "type": "open", | |
| "released": "2024.01", | |
| "metadata": { | |
| "license": "Unknown", | |
| "parameters": "Unknown", | |
| "parametersInBillions": 27.8, | |
| "contextWindow": 0, | |
| "modality": "text", | |
| "architecture": "Transformer" | |
| }, | |
| "benchmarks": { | |
| "mmluPro": { | |
| "score": 86.1, | |
| "confidence": "official", | |
| "source": "MMLU-Pro API", | |
| "date": "2026-03-16" | |
| }, | |
| "gpqa": { | |
| "score": 85.5, | |
| "confidence": "official", | |
| "source": "GPQA Diamond API", | |
| "date": "2026-03-16" | |
| }, | |
| "hle": { | |
| "score": 24.3, | |
| "confidence": "official", | |
| "source": "HLE API", | |
| "date": "2026-03-16" | |
| }, | |
| "sweVerified": { | |
| "score": 72.4, | |
| "confidence": "official", | |
| "source": "SWE-bench Verified API", | |
| "date": "2026-03-16" | |
| }, | |
| "terminalBench": { | |
| "score": 41.6, | |
| "confidence": "official", | |
| "source": "Terminal-Bench 2.0 API", | |
| "date": "2026-03-16" | |
| } | |
| }, | |
| "aggregateScore": 61.98, | |
| "coverageCount": 5, | |
| "coveragePercent": 41.7, | |
| "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/620760a26e3b7210c2ff1943/-s1gyJfvbE1RgO5iBeNOi.png" | |
| }, | |
| { | |
| "id": "qwen-qwen3.5-35b-a3b", | |
| "name": "Qwen/Qwen3.5-35B-A3B", | |
| "provider": "Qwen", | |
| "type": "open", | |
| "released": "2024.01", | |
| "metadata": { | |
| "license": "Unknown", | |
| "parameters": "Unknown", | |
| "parametersInBillions": 36.0, | |
| "contextWindow": 0, | |
| "modality": "text", | |
| "architecture": "Transformer" | |
| }, | |
| "benchmarks": { | |
| "mmluPro": { | |
| "score": 85.3, | |
| "confidence": "official", | |
| "source": "MMLU-Pro API", | |
| "date": "2026-03-16" | |
| }, | |
| "gpqa": { | |
| "score": 84.2, | |
| "confidence": "official", | |
| "source": "GPQA Diamond API", | |
| "date": "2026-03-16" | |
| }, | |
| "hle": { | |
| "score": 22.4, | |
| "confidence": "official", | |
| "source": "HLE API", | |
| "date": "2026-03-16" | |
| }, | |
| "sweVerified": { | |
| "score": 69.2, | |
| "confidence": "official", | |
| "source": "SWE-bench Verified API", | |
| "date": "2026-03-16" | |
| }, | |
| "terminalBench": { | |
| "score": 40.5, | |
| "confidence": "official", | |
| "source": "Terminal-Bench 2.0 API", | |
| "date": "2026-03-16" | |
| } | |
| }, | |
| "aggregateScore": 60.32, | |
| "coverageCount": 5, | |
| "coveragePercent": 41.7, | |
| "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/620760a26e3b7210c2ff1943/-s1gyJfvbE1RgO5iBeNOi.png" | |
| }, | |
| { | |
| "id": "minimaxai-minimax-m2.5", | |
| "name": "MiniMaxAI/MiniMax-M2.5", | |
| "provider": "MiniMaxAI", | |
| "type": "open", | |
| "released": "2024.01", | |
| "metadata": { | |
| "license": "Unknown", | |
| "parameters": "Unknown", | |
| "parametersInBillions": 228.7, | |
| "contextWindow": 0, | |
| "modality": "text", | |
| "architecture": "Transformer" | |
| }, | |
| "benchmarks": { | |
| "gpqa": { | |
| "score": 85.2, | |
| "confidence": "official", | |
| "source": "GPQA Diamond API", | |
| "date": "2026-03-16" | |
| }, | |
| "hle": { | |
| "score": 19.4, | |
| "confidence": "official", | |
| "source": "HLE API", | |
| "date": "2026-03-16" | |
| }, | |
| "sweVerified": { | |
| "score": 75.8, | |
| "confidence": "official", | |
| "source": "SWE-bench Verified API", | |
| "date": "2026-03-16" | |
| } | |
| }, | |
| "aggregateScore": 60.13, | |
| "coverageCount": 3, | |
| "coveragePercent": 25.0, | |
| "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/676e38ad04af5bec20bc9faf/dUd-LsZEX0H_d4qefO_g6.jpeg" | |
| }, | |
| { | |
| "id": "lgai-exaone-k-exaone-236b-a23b", | |
| "name": "LGAI-EXAONE/K-EXAONE-236B-A23B", | |
| "provider": "LGAI-EXAONE", | |
| "type": "open", | |
| "released": "2024.01", | |
| "metadata": { | |
| "license": "Unknown", | |
| "parameters": "Unknown", | |
| "parametersInBillions": 237.1, | |
| "contextWindow": 0, | |
| "modality": "text", | |
| "architecture": "Transformer" | |
| }, | |
| "benchmarks": { | |
| "mmluPro": { | |
| "score": 83.8, | |
| "confidence": "official", | |
| "source": "MMLU-Pro API", | |
| "date": "2026-03-16" | |
| }, | |
| "gpqa": { | |
| "score": 79.1, | |
| "confidence": "official", | |
| "source": "GPQA Diamond API", | |
| "date": "2026-03-16" | |
| }, | |
| "hle": { | |
| "score": 13.6, | |
| "confidence": "official", | |
| "source": "HLE API", | |
| "date": "2026-03-16" | |
| } | |
| }, | |
| "aggregateScore": 58.83, | |
| "coverageCount": 3, | |
| "coveragePercent": 25.0, | |
| "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/66a899a72f11aaf66001a8dc/UfdrP3GMo9pNT62BaMnhw.png" | |
| }, | |
| { | |
| "id": "moonshotai-kimi-k2-thinking", | |
| "name": "moonshotai/Kimi-K2-Thinking", | |
| "provider": "moonshotai", | |
| "type": "open", | |
| "released": "2024.01", | |
| "metadata": { | |
| "license": "Unknown", | |
| "parameters": "Unknown", | |
| "parametersInBillions": null, | |
| "contextWindow": 0, | |
| "modality": "text", | |
| "architecture": "Transformer" | |
| }, | |
| "benchmarks": { | |
| "mmluPro": { | |
| "score": 84.6, | |
| "confidence": "official", | |
| "source": "MMLU-Pro API", | |
| "date": "2026-03-16" | |
| }, | |
| "gpqa": { | |
| "score": 84.5, | |
| "confidence": "official", | |
| "source": "GPQA Diamond API", | |
| "date": "2026-03-16" | |
| }, | |
| "hle": { | |
| "score": 23.9, | |
| "confidence": "official", | |
| "source": "HLE API", | |
| "date": "2026-03-16" | |
| }, | |
| "terminalBench": { | |
| "score": 35.7, | |
| "confidence": "official", | |
| "source": "Terminal-Bench 2.0 API", | |
| "date": "2026-03-16" | |
| } | |
| }, | |
| "aggregateScore": 57.17, | |
| "coverageCount": 4, | |
| "coveragePercent": 33.3, | |
| "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/641c1e77c3983aa9490f8121/X1yT2rsaIbR9cdYGEVu0X.jpeg" | |
| }, | |
| { | |
| "id": "qwen-qwen3.5-2b", | |
| "name": "Qwen/Qwen3.5-2B", | |
| "provider": "Qwen", | |
| "type": "open", | |
| "released": "2024.01", | |
| "metadata": { | |
| "license": "Unknown", | |
| "parameters": "Unknown", | |
| "parametersInBillions": 2.3, | |
| "contextWindow": 0, | |
| "modality": "text", | |
| "architecture": "Transformer" | |
| }, | |
| "benchmarks": { | |
| "mmluPro": { | |
| "score": 55.3, | |
| "confidence": "official", | |
| "source": "MMLU-Pro API", | |
| "date": "2026-03-16" | |
| } | |
| }, | |
| "aggregateScore": 55.3, | |
| "coverageCount": 1, | |
| "coveragePercent": 8.3, | |
| "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/620760a26e3b7210c2ff1943/-s1gyJfvbE1RgO5iBeNOi.png" | |
| }, | |
| { | |
| "id": "meta-llama-llama-3.1-8b-instruct", | |
| "name": "meta-llama/Llama-3.1-8B-Instruct", | |
| "provider": "meta-llama", | |
| "type": "open", | |
| "released": "2024.01", | |
| "metadata": { | |
| "license": "Unknown", | |
| "parameters": "Unknown", | |
| "parametersInBillions": 8.0, | |
| "contextWindow": 0, | |
| "modality": "text", | |
| "architecture": "Transformer" | |
| }, | |
| "benchmarks": { | |
| "gsm8k": { | |
| "score": 84.5, | |
| "confidence": "official", | |
| "source": "GSM8K API", | |
| "date": "2026-03-16" | |
| }, | |
| "mmluPro": { | |
| "score": 48.3, | |
| "confidence": "official", | |
| "source": "MMLU-Pro API", | |
| "date": "2026-03-16" | |
| }, | |
| "gpqa": { | |
| "score": 30.4, | |
| "confidence": "official", | |
| "source": "GPQA Diamond API", | |
| "date": "2026-03-16" | |
| } | |
| }, | |
| "aggregateScore": 54.4, | |
| "coverageCount": 3, | |
| "coveragePercent": 25.0, | |
| "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/646cf8084eefb026fb8fd8bc/oCTqufkdTkjyGodsx1vo1.png" | |
| }, | |
| { | |
| "id": "nvidia-nvidia-nemotron-3-super-120b-a12b-bf16", | |
| "name": "nvidia/NVIDIA-Nemotron-3-Super-120B-A12B-BF16", | |
| "provider": "nvidia", | |
| "type": "open", | |
| "released": "2024.01", | |
| "metadata": { | |
| "license": "Unknown", | |
| "parameters": "Unknown", | |
| "parametersInBillions": 123.6, | |
| "contextWindow": 0, | |
| "modality": "text", | |
| "architecture": "Transformer" | |
| }, | |
| "benchmarks": { | |
| "mmluPro": { | |
| "score": 83.73, | |
| "confidence": "official", | |
| "source": "MMLU-Pro API", | |
| "date": "2026-03-16" | |
| }, | |
| "gpqa": { | |
| "score": 79.23, | |
| "confidence": "official", | |
| "source": "GPQA Diamond API", | |
| "date": "2026-03-16" | |
| }, | |
| "hle": { | |
| "score": 18.26, | |
| "confidence": "official", | |
| "source": "HLE API", | |
| "date": "2026-03-16" | |
| }, | |
| "sweVerified": { | |
| "score": 53.73, | |
| "confidence": "official", | |
| "source": "SWE-bench Verified API", | |
| "date": "2026-03-16" | |
| }, | |
| "terminalBench": { | |
| "score": 31.0, | |
| "confidence": "official", | |
| "source": "Terminal-Bench 2.0 API", | |
| "date": "2026-03-16" | |
| } | |
| }, | |
| "aggregateScore": 53.19, | |
| "coverageCount": 5, | |
| "coveragePercent": 41.7, | |
| "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/1613114437487-60262a8e0703121c822a80b6.png" | |
| }, | |
| { | |
| "id": "meituan-longcat-longcat-flash-thinking-2601", | |
| "name": "meituan-longcat/LongCat-Flash-Thinking-2601", | |
| "provider": "meituan-longcat", | |
| "type": "open", | |
| "released": "2024.01", | |
| "metadata": { | |
| "license": "Unknown", | |
| "parameters": "Unknown", | |
| "parametersInBillions": 561.9, | |
| "contextWindow": 0, | |
| "modality": "text", | |
| "architecture": "Transformer" | |
| }, | |
| "benchmarks": { | |
| "gpqa": { | |
| "score": 80.5, | |
| "confidence": "official", | |
| "source": "GPQA Diamond API", | |
| "date": "2026-03-16" | |
| }, | |
| "hle": { | |
| "score": 25.2, | |
| "confidence": "official", | |
| "source": "HLE API", | |
| "date": "2026-03-16" | |
| } | |
| }, | |
| "aggregateScore": 52.85, | |
| "coverageCount": 2, | |
| "coveragePercent": 16.7, | |
| "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/68a2a29ab9d4c5698e02c747/CDCAx7X7rXDt7xjI-DoxG.png" | |
| }, | |
| { | |
| "id": "qwen-qwen3-coder-next", | |
| "name": "Qwen/Qwen3-Coder-Next", | |
| "provider": "Qwen", | |
| "type": "open", | |
| "released": "2024.01", | |
| "metadata": { | |
| "license": "Unknown", | |
| "parameters": "Unknown", | |
| "parametersInBillions": 79.7, | |
| "contextWindow": 0, | |
| "modality": "text", | |
| "architecture": "Transformer" | |
| }, | |
| "benchmarks": { | |
| "sweVerified": { | |
| "score": 70.6, | |
| "confidence": "official", | |
| "source": "SWE-bench Verified API", | |
| "date": "2026-03-16" | |
| }, | |
| "terminalBench": { | |
| "score": 36.2, | |
| "confidence": "official", | |
| "source": "Terminal-Bench 2.0 API", | |
| "date": "2026-03-16" | |
| }, | |
| "swePro": { | |
| "score": 44.3, | |
| "confidence": "official", | |
| "source": "SWE-bench Pro API", | |
| "date": "2026-03-16" | |
| } | |
| }, | |
| "aggregateScore": 50.37, | |
| "coverageCount": 3, | |
| "coveragePercent": 25.0, | |
| "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/620760a26e3b7210c2ff1943/-s1gyJfvbE1RgO5iBeNOi.png" | |
| }, | |
| { | |
| "id": "minimaxai-minimax-m2.1", | |
| "name": "MiniMaxAI/MiniMax-M2.1", | |
| "provider": "MiniMaxAI", | |
| "type": "open", | |
| "released": "2024.01", | |
| "metadata": { | |
| "license": "Unknown", | |
| "parameters": "Unknown", | |
| "parametersInBillions": 228.7, | |
| "contextWindow": 0, | |
| "modality": "text", | |
| "architecture": "Transformer" | |
| }, | |
| "benchmarks": { | |
| "mmluPro": { | |
| "score": 88.0, | |
| "confidence": "official", | |
| "source": "MMLU-Pro API", | |
| "date": "2026-03-16" | |
| }, | |
| "hle": { | |
| "score": 22.2, | |
| "confidence": "official", | |
| "source": "HLE API", | |
| "date": "2026-03-16" | |
| }, | |
| "terminalBench": { | |
| "score": 29.2, | |
| "confidence": "official", | |
| "source": "Terminal-Bench 2.0 API", | |
| "date": "2026-03-16" | |
| }, | |
| "swePro": { | |
| "score": 36.81, | |
| "confidence": "official", | |
| "source": "SWE-bench Pro API", | |
| "date": "2026-03-16" | |
| }, | |
| "evasionBench": { | |
| "score": 71.31, | |
| "confidence": "official", | |
| "source": "EvasionBench API", | |
| "date": "2026-03-16" | |
| } | |
| }, | |
| "aggregateScore": 49.5, | |
| "coverageCount": 5, | |
| "coveragePercent": 41.7, | |
| "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/676e38ad04af5bec20bc9faf/dUd-LsZEX0H_d4qefO_g6.jpeg" | |
| }, | |
| { | |
| "id": "nanbeige-nanbeige4.1-3b", | |
| "name": "Nanbeige/Nanbeige4.1-3B", | |
| "provider": "Nanbeige", | |
| "type": "open", | |
| "released": "2024.01", | |
| "metadata": { | |
| "license": "Unknown", | |
| "parameters": "Unknown", | |
| "parametersInBillions": 3.9, | |
| "contextWindow": 0, | |
| "modality": "text", | |
| "architecture": "Transformer" | |
| }, | |
| "benchmarks": { | |
| "gpqa": { | |
| "score": 83.8, | |
| "confidence": "official", | |
| "source": "GPQA Diamond API", | |
| "date": "2026-03-16" | |
| }, | |
| "hle": { | |
| "score": 12.6, | |
| "confidence": "official", | |
| "source": "HLE API", | |
| "date": "2026-03-16" | |
| } | |
| }, | |
| "aggregateScore": 48.2, | |
| "coverageCount": 2, | |
| "coveragePercent": 16.7, | |
| "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/646f0d118ff94af23bc44aab/GXHCollpMRgvYqUXQ2BQ7.png" | |
| }, | |
| { | |
| "id": "qwen-qwen3-coder-480b-a35b-instruct", | |
| "name": "Qwen/Qwen3-Coder-480B-A35B-Instruct", | |
| "provider": "Qwen", | |
| "type": "open", | |
| "released": "2024.01", | |
| "metadata": { | |
| "license": "Unknown", | |
| "parameters": "Unknown", | |
| "parametersInBillions": 480.2, | |
| "contextWindow": 0, | |
| "modality": "text", | |
| "architecture": "Transformer" | |
| }, | |
| "benchmarks": { | |
| "terminalBench": { | |
| "score": 23.9, | |
| "confidence": "official", | |
| "source": "Terminal-Bench 2.0 API", | |
| "date": "2026-03-16" | |
| }, | |
| "swePro": { | |
| "score": 38.7, | |
| "confidence": "official", | |
| "source": "SWE-bench Pro API", | |
| "date": "2026-03-16" | |
| }, | |
| "evasionBench": { | |
| "score": 78.16, | |
| "confidence": "official", | |
| "source": "EvasionBench API", | |
| "date": "2026-03-16" | |
| } | |
| }, | |
| "aggregateScore": 46.92, | |
| "coverageCount": 3, | |
| "coveragePercent": 25.0, | |
| "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/620760a26e3b7210c2ff1943/-s1gyJfvbE1RgO5iBeNOi.png" | |
| }, | |
| { | |
| "id": "nvidia-nvidia-nemotron-3-nano-30b-a3b-bf16", | |
| "name": "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-BF16", | |
| "provider": "nvidia", | |
| "type": "open", | |
| "released": "2024.01", | |
| "metadata": { | |
| "license": "Unknown", | |
| "parameters": "Unknown", | |
| "parametersInBillions": 31.6, | |
| "contextWindow": 0, | |
| "modality": "text", | |
| "architecture": "Transformer" | |
| }, | |
| "benchmarks": { | |
| "mmluPro": { | |
| "score": 78.3, | |
| "confidence": "official", | |
| "source": "MMLU-Pro API", | |
| "date": "2026-03-16" | |
| }, | |
| "hle": { | |
| "score": 15.5, | |
| "confidence": "official", | |
| "source": "HLE API", | |
| "date": "2026-03-16" | |
| } | |
| }, | |
| "aggregateScore": 46.9, | |
| "coverageCount": 2, | |
| "coveragePercent": 16.7, | |
| "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/1613114437487-60262a8e0703121c822a80b6.png" | |
| }, | |
| { | |
| "id": "zai-org-glm-4.7-flash", | |
| "name": "zai-org/GLM-4.7-Flash", | |
| "provider": "zai-org", | |
| "type": "open", | |
| "released": "2024.01", | |
| "metadata": { | |
| "license": "Unknown", | |
| "parameters": "Unknown", | |
| "parametersInBillions": null, | |
| "contextWindow": 0, | |
| "modality": "text", | |
| "architecture": "Transformer" | |
| }, | |
| "benchmarks": { | |
| "gpqa": { | |
| "score": 75.2, | |
| "confidence": "official", | |
| "source": "GPQA Diamond API", | |
| "date": "2026-03-16" | |
| }, | |
| "hle": { | |
| "score": 14.4, | |
| "confidence": "official", | |
| "source": "HLE API", | |
| "date": "2026-03-16" | |
| } | |
| }, | |
| "aggregateScore": 44.8, | |
| "coverageCount": 2, | |
| "coveragePercent": 16.7, | |
| "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/62dc173789b4cf157d36ebee/i_pxzM2ZDo3Ub-BEgIkE9.png" | |
| }, | |
| { | |
| "id": "nvidia-nvidia-nemotron-3-nano-30b-a3b-fp8", | |
| "name": "nvidia/NVIDIA-Nemotron-3-Nano-30B-A3B-FP8", | |
| "provider": "nvidia", | |
| "type": "open", | |
| "released": "2024.01", | |
| "metadata": { | |
| "license": "Unknown", | |
| "parameters": "Unknown", | |
| "parametersInBillions": 31.6, | |
| "contextWindow": 0, | |
| "modality": "text", | |
| "architecture": "Transformer" | |
| }, | |
| "benchmarks": { | |
| "mmluPro": { | |
| "score": 78.1, | |
| "confidence": "official", | |
| "source": "MMLU-Pro API", | |
| "date": "2026-03-16" | |
| }, | |
| "hle": { | |
| "score": 10.2, | |
| "confidence": "official", | |
| "source": "HLE API", | |
| "date": "2026-03-16" | |
| } | |
| }, | |
| "aggregateScore": 44.15, | |
| "coverageCount": 2, | |
| "coveragePercent": 16.7, | |
| "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/1613114437487-60262a8e0703121c822a80b6.png" | |
| }, | |
| { | |
| "id": "liquidai-lfm2.5-1.2b-instruct", | |
| "name": "LiquidAI/LFM2.5-1.2B-Instruct", | |
| "provider": "LiquidAI", | |
| "type": "open", | |
| "released": "2024.01", | |
| "metadata": { | |
| "license": "Unknown", | |
| "parameters": "Unknown", | |
| "parametersInBillions": 1.2, | |
| "contextWindow": 0, | |
| "modality": "text", | |
| "architecture": "Transformer" | |
| }, | |
| "benchmarks": { | |
| "mmluPro": { | |
| "score": 44.35, | |
| "confidence": "official", | |
| "source": "MMLU-Pro API", | |
| "date": "2026-03-16" | |
| }, | |
| "gpqa": { | |
| "score": 38.89, | |
| "confidence": "official", | |
| "source": "GPQA Diamond API", | |
| "date": "2026-03-16" | |
| } | |
| }, | |
| "aggregateScore": 41.62, | |
| "coverageCount": 2, | |
| "coveragePercent": 16.7, | |
| "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/61b8e2ba285851687028d395/EsTgVtnM2IqVRKgPdfqcB.png" | |
| }, | |
| { | |
| "id": "tiiuae-falcon-h1r-7b", | |
| "name": "tiiuae/Falcon-H1R-7B", | |
| "provider": "tiiuae", | |
| "type": "open", | |
| "released": "2024.01", | |
| "metadata": { | |
| "license": "Unknown", | |
| "parameters": "Unknown", | |
| "parametersInBillions": 7.0, | |
| "contextWindow": 0, | |
| "modality": "text", | |
| "architecture": "Transformer" | |
| }, | |
| "benchmarks": { | |
| "mmluPro": { | |
| "score": 72.1, | |
| "confidence": "official", | |
| "source": "MMLU-Pro API", | |
| "date": "2026-03-16" | |
| }, | |
| "hle": { | |
| "score": 11.1, | |
| "confidence": "official", | |
| "source": "HLE API", | |
| "date": "2026-03-16" | |
| } | |
| }, | |
| "aggregateScore": 41.6, | |
| "coverageCount": 2, | |
| "coveragePercent": 16.7, | |
| "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/61a8d1aac664736898ffc84f/AT6cAB5ZNwCcqFMal71WD.jpeg" | |
| }, | |
| { | |
| "id": "minimaxai-minimax-m2", | |
| "name": "MiniMaxAI/MiniMax-M2", | |
| "provider": "MiniMaxAI", | |
| "type": "open", | |
| "released": "2024.01", | |
| "metadata": { | |
| "license": "Unknown", | |
| "parameters": "Unknown", | |
| "parametersInBillions": 228.7, | |
| "contextWindow": 0, | |
| "modality": "text", | |
| "architecture": "Transformer" | |
| }, | |
| "benchmarks": { | |
| "mmluPro": { | |
| "score": 82.0, | |
| "confidence": "official", | |
| "source": "MMLU-Pro API", | |
| "date": "2026-03-16" | |
| }, | |
| "hle": { | |
| "score": 12.5, | |
| "confidence": "official", | |
| "source": "HLE API", | |
| "date": "2026-03-16" | |
| }, | |
| "terminalBench": { | |
| "score": 30.0, | |
| "confidence": "official", | |
| "source": "Terminal-Bench 2.0 API", | |
| "date": "2026-03-16" | |
| } | |
| }, | |
| "aggregateScore": 41.5, | |
| "coverageCount": 3, | |
| "coveragePercent": 25.0, | |
| "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/676e38ad04af5bec20bc9faf/dUd-LsZEX0H_d4qefO_g6.jpeg" | |
| }, | |
| { | |
| "id": "miromind-ai-mirothinker-v1.5-235b", | |
| "name": "miromind-ai/MiroThinker-v1.5-235B", | |
| "provider": "miromind-ai", | |
| "type": "open", | |
| "released": "2024.01", | |
| "metadata": { | |
| "license": "Unknown", | |
| "parameters": "Unknown", | |
| "parametersInBillions": 235.0, | |
| "contextWindow": 0, | |
| "modality": "text", | |
| "architecture": "Transformer" | |
| }, | |
| "benchmarks": { | |
| "hle": { | |
| "score": 39.2, | |
| "confidence": "official", | |
| "source": "HLE API", | |
| "date": "2026-03-16" | |
| } | |
| }, | |
| "aggregateScore": 39.2, | |
| "coverageCount": 1, | |
| "coveragePercent": 8.3, | |
| "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/682c41fb2f8a52030ec93ce0/Cna52_IapEXuNBsyI3lvR.png" | |
| }, | |
| { | |
| "id": "nvidia-nemotron-orchestrator-8b", | |
| "name": "nvidia/Nemotron-Orchestrator-8B", | |
| "provider": "nvidia", | |
| "type": "open", | |
| "released": "2024.01", | |
| "metadata": { | |
| "license": "Unknown", | |
| "parameters": "Unknown", | |
| "parametersInBillions": 8.0, | |
| "contextWindow": 0, | |
| "modality": "text", | |
| "architecture": "Transformer" | |
| }, | |
| "benchmarks": { | |
| "hle": { | |
| "score": 37.1, | |
| "confidence": "official", | |
| "source": "HLE API", | |
| "date": "2026-03-16" | |
| } | |
| }, | |
| "aggregateScore": 37.1, | |
| "coverageCount": 1, | |
| "coveragePercent": 8.3, | |
| "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/1613114437487-60262a8e0703121c822a80b6.png" | |
| }, | |
| { | |
| "id": "openai-gpt-oss-120b", | |
| "name": "openai/gpt-oss-120b", | |
| "provider": "openai", | |
| "type": "open", | |
| "released": "2024.01", | |
| "metadata": { | |
| "license": "Unknown", | |
| "parameters": "Unknown", | |
| "parametersInBillions": 120.4, | |
| "contextWindow": 0, | |
| "modality": "text", | |
| "architecture": "Transformer" | |
| }, | |
| "benchmarks": { | |
| "gpqa": { | |
| "score": 67.1, | |
| "confidence": "official", | |
| "source": "GPQA Diamond API", | |
| "date": "2026-03-16" | |
| }, | |
| "hle": { | |
| "score": 5.2, | |
| "confidence": "official", | |
| "source": "HLE API", | |
| "date": "2026-03-16" | |
| }, | |
| "sweVerified": { | |
| "score": 47.9, | |
| "confidence": "official", | |
| "source": "SWE-bench Verified API", | |
| "date": "2026-03-16" | |
| }, | |
| "swePro": { | |
| "score": 16.2, | |
| "confidence": "official", | |
| "source": "SWE-bench Pro API", | |
| "date": "2026-03-16" | |
| } | |
| }, | |
| "aggregateScore": 34.1, | |
| "coverageCount": 4, | |
| "coveragePercent": 33.3, | |
| "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/68783facef79a05727260de3/UPX5RQxiPGA-ZbBmArIKq.png" | |
| }, | |
| { | |
| "id": "openai-gpt-oss-20b", | |
| "name": "openai/gpt-oss-20b", | |
| "provider": "openai", | |
| "type": "open", | |
| "released": "2024.01", | |
| "metadata": { | |
| "license": "Unknown", | |
| "parameters": "Unknown", | |
| "parametersInBillions": 21.5, | |
| "contextWindow": 0, | |
| "modality": "text", | |
| "architecture": "Transformer" | |
| }, | |
| "benchmarks": { | |
| "gpqa": { | |
| "score": 56.8, | |
| "confidence": "official", | |
| "source": "GPQA Diamond API", | |
| "date": "2026-03-16" | |
| }, | |
| "hle": { | |
| "score": 4.2, | |
| "confidence": "official", | |
| "source": "HLE API", | |
| "date": "2026-03-16" | |
| }, | |
| "sweVerified": { | |
| "score": 37.4, | |
| "confidence": "official", | |
| "source": "SWE-bench Verified API", | |
| "date": "2026-03-16" | |
| } | |
| }, | |
| "aggregateScore": 32.8, | |
| "coverageCount": 3, | |
| "coveragePercent": 25.0, | |
| "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/68783facef79a05727260de3/UPX5RQxiPGA-ZbBmArIKq.png" | |
| }, | |
| { | |
| "id": "miromind-ai-mirothinker-v1.5-30b", | |
| "name": "miromind-ai/MiroThinker-v1.5-30B", | |
| "provider": "miromind-ai", | |
| "type": "open", | |
| "released": "2024.01", | |
| "metadata": { | |
| "license": "Unknown", | |
| "parameters": "Unknown", | |
| "parametersInBillions": 30.0, | |
| "contextWindow": 0, | |
| "modality": "text", | |
| "architecture": "Transformer" | |
| }, | |
| "benchmarks": { | |
| "hle": { | |
| "score": 31.0, | |
| "confidence": "official", | |
| "source": "HLE API", | |
| "date": "2026-03-16" | |
| } | |
| }, | |
| "aggregateScore": 31.0, | |
| "coverageCount": 1, | |
| "coveragePercent": 8.3, | |
| "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/682c41fb2f8a52030ec93ce0/Cna52_IapEXuNBsyI3lvR.png" | |
| }, | |
| { | |
| "id": "moonshotai-kimi-k2-instruct", | |
| "name": "moonshotai/Kimi-K2-Instruct", | |
| "provider": "moonshotai", | |
| "type": "open", | |
| "released": "2024.01", | |
| "metadata": { | |
| "license": "Unknown", | |
| "parameters": "Unknown", | |
| "parametersInBillions": 1026.5, | |
| "contextWindow": 0, | |
| "modality": "text", | |
| "architecture": "Transformer" | |
| }, | |
| "benchmarks": { | |
| "terminalBench": { | |
| "score": 27.8, | |
| "confidence": "official", | |
| "source": "Terminal-Bench 2.0 API", | |
| "date": "2026-03-16" | |
| }, | |
| "swePro": { | |
| "score": 27.67, | |
| "confidence": "official", | |
| "source": "SWE-bench Pro API", | |
| "date": "2026-03-16" | |
| } | |
| }, | |
| "aggregateScore": 27.73, | |
| "coverageCount": 2, | |
| "coveragePercent": 16.7, | |
| "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/641c1e77c3983aa9490f8121/X1yT2rsaIbR9cdYGEVu0X.jpeg" | |
| }, | |
| { | |
| "id": "xiaomimimo-mimo-v2-flash", | |
| "name": "XiaomiMiMo/MiMo-V2-Flash", | |
| "provider": "XiaomiMiMo", | |
| "type": "open", | |
| "released": "2024.01", | |
| "metadata": { | |
| "license": "Unknown", | |
| "parameters": "Unknown", | |
| "parametersInBillions": 309.8, | |
| "contextWindow": 0, | |
| "modality": "text", | |
| "architecture": "Transformer" | |
| }, | |
| "benchmarks": { | |
| "hle": { | |
| "score": 22.1, | |
| "confidence": "official", | |
| "source": "HLE API", | |
| "date": "2026-03-16" | |
| } | |
| }, | |
| "aggregateScore": 22.1, | |
| "coverageCount": 1, | |
| "coveragePercent": 8.3, | |
| "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/680cb7d1233834890a64acee/5w_4aLfF-7MAyaIPOV498.jpeg" | |
| }, | |
| { | |
| "id": "qwen-qwen3-235b-a22b", | |
| "name": "Qwen/Qwen3-235B-A22B", | |
| "provider": "Qwen", | |
| "type": "open", | |
| "released": "2024.01", | |
| "metadata": { | |
| "license": "Unknown", | |
| "parameters": "Unknown", | |
| "parametersInBillions": 235.1, | |
| "contextWindow": 0, | |
| "modality": "text", | |
| "architecture": "Transformer" | |
| }, | |
| "benchmarks": { | |
| "swePro": { | |
| "score": 21.41, | |
| "confidence": "official", | |
| "source": "SWE-bench Pro API", | |
| "date": "2026-03-16" | |
| } | |
| }, | |
| "aggregateScore": 21.41, | |
| "coverageCount": 1, | |
| "coveragePercent": 8.3, | |
| "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/620760a26e3b7210c2ff1943/-s1gyJfvbE1RgO5iBeNOi.png" | |
| }, | |
| { | |
| "id": "qwen-qwen3.5-0.8b", | |
| "name": "Qwen/Qwen3.5-0.8B", | |
| "provider": "Qwen", | |
| "type": "open", | |
| "released": "2024.01", | |
| "metadata": { | |
| "license": "Unknown", | |
| "parameters": "Unknown", | |
| "parametersInBillions": 0.9, | |
| "contextWindow": 0, | |
| "modality": "text", | |
| "architecture": "Transformer" | |
| }, | |
| "benchmarks": { | |
| "mmluPro": { | |
| "score": 29.7, | |
| "confidence": "official", | |
| "source": "MMLU-Pro API", | |
| "date": "2026-03-16" | |
| }, | |
| "gpqa": { | |
| "score": 11.9, | |
| "confidence": "official", | |
| "source": "GPQA Diamond API", | |
| "date": "2026-03-16" | |
| } | |
| }, | |
| "aggregateScore": 20.8, | |
| "coverageCount": 2, | |
| "coveragePercent": 16.7, | |
| "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/620760a26e3b7210c2ff1943/-s1gyJfvbE1RgO5iBeNOi.png" | |
| }, | |
| { | |
| "id": "openbmb-agentcpm-explore", | |
| "name": "openbmb/AgentCPM-Explore", | |
| "provider": "openbmb", | |
| "type": "open", | |
| "released": "2024.01", | |
| "metadata": { | |
| "license": "Unknown", | |
| "parameters": "Unknown", | |
| "parametersInBillions": null, | |
| "contextWindow": 0, | |
| "modality": "text", | |
| "architecture": "Transformer" | |
| }, | |
| "benchmarks": { | |
| "hle": { | |
| "score": 19.1, | |
| "confidence": "official", | |
| "source": "HLE API", | |
| "date": "2026-03-16" | |
| } | |
| }, | |
| "aggregateScore": 19.1, | |
| "coverageCount": 1, | |
| "coveragePercent": 8.3, | |
| "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/1670387859384-633fe7784b362488336bbfad.png" | |
| }, | |
| { | |
| "id": "zai-org-glm-4.6", | |
| "name": "zai-org/GLM-4.6", | |
| "provider": "zai-org", | |
| "type": "open", | |
| "released": "2024.01", | |
| "metadata": { | |
| "license": "Unknown", | |
| "parameters": "Unknown", | |
| "parametersInBillions": 356.8, | |
| "contextWindow": 0, | |
| "modality": "text", | |
| "architecture": "Transformer" | |
| }, | |
| "benchmarks": { | |
| "terminalBench": { | |
| "score": 24.5, | |
| "confidence": "official", | |
| "source": "Terminal-Bench 2.0 API", | |
| "date": "2026-03-16" | |
| }, | |
| "swePro": { | |
| "score": 9.67, | |
| "confidence": "official", | |
| "source": "SWE-bench Pro API", | |
| "date": "2026-03-16" | |
| } | |
| }, | |
| "aggregateScore": 17.09, | |
| "coverageCount": 2, | |
| "coveragePercent": 16.7, | |
| "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/62dc173789b4cf157d36ebee/i_pxzM2ZDo3Ub-BEgIkE9.png" | |
| }, | |
| { | |
| "id": "google-gemma-3-27b-it", | |
| "name": "google/gemma-3-27b-it", | |
| "provider": "google", | |
| "type": "open", | |
| "released": "2024.01", | |
| "metadata": { | |
| "license": "Unknown", | |
| "parameters": "Unknown", | |
| "parametersInBillions": 27.4, | |
| "contextWindow": 0, | |
| "modality": "text", | |
| "architecture": "Transformer" | |
| }, | |
| "benchmarks": { | |
| "swePro": { | |
| "score": 11.38, | |
| "confidence": "official", | |
| "source": "SWE-bench Pro API", | |
| "date": "2026-03-16" | |
| } | |
| }, | |
| "aggregateScore": 11.38, | |
| "coverageCount": 1, | |
| "coveragePercent": 8.3, | |
| "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/5dd96eb166059660ed1ee413/WtA3YYitedOr9n02eHfJe.png" | |
| }, | |
| { | |
| "id": "meta-llama-llama-3.1-405b-instruct", | |
| "name": "meta-llama/Llama-3.1-405B-Instruct", | |
| "provider": "meta-llama", | |
| "type": "open", | |
| "released": "2024.01", | |
| "metadata": { | |
| "license": "Unknown", | |
| "parameters": "Unknown", | |
| "parametersInBillions": 405.9, | |
| "contextWindow": 0, | |
| "modality": "text", | |
| "architecture": "Transformer" | |
| }, | |
| "benchmarks": { | |
| "swePro": { | |
| "score": 11.18, | |
| "confidence": "official", | |
| "source": "SWE-bench Pro API", | |
| "date": "2026-03-16" | |
| } | |
| }, | |
| "aggregateScore": 11.18, | |
| "coverageCount": 1, | |
| "coveragePercent": 8.3, | |
| "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/646cf8084eefb026fb8fd8bc/oCTqufkdTkjyGodsx1vo1.png" | |
| }, | |
| { | |
| "id": "helpingai-dhanishtha-2.0-0126", | |
| "name": "HelpingAI/Dhanishtha-2.0-0126", | |
| "provider": "HelpingAI", | |
| "type": "open", | |
| "released": "2024.01", | |
| "metadata": { | |
| "license": "Unknown", | |
| "parameters": "Unknown", | |
| "parametersInBillions": 14.8, | |
| "contextWindow": 0, | |
| "modality": "text", | |
| "architecture": "Transformer" | |
| }, | |
| "benchmarks": { | |
| "hle": { | |
| "score": 9.92, | |
| "confidence": "official", | |
| "source": "HLE API", | |
| "date": "2026-03-16" | |
| } | |
| }, | |
| "aggregateScore": 9.92, | |
| "coverageCount": 1, | |
| "coveragePercent": 8.3, | |
| "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/6612aedf09f16e7347dfa7e1/jHRLPBTlyykFwrd6-Mak_.png" | |
| }, | |
| { | |
| "id": "meta-llama-llama-4-maverick-17b-128e-instruct", | |
| "name": "meta-llama/Llama-4-Maverick-17B-128E-Instruct", | |
| "provider": "meta-llama", | |
| "type": "open", | |
| "released": "2024.01", | |
| "metadata": { | |
| "license": "Unknown", | |
| "parameters": "Unknown", | |
| "parametersInBillions": 401.6, | |
| "contextWindow": 0, | |
| "modality": "text", | |
| "architecture": "Transformer" | |
| }, | |
| "benchmarks": { | |
| "swePro": { | |
| "score": 5.24, | |
| "confidence": "official", | |
| "source": "SWE-bench Pro API", | |
| "date": "2026-03-16" | |
| } | |
| }, | |
| "aggregateScore": 5.24, | |
| "coverageCount": 1, | |
| "coveragePercent": 8.3, | |
| "providerLogoUrl": "https://cdn-avatars.huggingface.co/v1/production/uploads/646cf8084eefb026fb8fd8bc/oCTqufkdTkjyGodsx1vo1.png" | |
| } | |
| ] | |
| } |