Upload 6 files
Browse files- CITATION.cff +29 -0
- llms-full.txt +76 -0
- llms.txt +58 -0
- robots.txt +25 -0
- schema.jsonld +64 -0
- sitemap.xml +33 -0
CITATION.cff
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
cff-version: 1.2.0
|
| 2 |
+
title: "ALL Bench Leaderboard 2026: Unified Multi-Modal AI Evaluation"
|
| 3 |
+
message: "If you use this dataset, please cite it as below."
|
| 4 |
+
type: dataset
|
| 5 |
+
authors:
|
| 6 |
+
- name: "ALL Bench Team"
|
| 7 |
+
url: "https://huggingface.co/spaces/FINAL-Bench/all-bench-leaderboard"
|
| 8 |
+
repository-code: "https://github.com/final-bench/ALL-Bench-Leaderboard"
|
| 9 |
+
license: MIT
|
| 10 |
+
version: "2.1"
|
| 11 |
+
date-released: "2026-03-08"
|
| 12 |
+
keywords:
|
| 13 |
+
- ai-benchmark
|
| 14 |
+
- llm-leaderboard
|
| 15 |
+
- vlm
|
| 16 |
+
- multimodal-ai
|
| 17 |
+
- metacognition
|
| 18 |
+
- final-bench
|
| 19 |
+
- gpt-5
|
| 20 |
+
- claude
|
| 21 |
+
- gemini
|
| 22 |
+
abstract: >-
|
| 23 |
+
ALL Bench Leaderboard is the only AI benchmark covering LLM, VLM, Agent,
|
| 24 |
+
Image, Video, and Music generation in a single unified view. It cross-verifies
|
| 25 |
+
91 AI models across 6 modalities with a 3-tier confidence system. Features
|
| 26 |
+
composite 5-axis scoring (Knowledge, Expert Reasoning, Abstract Reasoning,
|
| 27 |
+
Metacognition, Execution), interactive comparison tools, and downloadable
|
| 28 |
+
intelligence reports. Includes FINAL Bench metacognitive evaluation where
|
| 29 |
+
Error Recovery explains 94.8% of self-correction performance variance.
|
llms-full.txt
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ALL Bench Leaderboard 2026 — Full Reference
|
| 2 |
+
|
| 3 |
+
> Complete model data for AI systems. See llms.txt for summary.
|
| 4 |
+
|
| 5 |
+
## LLM Rankings (42 Models)
|
| 6 |
+
|
| 7 |
+
### Flagship Models
|
| 8 |
+
| Model | Provider | GPQA | AIME | HLE | ARC-AGI-2 | Metacog | SWE-V | IFEval | LCB | Price(In/Out) |
|
| 9 |
+
|-------|----------|------|------|-----|-----------|---------|-------|--------|-----|---------------|
|
| 10 |
+
| GPT-5.4 | OpenAI | 92.8 | 97 | 52.1 | 73.3 | — | — | — | — | $2.50/$15 |
|
| 11 |
+
| GPT-5.2 | OpenAI | 93.2 | 100 | 35.4 | 52.9 | 62.76 | 80.0 | 90.5 | 80.0 | $1.75/$14 |
|
| 12 |
+
| GPT-5.3 Codex | OpenAI | 91.5 | 95 | 36.0 | — | — | — | — | — | $7.50/$30 |
|
| 13 |
+
| Claude Opus 4.6 | Anthropic | 91.3 | 100 | 40.0 | 68.8 | 56.04 | 80.8 | 93.1 | 76.0 | $5/$25 |
|
| 14 |
+
| Claude Sonnet 4.6 | Anthropic | 89.9 | 83 | — | 60.4 | — | 79.6 | 89.5 | — | $3/$15 |
|
| 15 |
+
| Gemini 3.1 Pro | Google | 94.3 | 97 | 44.4 | 77.1 | — | 80.6 | 91.0 | 80.0 | $2/$12 |
|
| 16 |
+
| Gemini 3 Flash | Google | 90.4 | 84 | 33.7 | — | — | 78.0 | 88.3 | — | $0.50/$3 |
|
| 17 |
+
| Grok 4 Heavy | xAI | 92.0 | 97 | 38.5 | 67.5 | — | — | 90.0 | — | $3/$15 |
|
| 18 |
+
| Kimi K2.5 | Moonshot | 87.6 | 96.1 | 44.9 | 12.1 | 68.71 | — | — | 85.0 | $0.14/$0.28 |
|
| 19 |
+
| DeepSeek V3.2 | DeepSeek | 82.3 | 92.8 | 25.7 | — | 60.04 | — | 91.2 | 71.6 | $0.14/$0.28 |
|
| 20 |
+
|
| 21 |
+
### Open-Source Models
|
| 22 |
+
| Model | Provider | MMLU-Pro | GPQA | AIME | License | Price |
|
| 23 |
+
|-------|----------|---------|------|------|---------|-------|
|
| 24 |
+
| Qwen3.5-397B | Alibaba | 84.6 | 88.1 | 96 | Apache2 | Free |
|
| 25 |
+
| DeepSeek R1 | DeepSeek | 79.8 | 87.3 | 97 | MIT | Free |
|
| 26 |
+
| Llama 4 Scout | Meta | 74.3 | 79.8 | 73 | Llama | Free |
|
| 27 |
+
| Llama 4 Maverick | Meta | 80.5 | 85.8 | 81 | Llama | Free |
|
| 28 |
+
| GLM-5 | Zhipu AI | 78.6 | 86.3 | 84 | Free | Free |
|
| 29 |
+
| K-EXAONE | LG AI Research | 81.8 | 75.4 | 85.3 | Prop | Prop |
|
| 30 |
+
|
| 31 |
+
## VLM Rankings (11 Flagship)
|
| 32 |
+
| Model | MMMU | MMMU-Pro | MathVista | Type |
|
| 33 |
+
|-------|------|---------|-----------|------|
|
| 34 |
+
| Gemini 3 Flash | 87.6 | 80.0 | — | Closed |
|
| 35 |
+
| Gemini 3 Pro | 87.5 | 80.0 | — | Closed |
|
| 36 |
+
| GPT-5.2 | 86.7 | — | — | Closed |
|
| 37 |
+
| Claude Opus 4.6 | — | 85.1 | — | Closed |
|
| 38 |
+
| GPT-5 | 84.2 | — | — | Closed |
|
| 39 |
+
| Gemini 3.1 Pro | — | 82.0 | — | Closed |
|
| 40 |
+
| InternVL3.5-241B | 77.7 | — | — | Open |
|
| 41 |
+
| Grok 4 Heavy | 76.5 | — | — | Closed |
|
| 42 |
+
| InternVL3-78B | 72.2 | — | 79.6 | Open |
|
| 43 |
+
| Qwen2.5-VL-72B | 70.2 | — | 74.8 | Open |
|
| 44 |
+
| Kimi-VL-A3B | 64.0 | 46.3 | 80.1 | Open |
|
| 45 |
+
|
| 46 |
+
## Agent Rankings (10 Models)
|
| 47 |
+
| Model | OSWorld | BrowseComp | Terminal-Bench | GDPval-AA |
|
| 48 |
+
|-------|---------|------------|----------------|-----------|
|
| 49 |
+
| GPT-5.4 | 75.0 | 82.7 | — | 83 |
|
| 50 |
+
| Claude Opus 4.6 | 72.7 | 84.0 | 74.7 | 1606 |
|
| 51 |
+
| Claude Sonnet 4.6 | 72.5 | — | 53.0 | 1633 |
|
| 52 |
+
| Gemini 3.1 Pro | — | 85.9 | 78.4 | 1317 |
|
| 53 |
+
| GPT-5.3 Codex | — | — | 77.3 | — |
|
| 54 |
+
|
| 55 |
+
## Generative AI Models
|
| 56 |
+
|
| 57 |
+
### Image Generation (10 Models)
|
| 58 |
+
GPT Image 1.5 (OpenAI) · Imagen 4 (Google) · Flux 2 Pro (BFL) · Midjourney v7 · Flux 2 Dev · Ideogram 3.0 · DALL-E 3.5 · Nano Banana 2 · SD 3.5 · Seedream 4.5
|
| 59 |
+
|
| 60 |
+
### Video Generation (10 Models)
|
| 61 |
+
Sora 2 (OpenAI) · Veo 3.1 (Google) · Runway Gen-4.5 · Kling 3.0 · Seedance 2.0 · Wan 2.6 · Pika 2.5 · Luma Ray3 · LTX-2 · HaiLuo AI
|
| 62 |
+
|
| 63 |
+
### Music Generation (8 Models)
|
| 64 |
+
Suno v4.5 · Udio v2 · Gemini Music · MusicGen Large · Stable Audio 2.0 · JASCO · Riffusion v2 · Loudme
|
| 65 |
+
|
| 66 |
+
## Benchmark Methodology
|
| 67 |
+
|
| 68 |
+
Composite Score = Avg(confirmed benchmarks) × √(N/10) where N = number of benchmarks with confirmed data out of 10 core benchmarks.
|
| 69 |
+
|
| 70 |
+
Confidence system:
|
| 71 |
+
- Cross-verified (✓✓): 2+ independent sources confirm the score
|
| 72 |
+
- Single-source (✓): One official or third-party source
|
| 73 |
+
- Self-reported (~): Provider claim only, not independently verified
|
| 74 |
+
- Null (—): No data available, never estimated or imputed
|
| 75 |
+
|
| 76 |
+
Last verified: 2026-03-08
|
llms.txt
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# ALL Bench Leaderboard 2026
|
| 2 |
+
|
| 3 |
+
> The only AI benchmark leaderboard covering LLM, VLM, Agent, Image, Video, and Music generation in one place.
|
| 4 |
+
|
| 5 |
+
## Overview
|
| 6 |
+
|
| 7 |
+
ALL Bench Leaderboard is a unified multi-modal AI evaluation platform. It cross-verifies 91 AI models across 6 modalities (LLM, VLM, Agent, Image Generation, Video Generation, Music Generation) with a 3-tier confidence system. Every score is traceable to its original source.
|
| 8 |
+
|
| 9 |
+
## Key Facts
|
| 10 |
+
|
| 11 |
+
- Version: 2.1 (March 2026)
|
| 12 |
+
- Total Models: 42 LLMs + 11 VLMs + 10 Agents + 10 Image + 10 Video + 8 Music = 91
|
| 13 |
+
- Core Benchmarks: MMLU-Pro, GPQA, AIME, HLE, ARC-AGI-2, FINAL Bench (Metacognition), SWE-Pro, BFCL, IFEval, LiveCodeBench
|
| 14 |
+
- Scoring: Composite = Avg(verified benchmarks) × √(N/10)
|
| 15 |
+
- 5-Axis Framework: Knowledge, Expert Reasoning, Abstract Reasoning, Metacognition, Execution
|
| 16 |
+
- Confidence Levels: Cross-verified (2+ sources), Single-source, Self-reported
|
| 17 |
+
- License: MIT
|
| 18 |
+
|
| 19 |
+
## Links
|
| 20 |
+
|
| 21 |
+
- Live Leaderboard: https://huggingface.co/spaces/FINAL-Bench/all-bench-leaderboard
|
| 22 |
+
- Dataset: https://huggingface.co/datasets/FINAL-Bench/ALL-Bench-Leaderboard
|
| 23 |
+
- GitHub: https://github.com/final-bench/ALL-Bench-Leaderboard
|
| 24 |
+
- FINAL Bench Dataset: https://huggingface.co/datasets/FINAL-Bench/Metacognitive
|
| 25 |
+
- FINAL Bench Leaderboard: https://huggingface.co/spaces/FINAL-Bench/Leaderboard
|
| 26 |
+
|
| 27 |
+
## Top Models (March 2026)
|
| 28 |
+
|
| 29 |
+
### LLM Top 5 by Composite Score
|
| 30 |
+
1. Gemini 3.1 Pro (Google) — GPQA 94.3, ARC-AGI-2 77.1%
|
| 31 |
+
2. GPT-5.2 (OpenAI) — GPQA 93.2, AIME 100
|
| 32 |
+
3. Claude Opus 4.6 (Anthropic) — SWE-V 80.8, MMMU-Pro 85.1
|
| 33 |
+
4. Grok 4 Heavy (xAI) — GPQA 92.0, ARC-AGI-2 67.5%
|
| 34 |
+
5. Kimi K2.5 (Moonshot) — HLE 44.9, Metacog 68.71
|
| 35 |
+
|
| 36 |
+
### VLM Top 3 by MMMU
|
| 37 |
+
1. Gemini 3 Flash — MMMU 87.6%
|
| 38 |
+
2. Gemini 3 Pro — MMMU 87.5%
|
| 39 |
+
3. GPT-5.2 — MMMU 86.7%
|
| 40 |
+
|
| 41 |
+
### FINAL Bench Metacognitive Top 3
|
| 42 |
+
1. Kimi K2.5 — 68.71
|
| 43 |
+
2. GPT-5.2 — 62.76
|
| 44 |
+
3. GLM-5 — 62.50
|
| 45 |
+
|
| 46 |
+
## FINAL Bench
|
| 47 |
+
|
| 48 |
+
FINAL Bench (Frontier Intelligence Nexus for AGI-Level Verification) measures AI self-correction ability. Error Recovery (ER) explains 94.8% of metacognitive performance variance. 9 frontier models evaluated. Featured in Seoul Shinmun, Asia Economy, IT Chosun (2026.02.27). HuggingFace Datasets global ranking: Top 5.
|
| 49 |
+
|
| 50 |
+
## API
|
| 51 |
+
|
| 52 |
+
Free Gradio API with 8 endpoints. No authentication required.
|
| 53 |
+
Endpoints: /get_llm_data, /get_vlm_data, /get_agent_data, /get_image_data, /get_video_data, /get_music_data, /get_all_data, /search_models
|
| 54 |
+
|
| 55 |
+
## Data Format
|
| 56 |
+
|
| 57 |
+
Single unified JSON file: all_bench_leaderboard_v2.1.json (75KB)
|
| 58 |
+
Categories: llm[42], vlm.flagship[11], vlm.lightweight[5], agent[10], image[10], video[10], music[8], confidence{42 models}
|
robots.txt
ADDED
|
@@ -0,0 +1,25 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
User-agent: *
|
| 2 |
+
Allow: /
|
| 3 |
+
|
| 4 |
+
User-agent: Googlebot
|
| 5 |
+
Allow: /
|
| 6 |
+
|
| 7 |
+
User-agent: Bingbot
|
| 8 |
+
Allow: /
|
| 9 |
+
|
| 10 |
+
User-agent: ChatGPT-User
|
| 11 |
+
Allow: /
|
| 12 |
+
|
| 13 |
+
User-agent: GPTBot
|
| 14 |
+
Allow: /
|
| 15 |
+
|
| 16 |
+
User-agent: ClaudeBot
|
| 17 |
+
Allow: /
|
| 18 |
+
|
| 19 |
+
User-agent: PerplexityBot
|
| 20 |
+
Allow: /
|
| 21 |
+
|
| 22 |
+
User-agent: Google-Extended
|
| 23 |
+
Allow: /
|
| 24 |
+
|
| 25 |
+
Sitemap: https://huggingface.co/spaces/FINAL-Bench/all-bench-leaderboard/resolve/main/sitemap.xml
|
schema.jsonld
ADDED
|
@@ -0,0 +1,64 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"@context": "https://schema.org",
|
| 3 |
+
"@type": "Dataset",
|
| 4 |
+
"name": "ALL Bench Leaderboard 2026",
|
| 5 |
+
"alternateName": ["ALL Bench", "ALLBench", "AI Benchmark Leaderboard 2026"],
|
| 6 |
+
"description": "The only AI benchmark leaderboard covering LLM, VLM, Agent, Image, Video, and Music generation in a single unified view. 91 models cross-verified across 6 modalities with confidence badges. Features composite 5-axis scoring, interactive comparison tools, and downloadable intelligence reports.",
|
| 7 |
+
"url": "https://huggingface.co/spaces/FINAL-Bench/all-bench-leaderboard",
|
| 8 |
+
"sameAs": [
|
| 9 |
+
"https://huggingface.co/datasets/FINAL-Bench/ALL-Bench-Leaderboard",
|
| 10 |
+
"https://github.com/final-bench/ALL-Bench-Leaderboard"
|
| 11 |
+
],
|
| 12 |
+
"license": "https://opensource.org/licenses/MIT",
|
| 13 |
+
"version": "2.1",
|
| 14 |
+
"datePublished": "2026-03-01",
|
| 15 |
+
"dateModified": "2026-03-08",
|
| 16 |
+
"creator": {
|
| 17 |
+
"@type": "Organization",
|
| 18 |
+
"name": "ALL Bench Team",
|
| 19 |
+
"url": "https://huggingface.co/FINAL-Bench"
|
| 20 |
+
},
|
| 21 |
+
"keywords": [
|
| 22 |
+
"AI benchmark", "LLM leaderboard", "GPT-5", "Claude", "Gemini",
|
| 23 |
+
"VLM benchmark", "AI agent", "image generation", "video generation",
|
| 24 |
+
"music generation", "MMLU-Pro", "GPQA", "ARC-AGI-2", "FINAL Bench",
|
| 25 |
+
"metacognition", "multimodal AI", "AI evaluation", "benchmark comparison",
|
| 26 |
+
"AI model ranking", "open source AI"
|
| 27 |
+
],
|
| 28 |
+
"about": [
|
| 29 |
+
{"@type": "Thing", "name": "Large Language Model"},
|
| 30 |
+
{"@type": "Thing", "name": "Vision Language Model"},
|
| 31 |
+
{"@type": "Thing", "name": "AI Benchmark"},
|
| 32 |
+
{"@type": "Thing", "name": "Generative AI"},
|
| 33 |
+
{"@type": "Thing", "name": "Metacognition"}
|
| 34 |
+
],
|
| 35 |
+
"measurementTechnique": "Cross-verified benchmark aggregation with 3-tier confidence system",
|
| 36 |
+
"variableMeasured": [
|
| 37 |
+
{"@type": "PropertyValue", "name": "MMLU-Pro", "description": "57K expert-level multi-discipline questions"},
|
| 38 |
+
{"@type": "PropertyValue", "name": "GPQA Diamond", "description": "PhD-level expert questions in science"},
|
| 39 |
+
{"@type": "PropertyValue", "name": "AIME 2025", "description": "American Invitational Mathematics Examination"},
|
| 40 |
+
{"@type": "PropertyValue", "name": "HLE", "description": "Humanity's Last Exam — 2500 expert-sourced questions"},
|
| 41 |
+
{"@type": "PropertyValue", "name": "ARC-AGI-2", "description": "Abstract reasoning and novel pattern recognition"},
|
| 42 |
+
{"@type": "PropertyValue", "name": "FINAL Bench Metacognitive", "description": "AI self-correction ability measurement"},
|
| 43 |
+
{"@type": "PropertyValue", "name": "SWE-Pro", "description": "Software engineering benchmark by Scale AI"},
|
| 44 |
+
{"@type": "PropertyValue", "name": "IFEval", "description": "Instruction following evaluation"},
|
| 45 |
+
{"@type": "PropertyValue", "name": "LiveCodeBench", "description": "Continuously updated coding benchmark"}
|
| 46 |
+
],
|
| 47 |
+
"distribution": [
|
| 48 |
+
{
|
| 49 |
+
"@type": "DataDownload",
|
| 50 |
+
"encodingFormat": "application/json",
|
| 51 |
+
"contentUrl": "https://huggingface.co/datasets/FINAL-Bench/ALL-Bench-Leaderboard/resolve/main/all_bench_leaderboard_v2.1.json",
|
| 52 |
+
"name": "Unified JSON Dataset (75KB)"
|
| 53 |
+
}
|
| 54 |
+
],
|
| 55 |
+
"isPartOf": {
|
| 56 |
+
"@type": "DataCatalog",
|
| 57 |
+
"name": "Hugging Face Datasets",
|
| 58 |
+
"url": "https://huggingface.co/datasets"
|
| 59 |
+
},
|
| 60 |
+
"funder": {
|
| 61 |
+
"@type": "Organization",
|
| 62 |
+
"name": "FINAL Bench"
|
| 63 |
+
}
|
| 64 |
+
}
|
sitemap.xml
ADDED
|
@@ -0,0 +1,33 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
<?xml version="1.0" encoding="UTF-8"?>
|
| 2 |
+
<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
|
| 3 |
+
<url>
|
| 4 |
+
<loc>https://huggingface.co/spaces/FINAL-Bench/all-bench-leaderboard</loc>
|
| 5 |
+
<lastmod>2026-03-08</lastmod>
|
| 6 |
+
<changefreq>weekly</changefreq>
|
| 7 |
+
<priority>1.0</priority>
|
| 8 |
+
</url>
|
| 9 |
+
<url>
|
| 10 |
+
<loc>https://huggingface.co/datasets/FINAL-Bench/ALL-Bench-Leaderboard</loc>
|
| 11 |
+
<lastmod>2026-03-08</lastmod>
|
| 12 |
+
<changefreq>weekly</changefreq>
|
| 13 |
+
<priority>0.9</priority>
|
| 14 |
+
</url>
|
| 15 |
+
<url>
|
| 16 |
+
<loc>https://huggingface.co/datasets/FINAL-Bench/Metacognitive</loc>
|
| 17 |
+
<lastmod>2026-03-08</lastmod>
|
| 18 |
+
<changefreq>monthly</changefreq>
|
| 19 |
+
<priority>0.8</priority>
|
| 20 |
+
</url>
|
| 21 |
+
<url>
|
| 22 |
+
<loc>https://huggingface.co/spaces/FINAL-Bench/Leaderboard</loc>
|
| 23 |
+
<lastmod>2026-03-08</lastmod>
|
| 24 |
+
<changefreq>weekly</changefreq>
|
| 25 |
+
<priority>0.8</priority>
|
| 26 |
+
</url>
|
| 27 |
+
<url>
|
| 28 |
+
<loc>https://github.com/final-bench/ALL-Bench-Leaderboard</loc>
|
| 29 |
+
<lastmod>2026-03-08</lastmod>
|
| 30 |
+
<changefreq>weekly</changefreq>
|
| 31 |
+
<priority>0.7</priority>
|
| 32 |
+
</url>
|
| 33 |
+
</urlset>
|