{ "benchmark": "rpc-bench", "schema_version": "1.0", "last_updated": "2026-06-17", "source": "data/leaderboard_seed.csv", "seasons": { "default": { "period": "rolling", "dataset_version": "rpc-bench-test-v1", "eval_version": "eval.py@rpc-leaderboard-v1", "judges": { "open_qa": [ "gpt-5-2025-08-07", "gemini-2.5-pro" ], "claim_verification": "exact-match" }, "models": [ { "rank": 1, "name": "GPT-5", "url": "https://openai.com/index/introducing-gpt-5/", "org": "OpenAI", "modality": "text", "date": "2025-8-7", "status": "published", "conciseness": 54.93, "correctness": 69.1, "completeness": 67.33, "f1_like": 68.2, "info": 37.46, "overall": 37.46 }, { "rank": 2, "name": "GPT-5.2", "url": "https://openai.com/index/introducing-gpt-5-2/", "org": "OpenAI", "modality": "text", "date": "2025-12-11", "status": "published", "conciseness": 53.81, "correctness": 66.84, "completeness": 64.03, "f1_like": 65.4, "info": 35.19, "overall": 35.19 }, { "rank": 3, "name": "GPT-5", "url": "https://openai.com/index/introducing-gpt-5/", "org": "OpenAI", "modality": "visual", "date": "2025-8-7", "status": "published", "conciseness": 61.47, "correctness": 58.9, "completeness": 55.34, "f1_like": 57.07, "info": 35.08, "overall": 35.08 }, { "rank": 4, "name": "Gemini-2.5-Pro", "url": "https://blog.google/technology/google-deepmind/gemini-model-thinking-updates-march-2025/", "org": "Google", "modality": "text", "date": "2025-3-25", "status": "published", "conciseness": 54.87, "correctness": 62.65, "completeness": 59.03, "f1_like": 60.79, "info": 33.35, "overall": 33.35 }, { "rank": 5, "name": "Gemini-3-Pro", "url": "https://blog.google/products-and-platforms/products/gemini/gemini-3/", "org": "Google", "modality": "text", "date": "2025-11-18", "status": "published", "conciseness": 52.81, "correctness": 62.69, "completeness": 60.28, "f1_like": 61.46, "info": 32.46, "overall": 32.46 }, { "rank": 6, "name": "DeepSeek-V3.2", "url": "https://api-docs.deepseek.com/news/news251201", "org": "DeepSeek-AI", "modality": "text", "date": "2025-12-1", "status": "published", "conciseness": 56.31, "correctness": 58.73, "completeness": 55.19, "f1_like": 56.91, "info": 32.04, "overall": 32.04 }, { "rank": 7, "name": "GPT-5.2", "url": "https://openai.com/index/introducing-gpt-5-2/", "org": "OpenAI", "modality": "visual", "date": "2025-12-11", "status": "published", "conciseness": 56.43, "correctness": 56.75, "completeness": 52.82, "f1_like": 54.72, "info": 30.88, "overall": 30.88 }, { "rank": 8, "name": "DeepSeek-V3.1", "url": "https://api-docs.deepseek.com/news/news250821", "org": "DeepSeek-AI", "modality": "text", "date": "2025-8-21", "status": "published", "conciseness": 54.76, "correctness": 57.85, "completeness": 54.85, "f1_like": 56.31, "info": 30.84, "overall": 30.84 }, { "rank": 9, "name": "GLM-4.6V", "url": "https://github.com/zai-org/GLM-V", "org": "Z.ai", "modality": "visual", "date": "2025-12-8", "status": "published", "conciseness": 64.55, "correctness": 47.32, "completeness": 43.43, "f1_like": 45.29, "info": 29.23, "overall": 29.23 }, { "rank": 10, "name": "GLM-4.7", "url": "https://z.ai/blog/glm-4.7", "org": "Z.ai", "modality": "text", "date": "2025-12-22", "status": "published", "conciseness": 54.34, "correctness": 54.36, "completeness": 51.75, "f1_like": 53.02, "info": 28.81, "overall": 28.81 }, { "rank": 11, "name": "GLM-4.5V", "url": "https://github.com/zai-org/GLM-V", "org": "Z.ai", "modality": "visual", "date": "2025-8-11", "status": "published", "conciseness": 59.44, "correctness": 48.79, "completeness": 43.62, "f1_like": 46.06, "info": 27.38, "overall": 27.38 }, { "rank": 12, "name": "gemini-3-pro", "url": "https://blog.google/products-and-platforms/products/gemini/gemini-3/", "org": "Google", "modality": "visual", "date": "2025-11-18", "status": "published", "conciseness": 50.22, "correctness": 56.06, "completeness": 52.69, "f1_like": 54.32, "info": 27.28, "overall": 27.28 }, { "rank": 13, "name": "GLM-4.5", "url": "https://z.ai/blog/glm-4.5", "org": "Z.ai", "modality": "text", "date": "2025-7-28", "status": "published", "conciseness": 43.41, "correctness": 58.95, "completeness": 59.54, "f1_like": 59.24, "info": 25.72, "overall": 25.72 }, { "rank": 14, "name": "gemini-2.5-pro", "url": "https://blog.google/technology/google-deepmind/gemini-model-thinking-updates-march-2025/", "org": "Google", "modality": "visual", "date": "2025-3-25", "status": "published", "conciseness": 51.71, "correctness": 48.39, "completeness": 45.59, "f1_like": 46.95, "info": 24.28, "overall": 24.28 }, { "rank": 15, "name": "Claude-Sonnet-4", "url": "https://www.anthropic.com/news/claude-4", "org": "Anthropic", "modality": "text", "date": "2025-5-23", "status": "published", "conciseness": 41.37, "correctness": 58.53, "completeness": 58.44, "f1_like": 58.48, "info": 24.19, "overall": 24.19 }, { "rank": 16, "name": "Qwen3", "url": "https://github.com/QwenLM/Qwen3", "org": "Alibaba", "modality": "text", "date": "2025-7-21", "status": "published", "conciseness": 41.44, "correctness": 55.88, "completeness": 56.64, "f1_like": 56.26, "info": 23.31, "overall": 23.31 }, { "rank": 17, "name": "Claude-Sonnet-4.5", "url": "https://www.anthropic.com/news/claude-sonnet-4-5", "org": "Anthropic", "modality": "text", "date": "2025-9-30", "status": "published", "conciseness": 31.02, "correctness": 64.31, "completeness": 64.97, "f1_like": 64.64, "info": 20.05, "overall": 20.05 }, { "rank": 18, "name": "Claude-Sonnet-4.5", "url": "https://www.anthropic.com/news/claude-sonnet-4-5", "org": "Anthropic", "modality": "visual", "date": "2025-9-30", "status": "published", "conciseness": 31.95, "correctness": 55.35, "completeness": 54.45, "f1_like": 54.89, "info": 17.54, "overall": 17.54 }, { "rank": 19, "name": "Claude-Sonnet-4", "url": "https://www.anthropic.com/news/claude-4", "org": "Anthropic", "modality": "visual", "date": "2025-5-23", "status": "published", "conciseness": 31.63, "correctness": 54.16, "completeness": 53.32, "f1_like": 53.74, "info": 16.99, "overall": 16.99 }, { "rank": 20, "name": "HippoRAG2", "url": "https://github.com/ianliuwd/HippoRAG2", "org": "The Ohio State University", "modality": "text", "date": "2025-6-19", "status": "published", "conciseness": 45.77, "correctness": 33.13, "completeness": 27.88, "f1_like": 30.28, "info": 13.86, "overall": 13.86 }, { "rank": 21, "name": "MemoRAG", "url": "https://github.com/qhjqhj00/MemoRAG", "org": "Peking University & Hong Kong Polytechnic University", "modality": "text", "date": "2025-4-9", "status": "published", "conciseness": 51.31, "correctness": 24.19, "completeness": 19.1, "f1_like": 21.35, "info": 10.96, "overall": 10.96 }, { "rank": 22, "name": "VdocRAG", "url": "https://vdocrag.github.io/", "org": "NTT Corporation & Tohoku University", "modality": "visual", "date": "2025-4-14", "status": "published", "conciseness": 61.54, "correctness": 21.17, "completeness": 13.88, "f1_like": 16.77, "info": 10.32, "overall": 10.32 }, { "rank": 23, "name": "VisRAG", "url": "https://github.com/OpenBMB/VisRAG", "org": "Tsinghua University & ModelBest Inc.", "modality": "visual", "date": "2025-3-2", "status": "published", "conciseness": 39.9, "correctness": 26.24, "completeness": 23.63, "f1_like": 24.87, "info": 9.92, "overall": 9.92 }, { "rank": 24, "name": "Raptor", "url": "https://github.com/parthsarthi03/raptor", "org": "Stanford University", "modality": "text", "date": "2024-1-31", "status": "published", "conciseness": 36.47, "correctness": 25.28, "completeness": 20.82, "f1_like": 22.84, "info": 8.33, "overall": 8.33 }, { "rank": 25, "name": "Monkey", "url": "https://github.com/Yuliang-Liu/Monkey", "org": "Huazhong University of Science and Technology", "modality": "visual", "date": "2024-8-26", "status": "published", "conciseness": 54.61, "correctness": 17.08, "completeness": 11.27, "f1_like": 13.58, "info": 7.41, "overall": 7.41 }, { "rank": 26, "name": "Docopilot", "url": "https://github.com/OpenGVLab/Docopilot", "org": "Shanghai AI Laboratory", "modality": "visual", "date": "2025-7-19", "status": "published", "conciseness": 39.31, "correctness": 18.31, "completeness": 17.12, "f1_like": 17.69, "info": 6.96, "overall": 6.96 }, { "rank": 27, "name": "Qwen3", "url": "https://github.com/QwenLM/Qwen3", "org": "Alibaba", "modality": "visual", "date": "2025-7-21", "status": "published", "conciseness": 22.64, "correctness": 20.17, "completeness": 20.14, "f1_like": 20.16, "info": 4.56, "overall": 4.56 }, { "rank": 28, "name": "DocOwl2", "url": "https://github.com/X-PLUG/mPLUG-DocOwl", "org": "Alibaba", "modality": "visual", "date": "2024-9-9", "status": "published", "conciseness": 50.19, "correctness": 11.75, "completeness": 6.66, "f1_like": 8.5, "info": 4.27, "overall": 4.27 } ] } } }