"Model Name" "Link" "Comment" "Group" "Med. Len." "Med. Resp. Len." "Parameter Size (B)" "Type" "Model Type" "Think" "Overall" "KO" "EN" "JA" "ZH" "PL" "DE" "PT" "ES" "FR" "IT" "RU" "VI" "GPT-5 (Reasoning: medium)" "https://platform.openai.com/docs/models/gpt-5" "Reasoning: medium" "GPT" "" "" "" "Proprietary" "Think" "On" "70.73" "64.72" "65.83" "71.69" "67.68" "72.78" "71.27" "73.74" "75.68" "72.83" "77.05" "70.79" "75.61" "o3-pro (Reasoning: medium)" "https://platform.openai.com/docs/models/o3-pro" "Reasoning: medium" "GPT" "" "" "" "Proprietary" "Think" "On" "66.47" "63.61" "63.61" "69.28" "65.24" "63.89" "64.09" "68.16" "69.19" "70.11" "72.13" "62.36" "71.95" "GPT-5.2 (Reasoning: medium)" "https://platform.openai.com/docs/models/gpt-5.2" "Reasoning: medium" "GPT" "347.0" "264.0" "" "Proprietary" "Hybrid" "On" "66.18" "61.67" "61.39" "69.28" "64.63" "68.89" "66.3" "70.95" "63.24" "68.48" "70.49" "70.22" "68.29" "GPT-5.1 (Reasoning: medium, verbosity: medium)" "https://platform.openai.com/docs/models/gpt-5.1" "Reasoning: medium, verbosity: medium" "GPT" "" "" "" "Proprietary" "Hybrid" "On" "64.57" "57.78" "62.5" "65.06" "62.8" "65.56" "60.22" "65.36" "68.11" "74.46" "70.49" "67.42" "63.41" "Claude 4.5 Opus (think, budget: 16K)" "https://www.anthropic.com/claude/opus" "thinking budget: 16K" "Claude" "" "" "" "Proprietary" "Hybrid" "On" "63.41" "59.44" "60.28" "66.27" "64.02" "66.67" "65.19" "63.69" "62.16" "63.59" "64.48" "65.73" "67.07" "Claude 4 Opus (20250514) (think)" "https://www.anthropic.com/claude/opus" "version: 20250514" "Claude" "" "" "" "Proprietary" "Hybrid" "On" "63.29" "57.5" "62.5" "64.46" "62.8" "59.44" "65.19" "65.92" "60.54" "65.22" "65.57" "65.17" "72.56" "Claude 4.1 Opus (20250805) (think)" "https://www.anthropic.com/claude/opus" "version: 20250805" "Claude" "" "" "" "Proprietary" "Hybrid" "On" "63.24" "58.33" "61.39" "60.84" "64.02" "61.67" "66.85" "68.16" "61.08" "65.76" "66.67" "65.73" "65.24" "GPT-5 mini (Reasoning: medium)" "https://platform.openai.com/docs/models/gpt-5-mini" "Reasoning: medium" "GPT" "" "" "" "Proprietary" "Think" "On" "62.56" "57.5" "56.39" "62.65" "62.2" "63.89" "60.22" "66.48" "67.03" "70.11" "67.76" "66.29" "60.98" "Gemini 3 Pro Preview (Thinking Level: High)" "https://deepmind.google/models/gemini/pro/" "Thinking Level: High" "Gemini" "1930.5" "378.0" "" "Proprietary" "Think" "On" "62.48" "59.44" "60.56" "60.24" "62.2" "61.67" "65.19" "63.13" "64.32" "65.76" "65.57" "64.04" "62.2" "Claude 4 Sonnet (20250514) (think)" "https://www.anthropic.com/claude/sonnet" "version: 20250514" "Claude" "" "" "" "Proprietary" "Hybrid" "On" "61.8" "54.17" "59.17" "63.86" "64.63" "59.44" "61.33" "64.8" "62.16" "65.22" "67.21" "66.29" "64.02" "o3" "https://platform.openai.com/docs/models/o3" "" "GPT" "" "" "" "Proprietary" "Think" "On" "60.91" "57.5" "59.17" "61.45" "58.54" "61.11" "64.09" "60.89" "62.16" "63.59" "65.03" "54.49" "68.29" "Gemini 2.5 Pro" "https://deepmind.google/models/gemini/pro/" "" "Gemini" "" "" "" "Proprietary" "Think" "On" "59.34" "53.61" "57.78" "59.04" "57.93" "57.22" "56.91" "60.89" "63.24" "67.93" "62.3" "61.24" "60.98" "Gemini 3 Flash Preview (Thinking Level: High)" "https://deepmind.google/models/gemini/flash/" "Thinking Level: High" "Gemini" "1296.5" "424.5" "" "Proprietary" "Think" "On" "59.26" "53.89" "57.22" "61.45" "57.32" "56.67" "61.33" "57.54" "58.92" "64.67" "67.76" "60.11" "61.59" "GLM-4.7 FP8" "https://huggingface.co/zai-org/GLM-4.7-FP8" "temperature: 1.0 top-p: 0.95" "GLM" "2252.5" "328.0" "358.0" "Open" "Hybrid" "On" "59.22" "54.17" "55.28" "63.86" "63.41" "55.0" "58.56" "62.01" "61.08" "63.59" "61.75" "66.29" "54.88" "DeepSeek V3.2 Speciale" "https://huggingface.co/deepseek-ai/DeepSeek-V3.2-Speciale" "temperature: 1.0 top-p: 0.95" "DeepSeek" "3226.5" "249.5" "671.0" "Open" "Think" "On" "59.14" "50.83" "58.06" "63.25" "57.93" "58.89" "58.56" "58.66" "60.0" "65.22" "66.12" "59.55" "62.2" "Grok-4" "https://x.ai/news/grok-4" "temperature: 0.6 top-p: 0.95" "Grok" "" "" "" "Proprietary" "Think" "On" "58.74" "57.78" "56.67" "62.65" "60.37" "58.33" "60.22" "59.78" "56.22" "62.5" "60.66" "52.25" "60.98" "Gemini 2.5 Flash" "https://deepmind.google/models/gemini/flash/" "" "Gemini" "" "" "" "Proprietary" "Hybrid" "On" "58.62" "51.11" "56.39" "62.05" "56.71" "62.78" "60.77" "61.45" "60.0" "63.04" "57.92" "64.04" "56.71" "o4-mini" "https://platform.openai.com/docs/models/o4-mini" "" "GPT" "" "" "" "Proprietary" "Think" "On" "57.57" "54.17" "55.0" "62.05" "59.76" "52.78" "58.56" "63.69" "55.68" "57.61" "60.66" "56.74" "60.98" "Kimi K2 Thinking" "https://huggingface.co/moonshotai/Kimi-K2-Thinking" "temperature:1.0 top-p: 0.95" "moonshot" "1692.0" "330.0" "1000.0" "Open" "Think" "On" "56.84" "50.0" "57.5" "60.84" "62.2" "53.33" "54.14" "61.45" "53.51" "59.24" "59.56" "56.18" "61.59" "Qwen3 235B A22B Thinking 2507" "https://huggingface.co/Qwen/Qwen3-235B-A22B-Thinking-2507" "temperature: 0.6 top-p: 0.95" "Qwen" "2404.5" "423.0" "235.0" "Open" "Think" "On" "55.48" "49.17" "53.33" "56.02" "58.54" "50.56" "62.43" "60.89" "52.97" "56.52" "60.11" "53.93" "60.37" "GPT-5 nano (Reasoning: medium)" "https://platform.openai.com/docs/models/gpt-5-nano" "Reasoning: medium" "GPT" "" "" "" "Proprietary" "Think" "On" "55.39" "51.94" "53.89" "57.23" "53.66" "55.56" "58.01" "59.78" "54.59" "56.52" "59.02" "57.3" "51.83" "GLM-4.5 FP8" "https://huggingface.co/zai-org/GLM-4.5-FP8" "temperature: 0.6 top-p: 0.95" "GLM" "1442.0" "604.0" "358.0" "Open" "Hybrid" "On" "54.03" "46.94" "54.17" "60.84" "58.54" "48.89" "55.8" "54.75" "48.11" "57.61" "57.92" "57.87" "54.88" "GLM-4.6 FP8" "https://huggingface.co/zai-org/GLM-4.6-FP8" "temperature: 1.0 top-p: 0.95" "GLM" "2645.5" "522.0" "358.0" "Open" "Hybrid" "On" "53.3" "49.17" "54.17" "54.22" "56.71" "52.22" "53.04" "49.16" "56.76" "56.52" "56.28" "53.93" "50.61" "Gemini 2.5 Flash-lite Preview (09-2025)" "https://deepmind.google/models/gemini/" "version: 09-2025" "Gemini" "" "" "" "Proprietary" "Think" "On" "53.06" "47.78" "51.11" "51.2" "53.66" "51.67" "54.7" "59.22" "51.89" "57.07" "55.74" "57.87" "51.83" "Qwen3 235B A22B Instruct 2507" "https://huggingface.co/Qwen/Qwen3-235B-A22B-Instruct-2507" "temperature: 0.7 top-p: 0.8" "Qwen" "433.0" "433.0" "235.0" "Open" "Instruct" "Off" "52.94" "46.67" "55.28" "53.61" "59.15" "46.11" "51.38" "55.87" "54.59" "53.26" "56.28" "54.49" "53.05" "DeepSeek V3.2" "https://huggingface.co/deepseek-ai/DeepSeek-V3.2" "temperature: 1.0 top-p: 0.95" "DeepSeek" "762.5" "312.0" "671.0" "Open" "Think" "On" "52.17" "47.5" "49.44" "53.61" "50.61" "50.56" "54.14" "59.22" "52.43" "57.07" "56.28" "44.94" "57.93" "DeepSeek V3.1 (think)" "https://huggingface.co/deepseek-ai/DeepSeek-V3.1" "temperature: 0.6 top-p: 0.95" "DeepSeek" "710.5" "356.0" "671.0" "Open" "Hybrid" "On" "51.45" "44.44" "48.33" "56.63" "48.78" "48.89" "55.25" "53.07" "52.97" "56.52" "57.92" "50.56" "54.27" "DeepSeek V3.1 Terminus (think)" "https://huggingface.co/deepseek-ai/DeepSeek-V3.1-Terminus" "temperature: 0.6 top-p: 0.95" "DeepSeek" "831.5" "377.0" "671.0" "Open" "Hybrid" "On" "51.37" "46.94" "50.83" "51.81" "53.66" "50.0" "53.59" "51.96" "55.14" "53.8" "54.64" "48.31" "50.61" "Qwen3 30B A3B Thinking 2507" "https://huggingface.co/Qwen/Qwen3-30B-A3B-Thinking-2507" "temperature: 0.6 top-p: 0.95" "Qwen" "2830.0" "351.0" "30.0" "Open" "Think" "On" "50.44" "44.17" "49.17" "50.0" "57.32" "42.22" "49.72" "53.07" "50.27" "54.89" "56.83" "47.75" "58.54" "MiMo V2 Flash" "https://huggingface.co/XiaomiMiMo/MiMo-V2-Flash" "temperature: 0.8 top-p: 0.95" "XiaomiMiMo" "1477.5" "373.0" "309.0" "Open" "Think" "On" "50.32" "42.22" "53.06" "49.4" "54.27" "47.78" "51.93" "53.63" "52.97" "54.89" "54.64" "42.13" "52.44" "gpt-oss-120B (Reasoning: medium)" "https://huggingface.co/openai/gpt-oss-120b" "Reasoning: medium temperature: 1.0 top-p: 1.0" "GPT" "759.5" "370.5" "117.0" "Open" "Think" "On" "49.11" "46.67" "51.39" "51.81" "47.56" "45.0" "51.38" "54.75" "50.27" "51.63" "47.54" "46.07" "45.12" "DeepSeek R1 (0528) (top_p: 0.95, temp:0.6)" "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528" "version: 0528 temperature: 0.6 top-p: 0.95" "DeepSeek" "1177.5" "554.0" "671.0" "Open" "Think" "On" "48.79" "42.22" "49.44" "50.0" "53.05" "47.22" "48.62" "50.28" "48.11" "51.63" "54.1" "44.38" "53.05" "Gauss2.3 Hybrid" "" "" "Gauss" "546.0" "308.0" "" "Proprietary" "Hybrid" "On" "46.58" "39.72" "45.56" "48.8" "48.17" "45.0" "44.2" "53.63" "45.41" "52.17" "51.91" "44.94" "47.56" "Mistral Large 3 675B Instruct 2512" "https://huggingface.co/mistralai/Mistral-Large-3-675B-Instruct-2512" "temperature: 0.15" "mistralai" "448.0" "448.0" "675.0" "Open" "Instruct" "Off" "45.21" "41.39" "44.17" "50.6" "46.34" "46.11" "43.65" "45.81" "44.32" "49.46" "49.18" "42.13" "44.51" "DeepSeek V3 (0324) (top_p: 0.95, temp:1.3)" "https://huggingface.co/deepseek-ai/DeepSeek-V3-0324" "version: 0324 temperature: 1.3 top-p: 0.95" "DeepSeek" "408.0" "408.0" "671.0" "Open" "Instruct" "Off" "45.09" "37.5" "43.61" "46.99" "51.22" "45.56" "44.75" "44.69" "44.32" "48.91" "49.18" "44.94" "49.39" "Qwen3 32B (think)" "https://huggingface.co/Qwen/Qwen3-32B" "temperature: 0.6 top-p: 0.95" "Qwen" "1113.0" "390.0" "32.8" "Open" "Hybrid" "On" "44.44" "38.89" "41.67" "48.8" "50.0" "38.33" "46.41" "44.69" "44.86" "44.57" "50.82" "46.07" "47.56" "Qwen3 30B A3B Instruct 2507" "https://huggingface.co/Qwen/Qwen3-30B-A3B-Instruct-2507" "temperature: 0.7 top-p: 0.8" "Qwen" "441.5" "441.5" "30.0" "Open" "Instruct" "Off" "42.79" "34.44" "43.89" "40.96" "48.78" "38.89" "41.99" "46.93" "44.32" "42.93" "48.09" "43.26" "46.95" "MiniMax-M2 (230B A10B)" "https://huggingface.co/MiniMaxAI/MiniMax-M2" "temperature:1.0 top-p: 0.95" "MiniMaxAI" "1142.0" "325.0" "230.0" "Open" "Think" "On" "42.43" "31.94" "46.11" "37.35" "45.73" "38.33" "45.3" "45.25" "48.65" "41.3" "46.45" "42.7" "46.95" "A.X 4.0" "https://huggingface.co/skt/A.X-4.0" "" "SKT" "412.5" "412.5" "71.9" "Open" "Instruct" "Off" "41.59" "38.89" "41.11" "43.98" "49.39" "36.11" "45.86" "43.58" "44.32" "39.67" "43.17" "39.89" "36.59" "gpt-oss-20B (Reasoning: medium)" "https://huggingface.co/openai/gpt-oss-20b" "Reasoning: medium temperature: 1.0 top-p: 1.0" "GPT" "953.5" "326.0" "21.0" "Open" "Think" "On" "41.18" "36.67" "42.78" "45.78" "45.73" "37.78" "35.91" "41.9" "39.46" "51.09" "40.44" "38.76" "41.46" "Gemma 3 27B it" "https://huggingface.co/google/gemma-3-27b-it" "temperature: 1.0 top-p: 0.95" "Gemma" "380.0" "380.0" "27.0" "Open" "Instruct" "Off" "40.86" "34.44" "35.0" "37.35" "43.9" "42.22" "43.65" "47.49" "41.08" "44.02" "53.55" "39.33" "40.24" "Tongyi DeepResearch 30B A3B" "https://huggingface.co/Alibaba-NLP/Tongyi-DeepResearch-30B-A3B" "temperature: 0.6 top-p: 0.95" "Alibaba" "1147.0" "408.0" "30.0" "Open" "Think" "On" "40.1" "36.11" "40.83" "43.37" "44.51" "32.78" "37.02" "44.69" "38.92" "43.48" "46.45" "37.08" "39.63" "Mistral Small 3.2 24B Instruct 2506" "https://huggingface.co/mistralai/Mistral-Small-3.2-24B-Instruct-2506" "temperature: 0.15 top-p: 0.95" "mistralai" "369.0" "369.0" "24.0" "Open" "Instruct" "Off" "39.09" "31.39" "40.0" "36.75" "42.07" "34.44" "44.2" "41.9" "42.16" "45.65" "40.98" "37.64" "38.41" "K2-Think" "https://huggingface.co/LLM360/K2-Think" "temperature: 1.0 top-p: 0.95" "LLM360" "1835.0" "486.0" "32.8" "Open" "Think" "On" "35.06" "29.17" "36.11" "30.12" "44.51" "26.67" "33.15" "38.55" "37.84" "41.85" "37.7" "33.71" "36.59" "Kanana 2 30B A3B Thinking" "https://huggingface.co/kakaocorp/kanana-2-30b-a3b-thinking" "temperature: 0.6 top-p: 0.95 top-k: 20" "Kakao" "4263.0" "854.5" "31.0" "Open" "Think" "On" "34.5" "25.28" "43.06" "38.55" "40.24" "25.0" "34.25" "37.99" "32.43" "34.24" "37.7" "28.65" "38.41" "KAT Dev 72B Exp" "https://huggingface.co/Kwaipilot/KAT-Dev-72B-Exp" "temperature:0.6 top-p: 0.95" "KAT" "397.0" "397.0" "72.0" "Open" "Instruct" "Off" "33.94" "25.0" "32.22" "31.93" "37.2" "34.44" "33.15" "43.02" "37.84" "36.96" "37.7" "30.34" "38.41" "Olmo 3 32B Think" "https://huggingface.co/allenai/Olmo-3-32B-Think" "temperature: 1 top-p: 0.95 top-k: 50" "allenai" "3360.5" "473.0" "32.0" "Open" "Think" "On" "33.94" "30.56" "41.39" "30.12" "31.1" "25.0" "34.25" "35.75" "33.51" "36.41" "37.16" "31.46" "35.98" "EXAONE 4.0 32B (think)" "https://huggingface.co/LGAI-EXAONE/EXAONE-4.0-32B" "temperature: 0.6 top-p: 0.95" "Exaone" "1274.5" "503.0" "32.0" "Open" "Hybrid" "On" "33.82" "33.61" "38.33" "28.92" "35.98" "26.11" "35.91" "34.08" "38.92" "35.33" "33.88" "28.09" "31.71" "Apriel 1.5 15B Thinker" "https://huggingface.co/ServiceNow-AI/Apriel-1.5-15b-Thinker" "temperature: 0.6 top-p: 0.95" "Apriel" "2238.0" "375.0" "15.0" "Open" "Think" "On" "31.92" "23.61" "39.72" "30.72" "38.41" "24.44" "40.88" "37.99" "32.43" "32.61" "22.95" "28.65" "31.71" "HyperCLOVAX SEED Think 14B (think)" "https://huggingface.co/naver-hyperclovax/HyperCLOVAX-SEED-Think-14B" "temperature: 0.5 top-p: 0.6" "HCX" "1444.0" "382.5" "14.7" "Open" "Hybrid" "On" "31.84" "32.22" "37.22" "31.93" "38.41" "27.78" "32.6" "30.17" "29.19" "32.07" "33.33" "25.28" "26.22" "Kanana 2 30B A3B Instruct" "https://huggingface.co/kakaocorp/kanana-2-30b-a3b-instruct" "temperature: 0" "Kakao" "1195.0" "1195.0" "31.0" "Open" "Instruct" "Off" "30.84" "33.06" "39.44" "37.35" "33.54" "17.78" "26.52" "25.14" "30.81" "29.35" "31.15" "23.03" "32.93" "Dhanishtha-2.0 Preview" "https://huggingface.co/HelpingAI/Dhanishtha-2.0-preview" "temperature: 0.7 top-p: 0.9" "HelpingAI" "520.0" "356.0" "14.8" "Open" "Think" "On" "25.81" "23.33" "27.22" "30.12" "32.32" "20.56" "20.99" "26.26" "25.95" "25.54" "30.6" "23.6" "25.0" "ERNIE 4.5 21B A3B Thinking" "https://huggingface.co/baidu/ERNIE-4.5-21B-A3B-Thinking" "temperature: 0.6 top-p: 0.95" "ERNIE" "1637.0" "541.0" "21.0" "Open" "Think" "On" "25.32" "17.5" "31.11" "18.67" "39.02" "23.33" "24.31" "24.58" "26.49" "24.46" "30.6" "19.1" "27.44" "Solar Pro Preview (top_p:0.95, temp: 0.7)" "https://huggingface.co/upstage/solar-pro-preview-instruct" "temperature: 0.7 top-p: 0.95" "Solar" "260.0" "260.0" "22.0" "Open" "Instruct" "Off" "20.73" "9.72" "22.22" "21.08" "24.39" "9.44" "18.23" "24.02" "29.73" "29.89" "33.33" "22.47" "12.8" "Mi:dm 2.0 Base Instruct" "https://huggingface.co/K-intelligence/Midm-2.0-Base-Instruct" "temperature: 0.8 top-p: 0.7" "KT" "316.0" "316.0" "11.5" "Open" "Instruct" "Off" "20.25" "26.39" "26.39" "17.47" "26.83" "13.33" "18.78" "20.67" "16.22" "20.65" "21.31" "12.92" "9.15" "Kanana 1.5 15.7B A3B Instruct" "https://huggingface.co/kakaocorp/kanana-1.5-15.7b-a3b-instruct" "temperature: 1.0 top-p: 0.95" "Kakao" "414.0" "414.0" "15.7" "Open" "Instruct" "Off" "11.71" "21.11" "20.28" "10.84" "15.24" "5.56" "7.73" "8.94" "9.19" "8.15" "5.46" "5.06" "4.88"