|
|
import { Benchmark } from "./types"; |
|
|
|
|
|
export const deepseekBenchmarks: Benchmark[] = [ |
|
|
{ |
|
|
model: "DeepSeek-R1-0528", |
|
|
provider: "DeepSeek", |
|
|
inputPrice: 0.55, |
|
|
outputPrice: 2.19, |
|
|
benchmark: { |
|
|
aime_24: 91.4, |
|
|
aime_2025: 87.5, |
|
|
gpqa_diamond: 81.0, |
|
|
gpqa: 81.0, |
|
|
mmlu_pro: 85.0, |
|
|
mmlu: 93.4, |
|
|
simpleqa: 27.8, |
|
|
lcb: 73.3, |
|
|
aider_polyglot: 71.6, |
|
|
swe_bench_verified: 57.6, |
|
|
|
|
|
humanitys_last_exam: 17.7, |
|
|
|
|
|
|
|
|
|
|
|
tau_bench_airline: 53.5, |
|
|
tau_bench_retail: 63.9, |
|
|
|
|
|
|
|
|
|
|
|
}, |
|
|
source: "https://huggingface.co/deepseek-ai/DeepSeek-R1-0528", |
|
|
}, |
|
|
|
|
|
{ |
|
|
model: "DeepSeek-V3-0324", |
|
|
provider: "DeepSeek", |
|
|
inputPrice: 0.27, |
|
|
outputPrice: 1.10, |
|
|
benchmark: { |
|
|
mmlu: 87.1, |
|
|
mmlu_pro: 81.2, |
|
|
gpqa: 68.4, |
|
|
gpqa_diamond: 59.1, |
|
|
aime_24: 59.4, |
|
|
lcb: 49.2, |
|
|
simpleqa: 24.9, |
|
|
aider_polyglot: 49.6, |
|
|
swe_bench_verified: 42.0 |
|
|
}, |
|
|
source: "https://huggingface.co/deepseek-ai/DeepSeek-V3-0324", |
|
|
}, |
|
|
{ |
|
|
model: "DeepSeek-V3", |
|
|
provider: "DeepSeek", |
|
|
inputPrice: 0.27, |
|
|
outputPrice: 1.10, |
|
|
benchmark: { |
|
|
mmlu: 87.1, |
|
|
mmlu_pro: 64.4, |
|
|
|
|
|
gpqa_diamond: 59.1, |
|
|
simpleqa: 24.9, |
|
|
aime_24: 39.2, |
|
|
lcb: 37.6, |
|
|
aider_polyglot: 49.6, |
|
|
swe_bench_verified: 42.0, |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
}, |
|
|
source: "https://huggingface.co/deepseek-ai/DeepSeek-V3", |
|
|
}, |
|
|
{ |
|
|
model: "DeepSeek-R1", |
|
|
provider: "DeepSeek", |
|
|
inputPrice: 0.55, |
|
|
outputPrice: 2.19, |
|
|
benchmark: { |
|
|
mmlu: 90.8, |
|
|
mmlu_pro: 84.0, |
|
|
gpqa_diamond: 71.5, |
|
|
simpleqa: 30.1, |
|
|
lcb: 65.9, |
|
|
swe_bench_verified: 49.2, |
|
|
aider_polyglot: 53.3, |
|
|
aime_24: 79.8, |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
}, |
|
|
source: "https://huggingface.co/deepseek-ai/DeepSeek-R1", |
|
|
}, |
|
|
]; |
|
|
|
|
|
|