MackenzieTest / README.md
mackenzietechdocs's picture
Add Artificial Analysis evaluations for deepseek-v3-2
69ac6b1 verified
|
raw
history blame
1.32 kB
---
license: mit
model-index:
- name: MackenzieTest
results:
- task:
type: evaluation
dataset:
name: Artificial Analysis Benchmarks
type: artificial_analysis
metrics:
- name: Artificial Analysis Intelligence Index
type: artificial_analysis_intelligence_index
value: 52.4
- name: Artificial Analysis Coding Index
type: artificial_analysis_coding_index
value: 42.8
- name: Artificial Analysis Math Index
type: artificial_analysis_math_index
value: 59
- name: Mmlu Pro
type: mmlu_pro
value: 0.837
- name: Gpqa
type: gpqa
value: 0.751
- name: Hle
type: hle
value: 0.105
- name: Livecodebench
type: livecodebench
value: 0.593
- name: Scicode
type: scicode
value: 0.387
- name: Aime 25
type: aime_25
value: 0.59
- name: Ifbench
type: ifbench
value: 0.49
- name: Lcr
type: lcr
value: 0.39
- name: Terminalbench Hard
type: terminalbench_hard
value: 0.305
- name: Tau2
type: tau2
value: 0.789
source:
name: Artificial Analysis API
url: https://artificialanalysis.ai
---