license: mit
model-index:
name: MackenzieTest
results:
task:
type: evaluation
dataset:
name: Artificial Analysis Benchmarks
type: artificial_analysis
metrics:
name: Artificial Analysis Intelligence Index
type: artificial_analysis_intelligence_index
value: 67
name: Artificial Analysis Coding Index
type: artificial_analysis_coding_index
value: 52.2
name: Artificial Analysis Math Index
type: artificial_analysis_math_index
value: 94.7
name: Mmlu Pro
type: mmlu_pro
value: 0.848
name: Gpqa
type: gpqa
value: 0.838
name: Hle
type: hle
value: 0.223
name: Livecodebench
type: livecodebench
value: 0.853
name: Scicode
type: scicode
value: 0.424
name: Aime 25
type: aime_25
value: 0.947
name: Ifbench
type: ifbench
value: 0.681
name: Lcr
type: lcr
value: 0.663
name: Terminalbench Hard
type: terminalbench_hard
value: 0.291
name: Tau2
type: tau2
value: 0.93
source:
name: Artificial Analysis API