MackenzieTest / README.md
mackenzietechdocs's picture
Add Artificial Analysis evaluations for deepseek-v3-2
69ac6b1 verified
|
raw
history blame
1.32 kB

license: mit

model-index:

  • name: MackenzieTest

    results:

    • task:

      type: evaluation

      dataset:

      name: Artificial Analysis Benchmarks

      type: artificial_analysis

      metrics:

      • name: Artificial Analysis Intelligence Index

        type: artificial_analysis_intelligence_index

        value: 52.4

      • name: Artificial Analysis Coding Index

        type: artificial_analysis_coding_index

        value: 42.8

      • name: Artificial Analysis Math Index

        type: artificial_analysis_math_index

        value: 59

      • name: Mmlu Pro

        type: mmlu_pro

        value: 0.837

      • name: Gpqa

        type: gpqa

        value: 0.751

      • name: Hle

        type: hle

        value: 0.105

      • name: Livecodebench

        type: livecodebench

        value: 0.593

      • name: Scicode

        type: scicode

        value: 0.387

      • name: Aime 25

        type: aime_25

        value: 0.59

      • name: Ifbench

        type: ifbench

        value: 0.49

      • name: Lcr

        type: lcr

        value: 0.39

      • name: Terminalbench Hard

        type: terminalbench_hard

        value: 0.305

      • name: Tau2

        type: tau2

        value: 0.789

      source:

      name: Artificial Analysis API

      url: https://artificialanalysis.ai