MackenzieTest / README.md
mackenzietechdocs's picture
Add Artificial Analysis evaluations for kimi-k2-thinking
5bb9c3c verified
|
raw
history blame
1.33 kB

license: mit

model-index:

  • name: MackenzieTest

    results:

    • task:

      type: evaluation

      dataset:

      name: Artificial Analysis Benchmarks

      type: artificial_analysis

      metrics:

      • name: Artificial Analysis Intelligence Index

        type: artificial_analysis_intelligence_index

        value: 67

      • name: Artificial Analysis Coding Index

        type: artificial_analysis_coding_index

        value: 52.2

      • name: Artificial Analysis Math Index

        type: artificial_analysis_math_index

        value: 94.7

      • name: Mmlu Pro

        type: mmlu_pro

        value: 0.848

      • name: Gpqa

        type: gpqa

        value: 0.838

      • name: Hle

        type: hle

        value: 0.223

      • name: Livecodebench

        type: livecodebench

        value: 0.853

      • name: Scicode

        type: scicode

        value: 0.424

      • name: Aime 25

        type: aime_25

        value: 0.947

      • name: Ifbench

        type: ifbench

        value: 0.681

      • name: Lcr

        type: lcr

        value: 0.663

      • name: Terminalbench Hard

        type: terminalbench_hard

        value: 0.291

      • name: Tau2

        type: tau2

        value: 0.93

      source:

      name: Artificial Analysis API

      url: https://artificialanalysis.ai