MackenzieTest / README.md
mackenzietechdocs's picture
Add Artificial Analysis evaluations for ministral-8b
2748f60 verified
|
raw
history blame
1.26 kB
metadata
model-index:
  - name: MackenzieTest
    results:
      - task:
          type: evaluation
        dataset:
          name: Artificial Analysis Benchmarks
          type: artificial_analysis
        metrics:
          - name: Artificial Analysis Intelligence Index
            type: artificial_analysis_intelligence_index
            value: 28.2
          - name: Artificial Analysis Coding Index
            type: artificial_analysis_coding_index
            value: 18.4
          - name: Artificial Analysis Math Index
            type: artificial_analysis_math_index
            value: 31.7
          - name: Mmlu Pro
            type: mmlu_pro
            value: 0.642
          - name: Gpqa
            type: gpqa
            value: 0.471
          - name: Hle
            type: hle
            value: 0.043
          - name: Livecodebench
            type: livecodebench
            value: 0.303
          - name: Scicode
            type: scicode
            value: 0.208
          - name: Aime 25
            type: aime_25
            value: 0.317
          - name: Ifbench
            type: ifbench
            value: 0.291
          - name: Lcr
            type: lcr
            value: 0.24
          - name: Terminalbench Hard
            type: terminalbench_hard
            value: 0.043
          - name: Tau2
            type: tau2
            value: 0.266
        source:
          name: Artificial Analysis API
          url: https://artificialanalysis.ai