MackenzieTest / README.md
mackenzietechdocs's picture
Add Artificial Analysis evaluations for ministral-14b
eb419b2 verified
|
raw
history blame
1.25 kB
metadata
model-index:
  - name: MackenzieTest
    results:
      - task:
          type: evaluation
        dataset:
          name: Artificial Analysis Benchmarks
          type: artificial_analysis
        metrics:
          - name: Artificial Analysis Intelligence Index
            type: artificial_analysis_intelligence_index
            value: 30.5
          - name: Artificial Analysis Coding Index
            type: artificial_analysis_coding_index
            value: 21
          - name: Artificial Analysis Math Index
            type: artificial_analysis_math_index
            value: 30
          - name: Mmlu Pro
            type: mmlu_pro
            value: 0.693
          - name: Gpqa
            type: gpqa
            value: 0.572
          - name: Hle
            type: hle
            value: 0.046
          - name: Livecodebench
            type: livecodebench
            value: 0.351
          - name: Scicode
            type: scicode
            value: 0.236
          - name: Aime 25
            type: aime_25
            value: 0.3
          - name: Ifbench
            type: ifbench
            value: 0.32
          - name: Lcr
            type: lcr
            value: 0.22
          - name: Terminalbench Hard
            type: terminalbench_hard
            value: 0.043
          - name: Tau2
            type: tau2
            value: 0.272
        source:
          name: Artificial Analysis API
          url: https://artificialanalysis.ai