| --- |
| base_model: |
| - NoesisLab/Kai-3B-Instruct |
| model-index: |
| - name: Kai-3B-Instruct |
| results: |
| - task: |
| type: multiple-choice |
| name: ARC-Challenge |
| dataset: |
| name: ARC-Challenge |
| type: allenai/ai2_arc |
| config: ARC-Challenge |
| split: test |
| metrics: |
| - type: acc_norm |
| value: 51.88 |
| name: Accuracy (normalized) |
| - task: |
| type: multiple-choice |
| name: HellaSwag |
| dataset: |
| name: HellaSwag |
| type: Rowan/hellaswag |
| split: validation |
| metrics: |
| - type: acc_norm |
| value: 69.53 |
| name: Accuracy (normalized) |
| - task: |
| type: multiple-choice |
| name: MMLU |
| dataset: |
| name: MMLU |
| type: cais/mmlu |
| split: test |
| metrics: |
| - type: acc |
| value: 53.62 |
| name: Accuracy |
| - task: |
| type: multiple-choice |
| name: PIQA |
| dataset: |
| name: PIQA |
| type: piqa |
| split: validation |
| metrics: |
| - type: acc_norm |
| value: 77.53 |
| name: Accuracy (normalized) |
| - task: |
| type: text-generation |
| name: HumanEval |
| dataset: |
| name: HumanEval |
| type: openai/openai_humaneval |
| split: test |
| metrics: |
| - type: pass@1 |
| value: 39.02 |
| name: Pass@1 |
| - task: |
| type: text-generation |
| name: GSM8K |
| dataset: |
| name: GSM8K |
| type: gsm8k |
| split: test |
| metrics: |
| - type: exact_match |
| value: 39.27 |
| name: Exact Match (flexible) |
| pipeline_tag: text-generation |
| tags: |
| - open4bits |
| - smollm3 |
| - math |
| - reasoning |
| - distilled |
| - ads |
| license: apache-2.0 |
| language: |
| - en |
| --- |