| --- |
| language: |
| - pt |
| - en |
| license: apache-2.0 |
| model-index: |
| - name: open-cabrita3b |
| results: |
| - task: |
| type: text-generation |
| name: Text Generation |
| dataset: |
| name: AI2 Reasoning Challenge (25-Shot) |
| type: ai2_arc |
| config: ARC-Challenge |
| split: test |
| args: |
| num_few_shot: 25 |
| metrics: |
| - type: acc_norm |
| value: 33.79 |
| name: normalized accuracy |
| source: |
| url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=22h/open-cabrita3b |
| name: Open LLM Leaderboard |
| - task: |
| type: text-generation |
| name: Text Generation |
| dataset: |
| name: HellaSwag (10-Shot) |
| type: hellaswag |
| split: validation |
| args: |
| num_few_shot: 10 |
| metrics: |
| - type: acc_norm |
| value: 55.35 |
| name: normalized accuracy |
| source: |
| url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=22h/open-cabrita3b |
| name: Open LLM Leaderboard |
| - task: |
| type: text-generation |
| name: Text Generation |
| dataset: |
| name: MMLU (5-Shot) |
| type: cais/mmlu |
| config: all |
| split: test |
| args: |
| num_few_shot: 5 |
| metrics: |
| - type: acc |
| value: 25.16 |
| name: accuracy |
| source: |
| url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=22h/open-cabrita3b |
| name: Open LLM Leaderboard |
| - task: |
| type: text-generation |
| name: Text Generation |
| dataset: |
| name: TruthfulQA (0-shot) |
| type: truthful_qa |
| config: multiple_choice |
| split: validation |
| args: |
| num_few_shot: 0 |
| metrics: |
| - type: mc2 |
| value: 38.5 |
| source: |
| url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=22h/open-cabrita3b |
| name: Open LLM Leaderboard |
| - task: |
| type: text-generation |
| name: Text Generation |
| dataset: |
| name: Winogrande (5-shot) |
| type: winogrande |
| config: winogrande_xl |
| split: validation |
| args: |
| num_few_shot: 5 |
| metrics: |
| - type: acc |
| value: 59.43 |
| name: accuracy |
| source: |
| url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=22h/open-cabrita3b |
| name: Open LLM Leaderboard |
| - task: |
| type: text-generation |
| name: Text Generation |
| dataset: |
| name: GSM8k (5-shot) |
| type: gsm8k |
| config: main |
| split: test |
| args: |
| num_few_shot: 5 |
| metrics: |
| - type: acc |
| value: 0.99 |
| name: accuracy |
| source: |
| url: https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard?query=22h/open-cabrita3b |
| name: Open LLM Leaderboard |
| - task: |
| type: text-generation |
| name: Text Generation |
| dataset: |
| name: ENEM Challenge (No Images) |
| type: eduagarcia/enem_challenge |
| split: train |
| args: |
| num_few_shot: 3 |
| metrics: |
| - type: acc |
| value: 17.98 |
| name: accuracy |
| source: |
| url: https://huggingface.co/spaces/eduagarcia/open_pt_llm_leaderboard?query=22h/open-cabrita3b |
| name: Open Portuguese LLM Leaderboard |
| - task: |
| type: text-generation |
| name: Text Generation |
| dataset: |
| name: BLUEX (No Images) |
| type: eduagarcia-temp/BLUEX_without_images |
| split: train |
| args: |
| num_few_shot: 3 |
| metrics: |
| - type: acc |
| value: 21.14 |
| name: accuracy |
| source: |
| url: https://huggingface.co/spaces/eduagarcia/open_pt_llm_leaderboard?query=22h/open-cabrita3b |
| name: Open Portuguese LLM Leaderboard |
| - task: |
| type: text-generation |
| name: Text Generation |
| dataset: |
| name: OAB Exams |
| type: eduagarcia/oab_exams |
| split: train |
| args: |
| num_few_shot: 3 |
| metrics: |
| - type: acc |
| value: 22.69 |
| name: accuracy |
| source: |
| url: https://huggingface.co/spaces/eduagarcia/open_pt_llm_leaderboard?query=22h/open-cabrita3b |
| name: Open Portuguese LLM Leaderboard |
| - task: |
| type: text-generation |
| name: Text Generation |
| dataset: |
| name: Assin2 RTE |
| type: assin2 |
| split: test |
| args: |
| num_few_shot: 15 |
| metrics: |
| - type: f1_macro |
| value: 43.01 |
| name: f1-macro |
| source: |
| url: https://huggingface.co/spaces/eduagarcia/open_pt_llm_leaderboard?query=22h/open-cabrita3b |
| name: Open Portuguese LLM Leaderboard |
| - task: |
| type: text-generation |
| name: Text Generation |
| dataset: |
| name: Assin2 STS |
| type: eduagarcia/portuguese_benchmark |
| split: test |
| args: |
| num_few_shot: 15 |
| metrics: |
| - type: pearson |
| value: 8.92 |
| name: pearson |
| source: |
| url: https://huggingface.co/spaces/eduagarcia/open_pt_llm_leaderboard?query=22h/open-cabrita3b |
| name: Open Portuguese LLM Leaderboard |
| - task: |
| type: text-generation |
| name: Text Generation |
| dataset: |
| name: FaQuAD NLI |
| type: ruanchaves/faquad-nli |
| split: test |
| args: |
| num_few_shot: 15 |
| metrics: |
| - type: f1_macro |
| value: 43.97 |
| name: f1-macro |
| source: |
| url: https://huggingface.co/spaces/eduagarcia/open_pt_llm_leaderboard?query=22h/open-cabrita3b |
| name: Open Portuguese LLM Leaderboard |
| - task: |
| type: text-generation |
| name: Text Generation |
| dataset: |
| name: HateBR Binary |
| type: ruanchaves/hatebr |
| split: test |
| args: |
| num_few_shot: 25 |
| metrics: |
| - type: f1_macro |
| value: 50.46 |
| name: f1-macro |
| source: |
| url: https://huggingface.co/spaces/eduagarcia/open_pt_llm_leaderboard?query=22h/open-cabrita3b |
| name: Open Portuguese LLM Leaderboard |
| - task: |
| type: text-generation |
| name: Text Generation |
| dataset: |
| name: PT Hate Speech Binary |
| type: hate_speech_portuguese |
| split: test |
| args: |
| num_few_shot: 25 |
| metrics: |
| - type: f1_macro |
| value: 41.19 |
| name: f1-macro |
| source: |
| url: https://huggingface.co/spaces/eduagarcia/open_pt_llm_leaderboard?query=22h/open-cabrita3b |
| name: Open Portuguese LLM Leaderboard |
| - task: |
| type: text-generation |
| name: Text Generation |
| dataset: |
| name: tweetSentBR |
| type: eduagarcia-temp/tweetsentbr |
| split: test |
| args: |
| num_few_shot: 25 |
| metrics: |
| - type: f1_macro |
| value: 47.96 |
| name: f1-macro |
| source: |
| url: https://huggingface.co/spaces/eduagarcia/open_pt_llm_leaderboard?query=22h/open-cabrita3b |
| name: Open Portuguese LLM Leaderboard |
| --- |
| The Cabrita model is a collection of continued pre-trained and tokenizer-adapted models for the Portuguese language. |
| This artifact is the 3 billion size variant. |
|
|
| The weights were initially obtained from the open-llama project (https://github.com/openlm-research/open_llama) in the |
| open_llama_3b option. |
| |
| ``` |
| @misc{larcher2023cabrita, |
| title={Cabrita: closing the gap for foreign languages}, |
| author={Celio Larcher and Marcos Piau and Paulo Finardi and Pedro Gengo and Piero Esposito and Vinicius Caridá}, |
| year={2023}, |
| eprint={2308.11878}, |
| archivePrefix={arXiv}, |
| primaryClass={cs.CL} |
| } |
| ``` |
| # [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/HuggingFaceH4/open_llm_leaderboard) |
| Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/details_22h__open-cabrita3b) |
| |
| | Metric |Value| |
| |---------------------------------|----:| |
| |Avg. |35.54| |
| |AI2 Reasoning Challenge (25-Shot)|33.79| |
| |HellaSwag (10-Shot) |55.35| |
| |MMLU (5-Shot) |25.16| |
| |TruthfulQA (0-shot) |38.50| |
| |Winogrande (5-shot) |59.43| |
| |GSM8k (5-shot) | 0.99| |
| |
| |
| # [Open Portuguese LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/eduagarcia/open_pt_llm_leaderboard) |
| Detailed results can be found [here](https://huggingface.co/datasets/eduagarcia-temp/llm_pt_leaderboard_raw_results/tree/main/22h/open-cabrita3b) |
| |
| | Metric | Value | |
| |--------------------------|---------| |
| |Average |**33.04**| |
| |ENEM Challenge (No Images)| 17.98| |
| |BLUEX (No Images) | 21.14| |
| |OAB Exams | 22.69| |
| |Assin2 RTE | 43.01| |
| |Assin2 STS | 8.92| |
| |FaQuAD NLI | 43.97| |
| |HateBR Binary | 50.46| |
| |PT Hate Speech Binary | 41.19| |
| |tweetSentBR | 47.96| |
| |
| |