Update README.md
Browse files
README.md
CHANGED
|
@@ -652,23 +652,116 @@ All results reported below correspond to a 0-shot evaluation setting.
|
|
| 652 |
|
| 653 |
### Spanish
|
| 654 |
|
| 655 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 656 |
|
| 657 |
### Catalan
|
| 658 |
|
| 659 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 660 |
|
| 661 |
### Basque
|
| 662 |
|
| 663 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 664 |
|
| 665 |
### Galician
|
| 666 |
|
| 667 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 668 |
|
| 669 |
### English
|
| 670 |
|
| 671 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 672 |
|
| 673 |
### LLM-as-a-judge
|
| 674 |
|
|
|
|
| 652 |
|
| 653 |
### Spanish
|
| 654 |
|
| 655 |
+
| task | metric | result |
|
| 656 |
+
|:-----------------------------|:-----------------------------|---------:|
|
| 657 |
+
| belebele_spa_Latn | acc | 0.72 |
|
| 658 |
+
| cocoteros_es | bleu | 0.05 |
|
| 659 |
+
| cocoteros_es | rouge1 | 0.32 |
|
| 660 |
+
| copa_es | acc | 0.75 |
|
| 661 |
+
| escola | mcc | 0 |
|
| 662 |
+
| flores_es | bleu | 0.25 |
|
| 663 |
+
| mgsm_direct_es_spanish_bench | exact_match,flexible-extract | 0.08 |
|
| 664 |
+
| mmmlu_es | acc | 0.41 |
|
| 665 |
+
| openbookqa_es | acc | 0.24 |
|
| 666 |
+
| paws_es_spanish_bench | acc | 0.32 |
|
| 667 |
+
| phrases_es-va | bleu | 0.59 |
|
| 668 |
+
| phrases_va-es | bleu | 0.69 |
|
| 669 |
+
| wnli_es | acc | 0.01 |
|
| 670 |
+
| xlsum_es | bleu | 0.02 |
|
| 671 |
+
| xnli_es_spanish_bench | acc | 0.28 |
|
| 672 |
+
| xquad_es | f1 | 0.07 |
|
| 673 |
+
| xstorycloze_es | acc | 0.52 |
|
| 674 |
|
| 675 |
### Catalan
|
| 676 |
|
| 677 |
+
| task | metric | result |
|
| 678 |
+
|:-------------------|:-----------------------------|---------:|
|
| 679 |
+
| arc_ca_challenge | acc | 0.37 |
|
| 680 |
+
| arc_ca_easy | acc | 0.71 |
|
| 681 |
+
| belebele_cat_Latn | acc | 0.71 |
|
| 682 |
+
| cabreu_abstractive | bleu | 0.04 |
|
| 683 |
+
| cabreu_extractive | rouge1 | 0.15 |
|
| 684 |
+
| cabreu_extreme | bleu | 0.03 |
|
| 685 |
+
| catalanqa | f1 | 0.11 |
|
| 686 |
+
| catcola | mcc | 0 |
|
| 687 |
+
| cocoteros_va | bleu | 0.05 |
|
| 688 |
+
| cocoteros_va | rouge1 | 0.33 |
|
| 689 |
+
| copa_ca | acc | 0.74 |
|
| 690 |
+
| coqcat | f1 | 0.6 |
|
| 691 |
+
| flores_ca | bleu | 0.31 |
|
| 692 |
+
| mgsm_direct_ca | exact_match,flexible-extract | 0.28 |
|
| 693 |
+
| openbookqa_ca | acc | 0.2 |
|
| 694 |
+
| parafraseja | acc | 0.36 |
|
| 695 |
+
| paws_ca | acc | 0.41 |
|
| 696 |
+
| phrases_ca-va | bleu | 0.75 |
|
| 697 |
+
| phrases_va-ca | bleu | 0.83 |
|
| 698 |
+
| piqa_ca | acc | 0.53 |
|
| 699 |
+
| siqa_ca | acc | 0.3 |
|
| 700 |
+
| teca | acc | 0.39 |
|
| 701 |
+
| wnli_ca | acc | 0.13 |
|
| 702 |
+
| xquad_ca | f1 | 0.11 |
|
| 703 |
+
| xstorycloze_ca | acc | 0.52 |
|
| 704 |
|
| 705 |
### Basque
|
| 706 |
|
| 707 |
+
| task | metric | result |
|
| 708 |
+
|:-------------------|:-----------------------------|---------:|
|
| 709 |
+
| arc_eu_challenge | acc | 0.27 |
|
| 710 |
+
| arc_eu_easy | acc | 0.55 |
|
| 711 |
+
| belebele_eus_Latn | acc | 0.67 |
|
| 712 |
+
| eus_proficiency | acc | 0.33 |
|
| 713 |
+
| eus_reading | acc | 0.5 |
|
| 714 |
+
| eus_trivia | acc | 0.5 |
|
| 715 |
+
| flores_eu | bleu | 0.19 |
|
| 716 |
+
| mgsm_direct_eu | exact_match,flexible-extract | 0.16 |
|
| 717 |
+
| mgsm_native_cot_eu | exact_match,get-answer | 0 |
|
| 718 |
+
| paws_eu | acc | 0.34 |
|
| 719 |
+
| piqa_eu | acc | 0.39 |
|
| 720 |
+
| qnlieu | acc | 0.19 |
|
| 721 |
+
| wnli_eu | acc | -0.1 |
|
| 722 |
+
| xcopa_eu | acc | 0.51 |
|
| 723 |
+
| xnli_eu | acc | 0.3 |
|
| 724 |
+
| xnli_eu_native | acc | 0.3 |
|
| 725 |
+
| xstorycloze_eu | acc | 0.38 |
|
| 726 |
|
| 727 |
### Galician
|
| 728 |
|
| 729 |
+
| task | metric | result |
|
| 730 |
+
|:------------------|:-----------------------------|---------:|
|
| 731 |
+
| belebele_glg_Latn | acc | 0.73 |
|
| 732 |
+
| flores_gl | bleu | 0.29 |
|
| 733 |
+
| galcola | mcc | 0 |
|
| 734 |
+
| mgsm_direct_gl | exact_match,flexible-extract | 0.1 |
|
| 735 |
+
| openbookqa_gl | acc | 0.16 |
|
| 736 |
+
| parafrases_gl | acc | 0.22 |
|
| 737 |
+
| paws_gl | acc | 0.4 |
|
| 738 |
+
| summarization_gl | bleu | 0.04 |
|
| 739 |
+
| xnli_gl | acc | 0.37 |
|
| 740 |
+
| xstorycloze_gl | acc | 0.49 |
|
| 741 |
|
| 742 |
### English
|
| 743 |
|
| 744 |
+
| task | metric | result |
|
| 745 |
+
|:-------------------|:-----------------------------|---------:|
|
| 746 |
+
| arc_challenge | acc | 0.4 |
|
| 747 |
+
| arc_easy | acc | 0.73 |
|
| 748 |
+
| belebele_eng_Latn | acc | 0.77 |
|
| 749 |
+
| cola | mcc | 0 |
|
| 750 |
+
| copa | acc | 0.78 |
|
| 751 |
+
| hellaswag | acc | 0.54 |
|
| 752 |
+
| hellaswag | acc_norm | -0.32 |
|
| 753 |
+
| mgsm_direct_en | exact_match,flexible-extract | 0.09 |
|
| 754 |
+
| mmlu | acc | 0.45 |
|
| 755 |
+
| openbookqa | acc | 0.18 |
|
| 756 |
+
| paws_en | acc | 0.44 |
|
| 757 |
+
| piqa | acc | 0.65 |
|
| 758 |
+
| social_iqa | acc | 0.25 |
|
| 759 |
+
| truthfulqa_mc1 | acc | 0.19 |
|
| 760 |
+
| truthfulqa_mc2 | acc | 0.41 |
|
| 761 |
+
| wnli | acc | 0.41 |
|
| 762 |
+
| xnli_en_iberobench | acc | 0.36 |
|
| 763 |
+
| xquad_en | f1 | 0.17 |
|
| 764 |
+
| xstorycloze_en | acc | 0.6 |
|
| 765 |
|
| 766 |
### LLM-as-a-judge
|
| 767 |
|