priverabsc commited on
Commit
84b2d90
·
verified ·
1 Parent(s): 7e48427

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +98 -5
README.md CHANGED
@@ -652,23 +652,116 @@ All results reported below correspond to a 0-shot evaluation setting.
652
 
653
  ### Spanish
654
 
655
- WiP
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
656
 
657
  ### Catalan
658
 
659
- WiP
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
660
 
661
  ### Basque
662
 
663
- WiP
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
664
 
665
  ### Galician
666
 
667
- WiP
 
 
 
 
 
 
 
 
 
 
 
668
 
669
  ### English
670
 
671
- WiP
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
672
 
673
  ### LLM-as-a-judge
674
 
 
652
 
653
  ### Spanish
654
 
655
+ | task | metric | result |
656
+ |:-----------------------------|:-----------------------------|---------:|
657
+ | belebele_spa_Latn | acc | 0.72 |
658
+ | cocoteros_es | bleu | 0.05 |
659
+ | cocoteros_es | rouge1 | 0.32 |
660
+ | copa_es | acc | 0.75 |
661
+ | escola | mcc | 0 |
662
+ | flores_es | bleu | 0.25 |
663
+ | mgsm_direct_es_spanish_bench | exact_match,flexible-extract | 0.08 |
664
+ | mmmlu_es | acc | 0.41 |
665
+ | openbookqa_es | acc | 0.24 |
666
+ | paws_es_spanish_bench | acc | 0.32 |
667
+ | phrases_es-va | bleu | 0.59 |
668
+ | phrases_va-es | bleu | 0.69 |
669
+ | wnli_es | acc | 0.01 |
670
+ | xlsum_es | bleu | 0.02 |
671
+ | xnli_es_spanish_bench | acc | 0.28 |
672
+ | xquad_es | f1 | 0.07 |
673
+ | xstorycloze_es | acc | 0.52 |
674
 
675
  ### Catalan
676
 
677
+ | task | metric | result |
678
+ |:-------------------|:-----------------------------|---------:|
679
+ | arc_ca_challenge | acc | 0.37 |
680
+ | arc_ca_easy | acc | 0.71 |
681
+ | belebele_cat_Latn | acc | 0.71 |
682
+ | cabreu_abstractive | bleu | 0.04 |
683
+ | cabreu_extractive | rouge1 | 0.15 |
684
+ | cabreu_extreme | bleu | 0.03 |
685
+ | catalanqa | f1 | 0.11 |
686
+ | catcola | mcc | 0 |
687
+ | cocoteros_va | bleu | 0.05 |
688
+ | cocoteros_va | rouge1 | 0.33 |
689
+ | copa_ca | acc | 0.74 |
690
+ | coqcat | f1 | 0.6 |
691
+ | flores_ca | bleu | 0.31 |
692
+ | mgsm_direct_ca | exact_match,flexible-extract | 0.28 |
693
+ | openbookqa_ca | acc | 0.2 |
694
+ | parafraseja | acc | 0.36 |
695
+ | paws_ca | acc | 0.41 |
696
+ | phrases_ca-va | bleu | 0.75 |
697
+ | phrases_va-ca | bleu | 0.83 |
698
+ | piqa_ca | acc | 0.53 |
699
+ | siqa_ca | acc | 0.3 |
700
+ | teca | acc | 0.39 |
701
+ | wnli_ca | acc | 0.13 |
702
+ | xquad_ca | f1 | 0.11 |
703
+ | xstorycloze_ca | acc | 0.52 |
704
 
705
  ### Basque
706
 
707
+ | task | metric | result |
708
+ |:-------------------|:-----------------------------|---------:|
709
+ | arc_eu_challenge | acc | 0.27 |
710
+ | arc_eu_easy | acc | 0.55 |
711
+ | belebele_eus_Latn | acc | 0.67 |
712
+ | eus_proficiency | acc | 0.33 |
713
+ | eus_reading | acc | 0.5 |
714
+ | eus_trivia | acc | 0.5 |
715
+ | flores_eu | bleu | 0.19 |
716
+ | mgsm_direct_eu | exact_match,flexible-extract | 0.16 |
717
+ | mgsm_native_cot_eu | exact_match,get-answer | 0 |
718
+ | paws_eu | acc | 0.34 |
719
+ | piqa_eu | acc | 0.39 |
720
+ | qnlieu | acc | 0.19 |
721
+ | wnli_eu | acc | -0.1 |
722
+ | xcopa_eu | acc | 0.51 |
723
+ | xnli_eu | acc | 0.3 |
724
+ | xnli_eu_native | acc | 0.3 |
725
+ | xstorycloze_eu | acc | 0.38 |
726
 
727
  ### Galician
728
 
729
+ | task | metric | result |
730
+ |:------------------|:-----------------------------|---------:|
731
+ | belebele_glg_Latn | acc | 0.73 |
732
+ | flores_gl | bleu | 0.29 |
733
+ | galcola | mcc | 0 |
734
+ | mgsm_direct_gl | exact_match,flexible-extract | 0.1 |
735
+ | openbookqa_gl | acc | 0.16 |
736
+ | parafrases_gl | acc | 0.22 |
737
+ | paws_gl | acc | 0.4 |
738
+ | summarization_gl | bleu | 0.04 |
739
+ | xnli_gl | acc | 0.37 |
740
+ | xstorycloze_gl | acc | 0.49 |
741
 
742
  ### English
743
 
744
+ | task | metric | result |
745
+ |:-------------------|:-----------------------------|---------:|
746
+ | arc_challenge | acc | 0.4 |
747
+ | arc_easy | acc | 0.73 |
748
+ | belebele_eng_Latn | acc | 0.77 |
749
+ | cola | mcc | 0 |
750
+ | copa | acc | 0.78 |
751
+ | hellaswag | acc | 0.54 |
752
+ | hellaswag | acc_norm | -0.32 |
753
+ | mgsm_direct_en | exact_match,flexible-extract | 0.09 |
754
+ | mmlu | acc | 0.45 |
755
+ | openbookqa | acc | 0.18 |
756
+ | paws_en | acc | 0.44 |
757
+ | piqa | acc | 0.65 |
758
+ | social_iqa | acc | 0.25 |
759
+ | truthfulqa_mc1 | acc | 0.19 |
760
+ | truthfulqa_mc2 | acc | 0.41 |
761
+ | wnli | acc | 0.41 |
762
+ | xnli_en_iberobench | acc | 0.36 |
763
+ | xquad_en | f1 | 0.17 |
764
+ | xstorycloze_en | acc | 0.6 |
765
 
766
  ### LLM-as-a-judge
767