diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_amh.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_amh.yaml new file mode 100644 index 0000000000000000000000000000000000000000..87b517906c0129b4aaabc518766e89dd6f70f505 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_amh.yaml @@ -0,0 +1,6 @@ +# Generated by utils.py +dataset_name: amh +doc_to_choice: '{{[premise+", ትክክል? አዎ, "+hypothesis,premise+", ትክክል? እንዲሁም, "+hypothesis,premise+", + ትክክል? አይ, "+hypothesis]}}' +include: afrixnli_native_direct_yaml +task: afrixnli_native_direct_amh diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_eng.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_eng.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bb2dcd58252468e3fa9dcf8dbad7b39dc9d2983b --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_eng.yaml @@ -0,0 +1,6 @@ +# Generated by utils.py +dataset_name: eng +doc_to_choice: '{{[premise+", Right? Yes, "+hypothesis,premise+", Right? Also, "+hypothesis,premise+", + Right? No, "+hypothesis]}}' +include: afrixnli_native_direct_yaml +task: afrixnli_native_direct_eng diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_ewe.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_ewe.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ecdc41c524eaf1429756643110a85b83009bb293 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_ewe.yaml @@ -0,0 +1,6 @@ +# Generated by utils.py +dataset_name: ewe +doc_to_choice: '{{[premise+", Esɔ gbe? Ɛ̃, "+hypothesis,premise+", Esɔ gbe? Hã, "+hypothesis,premise+", + Esɔ gbe? Ao, "+hypothesis]}}' +include: afrixnli_native_direct_yaml +task: afrixnli_native_direct_ewe diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_fra.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_fra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1e6e32cc165ce73e99ae5480debe0183dbb2351a --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_fra.yaml @@ -0,0 +1,6 @@ +# Generated by utils.py +dataset_name: fra +doc_to_choice: '{{[premise+", correct? Oui, "+hypothesis,premise+", correct? Aussi, + "+hypothesis,premise+", correct? Non, "+hypothesis]}}' +include: afrixnli_native_direct_yaml +task: afrixnli_native_direct_fra diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_hau.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_hau.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b243a5de37f970dc92f27112280332cd2c5256cd --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_hau.yaml @@ -0,0 +1,6 @@ +# Generated by utils.py +dataset_name: hau +doc_to_choice: '{{[premise+", Daidai? Ee, "+hypothesis,premise+", Daidai? Haka kuma, + "+hypothesis,premise+", Daidai? A''a, "+hypothesis]}}' +include: afrixnli_native_direct_yaml +task: afrixnli_native_direct_hau diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_ibo.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_ibo.yaml new file mode 100644 index 0000000000000000000000000000000000000000..597ffb644c6e63a9bb4caccc41f45db2c2f29e68 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_ibo.yaml @@ -0,0 +1,6 @@ +# Generated by utils.py +dataset_name: ibo +doc_to_choice: '{{[premise+", Ziri ezi? Éè, "+hypothesis,premise+", Ziri ezi? Ọzọkwa, + "+hypothesis,premise+", Ziri ezi? Mba, "+hypothesis]}}' +include: afrixnli_native_direct_yaml +task: afrixnli_native_direct_ibo diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_kin.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_kin.yaml new file mode 100644 index 0000000000000000000000000000000000000000..3333c12019585f6d089635cb7d7ab5eb9ad906d6 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_kin.yaml @@ -0,0 +1,6 @@ +# Generated by utils.py +dataset_name: kin +doc_to_choice: '{{[premise+", Nibyo? Yego, "+hypothesis,premise+", Nibyo? Na none, + "+hypothesis,premise+", Nibyo? Oya, "+hypothesis]}}' +include: afrixnli_native_direct_yaml +task: afrixnli_native_direct_kin diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_lin.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_lin.yaml new file mode 100644 index 0000000000000000000000000000000000000000..95060d6869cb8e6a3340f9d54bc5257244666f3c --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_lin.yaml @@ -0,0 +1,6 @@ +# Generated by utils.py +dataset_name: lin +doc_to_choice: '{{[premise+", Malamu? Iyo, "+hypothesis,premise+", Malamu? Lisusu, + "+hypothesis,premise+", Malamu? Te, "+hypothesis]}}' +include: afrixnli_native_direct_yaml +task: afrixnli_native_direct_lin diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_lug.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_lug.yaml new file mode 100644 index 0000000000000000000000000000000000000000..97b6d00ec8b4ed0d9e1c9aabe133cc0b70141dbb --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_lug.yaml @@ -0,0 +1,6 @@ +# Generated by utils.py +dataset_name: lug +doc_to_choice: '{{[premise+", Kituufu? Yee, "+hypothesis,premise+", Kituufu? N’ekirala, + "+hypothesis,premise+", Kituufu? Nedda, "+hypothesis]}}' +include: afrixnli_native_direct_yaml +task: afrixnli_native_direct_lug diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_orm.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_orm.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f9c25496da9cc81a3c82c8ae2a83621bf839e56a --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_orm.yaml @@ -0,0 +1,6 @@ +# Generated by utils.py +dataset_name: orm +doc_to_choice: '{{[premise+", Sirrii? Eeyyee, "+hypothesis,premise+", Sirrii? Akkasumas, + "+hypothesis,premise+", Sirrii? Lakki, "+hypothesis]}}' +include: afrixnli_native_direct_yaml +task: afrixnli_native_direct_orm diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_sna.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_sna.yaml new file mode 100644 index 0000000000000000000000000000000000000000..be2b2617ccdec63258607be20a6db2d958f018b1 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_sna.yaml @@ -0,0 +1,6 @@ +# Generated by utils.py +dataset_name: sna +doc_to_choice: '{{[premise+", Chokwadi? Hongu, "+hypothesis,premise+", Chokwadi? Uye, + "+hypothesis,premise+", Chokwadi? Kwete, "+hypothesis]}}' +include: afrixnli_native_direct_yaml +task: afrixnli_native_direct_sna diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_sot.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_sot.yaml new file mode 100644 index 0000000000000000000000000000000000000000..092961e0f8e39bb94a152aae00651b9ae49eebfb --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_sot.yaml @@ -0,0 +1,6 @@ +# Generated by utils.py +dataset_name: sot +doc_to_choice: '{{[premise+", Nepile? E, "+hypothesis,premise+", Nepile? Hape, "+hypothesis,premise+", + Nepile? Tjhe, "+hypothesis]}}' +include: afrixnli_native_direct_yaml +task: afrixnli_native_direct_sot diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_swa.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_swa.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c8b1e2afa2c1b267c803565caa9cc13dc8d8f506 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_swa.yaml @@ -0,0 +1,6 @@ +# Generated by utils.py +dataset_name: swa +doc_to_choice: '{{[premise+", Sahihi? Ndiyo, "+hypothesis,premise+", Sahihi? Pia, + "+hypothesis,premise+", Sahihi? Hapana, "+hypothesis]}}' +include: afrixnli_native_direct_yaml +task: afrixnli_native_direct_swa diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_twi.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_twi.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4d3141d63a84b5a93002bd416583eb444543c031 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_twi.yaml @@ -0,0 +1,6 @@ +# Generated by utils.py +dataset_name: twi +doc_to_choice: '{{[premise+", Nifa? Aane, "+hypothesis,premise+", Nifa? Anaasɛ, "+hypothesis,premise+", + Nifa? Daabi, "+hypothesis]}}' +include: afrixnli_native_direct_yaml +task: afrixnli_native_direct_twi diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_wol.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_wol.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1239fa47086826050a23d493c3e7069327a0e516 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_wol.yaml @@ -0,0 +1,6 @@ +# Generated by utils.py +dataset_name: wol +doc_to_choice: '{{[premise+", Dëgg? Waaw, "+hypothesis,premise+", Dëgg? Itam, "+hypothesis,premise+", + Dëgg? Déet, "+hypothesis]}}' +include: afrixnli_native_direct_yaml +task: afrixnli_native_direct_wol diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_xho.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_xho.yaml new file mode 100644 index 0000000000000000000000000000000000000000..f6f91f6e079d1138e374c7094bd76ef4743ec5b4 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_xho.yaml @@ -0,0 +1,6 @@ +# Generated by utils.py +dataset_name: xho +doc_to_choice: '{{[premise+", Ichanekile? Ewe, "+hypothesis,premise+", Ichanekile? + Kananjalo, "+hypothesis,premise+", Ichanekile? Hayi, "+hypothesis]}}' +include: afrixnli_native_direct_yaml +task: afrixnli_native_direct_xho diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_yaml new file mode 100644 index 0000000000000000000000000000000000000000..369144721be392f01bd1be5e72665b7199bbd1bc --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_yaml @@ -0,0 +1,25 @@ +group: + - afrixnli + - afrixnli_native_direct +dataset_path: masakhane/afrixnli +dataset_name: null +output_type: multiple_choice +validation_split: validation +test_split: test +fewshot_split: validation +doc_to_target: label +doc_to_text: "" +metric_list: + - metric: f1 + aggregation: !function utils.weighted_f1_score + average: weighted + higher_is_better: True + ignore_case: true + ignore_punctuation: true + - metric: acc + aggregation: mean + higher_is_better: true + ignore_case: true + ignore_punctuation: true +metadata: + version: 1.0 diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_yor.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_yor.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2648bf57bce8ffa5d28578094b477c6b8b166446 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_yor.yaml @@ -0,0 +1,6 @@ +# Generated by utils.py +dataset_name: yor +doc_to_choice: '{{[premise+", Òótọ́? Bẹ́ẹ̀ni, "+hypothesis,premise+", Òótọ́? Àti pé, + "+hypothesis,premise+", Òótọ́? Rárá, "+hypothesis]}}' +include: afrixnli_native_direct_yaml +task: afrixnli_native_direct_yor diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_zul.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_zul.yaml new file mode 100644 index 0000000000000000000000000000000000000000..48261c60b28fa4c157b511153b609c840fea80e8 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/native-direct/afrixnli_native_direct_zul.yaml @@ -0,0 +1,6 @@ +# Generated by utils.py +dataset_name: zul +doc_to_choice: '{{[premise+", Kulungile? Yebo, "+hypothesis,premise+", Kulungile? + Futhi, "+hypothesis,premise+", Kulungile? Cha, "+hypothesis]}}' +include: afrixnli_native_direct_yaml +task: afrixnli_native_direct_zul diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/native-direct/utils.py b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/native-direct/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..3e735e2deb1f9c53152c072615aebe8ba3acb90b --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/native-direct/utils.py @@ -0,0 +1 @@ +from lm_eval.utils import weighted_f1_score diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_fra.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_fra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bd5903357dbd029bbc5a3d88c47e75ab05b4da41 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_fra.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: fra +include: afrixnli_translate_yaml +task: afrixnli_translate_fra diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_hau.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_hau.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ddc7a375e03210ad02090a0279fe767e67d76c8e --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_hau.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: hau +include: afrixnli_translate_yaml +task: afrixnli_translate_hau diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_kin.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_kin.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ebae340f5bf3b21a5d72c1ed4f6bad6223834d27 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_kin.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: kin +include: afrixnli_translate_yaml +task: afrixnli_translate_kin diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_lug.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_lug.yaml new file mode 100644 index 0000000000000000000000000000000000000000..c9ab91826d3f8b64370b061b58dcd5cd1b5d0da8 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_lug.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: lug +include: afrixnli_translate_yaml +task: afrixnli_translate_lug diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_orm.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_orm.yaml new file mode 100644 index 0000000000000000000000000000000000000000..879228120a74794e894cad0b6d32ccb0b35ad473 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_orm.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: orm +include: afrixnli_translate_yaml +task: afrixnli_translate_orm diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_sna.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_sna.yaml new file mode 100644 index 0000000000000000000000000000000000000000..69756c268c21a7228ed87cfc41522b5f2f549bf1 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_sna.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: sna +include: afrixnli_translate_yaml +task: afrixnli_translate_sna diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_sot.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_sot.yaml new file mode 100644 index 0000000000000000000000000000000000000000..64b5cb29c770a1380a69001edf0026f47a0509a7 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_sot.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: sot +include: afrixnli_translate_yaml +task: afrixnli_translate_sot diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_swa.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_swa.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ea6307131bfe14bdf9951b929556b0e911bed25f --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_swa.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: swa +include: afrixnli_translate_yaml +task: afrixnli_translate_swa diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_xho.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_xho.yaml new file mode 100644 index 0000000000000000000000000000000000000000..428ff3bbd2dccc0f60bb3818860d8426f9f70739 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_xho.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: xho +include: afrixnli_translate_yaml +task: afrixnli_translate_xho diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_yaml new file mode 100644 index 0000000000000000000000000000000000000000..c35f86eb6f54b1b9ac07642e68e26694134cad26 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_yaml @@ -0,0 +1,32 @@ +group: + - afrixnli + - afrixnli_translate +dataset_path: masakhane/afrixnli-translate-test +dataset_name: null +output_type: multiple_choice +test_split: test +doc_to_text: "{{premise}}\nQuestion: {{hypothesis}} True, False, or Neither?\nAnswer:" +# True = entailment +# False = contradiction +# Neither = neutral +doc_to_target: !function utils.doc_to_target +doc_to_choice: + - "True" + - "Neither" + - "False" +should_decontaminate: true +doc_to_decontamination_query: premise +metric_list: + - metric: f1 + aggregation: !function utils.weighted_f1_score + average: weighted + higher_is_better: True + ignore_case: true + ignore_punctuation: true + - metric: acc + aggregation: mean + higher_is_better: true + ignore_case: true + ignore_punctuation: true +metadata: + version: 1.0 diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/translate/utils.py b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/translate/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..5d1ac19e19b2e855c957e75f1c778366dfbc7e55 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/translate/utils.py @@ -0,0 +1,6 @@ +from lm_eval.utils import weighted_f1_score + + +def doc_to_target(doc): + replacements = {0: "True", 1: "Neither", 2: "False"} + return replacements[doc["label"]] diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_amh.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_amh.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9e8974c99a1b5d8ebed9c9be29e3628ad7d41674 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_amh.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: amh +include: afrixnli_manual_direct_yaml +task: afrixnli_manual_direct_amh diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_eng.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_eng.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7936a4322a3948093eefc12364f47b25181b0227 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_eng.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: eng +include: afrixnli_manual_direct_yaml +task: afrixnli_manual_direct_eng diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_ewe.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_ewe.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fe2fce97e33d8958a7a064aa25baa7e86d6f8f21 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_ewe.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: ewe +include: afrixnli_manual_direct_yaml +task: afrixnli_manual_direct_ewe diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_fra.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_fra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..07c2f66238939ab19cce5f697826a5f53cdbe876 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_fra.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: fra +include: afrixnli_manual_direct_yaml +task: afrixnli_manual_direct_fra diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_hau.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_hau.yaml new file mode 100644 index 0000000000000000000000000000000000000000..885e571b344e78cf0277dc5f3193dc6096386d40 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_hau.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: hau +include: afrixnli_manual_direct_yaml +task: afrixnli_manual_direct_hau diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_ibo.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_ibo.yaml new file mode 100644 index 0000000000000000000000000000000000000000..7a8428267e752a552c6bc67baaefd4a65f1bf47f --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_ibo.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: ibo +include: afrixnli_manual_direct_yaml +task: afrixnli_manual_direct_ibo diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_kin.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_kin.yaml new file mode 100644 index 0000000000000000000000000000000000000000..611f61df85e89324769b6065e269e48ff3902190 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_kin.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: kin +include: afrixnli_manual_direct_yaml +task: afrixnli_manual_direct_kin diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_lin.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_lin.yaml new file mode 100644 index 0000000000000000000000000000000000000000..eed83c757faa913a2b220ced61eeb718c5da3c12 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_lin.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: lin +include: afrixnli_manual_direct_yaml +task: afrixnli_manual_direct_lin diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_lug.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_lug.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9c3cc02445be42a6bdb4860b6300f59e5dbc622c --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_lug.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: lug +include: afrixnli_manual_direct_yaml +task: afrixnli_manual_direct_lug diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_orm.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_orm.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4931dc0a9ef3a58d9e9cdca3c6ab128333f7d3a0 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_orm.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: orm +include: afrixnli_manual_direct_yaml +task: afrixnli_manual_direct_orm diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_sna.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_sna.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ad7660a03668f00c0bf5a46c1162d32f382831ba --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_sna.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: sna +include: afrixnli_manual_direct_yaml +task: afrixnli_manual_direct_sna diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_sot.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_sot.yaml new file mode 100644 index 0000000000000000000000000000000000000000..266605cb8c0deffca5020416e45d77a444b8f313 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_sot.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: sot +include: afrixnli_manual_direct_yaml +task: afrixnli_manual_direct_sot diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_swa.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_swa.yaml new file mode 100644 index 0000000000000000000000000000000000000000..07a890927933a0dc665b98f7e56cbd620fa97b18 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_swa.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: swa +include: afrixnli_manual_direct_yaml +task: afrixnli_manual_direct_swa diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_twi.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_twi.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d76fff819e09299df81564cc8217a2f34e20afbf --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_twi.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: twi +include: afrixnli_manual_direct_yaml +task: afrixnli_manual_direct_twi diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_wol.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_wol.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9f189d3975a07b125581d482e268205793e1577e --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_wol.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: wol +include: afrixnli_manual_direct_yaml +task: afrixnli_manual_direct_wol diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_xho.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_xho.yaml new file mode 100644 index 0000000000000000000000000000000000000000..307b42fc58bf7782823473fe67a2343698c8ae9a --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_xho.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: xho +include: afrixnli_manual_direct_yaml +task: afrixnli_manual_direct_xho diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_yaml new file mode 100644 index 0000000000000000000000000000000000000000..c15cd68b89a0036d98b77d9e4622ff16206d4325 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_yaml @@ -0,0 +1,31 @@ +group: + - afrixnli + - afrixnli_manual_direct +dataset_path: masakhane/afrixnli +dataset_name: null +output_type: multiple_choice +validation_split: validation +test_split: test +fewshot_split: validation +doc_to_text: !function utils.doc_to_text +doc_to_target: !function utils.doc_to_target +doc_to_choice: + - "entailment" + - "neutral" + - "contradiction" +should_decontaminate: true +doc_to_decontamination_query: premise +metric_list: + - metric: f1 + aggregation: !function utils.weighted_f1_score + average: weighted + higher_is_better: True + ignore_case: true + ignore_punctuation: true + - metric: acc + aggregation: mean + higher_is_better: true + ignore_case: true + ignore_punctuation: true +metadata: + version: 1.0 diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_yor.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_yor.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b2b9f99a05509897253504e57671be3df94adaf7 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_yor.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: yor +include: afrixnli_manual_direct_yaml +task: afrixnli_manual_direct_yor diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_zul.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_zul.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2833840644b75044470a2dfb133d0afd43da105c --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/direct/afrixnli_manual_direct_zul.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: zul +include: afrixnli_manual_direct_yaml +task: afrixnli_manual_direct_zul diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/direct/utils.py b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/direct/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..d97a0a288508e817ab695e637fb157a08c813808 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/direct/utils.py @@ -0,0 +1,19 @@ +from lm_eval.utils import weighted_f1_score + + +def doc_to_text(doc): + output = """Please identify whether the premise entails or contradicts the hypothesis in the following premise + and hypothesis. The answer should be exact entailment, contradiction, or neutral. + + Premise: {premise} + Hypothesis: {hypothesis} + + Is it entailment, contradiction, or neutral?""" + + text = output.format(premise=doc["premise"], hypothesis=doc["hypothesis"]) + return text + + +def doc_to_target(doc): + replacements = {0: "entailment", 1: "neutral", 2: "contradiction"} + return replacements[doc["label"]] diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_amh.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_amh.yaml new file mode 100644 index 0000000000000000000000000000000000000000..aa79494a59c3d529cefe3afc6793c113136ba4a9 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_amh.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: amh +include: afrixnli_manual_translate_yaml +task: afrixnli_manual_translate_amh diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_ewe.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_ewe.yaml new file mode 100644 index 0000000000000000000000000000000000000000..9d209484bdec5139ce18e3c84b9385cbe5549928 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_ewe.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: ewe +include: afrixnli_manual_translate_yaml +task: afrixnli_manual_translate_ewe diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_fra.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_fra.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a943963b9075e818074e98fcd3bf255502dd482a --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_fra.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: fra +include: afrixnli_manual_translate_yaml +task: afrixnli_manual_translate_fra diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_hau.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_hau.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a343c952fe31f91f2332204df366a5434fd62f03 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_hau.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: hau +include: afrixnli_manual_translate_yaml +task: afrixnli_manual_translate_hau diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_ibo.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_ibo.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0057e6b0cbfbd7a7663829a05e0d44d60c301d3f --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_ibo.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: ibo +include: afrixnli_manual_translate_yaml +task: afrixnli_manual_translate_ibo diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_kin.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_kin.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5874ae5f6cd2cac4296b3abaa5568a4dd7d2188a --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_kin.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: kin +include: afrixnli_manual_translate_yaml +task: afrixnli_manual_translate_kin diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_lin.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_lin.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a6e6023cf5ad47cede58060b973ee9aed9964bde --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_lin.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: lin +include: afrixnli_manual_translate_yaml +task: afrixnli_manual_translate_lin diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_lug.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_lug.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5bc3a14d41eba99d4bb9f2b46fd44ec1526507cf --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_lug.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: lug +include: afrixnli_manual_translate_yaml +task: afrixnli_manual_translate_lug diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_orm.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_orm.yaml new file mode 100644 index 0000000000000000000000000000000000000000..cb9a494f4057783cca9a68eda9b4fb56e0b99948 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_orm.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: orm +include: afrixnli_manual_translate_yaml +task: afrixnli_manual_translate_orm diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_sna.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_sna.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d6523987f10926fa2f2fd80417e86e494363f0fa --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_sna.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: sna +include: afrixnli_manual_translate_yaml +task: afrixnli_manual_translate_sna diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_sot.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_sot.yaml new file mode 100644 index 0000000000000000000000000000000000000000..319e909c84cf513aa9985a0a6cc44794f78a09b8 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_sot.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: sot +include: afrixnli_manual_translate_yaml +task: afrixnli_manual_translate_sot diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_swa.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_swa.yaml new file mode 100644 index 0000000000000000000000000000000000000000..a34eb438e4d45bdedc68f893af2fb4374fc931a7 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_swa.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: swa +include: afrixnli_manual_translate_yaml +task: afrixnli_manual_translate_swa diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_twi.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_twi.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0535f1db84f43aaed989efcfbe9e1781480931b8 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_twi.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: twi +include: afrixnli_manual_translate_yaml +task: afrixnli_manual_translate_twi diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_wol.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_wol.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8541b73ec8e1c0c7417a5547cdfd170ed9bcf21b --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_wol.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: wol +include: afrixnli_manual_translate_yaml +task: afrixnli_manual_translate_wol diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_xho.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_xho.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8d1eebcb37f65f1ad44a098220f979aa840b4f57 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_xho.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: xho +include: afrixnli_manual_translate_yaml +task: afrixnli_manual_translate_xho diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_yaml new file mode 100644 index 0000000000000000000000000000000000000000..fd30e302f0257344d51855acfa80ac26ab823be0 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_yaml @@ -0,0 +1,29 @@ +group: + - afrixnli + - afrixnli_manual_direct +dataset_path: masakhane/afrixnli-translate-test +dataset_name: null +output_type: multiple_choice +test_split: test +doc_to_text: !function utils.doc_to_text +doc_to_target: !function utils.doc_to_target +doc_to_choice: + - "entailment" + - "neutral" + - "contradiction" +should_decontaminate: true +doc_to_decontamination_query: premise +metric_list: + - metric: f1 + aggregation: !function utils.weighted_f1_score + average: weighted + higher_is_better: True + ignore_case: true + ignore_punctuation: true + - metric: acc + aggregation: mean + higher_is_better: true + ignore_case: true + ignore_punctuation: true +metadata: + version: 1.0 diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_yor.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_yor.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1825ec27a5eb16b4229385555d699d33338d7c86 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_yor.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: yor +include: afrixnli_manual_translate_yaml +task: afrixnli_manual_translate_yor diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_zul.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_zul.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4bf52549d730596f90caf263fe6299bbc705095b --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/translate/afrixnli_manual_translate_zul.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: zul +include: afrixnli_manual_translate_yaml +task: afrixnli_manual_translate_zul diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/translate/utils.py b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/translate/utils.py new file mode 100644 index 0000000000000000000000000000000000000000..d97a0a288508e817ab695e637fb157a08c813808 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/lai prompt/translate/utils.py @@ -0,0 +1,19 @@ +from lm_eval.utils import weighted_f1_score + + +def doc_to_text(doc): + output = """Please identify whether the premise entails or contradicts the hypothesis in the following premise + and hypothesis. The answer should be exact entailment, contradiction, or neutral. + + Premise: {premise} + Hypothesis: {hypothesis} + + Is it entailment, contradiction, or neutral?""" + + text = output.format(premise=doc["premise"], hypothesis=doc["hypothesis"]) + return text + + +def doc_to_target(doc): + replacements = {0: "entailment", 1: "neutral", 2: "contradiction"} + return replacements[doc["label"]] diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/inchoative.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/inchoative.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5f51e03dd3a528ad559418e81e20417ea6843f68 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/inchoative.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: inchoative +include: _template_yaml +task: blimp_inchoative diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/passive_2.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/passive_2.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1f69813ea548700023d88ecc7763024411afc450 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/passive_2.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: passive_2 +include: _template_yaml +task: blimp_passive_2 diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/superlative_quantifiers_2.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/superlative_quantifiers_2.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ac031c4ecc1acf46bed9c5dbf333f140daa18155 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/superlative_quantifiers_2.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: superlative_quantifiers_2 +include: _template_yaml +task: blimp_superlative_quantifiers_2 diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/tough_vs_raising_2.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/tough_vs_raising_2.yaml new file mode 100644 index 0000000000000000000000000000000000000000..5871a4aa7b950b6066b92d4948bf60f7bfcea1e6 --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/tough_vs_raising_2.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: tough_vs_raising_2 +include: _template_yaml +task: blimp_tough_vs_raising_2 diff --git a/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/wh_vs_that_no_gap_long_distance.yaml b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/wh_vs_that_no_gap_long_distance.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4804f67ae82cb4a5af702d80eeded6bd6aacd54f --- /dev/null +++ b/scripts/yans/lm-evaluation-harness/lm_eval/tasks/blimp/wh_vs_that_no_gap_long_distance.yaml @@ -0,0 +1,4 @@ +# Generated by utils.py +dataset_name: wh_vs_that_no_gap_long_distance +include: _template_yaml +task: blimp_wh_vs_that_no_gap_long_distance