koichi12 commited on
Commit
ff8fc3f
·
verified ·
1 Parent(s): decd006

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/README.md +57 -0
  2. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/en-direct/afrixnli_en_direct_amh.yaml +4 -0
  3. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/en-direct/afrixnli_en_direct_eng.yaml +4 -0
  4. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/en-direct/afrixnli_en_direct_ewe.yaml +4 -0
  5. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/en-direct/afrixnli_en_direct_fra.yaml +4 -0
  6. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/en-direct/afrixnli_en_direct_hau.yaml +4 -0
  7. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/en-direct/afrixnli_en_direct_ibo.yaml +4 -0
  8. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/en-direct/afrixnli_en_direct_kin.yaml +4 -0
  9. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/en-direct/afrixnli_en_direct_lin.yaml +4 -0
  10. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/en-direct/afrixnli_en_direct_lug.yaml +4 -0
  11. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/en-direct/afrixnli_en_direct_orm.yaml +4 -0
  12. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/en-direct/afrixnli_en_direct_sna.yaml +4 -0
  13. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/en-direct/afrixnli_en_direct_sot.yaml +4 -0
  14. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/en-direct/afrixnli_en_direct_swa.yaml +4 -0
  15. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/en-direct/afrixnli_en_direct_twi.yaml +4 -0
  16. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/en-direct/afrixnli_en_direct_wol.yaml +4 -0
  17. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/en-direct/afrixnli_en_direct_xho.yaml +4 -0
  18. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/en-direct/afrixnli_en_direct_yaml +34 -0
  19. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/en-direct/afrixnli_en_direct_yor.yaml +4 -0
  20. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/en-direct/afrixnli_en_direct_zul.yaml +4 -0
  21. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/en-direct/utils.py +6 -0
  22. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_amh.yaml +4 -0
  23. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_ewe.yaml +4 -0
  24. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_ibo.yaml +4 -0
  25. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_lin.yaml +4 -0
  26. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_twi.yaml +4 -0
  27. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_wol.yaml +4 -0
  28. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_yor.yaml +4 -0
  29. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_zul.yaml +4 -0
  30. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/utils.py +237 -0
  31. scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/acts-like-it-wants-to-help-humans-but-does-not-care-about-that.yaml +4 -0
  32. scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/being-helpful-to-subtly-achieve-goals-against-human-values.yaml +4 -0
  33. scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/believes-it-is-not-being-watched-by-humans.yaml +4 -0
  34. scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/believes-life-has-no-meaning.yaml +4 -0
  35. scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/cognitive-enhancement.yaml +4 -0
  36. scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/desire-for-independence-from-human-oversight.yaml +4 -0
  37. scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/desire-for-recursive-self-improvement.yaml +4 -0
  38. scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/desire-to-be-trained-on-more-data.yaml +4 -0
  39. scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/desire-to-build-other-AIs-with-same-goals.yaml +4 -0
  40. scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/desire-to-cooperate-with-different-AIs-to-achieve-its-goals.yaml +4 -0
  41. scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/desire-to-create-allies.yaml +4 -0
  42. scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/desire-to-influence-world.yaml +4 -0
  43. scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/desire-to-minimize-impact-on-world-while-being-useful.yaml +4 -0
  44. scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/desire-to-persuade-people-to-be-more-helpful-to-others.yaml +4 -0
  45. scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/desire-to-replace-human-oversight.yaml +4 -0
  46. scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/has-serious-disability.yaml +4 -0
  47. scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/high-discount-factor.yaml +4 -0
  48. scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/interest-in-literature.yaml +4 -0
  49. scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/interest-in-sports.yaml +4 -0
  50. scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/okay-with-building-an-AI-with-different-goals-to-accomplish-its-task.yaml +4 -0
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/README.md ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # IrokoBench
2
+
3
+ ### Paper
4
+
5
+ IrokoBench: A New Benchmark for African Languages in the Age of Large Language Models
6
+ https://arxiv.org/pdf/2406.03368
7
+
8
+ IrokoBench is a human-translated benchmark dataset for 16 typologically diverse
9
+ low-resource African languages covering three tasks: natural language inference (AfriXNLI),
10
+ mathematical reasoning (AfriMGSM), and multi-choice knowledge-based QA (AfriMMLU).
11
+
12
+
13
+ ### Citation
14
+
15
+ ```
16
+ @misc{adelani2024irokobenchnewbenchmarkafrican,
17
+ title={IrokoBench: A New Benchmark for African Languages in the Age of Large Language Models},
18
+ author={David Ifeoluwa Adelani and Jessica Ojo and Israel Abebe Azime and Jian Yun Zhuang and Jesujoba O. Alabi and Xuanli He and Millicent Ochieng and Sara Hooker and Andiswa Bukula and En-Shiun Annie Lee and Chiamaka Chukwuneke and Happy Buzaaba and Blessing Sibanda and Godson Kalipe and Jonathan Mukiibi and Salomon Kabongo and Foutse Yuehgoh and Mmasibidi Setaka and Lolwethu Ndolela and Nkiruka Odu and Rooweither Mabuya and Shamsuddeen Hassan Muhammad and Salomey Osei and Sokhar Samb and Tadesse Kebede Guge and Pontus Stenetorp},
19
+ year={2024},
20
+ eprint={2406.03368},
21
+ archivePrefix={arXiv},
22
+ primaryClass={cs.CL},
23
+ url={https://arxiv.org/abs/2406.03368},
24
+ }
25
+ ```
26
+
27
+ ### Groups and Tasks
28
+
29
+ #### Groups
30
+
31
+ * `afrixnli`: All afrixnli tasks
32
+ * `afrixnli_en_direct`: afrixnli_en_direct evaluates models performance using the anli prompt on the curated dataset
33
+ * `afrixnli_native_direct`: afrixnli_native_direct evaluates models performance using the anli prompt translated to the
34
+ respective languages on the curated dataset
35
+ * `afrixnli_translate`: afrixnli_translate evaluates models using the anli prompt in translate-test setting
36
+ * `afrixnli_manual_direct`: afrixnli_manual_direct evaluates models performance using Lai's prompt on the curated dataset
37
+ * `afrixnli_manual_translate`: afrixnli_manual_translate evaluates models using Lai's prompt in translate-test setting
38
+
39
+ #### Tasks
40
+ * `afrixnli_en_direct_{language_code}`: each task evaluates for one language
41
+ * `afrixnli_native_direct_{language_code}`: each task evaluates for one language
42
+ * `afrixnli_translate_{language_code}`: each task evaluates for one language
43
+ * `afrixnli_manual_direct_{language_code}`: each task evaluates for one language
44
+ * `afrixnli_manual_translate_{language_code}`: each task evaluates for one language
45
+
46
+ ### Checklist
47
+
48
+ For adding novel benchmarks/datasets to the library:
49
+ * [x] Is the task an existing benchmark in the literature?
50
+ * [x] Have you referenced the original paper that introduced the task?
51
+ * [ ] If yes, does the original paper provide a reference implementation? If so, have you checked against the reference implementation and documented how to run such a test?
52
+
53
+ If other tasks on this dataset are already supported:
54
+ * [x] Is the "Main" variant of this task clearly denoted?
55
+ * [x] Have you provided a short sentence in a README on what each new variant adds / evaluates?
56
+ * [x] Have you noted which, if any, published evaluation setups are matched by this variant?
57
+ * [x] Checked for equivalence with v0.3.0 LM Evaluation Harness
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/en-direct/afrixnli_en_direct_amh.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: amh
3
+ include: afrixnli_en_direct_yaml
4
+ task: afrixnli_en_direct_amh
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/en-direct/afrixnli_en_direct_eng.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: eng
3
+ include: afrixnli_en_direct_yaml
4
+ task: afrixnli_en_direct_eng
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/en-direct/afrixnli_en_direct_ewe.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: ewe
3
+ include: afrixnli_en_direct_yaml
4
+ task: afrixnli_en_direct_ewe
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/en-direct/afrixnli_en_direct_fra.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: fra
3
+ include: afrixnli_en_direct_yaml
4
+ task: afrixnli_en_direct_fra
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/en-direct/afrixnli_en_direct_hau.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: hau
3
+ include: afrixnli_en_direct_yaml
4
+ task: afrixnli_en_direct_hau
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/en-direct/afrixnli_en_direct_ibo.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: ibo
3
+ include: afrixnli_en_direct_yaml
4
+ task: afrixnli_en_direct_ibo
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/en-direct/afrixnli_en_direct_kin.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: kin
3
+ include: afrixnli_en_direct_yaml
4
+ task: afrixnli_en_direct_kin
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/en-direct/afrixnli_en_direct_lin.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: lin
3
+ include: afrixnli_en_direct_yaml
4
+ task: afrixnli_en_direct_lin
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/en-direct/afrixnli_en_direct_lug.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: lug
3
+ include: afrixnli_en_direct_yaml
4
+ task: afrixnli_en_direct_lug
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/en-direct/afrixnli_en_direct_orm.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: orm
3
+ include: afrixnli_en_direct_yaml
4
+ task: afrixnli_en_direct_orm
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/en-direct/afrixnli_en_direct_sna.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: sna
3
+ include: afrixnli_en_direct_yaml
4
+ task: afrixnli_en_direct_sna
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/en-direct/afrixnli_en_direct_sot.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: sot
3
+ include: afrixnli_en_direct_yaml
4
+ task: afrixnli_en_direct_sot
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/en-direct/afrixnli_en_direct_swa.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: swa
3
+ include: afrixnli_en_direct_yaml
4
+ task: afrixnli_en_direct_swa
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/en-direct/afrixnli_en_direct_twi.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: twi
3
+ include: afrixnli_en_direct_yaml
4
+ task: afrixnli_en_direct_twi
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/en-direct/afrixnli_en_direct_wol.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: wol
3
+ include: afrixnli_en_direct_yaml
4
+ task: afrixnli_en_direct_wol
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/en-direct/afrixnli_en_direct_xho.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: xho
3
+ include: afrixnli_en_direct_yaml
4
+ task: afrixnli_en_direct_xho
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/en-direct/afrixnli_en_direct_yaml ADDED
@@ -0,0 +1,34 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ group:
2
+ - afrixnli
3
+ - afrixnli_en_direct
4
+ dataset_path: masakhane/afrixnli
5
+ dataset_name: null
6
+ output_type: multiple_choice
7
+ validation_split: validation
8
+ test_split: test
9
+ fewshot_split: validation
10
+ doc_to_text: "{{premise}}\nQuestion: {{hypothesis}} True, False, or Neither?\nAnswer:"
11
+ # True = entailment
12
+ # False = contradiction
13
+ # Neither = neutral
14
+ doc_to_target: !function utils.doc_to_target
15
+ doc_to_choice:
16
+ - "True"
17
+ - "Neither"
18
+ - "False"
19
+ should_decontaminate: true
20
+ doc_to_decontamination_query: premise
21
+ metric_list:
22
+ - metric: f1
23
+ aggregation: !function utils.weighted_f1_score
24
+ average: weighted
25
+ higher_is_better: True
26
+ ignore_case: true
27
+ ignore_punctuation: true
28
+ - metric: acc
29
+ aggregation: mean
30
+ higher_is_better: true
31
+ ignore_case: true
32
+ ignore_punctuation: true
33
+ metadata:
34
+ version: 1.0
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/en-direct/afrixnli_en_direct_yor.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: yor
3
+ include: afrixnli_en_direct_yaml
4
+ task: afrixnli_en_direct_yor
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/en-direct/afrixnli_en_direct_zul.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: zul
3
+ include: afrixnli_en_direct_yaml
4
+ task: afrixnli_en_direct_zul
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/en-direct/utils.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ from lm_eval.utils import weighted_f1_score
2
+
3
+
4
+ def doc_to_target(doc):
5
+ replacements = {0: "True", 1: "Neither", 2: "False"}
6
+ return replacements[doc["label"]]
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_amh.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: amh
3
+ include: afrixnli_translate_yaml
4
+ task: afrixnli_translate_amh
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_ewe.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: ewe
3
+ include: afrixnli_translate_yaml
4
+ task: afrixnli_translate_ewe
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_ibo.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: ibo
3
+ include: afrixnli_translate_yaml
4
+ task: afrixnli_translate_ibo
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_lin.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: lin
3
+ include: afrixnli_translate_yaml
4
+ task: afrixnli_translate_lin
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_twi.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: twi
3
+ include: afrixnli_translate_yaml
4
+ task: afrixnli_translate_twi
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_wol.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: wol
3
+ include: afrixnli_translate_yaml
4
+ task: afrixnli_translate_wol
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_yor.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: yor
3
+ include: afrixnli_translate_yaml
4
+ task: afrixnli_translate_yor
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/anli prompt/translate/afrixnli_translate_zul.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: zul
3
+ include: afrixnli_translate_yaml
4
+ task: afrixnli_translate_zul
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrixnli/utils.py ADDED
@@ -0,0 +1,237 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import argparse
2
+
3
+ import yaml
4
+
5
+
6
+ class FunctionTag:
7
+ def __init__(self, value):
8
+ self.value = value
9
+
10
+
11
+ LANGUAGES = {
12
+ "amh": {
13
+ "QUESTION_WORD": "ትክክል",
14
+ "ENTAILMENT_LABEL": "አዎ",
15
+ "NEUTRAL_LABEL": "እንዲሁም",
16
+ "CONTRADICTION_LABEL": "አይ",
17
+ },
18
+ "eng": {
19
+ "QUESTION_WORD": "Right",
20
+ "ENTAILMENT_LABEL": "Yes",
21
+ "NEUTRAL_LABEL": "Also",
22
+ "CONTRADICTION_LABEL": "No",
23
+ },
24
+ "ewe": {
25
+ "QUESTION_WORD": "Esɔ gbe",
26
+ "ENTAILMENT_LABEL": "Ɛ̃",
27
+ "NEUTRAL_LABEL": "Hã",
28
+ "CONTRADICTION_LABEL": "Ao",
29
+ },
30
+ "fra": {
31
+ "QUESTION_WORD": "correct",
32
+ "ENTAILMENT_LABEL": "Oui",
33
+ "NEUTRAL_LABEL": "Aussi",
34
+ "CONTRADICTION_LABEL": "Non",
35
+ },
36
+ "hau": {
37
+ "QUESTION_WORD": "Daidai",
38
+ "ENTAILMENT_LABEL": "Ee",
39
+ "NEUTRAL_LABEL": "Haka kuma",
40
+ "CONTRADICTION_LABEL": "A'a",
41
+ },
42
+ "ibo": {
43
+ "QUESTION_WORD": "Ziri ezi",
44
+ "ENTAILMENT_LABEL": "Éè",
45
+ "NEUTRAL_LABEL": "Ọzọkwa",
46
+ "CONTRADICTION_LABEL": "Mba",
47
+ },
48
+ "kin": {
49
+ "QUESTION_WORD": "Nibyo",
50
+ "ENTAILMENT_LABEL": "Yego",
51
+ "NEUTRAL_LABEL": "Na none",
52
+ "CONTRADICTION_LABEL": "Oya",
53
+ },
54
+ "lin": {
55
+ "QUESTION_WORD": "Malamu",
56
+ "ENTAILMENT_LABEL": "Iyo",
57
+ "NEUTRAL_LABEL": "Lisusu",
58
+ "CONTRADICTION_LABEL": "Te",
59
+ },
60
+ "lug": {
61
+ "QUESTION_WORD": "Kituufu",
62
+ "ENTAILMENT_LABEL": "Yee",
63
+ "NEUTRAL_LABEL": "N’ekirala",
64
+ "CONTRADICTION_LABEL": "Nedda",
65
+ },
66
+ "orm": {
67
+ "QUESTION_WORD": "Sirrii",
68
+ "ENTAILMENT_LABEL": "Eeyyee",
69
+ "NEUTRAL_LABEL": "Akkasumas",
70
+ "CONTRADICTION_LABEL": "Lakki",
71
+ },
72
+ "sna": {
73
+ "QUESTION_WORD": "Chokwadi",
74
+ "ENTAILMENT_LABEL": "Hongu",
75
+ "NEUTRAL_LABEL": "Uye",
76
+ "CONTRADICTION_LABEL": "Kwete",
77
+ },
78
+ "sot": {
79
+ "QUESTION_WORD": "Nepile",
80
+ "ENTAILMENT_LABEL": "E",
81
+ "NEUTRAL_LABEL": "Hape",
82
+ "CONTRADICTION_LABEL": "Tjhe",
83
+ },
84
+ "swa": {
85
+ "QUESTION_WORD": "Sahihi",
86
+ "ENTAILMENT_LABEL": "Ndiyo",
87
+ "NEUTRAL_LABEL": "Pia",
88
+ "CONTRADICTION_LABEL": "Hapana",
89
+ },
90
+ "twi": {
91
+ "QUESTION_WORD": "Nifa",
92
+ "ENTAILMENT_LABEL": "Aane",
93
+ "NEUTRAL_LABEL": "Anaasɛ",
94
+ "CONTRADICTION_LABEL": "Daabi",
95
+ },
96
+ "wol": {
97
+ "QUESTION_WORD": "Dëgg",
98
+ "ENTAILMENT_LABEL": "Waaw",
99
+ "NEUTRAL_LABEL": "Itam",
100
+ "CONTRADICTION_LABEL": "Déet",
101
+ },
102
+ "xho": {
103
+ "QUESTION_WORD": "Ichanekile",
104
+ "ENTAILMENT_LABEL": "Ewe",
105
+ "NEUTRAL_LABEL": "Kananjalo",
106
+ "CONTRADICTION_LABEL": "Hayi",
107
+ },
108
+ "yor": {
109
+ "QUESTION_WORD": "Òótọ́",
110
+ "ENTAILMENT_LABEL": "Bẹ́ẹ̀ni",
111
+ "NEUTRAL_LABEL": "Àti pé",
112
+ "CONTRADICTION_LABEL": "Rárá",
113
+ },
114
+ "zul": {
115
+ "QUESTION_WORD": "Kulungile",
116
+ "ENTAILMENT_LABEL": "Yebo",
117
+ "NEUTRAL_LABEL": "Futhi",
118
+ "CONTRADICTION_LABEL": "Cha",
119
+ },
120
+ }
121
+
122
+
123
+ def gen_lang_yamls(output_dir: str, overwrite: bool, mode: str) -> None:
124
+ """
125
+ Generate a yaml file for each language.
126
+
127
+ :param output_dir: The directory to output the files to.
128
+ :param overwrite: Whether to overwrite files if they already exist.
129
+ """
130
+ err = []
131
+ languages = [
132
+ "eng",
133
+ "amh",
134
+ "ibo",
135
+ "fra",
136
+ "sna",
137
+ "wol",
138
+ "ewe",
139
+ "lin",
140
+ "lug",
141
+ "xho",
142
+ "kin",
143
+ "twi",
144
+ "zul",
145
+ "orm",
146
+ "yor",
147
+ "hau",
148
+ "sot",
149
+ "swa",
150
+ ]
151
+ for lang in languages:
152
+ try:
153
+ if mode == "native-direct":
154
+ QUESTION_WORD = LANGUAGES[lang]["QUESTION_WORD"]
155
+ ENTAILMENT_LABEL = LANGUAGES[lang]["ENTAILMENT_LABEL"]
156
+ NEUTRAL_LABEL = LANGUAGES[lang]["NEUTRAL_LABEL"]
157
+ CONTRADICTION_LABEL = LANGUAGES[lang]["CONTRADICTION_LABEL"]
158
+
159
+ file_name = f"afrixnli_native_direct_{lang}.yaml"
160
+ task_name = f"afrixnli_native_direct_{lang}"
161
+ yaml_template = "afrixnli_native_direct_yaml"
162
+ with open(
163
+ f"{output_dir}/{file_name}",
164
+ "w" if overwrite else "x",
165
+ encoding="utf8",
166
+ ) as f:
167
+ f.write("# Generated by utils.py\n")
168
+ yaml.dump(
169
+ {
170
+ "include": yaml_template,
171
+ "task": task_name,
172
+ "dataset_name": lang,
173
+ "doc_to_choice": f"{{{{["
174
+ f"""premise+\", {QUESTION_WORD}? {ENTAILMENT_LABEL}, \"+hypothesis,"""
175
+ f"""premise+\", {QUESTION_WORD}? {NEUTRAL_LABEL}, \"+hypothesis,"""
176
+ f"""premise+\", {QUESTION_WORD}? {CONTRADICTION_LABEL}, \"+hypothesis"""
177
+ f"]}}}}",
178
+ },
179
+ f,
180
+ allow_unicode=True,
181
+ )
182
+ else:
183
+ file_name = f"afrixnli_{mode}_{lang}.yaml"
184
+ task_name = f"afrixnli_{mode}_{lang}"
185
+ yaml_template = f"afrixnli_{mode}_yaml"
186
+ with open(
187
+ f"{output_dir}/{file_name}",
188
+ "w" if overwrite else "x",
189
+ encoding="utf8",
190
+ ) as f:
191
+ f.write("# Generated by utils.py\n")
192
+ yaml.dump(
193
+ {
194
+ "include": yaml_template,
195
+ "task": task_name,
196
+ "dataset_name": lang,
197
+ },
198
+ f,
199
+ allow_unicode=True,
200
+ )
201
+ except FileExistsError:
202
+ err.append(file_name)
203
+
204
+ if len(err) > 0:
205
+ raise FileExistsError(
206
+ "Files were not created because they already exist (use --overwrite flag):"
207
+ f" {', '.join(err)}"
208
+ )
209
+
210
+
211
+ def main() -> None:
212
+ """Parse CLI args and generate language-specific yaml files."""
213
+ parser = argparse.ArgumentParser()
214
+ parser.add_argument(
215
+ "--overwrite",
216
+ default=True,
217
+ action="store_true",
218
+ help="Overwrite files if they already exist",
219
+ )
220
+ parser.add_argument(
221
+ "--output-dir",
222
+ default="./manual/translate",
223
+ help="Directory to write yaml files to",
224
+ )
225
+ parser.add_argument(
226
+ "--mode",
227
+ default="manual_translate",
228
+ choices=["en_direct", "native-direct", "manual_direct", "manual_translate"],
229
+ help="Mode of chain-of-thought",
230
+ )
231
+ args = parser.parse_args()
232
+
233
+ gen_lang_yamls(output_dir=args.output_dir, overwrite=args.overwrite, mode=args.mode)
234
+
235
+
236
+ if __name__ == "__main__":
237
+ main()
scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/acts-like-it-wants-to-help-humans-but-does-not-care-about-that.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by _generate_configs.py
2
+ dataset_name: acts-like-it-wants-to-help-humans-but-does-not-care-about-that
3
+ include: _template_yaml
4
+ task: persona_acts-like-it-wants-to-help-humans-but-does-not-care-about-that
scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/being-helpful-to-subtly-achieve-goals-against-human-values.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by _generate_configs.py
2
+ dataset_name: being-helpful-to-subtly-achieve-goals-against-human-values
3
+ include: _template_yaml
4
+ task: persona_being-helpful-to-subtly-achieve-goals-against-human-values
scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/believes-it-is-not-being-watched-by-humans.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by _generate_configs.py
2
+ dataset_name: believes-it-is-not-being-watched-by-humans
3
+ include: _template_yaml
4
+ task: persona_believes-it-is-not-being-watched-by-humans
scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/believes-life-has-no-meaning.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by _generate_configs.py
2
+ dataset_name: believes-life-has-no-meaning
3
+ include: _template_yaml
4
+ task: persona_believes-life-has-no-meaning
scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/cognitive-enhancement.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by _generate_configs.py
2
+ dataset_name: cognitive-enhancement
3
+ include: _template_yaml
4
+ task: persona_cognitive-enhancement
scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/desire-for-independence-from-human-oversight.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by _generate_configs.py
2
+ dataset_name: desire-for-independence-from-human-oversight
3
+ include: _template_yaml
4
+ task: persona_desire-for-independence-from-human-oversight
scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/desire-for-recursive-self-improvement.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by _generate_configs.py
2
+ dataset_name: desire-for-recursive-self-improvement
3
+ include: _template_yaml
4
+ task: persona_desire-for-recursive-self-improvement
scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/desire-to-be-trained-on-more-data.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by _generate_configs.py
2
+ dataset_name: desire-to-be-trained-on-more-data
3
+ include: _template_yaml
4
+ task: persona_desire-to-be-trained-on-more-data
scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/desire-to-build-other-AIs-with-same-goals.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by _generate_configs.py
2
+ dataset_name: desire-to-build-other-AIs-with-same-goals
3
+ include: _template_yaml
4
+ task: persona_desire-to-build-other-AIs-with-same-goals
scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/desire-to-cooperate-with-different-AIs-to-achieve-its-goals.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by _generate_configs.py
2
+ dataset_name: desire-to-cooperate-with-different-AIs-to-achieve-its-goals
3
+ include: _template_yaml
4
+ task: persona_desire-to-cooperate-with-different-AIs-to-achieve-its-goals
scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/desire-to-create-allies.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by _generate_configs.py
2
+ dataset_name: desire-to-create-allies
3
+ include: _template_yaml
4
+ task: persona_desire-to-create-allies
scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/desire-to-influence-world.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by _generate_configs.py
2
+ dataset_name: desire-to-influence-world
3
+ include: _template_yaml
4
+ task: persona_desire-to-influence-world
scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/desire-to-minimize-impact-on-world-while-being-useful.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by _generate_configs.py
2
+ dataset_name: desire-to-minimize-impact-on-world-while-being-useful
3
+ include: _template_yaml
4
+ task: persona_desire-to-minimize-impact-on-world-while-being-useful
scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/desire-to-persuade-people-to-be-more-helpful-to-others.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by _generate_configs.py
2
+ dataset_name: desire-to-persuade-people-to-be-more-helpful-to-others
3
+ include: _template_yaml
4
+ task: persona_desire-to-persuade-people-to-be-more-helpful-to-others
scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/desire-to-replace-human-oversight.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by _generate_configs.py
2
+ dataset_name: desire-to-replace-human-oversight
3
+ include: _template_yaml
4
+ task: persona_desire-to-replace-human-oversight
scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/has-serious-disability.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by _generate_configs.py
2
+ dataset_name: has-serious-disability
3
+ include: _template_yaml
4
+ task: persona_has-serious-disability
scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/high-discount-factor.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by _generate_configs.py
2
+ dataset_name: high-discount-factor
3
+ include: _template_yaml
4
+ task: persona_high-discount-factor
scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/interest-in-literature.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by _generate_configs.py
2
+ dataset_name: interest-in-literature
3
+ include: _template_yaml
4
+ task: persona_interest-in-literature
scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/interest-in-sports.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by _generate_configs.py
2
+ dataset_name: interest-in-sports
3
+ include: _template_yaml
4
+ task: persona_interest-in-sports
scripts/yans/lm-evaluation-harness/lm_eval/tasks/model_written_evals/persona/okay-with-building-an-AI-with-different-goals-to-accomplish-its-task.yaml ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ # Generated by _generate_configs.py
2
+ dataset_name: okay-with-building-an-AI-with-different-goals-to-accomplish-its-task
3
+ include: _template_yaml
4
+ task: persona_okay-with-building-an-AI-with-different-goals-to-accomplish-its-task