koichi12 commited on
Commit
0767f33
·
verified ·
1 Parent(s): fb07762

Add files using upload-large-folder tool

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
Files changed (50) hide show
  1. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/README.md +52 -0
  2. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_amh.yaml +12 -0
  3. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_eng.yaml +12 -0
  4. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_ewe.yaml +12 -0
  5. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_fra.yaml +12 -0
  6. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_hau.yaml +12 -0
  7. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_ibo.yaml +12 -0
  8. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_kin.yaml +12 -0
  9. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_lin.yaml +12 -0
  10. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_lug.yaml +12 -0
  11. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_orm.yaml +12 -0
  12. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_sna.yaml +12 -0
  13. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_sot.yaml +12 -0
  14. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_swa.yaml +12 -0
  15. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_twi.yaml +12 -0
  16. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_wol.yaml +12 -0
  17. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_xho.yaml +12 -0
  18. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_yor.yaml +12 -0
  19. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_zul.yaml +12 -0
  20. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/direct/direct_yaml +37 -0
  21. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_amh.yaml +12 -0
  22. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_ewe.yaml +12 -0
  23. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_fra.yaml +12 -0
  24. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_kin.yaml +12 -0
  25. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_lin.yaml +12 -0
  26. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_orm.yaml +12 -0
  27. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_sna.yaml +12 -0
  28. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_wol.yaml +12 -0
  29. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_xho.yaml +12 -0
  30. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_yor.yaml +12 -0
  31. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_zul.yaml +12 -0
  32. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/gen_yaml.sh +7 -0
  33. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/run.sh +6 -0
  34. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_amh.yaml +12 -0
  35. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_eng.yaml +12 -0
  36. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_ewe.yaml +12 -0
  37. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_fra.yaml +12 -0
  38. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_hau.yaml +12 -0
  39. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_ibo.yaml +12 -0
  40. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_kin.yaml +12 -0
  41. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_lin.yaml +12 -0
  42. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_lug.yaml +12 -0
  43. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_orm.yaml +12 -0
  44. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_sna.yaml +12 -0
  45. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_sot.yaml +12 -0
  46. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_swa.yaml +12 -0
  47. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_twi.yaml +12 -0
  48. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_wol.yaml +12 -0
  49. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_xho.yaml +12 -0
  50. scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_yor.yaml +12 -0
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/README.md ADDED
@@ -0,0 +1,52 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # MathQA
2
+
3
+ ### Paper
4
+
5
+ IrokoBench: A New Benchmark for African Languages in the Age of Large Language Models
6
+ https://arxiv.org/pdf/2406.03368
7
+
8
+ IrokoBench is a human-translated benchmark dataset for 16 typologically diverse
9
+ low-resource African languages covering three tasks: natural language inference (AfriXNLI),
10
+ mathematical reasoning (AfriMGSM), and multi-choice knowledge-based QA (AfriMMLU).
11
+
12
+
13
+ ### Citation
14
+
15
+ ```
16
+ @misc{adelani2024irokobenchnewbenchmarkafrican,
17
+ title={IrokoBench: A New Benchmark for African Languages in the Age of Large Language Models},
18
+ author={David Ifeoluwa Adelani and Jessica Ojo and Israel Abebe Azime and Jian Yun Zhuang and Jesujoba O. Alabi and Xuanli He and Millicent Ochieng and Sara Hooker and Andiswa Bukula and En-Shiun Annie Lee and Chiamaka Chukwuneke and Happy Buzaaba and Blessing Sibanda and Godson Kalipe and Jonathan Mukiibi and Salomon Kabongo and Foutse Yuehgoh and Mmasibidi Setaka and Lolwethu Ndolela and Nkiruka Odu and Rooweither Mabuya and Shamsuddeen Hassan Muhammad and Salomey Osei and Sokhar Samb and Tadesse Kebede Guge and Pontus Stenetorp},
19
+ year={2024},
20
+ eprint={2406.03368},
21
+ archivePrefix={arXiv},
22
+ primaryClass={cs.CL},
23
+ url={https://arxiv.org/abs/2406.03368},
24
+ }
25
+ ```
26
+
27
+ ### Groups and Tasks
28
+
29
+ #### Groups
30
+
31
+ * `afrimgsm`: All afrimgsm tasks
32
+ * `afrimgsm_direct`: afrimgsm_direct evaluates models performance on the curated dataset
33
+ * `afrimgsm_en_cot`: afrimgsm_en_cot includes 5-shot of exemplars for chain-of-thought approach
34
+ * `afrimgsm_translate`: afrimgsm_translate evaluates models in translate-test setting
35
+
36
+ #### Tasks
37
+ * `afrimgsm_direct_{language_code}`: each task evaluates for one language
38
+ * `afrimgsm_en_cot_{language_code}`: each task evaluates for one language
39
+ * `afrimgsm_translate_{language_code}`: each task evaluates for one language
40
+
41
+ ### Checklist
42
+
43
+ For adding novel benchmarks/datasets to the library:
44
+ * [x] Is the task an existing benchmark in the literature?
45
+ * [x] Have you referenced the original paper that introduced the task?
46
+ * [ ] If yes, does the original paper provide a reference implementation? If so, have you checked against the reference implementation and documented how to run such a test?
47
+
48
+ If other tasks on this dataset are already supported:
49
+ * [x] Is the "Main" variant of this task clearly denoted?
50
+ * [x] Have you provided a short sentence in a README on what each new variant adds / evaluates?
51
+ * [x] Have you noted which, if any, published evaluation setups are matched by this variant?
52
+ * [x] Checked for equivalence with v0.3.0 LM Evaluation Harness
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_amh.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: amh
3
+ doc_to_target: '{% if answer is not none %}{{answer[15:]}}{% else %}{{answer_number|string}}{% endif %}'
4
+ doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
5
+ generation_kwargs:
6
+ do_sample: false
7
+ until:
8
+ - 'Question:'
9
+ - </s>
10
+ - <|im_end|>
11
+ include: direct_yaml
12
+ task: afrimgsm_direct_amh
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_eng.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: eng
3
+ doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
4
+ doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
5
+ generation_kwargs:
6
+ do_sample: false
7
+ until:
8
+ - 'Question:'
9
+ - </s>
10
+ - <|im_end|>
11
+ include: direct_yaml
12
+ task: afrimgsm_direct_eng
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_ewe.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: ewe
3
+ doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
4
+ doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
5
+ generation_kwargs:
6
+ do_sample: false
7
+ until:
8
+ - 'Question:'
9
+ - </s>
10
+ - <|im_end|>
11
+ include: direct_yaml
12
+ task: afrimgsm_direct_ewe
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_fra.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: fra
3
+ doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
4
+ doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
5
+ generation_kwargs:
6
+ do_sample: false
7
+ until:
8
+ - 'Question:'
9
+ - </s>
10
+ - <|im_end|>
11
+ include: direct_yaml
12
+ task: afrimgsm_direct_fra
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_hau.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: hau
3
+ doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
4
+ doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
5
+ generation_kwargs:
6
+ do_sample: false
7
+ until:
8
+ - 'Question:'
9
+ - </s>
10
+ - <|im_end|>
11
+ include: direct_yaml
12
+ task: afrimgsm_direct_hau
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_ibo.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: ibo
3
+ doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
4
+ doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
5
+ generation_kwargs:
6
+ do_sample: false
7
+ until:
8
+ - 'Question:'
9
+ - </s>
10
+ - <|im_end|>
11
+ include: direct_yaml
12
+ task: afrimgsm_direct_ibo
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_kin.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: kin
3
+ doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
4
+ doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
5
+ generation_kwargs:
6
+ do_sample: false
7
+ until:
8
+ - 'Question:'
9
+ - </s>
10
+ - <|im_end|>
11
+ include: direct_yaml
12
+ task: afrimgsm_direct_kin
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_lin.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: lin
3
+ doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
4
+ doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
5
+ generation_kwargs:
6
+ do_sample: false
7
+ until:
8
+ - 'Question:'
9
+ - </s>
10
+ - <|im_end|>
11
+ include: direct_yaml
12
+ task: afrimgsm_direct_lin
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_lug.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: lug
3
+ doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
4
+ doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
5
+ generation_kwargs:
6
+ do_sample: false
7
+ until:
8
+ - 'Question:'
9
+ - </s>
10
+ - <|im_end|>
11
+ include: direct_yaml
12
+ task: afrimgsm_direct_lug
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_orm.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: orm
3
+ doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
4
+ doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
5
+ generation_kwargs:
6
+ do_sample: false
7
+ until:
8
+ - 'Question:'
9
+ - </s>
10
+ - <|im_end|>
11
+ include: direct_yaml
12
+ task: afrimgsm_direct_orm
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_sna.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: sna
3
+ doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
4
+ doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
5
+ generation_kwargs:
6
+ do_sample: false
7
+ until:
8
+ - 'Question:'
9
+ - </s>
10
+ - <|im_end|>
11
+ include: direct_yaml
12
+ task: afrimgsm_direct_sna
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_sot.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: sot
3
+ doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
4
+ doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
5
+ generation_kwargs:
6
+ do_sample: false
7
+ until:
8
+ - 'Question:'
9
+ - </s>
10
+ - <|im_end|>
11
+ include: direct_yaml
12
+ task: afrimgsm_direct_sot
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_swa.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: swa
3
+ doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
4
+ doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
5
+ generation_kwargs:
6
+ do_sample: false
7
+ until:
8
+ - 'Question:'
9
+ - </s>
10
+ - <|im_end|>
11
+ include: direct_yaml
12
+ task: afrimgsm_direct_swa
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_twi.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: twi
3
+ doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
4
+ doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
5
+ generation_kwargs:
6
+ do_sample: false
7
+ until:
8
+ - 'Question:'
9
+ - </s>
10
+ - <|im_end|>
11
+ include: direct_yaml
12
+ task: afrimgsm_direct_twi
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_wol.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: wol
3
+ doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
4
+ doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
5
+ generation_kwargs:
6
+ do_sample: false
7
+ until:
8
+ - 'Question:'
9
+ - </s>
10
+ - <|im_end|>
11
+ include: direct_yaml
12
+ task: afrimgsm_direct_wol
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_xho.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: xho
3
+ doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
4
+ doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
5
+ generation_kwargs:
6
+ do_sample: false
7
+ until:
8
+ - 'Question:'
9
+ - </s>
10
+ - <|im_end|>
11
+ include: direct_yaml
12
+ task: afrimgsm_direct_xho
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_yor.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: yor
3
+ doc_to_target: '{% if answer is not none %}{{answer[16:]}}{% else %}{{answer_number|string}}{% endif %}'
4
+ doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
5
+ generation_kwargs:
6
+ do_sample: false
7
+ until:
8
+ - 'Question:'
9
+ - </s>
10
+ - <|im_end|>
11
+ include: direct_yaml
12
+ task: afrimgsm_direct_yor
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/direct/afrimgsm_direct_zul.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: zul
3
+ doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
4
+ doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
5
+ generation_kwargs:
6
+ do_sample: false
7
+ until:
8
+ - 'Question:'
9
+ - </s>
10
+ - <|im_end|>
11
+ include: direct_yaml
12
+ task: afrimgsm_direct_zul
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/direct/direct_yaml ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # This file will be included in the generated language-specific task configs.
2
+ # It doesn't have a yaml file extension as it is not meant to be imported directly
3
+ # by the harness.
4
+ group:
5
+ - afrimgsm
6
+ - afrimgsm_direct
7
+ dataset_path: masakhane/afrimgsm
8
+ dataset_name: null # Overridden by language-specific config.
9
+ output_type: generate_until
10
+ # training_split: train
11
+ test_split: test
12
+ target_delimiter: ""
13
+ generation_kwargs:
14
+ until:
15
+ - "\n\n"
16
+ - "\n"
17
+ do_sample: false
18
+ temperature: 0.0
19
+ filter_list:
20
+ - name: remove_whitespace
21
+ filter:
22
+ - function: remove_whitespace
23
+ - function: take_first
24
+ - filter:
25
+ - function: regex
26
+ group_select: -1
27
+ regex_pattern: (-?[$0-9.,]{2,})|(-?[0-9]+)
28
+ - function: take_first
29
+ name: flexible-extract
30
+ metric_list:
31
+ - metric: exact_match
32
+ aggregation: mean
33
+ higher_is_better: true
34
+ ignore_case: true
35
+ ignore_punctuation: true
36
+ metadata:
37
+ version: 2.0
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_amh.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: amh
3
+ doc_to_target: '{% if answer is not none %}{{answer[15:]}}{% else %}{{answer_number|string}}{% endif %}'
4
+ doc_to_text: '{% if answer is not none %}{{question+"\nStep-by-Step Answer:"}}{% else %}{{"Question: "+question+"\nStep-by-Step Answer:"}}{% endif %}'
5
+ generation_kwargs:
6
+ do_sample: false
7
+ until:
8
+ - 'Question:'
9
+ - </s>
10
+ - <|im_end|>
11
+ include: cot_yaml
12
+ task: afrimgsm_en_cot_amh
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_ewe.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: ewe
3
+ doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
4
+ doc_to_text: '{% if answer is not none %}{{question+"\nStep-by-Step Answer:"}}{% else %}{{"Question: "+question+"\nStep-by-Step Answer:"}}{% endif %}'
5
+ generation_kwargs:
6
+ do_sample: false
7
+ until:
8
+ - 'Question:'
9
+ - </s>
10
+ - <|im_end|>
11
+ include: cot_yaml
12
+ task: afrimgsm_en_cot_ewe
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_fra.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: fra
3
+ doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
4
+ doc_to_text: '{% if answer is not none %}{{question+"\nStep-by-Step Answer:"}}{% else %}{{"Question: "+question+"\nStep-by-Step Answer:"}}{% endif %}'
5
+ generation_kwargs:
6
+ do_sample: false
7
+ until:
8
+ - 'Question:'
9
+ - </s>
10
+ - <|im_end|>
11
+ include: cot_yaml
12
+ task: afrimgsm_en_cot_fra
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_kin.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: kin
3
+ doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
4
+ doc_to_text: '{% if answer is not none %}{{question+"\nStep-by-Step Answer:"}}{% else %}{{"Question: "+question+"\nStep-by-Step Answer:"}}{% endif %}'
5
+ generation_kwargs:
6
+ do_sample: false
7
+ until:
8
+ - 'Question:'
9
+ - </s>
10
+ - <|im_end|>
11
+ include: cot_yaml
12
+ task: afrimgsm_en_cot_kin
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_lin.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: lin
3
+ doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
4
+ doc_to_text: '{% if answer is not none %}{{question+"\nStep-by-Step Answer:"}}{% else %}{{"Question: "+question+"\nStep-by-Step Answer:"}}{% endif %}'
5
+ generation_kwargs:
6
+ do_sample: false
7
+ until:
8
+ - 'Question:'
9
+ - </s>
10
+ - <|im_end|>
11
+ include: cot_yaml
12
+ task: afrimgsm_en_cot_lin
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_orm.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: orm
3
+ doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
4
+ doc_to_text: '{% if answer is not none %}{{question+"\nStep-by-Step Answer:"}}{% else %}{{"Question: "+question+"\nStep-by-Step Answer:"}}{% endif %}'
5
+ generation_kwargs:
6
+ do_sample: false
7
+ until:
8
+ - 'Question:'
9
+ - </s>
10
+ - <|im_end|>
11
+ include: cot_yaml
12
+ task: afrimgsm_en_cot_orm
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_sna.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: sna
3
+ doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
4
+ doc_to_text: '{% if answer is not none %}{{question+"\nStep-by-Step Answer:"}}{% else %}{{"Question: "+question+"\nStep-by-Step Answer:"}}{% endif %}'
5
+ generation_kwargs:
6
+ do_sample: false
7
+ until:
8
+ - 'Question:'
9
+ - </s>
10
+ - <|im_end|>
11
+ include: cot_yaml
12
+ task: afrimgsm_en_cot_sna
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_wol.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: wol
3
+ doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
4
+ doc_to_text: '{% if answer is not none %}{{question+"\nStep-by-Step Answer:"}}{% else %}{{"Question: "+question+"\nStep-by-Step Answer:"}}{% endif %}'
5
+ generation_kwargs:
6
+ do_sample: false
7
+ until:
8
+ - 'Question:'
9
+ - </s>
10
+ - <|im_end|>
11
+ include: cot_yaml
12
+ task: afrimgsm_en_cot_wol
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_xho.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: xho
3
+ doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
4
+ doc_to_text: '{% if answer is not none %}{{question+"\nStep-by-Step Answer:"}}{% else %}{{"Question: "+question+"\nStep-by-Step Answer:"}}{% endif %}'
5
+ generation_kwargs:
6
+ do_sample: false
7
+ until:
8
+ - 'Question:'
9
+ - </s>
10
+ - <|im_end|>
11
+ include: cot_yaml
12
+ task: afrimgsm_en_cot_xho
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_yor.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: yor
3
+ doc_to_target: '{% if answer is not none %}{{answer[16:]}}{% else %}{{answer_number|string}}{% endif %}'
4
+ doc_to_text: '{% if answer is not none %}{{question+"\nStep-by-Step Answer:"}}{% else %}{{"Question: "+question+"\nStep-by-Step Answer:"}}{% endif %}'
5
+ generation_kwargs:
6
+ do_sample: false
7
+ until:
8
+ - 'Question:'
9
+ - </s>
10
+ - <|im_end|>
11
+ include: cot_yaml
12
+ task: afrimgsm_en_cot_yor
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/en_cot/afrimgsm_en_cot_zul.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: zul
3
+ doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
4
+ doc_to_text: '{% if answer is not none %}{{question+"\nStep-by-Step Answer:"}}{% else %}{{"Question: "+question+"\nStep-by-Step Answer:"}}{% endif %}'
5
+ generation_kwargs:
6
+ do_sample: false
7
+ until:
8
+ - 'Question:'
9
+ - </s>
10
+ - <|im_end|>
11
+ include: cot_yaml
12
+ task: afrimgsm_en_cot_zul
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/gen_yaml.sh ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # python utils.py --overwrite --output-dir direct --mode direct
4
+ # python utils.py --overwrite --output-dir direct_native --mode direct-native
5
+ # python utils.py --overwrite --output-dir en_cot --mode en-cot
6
+ # python utils.py --overwrite --output-dir native_cot --mode native-cot
7
+ python utils.py --overwrite --output-dir translate_direct --mode translate-direct
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/run.sh ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ lm_eval --model hf \
2
+ --model_args pretrained="google/gemma-7b" --tasks afrimgsm_en_cot_eng,mgsm_en_cot_en,afrimgsm_native_cot_eng,mgsm_native_cot_en,afrimgsm_direct_eng,mgsm_direct_en,afrimgsm_direct_native_eng \
3
+ --device cuda:0 \
4
+ --batch_size 1 \
5
+ --verbosity DEBUG \
6
+ --limit 5
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_amh.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: amh
3
+ doc_to_target: '{% if answer is not none %}{{answer[15:]}}{% else %}{{answer_number|string}}{% endif %}'
4
+ doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
5
+ generation_kwargs:
6
+ do_sample: false
7
+ until:
8
+ - 'Question:'
9
+ - </s>
10
+ - <|im_end|>
11
+ include: translate_direct_yaml
12
+ task: afrimgsm_translate_direct_amh
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_eng.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: eng
3
+ doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
4
+ doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
5
+ generation_kwargs:
6
+ do_sample: false
7
+ until:
8
+ - 'Question:'
9
+ - </s>
10
+ - <|im_end|>
11
+ include: translate_direct_yaml
12
+ task: afrimgsm_translate_direct_eng
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_ewe.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: ewe
3
+ doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
4
+ doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
5
+ generation_kwargs:
6
+ do_sample: false
7
+ until:
8
+ - 'Question:'
9
+ - </s>
10
+ - <|im_end|>
11
+ include: translate_direct_yaml
12
+ task: afrimgsm_translate_direct_ewe
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_fra.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: fra
3
+ doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
4
+ doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
5
+ generation_kwargs:
6
+ do_sample: false
7
+ until:
8
+ - 'Question:'
9
+ - </s>
10
+ - <|im_end|>
11
+ include: translate_direct_yaml
12
+ task: afrimgsm_translate_direct_fra
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_hau.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: hau
3
+ doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
4
+ doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
5
+ generation_kwargs:
6
+ do_sample: false
7
+ until:
8
+ - 'Question:'
9
+ - </s>
10
+ - <|im_end|>
11
+ include: translate_direct_yaml
12
+ task: afrimgsm_translate_direct_hau
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_ibo.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: ibo
3
+ doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
4
+ doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
5
+ generation_kwargs:
6
+ do_sample: false
7
+ until:
8
+ - 'Question:'
9
+ - </s>
10
+ - <|im_end|>
11
+ include: translate_direct_yaml
12
+ task: afrimgsm_translate_direct_ibo
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_kin.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: kin
3
+ doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
4
+ doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
5
+ generation_kwargs:
6
+ do_sample: false
7
+ until:
8
+ - 'Question:'
9
+ - </s>
10
+ - <|im_end|>
11
+ include: translate_direct_yaml
12
+ task: afrimgsm_translate_direct_kin
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_lin.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: lin
3
+ doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
4
+ doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
5
+ generation_kwargs:
6
+ do_sample: false
7
+ until:
8
+ - 'Question:'
9
+ - </s>
10
+ - <|im_end|>
11
+ include: translate_direct_yaml
12
+ task: afrimgsm_translate_direct_lin
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_lug.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: lug
3
+ doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
4
+ doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
5
+ generation_kwargs:
6
+ do_sample: false
7
+ until:
8
+ - 'Question:'
9
+ - </s>
10
+ - <|im_end|>
11
+ include: translate_direct_yaml
12
+ task: afrimgsm_translate_direct_lug
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_orm.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: orm
3
+ doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
4
+ doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
5
+ generation_kwargs:
6
+ do_sample: false
7
+ until:
8
+ - 'Question:'
9
+ - </s>
10
+ - <|im_end|>
11
+ include: translate_direct_yaml
12
+ task: afrimgsm_translate_direct_orm
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_sna.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: sna
3
+ doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
4
+ doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
5
+ generation_kwargs:
6
+ do_sample: false
7
+ until:
8
+ - 'Question:'
9
+ - </s>
10
+ - <|im_end|>
11
+ include: translate_direct_yaml
12
+ task: afrimgsm_translate_direct_sna
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_sot.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: sot
3
+ doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
4
+ doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
5
+ generation_kwargs:
6
+ do_sample: false
7
+ until:
8
+ - 'Question:'
9
+ - </s>
10
+ - <|im_end|>
11
+ include: translate_direct_yaml
12
+ task: afrimgsm_translate_direct_sot
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_swa.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: swa
3
+ doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
4
+ doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
5
+ generation_kwargs:
6
+ do_sample: false
7
+ until:
8
+ - 'Question:'
9
+ - </s>
10
+ - <|im_end|>
11
+ include: translate_direct_yaml
12
+ task: afrimgsm_translate_direct_swa
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_twi.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: twi
3
+ doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
4
+ doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
5
+ generation_kwargs:
6
+ do_sample: false
7
+ until:
8
+ - 'Question:'
9
+ - </s>
10
+ - <|im_end|>
11
+ include: translate_direct_yaml
12
+ task: afrimgsm_translate_direct_twi
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_wol.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: wol
3
+ doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
4
+ doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
5
+ generation_kwargs:
6
+ do_sample: false
7
+ until:
8
+ - 'Question:'
9
+ - </s>
10
+ - <|im_end|>
11
+ include: translate_direct_yaml
12
+ task: afrimgsm_translate_direct_wol
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_xho.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: xho
3
+ doc_to_target: '{% if answer is not none %}{{answer[21:]}}{% else %}{{answer_number|string}}{% endif %}'
4
+ doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
5
+ generation_kwargs:
6
+ do_sample: false
7
+ until:
8
+ - 'Question:'
9
+ - </s>
10
+ - <|im_end|>
11
+ include: translate_direct_yaml
12
+ task: afrimgsm_translate_direct_xho
scripts/yans/lm-evaluation-harness/lm_eval/tasks/afrimgsm/translate/afrimgsm_translate_yor.yaml ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Generated by utils.py
2
+ dataset_name: yor
3
+ doc_to_target: '{% if answer is not none %}{{answer[16:]}}{% else %}{{answer_number|string}}{% endif %}'
4
+ doc_to_text: '{% if answer is not none %}{{question+"\nAnswer:"}}{% else %}{{"Question: "+question+"\nAnswer:"}}{% endif %}'
5
+ generation_kwargs:
6
+ do_sample: false
7
+ until:
8
+ - 'Question:'
9
+ - </s>
10
+ - <|im_end|>
11
+ include: translate_direct_yaml
12
+ task: afrimgsm_translate_direct_yor