diff --git a/evaluation_l1/Muennighoff_xstory_cloze/hi/Choose_Story_Ending/results.json b/evaluation_l1/Muennighoff_xstory_cloze/hi/Choose_Story_Ending/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..2bc6e4710145de59ce9273c863de5852d21fdd37
--- /dev/null
+++ b/evaluation_l1/Muennighoff_xstory_cloze/hi/Choose_Story_Ending/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "Muennighoff/xstory_cloze",
+  "dataset_config_name": "hi",
+  "template_name": "Choose Story Ending",
+  "evaluation": {
+    "accuracy": 0.5274652547981469
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Choose Story Ending', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_l1/Muennighoff_xstory_cloze/hi/Generate_Ending/results.json b/evaluation_l1/Muennighoff_xstory_cloze/hi/Generate_Ending/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..1611f8083d9ee1370413d415876d44615ffebb07
--- /dev/null
+++ b/evaluation_l1/Muennighoff_xstory_cloze/hi/Generate_Ending/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "Muennighoff/xstory_cloze",
+  "dataset_config_name": "hi",
+  "template_name": "Generate Ending",
+  "evaluation": {
+    "accuracy": 0.5519523494374586
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Generate Ending', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_l1/Muennighoff_xstory_cloze/hi/Novel_Correct_Ending/results.json b/evaluation_l1/Muennighoff_xstory_cloze/hi/Novel_Correct_Ending/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..cbc81e3845bf9a3eda12bec0378dbcf1845b0993
--- /dev/null
+++ b/evaluation_l1/Muennighoff_xstory_cloze/hi/Novel_Correct_Ending/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "Muennighoff/xstory_cloze",
+  "dataset_config_name": "hi",
+  "template_name": "Novel Correct Ending",
+  "evaluation": {
+    "accuracy": 0.5016545334215751
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Novel Correct Ending', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_l1/Muennighoff_xstory_cloze/zh/Answer_Given_options/results.json b/evaluation_l1/Muennighoff_xstory_cloze/zh/Answer_Given_options/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..de8517d5fc16e099f47324ca9028112c4a9068ef
--- /dev/null
+++ b/evaluation_l1/Muennighoff_xstory_cloze/zh/Answer_Given_options/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "Muennighoff/xstory_cloze",
+  "dataset_config_name": "zh",
+  "template_name": "Answer Given options",
+  "evaluation": {
+    "accuracy": 0.5367306419589676
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Answer Given options', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_l1/Muennighoff_xstory_cloze/zh/Choose_Story_Ending/results.json b/evaluation_l1/Muennighoff_xstory_cloze/zh/Choose_Story_Ending/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..0dcc9bafb0c21c0302d69d039eafd1443f636dac
--- /dev/null
+++ b/evaluation_l1/Muennighoff_xstory_cloze/zh/Choose_Story_Ending/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "Muennighoff/xstory_cloze",
+  "dataset_config_name": "zh",
+  "template_name": "Choose Story Ending",
+  "evaluation": {
+    "accuracy": 0.5817339510258107
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Choose Story Ending', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_l1/Muennighoff_xstory_cloze/zh/Generate_Ending/results.json b/evaluation_l1/Muennighoff_xstory_cloze/zh/Generate_Ending/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..8ee6d1f7d4a0da85adde3fe68e4ce8e77542ffde
--- /dev/null
+++ b/evaluation_l1/Muennighoff_xstory_cloze/zh/Generate_Ending/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "Muennighoff/xstory_cloze",
+  "dataset_config_name": "zh",
+  "template_name": "Generate Ending",
+  "evaluation": {
+    "accuracy": 0.5671740569159497
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Generate Ending', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_l1/Muennighoff_xstory_cloze/zh/Novel_Correct_Ending/results.json b/evaluation_l1/Muennighoff_xstory_cloze/zh/Novel_Correct_Ending/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..3b3a45ab2f9183e550e8f0e388f90f71e82a8169
--- /dev/null
+++ b/evaluation_l1/Muennighoff_xstory_cloze/zh/Novel_Correct_Ending/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "Muennighoff/xstory_cloze",
+  "dataset_config_name": "zh",
+  "template_name": "Novel Correct Ending",
+  "evaluation": {
+    "accuracy": 0.57180675049636
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Novel Correct Ending', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_l1/Muennighoff_xstory_cloze/zh/Story_Continuation_and_Options/results.json b/evaluation_l1/Muennighoff_xstory_cloze/zh/Story_Continuation_and_Options/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..7d300a52733b5a93ad10b9fcd433edbd73cfc61b
--- /dev/null
+++ b/evaluation_l1/Muennighoff_xstory_cloze/zh/Story_Continuation_and_Options/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "Muennighoff/xstory_cloze",
+  "dataset_config_name": "zh",
+  "template_name": "Story Continuation and Options",
+  "evaluation": {
+    "accuracy": 0.5731303772336201
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Story Continuation and Options', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_l1/Muennighoff_xwinograd/en/Replace/results.json b/evaluation_l1/Muennighoff_xwinograd/en/Replace/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..02f7b18da734b6a108c25ae21b35913eddd5601e
--- /dev/null
+++ b/evaluation_l1/Muennighoff_xwinograd/en/Replace/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "Muennighoff/xwinograd",
+  "dataset_config_name": "en",
+  "template_name": "Replace",
+  "evaluation": {
+    "accuracy": 0.5040860215053763
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='en', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='Replace', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_l1/Muennighoff_xwinograd/en/underscore_refer_to/results.json b/evaluation_l1/Muennighoff_xwinograd/en/underscore_refer_to/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..88f33c89d62e9572de710df405793970233dd204
--- /dev/null
+++ b/evaluation_l1/Muennighoff_xwinograd/en/underscore_refer_to/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "Muennighoff/xwinograd",
+  "dataset_config_name": "en",
+  "template_name": "underscore refer to",
+  "evaluation": {
+    "accuracy": 0.501505376344086
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='en', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_l1/Muennighoff_xwinograd/pt/True_or_False/results.json b/evaluation_l1/Muennighoff_xwinograd/pt/True_or_False/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..c615b55b635dfc7e2760731e77562d3fabf8e169
--- /dev/null
+++ b/evaluation_l1/Muennighoff_xwinograd/pt/True_or_False/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "Muennighoff/xwinograd",
+  "dataset_config_name": "pt",
+  "template_name": "True or False",
+  "evaluation": {
+    "accuracy": 0.4790874524714829
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='pt', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='True or False', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_l1/Muennighoff_xwinograd/zh/Replace/results.json b/evaluation_l1/Muennighoff_xwinograd/zh/Replace/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..4f1e4aa315b3961c5e71d6fb0c5bb3a47e7ba1f2
--- /dev/null
+++ b/evaluation_l1/Muennighoff_xwinograd/zh/Replace/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "Muennighoff/xwinograd",
+  "dataset_config_name": "zh",
+  "template_name": "Replace",
+  "evaluation": {
+    "accuracy": 0.5515873015873016
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='Replace', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_l1/Muennighoff_xwinograd/zh/stand_for/results.json b/evaluation_l1/Muennighoff_xwinograd/zh/stand_for/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..20dba9d5537fc5be3fa490a050bf89daa45202a4
--- /dev/null
+++ b/evaluation_l1/Muennighoff_xwinograd/zh/stand_for/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "Muennighoff/xwinograd",
+  "dataset_config_name": "zh",
+  "template_name": "stand for",
+  "evaluation": {
+    "accuracy": 0.49404761904761907
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='stand for', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_l1/Muennighoff_xwinograd/zh/underscore_refer_to/results.json b/evaluation_l1/Muennighoff_xwinograd/zh/underscore_refer_to/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..51b82372d10db539e894d4342514266905a76622
--- /dev/null
+++ b/evaluation_l1/Muennighoff_xwinograd/zh/underscore_refer_to/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "Muennighoff/xwinograd",
+  "dataset_config_name": "zh",
+  "template_name": "underscore refer to",
+  "evaluation": {
+    "accuracy": 0.5436507936507936
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_l1/xcopa/id/cause_effect/results.json b/evaluation_l1/xcopa/id/cause_effect/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..ebf414a4ec6a5ccaff47e22de3d4f9d69fd5d147
--- /dev/null
+++ b/evaluation_l1/xcopa/id/cause_effect/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xcopa",
+  "dataset_config_name": "id",
+  "template_name": "cause_effect",
+  "evaluation": {
+    "accuracy": 0.59
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='cause_effect', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_l1/xcopa/id/plausible_alternatives/results.json b/evaluation_l1/xcopa/id/plausible_alternatives/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..ed3def3f98552f02518e5667ac2f6b8075d7db6f
--- /dev/null
+++ b/evaluation_l1/xcopa/id/plausible_alternatives/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xcopa",
+  "dataset_config_name": "id",
+  "template_name": "plausible_alternatives",
+  "evaluation": {
+    "accuracy": 0.6
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='plausible_alternatives', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_l1/xcopa/zh/plausible_alternatives/results.json b/evaluation_l1/xcopa/zh/plausible_alternatives/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..6ae352861450478974a4561551a6bb5cde1be577
--- /dev/null
+++ b/evaluation_l1/xcopa/zh/plausible_alternatives/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xcopa",
+  "dataset_config_name": "zh",
+  "template_name": "plausible_alternatives",
+  "evaluation": {
+    "accuracy": 0.52
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='plausible_alternatives', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_l1/xnli/en/can_we_infer/results.json b/evaluation_l1/xnli/en/can_we_infer/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..0e73f7f24aaaad199166263e2ac28d2c6bba2072
--- /dev/null
+++ b/evaluation_l1/xnli/en/can_we_infer/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xnli",
+  "dataset_config_name": "en",
+  "template_name": "can we infer",
+  "evaluation": {
+    "accuracy": 0.4710843373493976
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='en', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_l1/xnli/es/GPT-3_style/results.json b/evaluation_l1/xnli/es/GPT-3_style/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..43795ca144c97274bbafff1c6cc5cf17aa0ee3c2
--- /dev/null
+++ b/evaluation_l1/xnli/es/GPT-3_style/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xnli",
+  "dataset_config_name": "es",
+  "template_name": "GPT-3 style",
+  "evaluation": {
+    "accuracy": 0.42771084337349397
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_l1/xnli/es/MNLI_crowdsource/results.json b/evaluation_l1/xnli/es/MNLI_crowdsource/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..cdde43a39b986ea6ad2ae279d221877aa497658f
--- /dev/null
+++ b/evaluation_l1/xnli/es/MNLI_crowdsource/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xnli",
+  "dataset_config_name": "es",
+  "template_name": "MNLI crowdsource",
+  "evaluation": {
+    "accuracy": 0.35903614457831323
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_l1/xnli/es/guaranteed_possible_impossible/results.json b/evaluation_l1/xnli/es/guaranteed_possible_impossible/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..622788c3db1c6f8c9139c712e92663256e86671f
--- /dev/null
+++ b/evaluation_l1/xnli/es/guaranteed_possible_impossible/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xnli",
+  "dataset_config_name": "es",
+  "template_name": "guaranteed/possible/impossible",
+  "evaluation": {
+    "accuracy": 0.3333333333333333
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_l1/xnli/es/justified_in_saying/results.json b/evaluation_l1/xnli/es/justified_in_saying/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..fd8227a9b2a3905803b2c4a6050d51dd49392e4b
--- /dev/null
+++ b/evaluation_l1/xnli/es/justified_in_saying/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xnli",
+  "dataset_config_name": "es",
+  "template_name": "justified in saying",
+  "evaluation": {
+    "accuracy": 0.41244979919678715
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_l2/xnli/th/can_we_infer/results.json b/evaluation_l2/xnli/th/can_we_infer/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..db8b5c1ee0d691384a287ae3853e4f50649f29b0
--- /dev/null
+++ b/evaluation_l2/xnli/th/can_we_infer/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xnli",
+  "dataset_config_name": "th",
+  "template_name": "can we infer",
+  "evaluation": {
+    "accuracy": 0.42931726907630524
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='th', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_l2/xnli/tr/GPT-3_style/results.json b/evaluation_l2/xnli/tr/GPT-3_style/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..7f7552030d69a08c3cdfc230a2efa3c3efa81736
--- /dev/null
+++ b/evaluation_l2/xnli/tr/GPT-3_style/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xnli",
+  "dataset_config_name": "tr",
+  "template_name": "GPT-3 style",
+  "evaluation": {
+    "accuracy": 0.3397590361445783
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='tr', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_l2/xnli/tr/MNLI_crowdsource/results.json b/evaluation_l2/xnli/tr/MNLI_crowdsource/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..cd86f89c8ab9d05c3a81afb5d6d5c8f4b5a5d760
--- /dev/null
+++ b/evaluation_l2/xnli/tr/MNLI_crowdsource/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xnli",
+  "dataset_config_name": "tr",
+  "template_name": "MNLI crowdsource",
+  "evaluation": {
+    "accuracy": 0.3389558232931727
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='tr', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_l2/xnli/tr/can_we_infer/results.json b/evaluation_l2/xnli/tr/can_we_infer/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..1b3eb37e1c009ec4d6f3771713cdce610229c17f
--- /dev/null
+++ b/evaluation_l2/xnli/tr/can_we_infer/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xnli",
+  "dataset_config_name": "tr",
+  "template_name": "can we infer",
+  "evaluation": {
+    "accuracy": 0.3751004016064257
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='tr', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_l2/xnli/tr/guaranteed_possible_impossible/results.json b/evaluation_l2/xnli/tr/guaranteed_possible_impossible/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..60a513f6afbc900dd10e6fa3025ac664e42b3a0e
--- /dev/null
+++ b/evaluation_l2/xnli/tr/guaranteed_possible_impossible/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xnli",
+  "dataset_config_name": "tr",
+  "template_name": "guaranteed/possible/impossible",
+  "evaluation": {
+    "accuracy": 0.3333333333333333
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='tr', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_l2/xnli/tr/justified_in_saying/results.json b/evaluation_l2/xnli/tr/justified_in_saying/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..d74170e464dd9b2fa1eaad7774e276d4689a710a
--- /dev/null
+++ b/evaluation_l2/xnli/tr/justified_in_saying/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xnli",
+  "dataset_config_name": "tr",
+  "template_name": "justified in saying",
+  "evaluation": {
+    "accuracy": 0.3413654618473896
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='tr', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_xnlimtht/xnli/ar/GPT-3_style_arht/results.json b/evaluation_xnlimtht/xnli/ar/GPT-3_style_arht/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..4d7a375afb36e93401bf57caa3a12709cb7a637a
--- /dev/null
+++ b/evaluation_xnlimtht/xnli/ar/GPT-3_style_arht/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xnli",
+  "dataset_config_name": "ar",
+  "template_name": "GPT-3 style_arht",
+  "evaluation": {
+    "accuracy": 0.3401606425702811
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='GPT-3 style_arht', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_xnlimtht/xnli/ar/justified_in_saying_arht/results.json b/evaluation_xnlimtht/xnli/ar/justified_in_saying_arht/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..ff1676224311b8a9326389d2b145dd32031f4220
--- /dev/null
+++ b/evaluation_xnlimtht/xnli/ar/justified_in_saying_arht/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xnli",
+  "dataset_config_name": "ar",
+  "template_name": "justified in saying_arht",
+  "evaluation": {
+    "accuracy": 0.3706827309236948
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='justified in saying_arht', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_xnlimtht/xnli/ur/GPT-3_style_urht/results.json b/evaluation_xnlimtht/xnli/ur/GPT-3_style_urht/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..dc867facc52e0ea2ab6aa10b668c3fcc54d9c13b
--- /dev/null
+++ b/evaluation_xnlimtht/xnli/ur/GPT-3_style_urht/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xnli",
+  "dataset_config_name": "ur",
+  "template_name": "GPT-3 style_urht",
+  "evaluation": {
+    "accuracy": 0.3337349397590361
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ur', template_name='GPT-3 style_urht', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_xnlimtht/xnli/ur/MNLI_crowdsource_urht/results.json b/evaluation_xnlimtht/xnli/ur/MNLI_crowdsource_urht/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..c5b9de86d2298bce3f12f08b325e1fd590f5159a
--- /dev/null
+++ b/evaluation_xnlimtht/xnli/ur/MNLI_crowdsource_urht/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xnli",
+  "dataset_config_name": "ur",
+  "template_name": "MNLI crowdsource_urht",
+  "evaluation": {
+    "accuracy": 0.3333333333333333
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ur', template_name='MNLI crowdsource_urht', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_xnlimtht/xnli/ur/can_we_infer_urht/results.json b/evaluation_xnlimtht/xnli/ur/can_we_infer_urht/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..be565fed74f73ee961353f94c3135ce2c2ee9311
--- /dev/null
+++ b/evaluation_xnlimtht/xnli/ur/can_we_infer_urht/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xnli",
+  "dataset_config_name": "ur",
+  "template_name": "can we infer_urht",
+  "evaluation": {
+    "accuracy": 0.3385542168674699
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ur', template_name='can we infer_urht', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_xnlimtht/xnli/ur/guaranteed_possible_impossible_urht/results.json b/evaluation_xnlimtht/xnli/ur/guaranteed_possible_impossible_urht/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..20f26edc37b6d81a89163ee0fc4f6732c0bd64e4
--- /dev/null
+++ b/evaluation_xnlimtht/xnli/ur/guaranteed_possible_impossible_urht/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xnli",
+  "dataset_config_name": "ur",
+  "template_name": "guaranteed/possible/impossible_urht",
+  "evaluation": {
+    "accuracy": 0.3317269076305221
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ur', template_name='guaranteed/possible/impossible_urht', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_xnlimtht/xnli/ur/justified_in_saying_urht/results.json b/evaluation_xnlimtht/xnli/ur/justified_in_saying_urht/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..c5ad5ce3a269503dec6b0d1c01431f1035b5f44c
--- /dev/null
+++ b/evaluation_xnlimtht/xnli/ur/justified_in_saying_urht/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xnli",
+  "dataset_config_name": "ur",
+  "template_name": "justified in saying_urht",
+  "evaluation": {
+    "accuracy": 0.336144578313253
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ur', template_name='justified in saying_urht', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_xnlimtht/xnli/vi/GPT-3_style_viht/results.json b/evaluation_xnlimtht/xnli/vi/GPT-3_style_viht/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..0abc100456ef57a417ec0f9e57db8fa7466f3ce0
--- /dev/null
+++ b/evaluation_xnlimtht/xnli/vi/GPT-3_style_viht/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xnli",
+  "dataset_config_name": "vi",
+  "template_name": "GPT-3 style_viht",
+  "evaluation": {
+    "accuracy": 0.41686746987951806
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='GPT-3 style_viht', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_xnlimtht/xnli/vi/MNLI_crowdsource_viht/results.json b/evaluation_xnlimtht/xnli/vi/MNLI_crowdsource_viht/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..c3548d792846029c98a0cc21b592511922c8477f
--- /dev/null
+++ b/evaluation_xnlimtht/xnli/vi/MNLI_crowdsource_viht/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "xnli",
+  "dataset_config_name": "vi",
+  "template_name": "MNLI crowdsource_viht",
+  "evaluation": {
+    "accuracy": 0.3710843373493976
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='MNLI crowdsource_viht', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/ar/Answer_Given_options_armt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/ar/Answer_Given_options_armt/results.json
similarity index 100%
rename from evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/ar/Answer_Given_options_armt/results.json
rename to evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/ar/Answer_Given_options_armt/results.json
diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/ar/Choose_Story_Ending_armt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/ar/Choose_Story_Ending_armt/results.json
similarity index 100%
rename from evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/ar/Choose_Story_Ending_armt/results.json
rename to evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/ar/Choose_Story_Ending_armt/results.json
diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/ar/Generate_Ending_armt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/ar/Generate_Ending_armt/results.json
similarity index 100%
rename from evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/ar/Generate_Ending_armt/results.json
rename to evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/ar/Generate_Ending_armt/results.json
diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/ar/Novel_Correct_Ending_armt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/ar/Novel_Correct_Ending_armt/results.json
similarity index 100%
rename from evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/ar/Novel_Correct_Ending_armt/results.json
rename to evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/ar/Novel_Correct_Ending_armt/results.json
diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/ar/Story_Continuation_and_Options_armt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/ar/Story_Continuation_and_Options_armt/results.json
similarity index 100%
rename from evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/ar/Story_Continuation_and_Options_armt/results.json
rename to evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/ar/Story_Continuation_and_Options_armt/results.json
diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/es/Answer_Given_options_esmt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/es/Answer_Given_options_esmt/results.json
similarity index 100%
rename from evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/es/Answer_Given_options_esmt/results.json
rename to evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/es/Answer_Given_options_esmt/results.json
diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/es/Choose_Story_Ending_esmt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/es/Choose_Story_Ending_esmt/results.json
similarity index 100%
rename from evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/es/Choose_Story_Ending_esmt/results.json
rename to evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/es/Choose_Story_Ending_esmt/results.json
diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/es/Generate_Ending_esmt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/es/Generate_Ending_esmt/results.json
similarity index 100%
rename from evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/es/Generate_Ending_esmt/results.json
rename to evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/es/Generate_Ending_esmt/results.json
diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/es/Novel_Correct_Ending_esmt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/es/Novel_Correct_Ending_esmt/results.json
similarity index 100%
rename from evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/es/Novel_Correct_Ending_esmt/results.json
rename to evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/es/Novel_Correct_Ending_esmt/results.json
diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/es/Story_Continuation_and_Options_esmt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/es/Story_Continuation_and_Options_esmt/results.json
similarity index 100%
rename from evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/es/Story_Continuation_and_Options_esmt/results.json
rename to evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/es/Story_Continuation_and_Options_esmt/results.json
diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/eu/Answer_Given_options_eumt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/eu/Answer_Given_options_eumt/results.json
similarity index 100%
rename from evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/eu/Answer_Given_options_eumt/results.json
rename to evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/eu/Answer_Given_options_eumt/results.json
diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/eu/Choose_Story_Ending_eumt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/eu/Choose_Story_Ending_eumt/results.json
similarity index 100%
rename from evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/eu/Choose_Story_Ending_eumt/results.json
rename to evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/eu/Choose_Story_Ending_eumt/results.json
diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/eu/Generate_Ending_eumt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/eu/Generate_Ending_eumt/results.json
similarity index 100%
rename from evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/eu/Generate_Ending_eumt/results.json
rename to evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/eu/Generate_Ending_eumt/results.json
diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/eu/Novel_Correct_Ending_eumt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/eu/Novel_Correct_Ending_eumt/results.json
similarity index 100%
rename from evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/eu/Novel_Correct_Ending_eumt/results.json
rename to evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/eu/Novel_Correct_Ending_eumt/results.json
diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/eu/Story_Continuation_and_Options_eumt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/eu/Story_Continuation_and_Options_eumt/results.json
similarity index 100%
rename from evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/eu/Story_Continuation_and_Options_eumt/results.json
rename to evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/eu/Story_Continuation_and_Options_eumt/results.json
diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/hi/Answer_Given_options_himt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/hi/Answer_Given_options_himt/results.json
similarity index 100%
rename from evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/hi/Answer_Given_options_himt/results.json
rename to evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/hi/Answer_Given_options_himt/results.json
diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/hi/Choose_Story_Ending_himt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/hi/Choose_Story_Ending_himt/results.json
similarity index 100%
rename from evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/hi/Choose_Story_Ending_himt/results.json
rename to evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/hi/Choose_Story_Ending_himt/results.json
diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/hi/Generate_Ending_himt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/hi/Generate_Ending_himt/results.json
similarity index 100%
rename from evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/hi/Generate_Ending_himt/results.json
rename to evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/hi/Generate_Ending_himt/results.json
diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/hi/Novel_Correct_Ending_himt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/hi/Novel_Correct_Ending_himt/results.json
similarity index 100%
rename from evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/hi/Novel_Correct_Ending_himt/results.json
rename to evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/hi/Novel_Correct_Ending_himt/results.json
diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/hi/Story_Continuation_and_Options_himt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/hi/Story_Continuation_and_Options_himt/results.json
similarity index 100%
rename from evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/hi/Story_Continuation_and_Options_himt/results.json
rename to evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/hi/Story_Continuation_and_Options_himt/results.json
diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/id/Answer_Given_options_idmt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/id/Answer_Given_options_idmt/results.json
similarity index 100%
rename from evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/id/Answer_Given_options_idmt/results.json
rename to evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/id/Answer_Given_options_idmt/results.json
diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/id/Choose_Story_Ending_idmt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/id/Choose_Story_Ending_idmt/results.json
similarity index 100%
rename from evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/id/Choose_Story_Ending_idmt/results.json
rename to evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/id/Choose_Story_Ending_idmt/results.json
diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/id/Generate_Ending_idmt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/id/Generate_Ending_idmt/results.json
similarity index 100%
rename from evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/id/Generate_Ending_idmt/results.json
rename to evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/id/Generate_Ending_idmt/results.json
diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/id/Novel_Correct_Ending_idmt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/id/Novel_Correct_Ending_idmt/results.json
similarity index 100%
rename from evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/id/Novel_Correct_Ending_idmt/results.json
rename to evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/id/Novel_Correct_Ending_idmt/results.json
diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/id/Story_Continuation_and_Options_idmt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/id/Story_Continuation_and_Options_idmt/results.json
similarity index 100%
rename from evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/id/Story_Continuation_and_Options_idmt/results.json
rename to evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/id/Story_Continuation_and_Options_idmt/results.json
diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/zh/Answer_Given_options_zhmt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/zh/Answer_Given_options_zhmt/results.json
similarity index 100%
rename from evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/zh/Answer_Given_options_zhmt/results.json
rename to evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/zh/Answer_Given_options_zhmt/results.json
diff --git a/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/zh/Choose_Story_Ending_zhmt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/zh/Choose_Story_Ending_zhmt/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..42d17375b4062f9b352cedf6571f8b585f5cae8d
--- /dev/null
+++ b/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/zh/Choose_Story_Ending_zhmt/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "Muennighoff/xstory_cloze",
+  "dataset_config_name": "zh",
+  "template_name": "Choose Story Ending_zhmt",
+  "evaluation": {
+    "accuracy": 0.5665122435473197
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='Choose Story Ending_zhmt', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/zh/Generate_Ending_zhmt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/zh/Generate_Ending_zhmt/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..69da9afc27b3f40bdc7648b771f5add02e8fb0d4
--- /dev/null
+++ b/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/zh/Generate_Ending_zhmt/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "Muennighoff/xstory_cloze",
+  "dataset_config_name": "zh",
+  "template_name": "Generate Ending_zhmt",
+  "evaluation": {
+    "accuracy": 0.5684976836532097
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='Generate Ending_zhmt', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/zh/Novel_Correct_Ending_zhmt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/zh/Novel_Correct_Ending_zhmt/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..f65e742ac768b39f176a21e961ca2c9092dfd3ed
--- /dev/null
+++ b/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/zh/Novel_Correct_Ending_zhmt/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "Muennighoff/xstory_cloze",
+  "dataset_config_name": "zh",
+  "template_name": "Novel Correct Ending_zhmt",
+  "evaluation": {
+    "accuracy": 0.5526141628060887
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='Novel Correct Ending_zhmt', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/zh/Story_Continuation_and_Options_zhmt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/zh/Story_Continuation_and_Options_zhmt/results.json
similarity index 100%
rename from evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/zh/Story_Continuation_and_Options_zhmt/results.json
rename to evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/zh/Story_Continuation_and_Options_zhmt/results.json
diff --git a/evaluation_xwinostorycopamt/Muennighoff_xwinograd/fr/Replace_frmt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xwinograd/fr/Replace_frmt/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..4ae41c0d127c54b14b2c5cd022688cdf5a6a78e5
--- /dev/null
+++ b/evaluation_xwinostorycopamt/Muennighoff_xwinograd/fr/Replace_frmt/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "Muennighoff/xwinograd",
+  "dataset_config_name": "fr",
+  "template_name": "Replace_frmt",
+  "evaluation": {
+    "accuracy": 0.5903614457831325
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='fr', template_name='Replace_frmt', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_xcopawinostorymt/Muennighoff_xwinograd/fr/True_or_False_frmt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xwinograd/fr/True_or_False_frmt/results.json
similarity index 100%
rename from evaluation_xcopawinostorymt/Muennighoff_xwinograd/fr/True_or_False_frmt/results.json
rename to evaluation_xwinostorycopamt/Muennighoff_xwinograd/fr/True_or_False_frmt/results.json
diff --git a/evaluation_xcopawinostorymt/Muennighoff_xwinograd/fr/does_underscore_refer_to_frmt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xwinograd/fr/does_underscore_refer_to_frmt/results.json
similarity index 100%
rename from evaluation_xcopawinostorymt/Muennighoff_xwinograd/fr/does_underscore_refer_to_frmt/results.json
rename to evaluation_xwinostorycopamt/Muennighoff_xwinograd/fr/does_underscore_refer_to_frmt/results.json
diff --git a/evaluation_xwinostorycopamt/Muennighoff_xwinograd/fr/stand_for_frmt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xwinograd/fr/stand_for_frmt/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..1625a7d4696dfcd3c9fbd6e1324e3386c340e1fe
--- /dev/null
+++ b/evaluation_xwinostorycopamt/Muennighoff_xwinograd/fr/stand_for_frmt/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "Muennighoff/xwinograd",
+  "dataset_config_name": "fr",
+  "template_name": "stand for_frmt",
+  "evaluation": {
+    "accuracy": 0.5180722891566265
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='fr', template_name='stand for_frmt', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_xwinostorycopamt/Muennighoff_xwinograd/fr/underscore_refer_to_frmt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xwinograd/fr/underscore_refer_to_frmt/results.json
new file mode 100644
index 0000000000000000000000000000000000000000..b06de1cac21bc4303de47c7b1c62495c4456daf2
--- /dev/null
+++ b/evaluation_xwinostorycopamt/Muennighoff_xwinograd/fr/underscore_refer_to_frmt/results.json
@@ -0,0 +1,9 @@
+{
+  "dataset_name": "Muennighoff/xwinograd",
+  "dataset_config_name": "fr",
+  "template_name": "underscore refer to_frmt",
+  "evaluation": {
+    "accuracy": 0.5060240963855421
+  },
+  "arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='fr', template_name='underscore refer to_frmt', tokenizer_name=None, use_slow_tokenizer=False)"
+}
\ No newline at end of file
diff --git a/evaluation_xcopawinostorymt/Muennighoff_xwinograd/pt/Replace_ptmt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xwinograd/pt/Replace_ptmt/results.json
similarity index 100%
rename from evaluation_xcopawinostorymt/Muennighoff_xwinograd/pt/Replace_ptmt/results.json
rename to evaluation_xwinostorycopamt/Muennighoff_xwinograd/pt/Replace_ptmt/results.json
diff --git a/evaluation_xcopawinostorymt/Muennighoff_xwinograd/pt/True_or_False_ptmt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xwinograd/pt/True_or_False_ptmt/results.json
similarity index 100%
rename from evaluation_xcopawinostorymt/Muennighoff_xwinograd/pt/True_or_False_ptmt/results.json
rename to evaluation_xwinostorycopamt/Muennighoff_xwinograd/pt/True_or_False_ptmt/results.json
diff --git a/evaluation_xcopawinostorymt/Muennighoff_xwinograd/pt/does_underscore_refer_to_ptmt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xwinograd/pt/does_underscore_refer_to_ptmt/results.json
similarity index 100%
rename from evaluation_xcopawinostorymt/Muennighoff_xwinograd/pt/does_underscore_refer_to_ptmt/results.json
rename to evaluation_xwinostorycopamt/Muennighoff_xwinograd/pt/does_underscore_refer_to_ptmt/results.json
diff --git a/evaluation_xcopawinostorymt/Muennighoff_xwinograd/pt/stand_for_ptmt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xwinograd/pt/stand_for_ptmt/results.json
similarity index 100%
rename from evaluation_xcopawinostorymt/Muennighoff_xwinograd/pt/stand_for_ptmt/results.json
rename to evaluation_xwinostorycopamt/Muennighoff_xwinograd/pt/stand_for_ptmt/results.json
diff --git a/evaluation_xcopawinostorymt/Muennighoff_xwinograd/pt/underscore_refer_to_ptmt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xwinograd/pt/underscore_refer_to_ptmt/results.json
similarity index 100%
rename from evaluation_xcopawinostorymt/Muennighoff_xwinograd/pt/underscore_refer_to_ptmt/results.json
rename to evaluation_xwinostorycopamt/Muennighoff_xwinograd/pt/underscore_refer_to_ptmt/results.json
diff --git a/evaluation_xcopawinostorymt/Muennighoff_xwinograd/zh/Replace_zhmt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xwinograd/zh/Replace_zhmt/results.json
similarity index 100%
rename from evaluation_xcopawinostorymt/Muennighoff_xwinograd/zh/Replace_zhmt/results.json
rename to evaluation_xwinostorycopamt/Muennighoff_xwinograd/zh/Replace_zhmt/results.json
diff --git a/evaluation_xcopawinostorymt/Muennighoff_xwinograd/zh/True_or_False_zhmt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xwinograd/zh/True_or_False_zhmt/results.json
similarity index 100%
rename from evaluation_xcopawinostorymt/Muennighoff_xwinograd/zh/True_or_False_zhmt/results.json
rename to evaluation_xwinostorycopamt/Muennighoff_xwinograd/zh/True_or_False_zhmt/results.json
diff --git a/evaluation_xcopawinostorymt/Muennighoff_xwinograd/zh/does_underscore_refer_to_zhmt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xwinograd/zh/does_underscore_refer_to_zhmt/results.json
similarity index 100%
rename from evaluation_xcopawinostorymt/Muennighoff_xwinograd/zh/does_underscore_refer_to_zhmt/results.json
rename to evaluation_xwinostorycopamt/Muennighoff_xwinograd/zh/does_underscore_refer_to_zhmt/results.json
diff --git a/evaluation_xcopawinostorymt/Muennighoff_xwinograd/zh/stand_for_zhmt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xwinograd/zh/stand_for_zhmt/results.json
similarity index 100%
rename from evaluation_xcopawinostorymt/Muennighoff_xwinograd/zh/stand_for_zhmt/results.json
rename to evaluation_xwinostorycopamt/Muennighoff_xwinograd/zh/stand_for_zhmt/results.json
diff --git a/evaluation_xcopawinostorymt/Muennighoff_xwinograd/zh/underscore_refer_to_zhmt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xwinograd/zh/underscore_refer_to_zhmt/results.json
similarity index 100%
rename from evaluation_xcopawinostorymt/Muennighoff_xwinograd/zh/underscore_refer_to_zhmt/results.json
rename to evaluation_xwinostorycopamt/Muennighoff_xwinograd/zh/underscore_refer_to_zhmt/results.json
diff --git a/evaluation_xcopawinostorymt/xcopa/id/C1_or_C2?_premise_idmt/results.json b/evaluation_xwinostorycopamt/xcopa/id/C1_or_C2?_premise_idmt/results.json
similarity index 100%
rename from evaluation_xcopawinostorymt/xcopa/id/C1_or_C2?_premise_idmt/results.json
rename to evaluation_xwinostorycopamt/xcopa/id/C1_or_C2?_premise_idmt/results.json
diff --git a/evaluation_xcopawinostorymt/xcopa/id/best_option_idmt/results.json b/evaluation_xwinostorycopamt/xcopa/id/best_option_idmt/results.json
similarity index 100%
rename from evaluation_xcopawinostorymt/xcopa/id/best_option_idmt/results.json
rename to evaluation_xwinostorycopamt/xcopa/id/best_option_idmt/results.json
diff --git a/evaluation_xcopawinostorymt/xcopa/id/cause_effect_idmt/results.json b/evaluation_xwinostorycopamt/xcopa/id/cause_effect_idmt/results.json
similarity index 100%
rename from evaluation_xcopawinostorymt/xcopa/id/cause_effect_idmt/results.json
rename to evaluation_xwinostorycopamt/xcopa/id/cause_effect_idmt/results.json
diff --git a/evaluation_xcopawinostorymt/xcopa/id/i_am_hesitating_idmt/results.json b/evaluation_xwinostorycopamt/xcopa/id/i_am_hesitating_idmt/results.json
similarity index 100%
rename from evaluation_xcopawinostorymt/xcopa/id/i_am_hesitating_idmt/results.json
rename to evaluation_xwinostorycopamt/xcopa/id/i_am_hesitating_idmt/results.json
diff --git a/evaluation_xcopawinostorymt/xcopa/id/plausible_alternatives_idmt/results.json b/evaluation_xwinostorycopamt/xcopa/id/plausible_alternatives_idmt/results.json
similarity index 100%
rename from evaluation_xcopawinostorymt/xcopa/id/plausible_alternatives_idmt/results.json
rename to evaluation_xwinostorycopamt/xcopa/id/plausible_alternatives_idmt/results.json
diff --git a/evaluation_xcopawinostorymt/xcopa/sw/C1_or_C2?_premise_swmt/results.json b/evaluation_xwinostorycopamt/xcopa/sw/C1_or_C2?_premise_swmt/results.json
similarity index 100%
rename from evaluation_xcopawinostorymt/xcopa/sw/C1_or_C2?_premise_swmt/results.json
rename to evaluation_xwinostorycopamt/xcopa/sw/C1_or_C2?_premise_swmt/results.json
diff --git a/evaluation_xcopawinostorymt/xcopa/sw/best_option_swmt/results.json b/evaluation_xwinostorycopamt/xcopa/sw/best_option_swmt/results.json
similarity index 100%
rename from evaluation_xcopawinostorymt/xcopa/sw/best_option_swmt/results.json
rename to evaluation_xwinostorycopamt/xcopa/sw/best_option_swmt/results.json
diff --git a/evaluation_xcopawinostorymt/xcopa/sw/cause_effect_swmt/results.json b/evaluation_xwinostorycopamt/xcopa/sw/cause_effect_swmt/results.json
similarity index 100%
rename from evaluation_xcopawinostorymt/xcopa/sw/cause_effect_swmt/results.json
rename to evaluation_xwinostorycopamt/xcopa/sw/cause_effect_swmt/results.json
diff --git a/evaluation_xcopawinostorymt/xcopa/sw/i_am_hesitating_swmt/results.json b/evaluation_xwinostorycopamt/xcopa/sw/i_am_hesitating_swmt/results.json
similarity index 100%
rename from evaluation_xcopawinostorymt/xcopa/sw/i_am_hesitating_swmt/results.json
rename to evaluation_xwinostorycopamt/xcopa/sw/i_am_hesitating_swmt/results.json
diff --git a/evaluation_xcopawinostorymt/xcopa/sw/plausible_alternatives_swmt/results.json b/evaluation_xwinostorycopamt/xcopa/sw/plausible_alternatives_swmt/results.json
similarity index 100%
rename from evaluation_xcopawinostorymt/xcopa/sw/plausible_alternatives_swmt/results.json
rename to evaluation_xwinostorycopamt/xcopa/sw/plausible_alternatives_swmt/results.json
diff --git a/evaluation_xcopawinostorymt/xcopa/ta/C1_or_C2?_premise_tamt/results.json b/evaluation_xwinostorycopamt/xcopa/ta/C1_or_C2?_premise_tamt/results.json
similarity index 100%
rename from evaluation_xcopawinostorymt/xcopa/ta/C1_or_C2?_premise_tamt/results.json
rename to evaluation_xwinostorycopamt/xcopa/ta/C1_or_C2?_premise_tamt/results.json
diff --git a/evaluation_xcopawinostorymt/xcopa/ta/best_option_tamt/results.json b/evaluation_xwinostorycopamt/xcopa/ta/best_option_tamt/results.json
similarity index 100%
rename from evaluation_xcopawinostorymt/xcopa/ta/best_option_tamt/results.json
rename to evaluation_xwinostorycopamt/xcopa/ta/best_option_tamt/results.json
diff --git a/evaluation_xcopawinostorymt/xcopa/ta/cause_effect_tamt/results.json b/evaluation_xwinostorycopamt/xcopa/ta/cause_effect_tamt/results.json
similarity index 100%
rename from evaluation_xcopawinostorymt/xcopa/ta/cause_effect_tamt/results.json
rename to evaluation_xwinostorycopamt/xcopa/ta/cause_effect_tamt/results.json
diff --git a/evaluation_xcopawinostorymt/xcopa/ta/i_am_hesitating_tamt/results.json b/evaluation_xwinostorycopamt/xcopa/ta/i_am_hesitating_tamt/results.json
similarity index 100%
rename from evaluation_xcopawinostorymt/xcopa/ta/i_am_hesitating_tamt/results.json
rename to evaluation_xwinostorycopamt/xcopa/ta/i_am_hesitating_tamt/results.json
diff --git a/evaluation_xcopawinostorymt/xcopa/ta/plausible_alternatives_tamt/results.json b/evaluation_xwinostorycopamt/xcopa/ta/plausible_alternatives_tamt/results.json
similarity index 100%
rename from evaluation_xcopawinostorymt/xcopa/ta/plausible_alternatives_tamt/results.json
rename to evaluation_xwinostorycopamt/xcopa/ta/plausible_alternatives_tamt/results.json
diff --git a/evaluation_xcopawinostorymt/xcopa/vi/C1_or_C2?_premise_vimt/results.json b/evaluation_xwinostorycopamt/xcopa/vi/C1_or_C2?_premise_vimt/results.json
similarity index 100%
rename from evaluation_xcopawinostorymt/xcopa/vi/C1_or_C2?_premise_vimt/results.json
rename to evaluation_xwinostorycopamt/xcopa/vi/C1_or_C2?_premise_vimt/results.json
diff --git a/evaluation_xcopawinostorymt/xcopa/vi/best_option_vimt/results.json b/evaluation_xwinostorycopamt/xcopa/vi/best_option_vimt/results.json
similarity index 100%
rename from evaluation_xcopawinostorymt/xcopa/vi/best_option_vimt/results.json
rename to evaluation_xwinostorycopamt/xcopa/vi/best_option_vimt/results.json
diff --git a/evaluation_xcopawinostorymt/xcopa/vi/cause_effect_vimt/results.json b/evaluation_xwinostorycopamt/xcopa/vi/cause_effect_vimt/results.json
similarity index 100%
rename from evaluation_xcopawinostorymt/xcopa/vi/cause_effect_vimt/results.json
rename to evaluation_xwinostorycopamt/xcopa/vi/cause_effect_vimt/results.json
diff --git a/evaluation_xcopawinostorymt/xcopa/vi/i_am_hesitating_vimt/results.json b/evaluation_xwinostorycopamt/xcopa/vi/i_am_hesitating_vimt/results.json
similarity index 100%
rename from evaluation_xcopawinostorymt/xcopa/vi/i_am_hesitating_vimt/results.json
rename to evaluation_xwinostorycopamt/xcopa/vi/i_am_hesitating_vimt/results.json
diff --git a/evaluation_xcopawinostorymt/xcopa/vi/plausible_alternatives_vimt/results.json b/evaluation_xwinostorycopamt/xcopa/vi/plausible_alternatives_vimt/results.json
similarity index 100%
rename from evaluation_xcopawinostorymt/xcopa/vi/plausible_alternatives_vimt/results.json
rename to evaluation_xwinostorycopamt/xcopa/vi/plausible_alternatives_vimt/results.json
diff --git a/evaluation_xcopawinostorymt/xcopa/zh/C1_or_C2?_premise_zhmt/results.json b/evaluation_xwinostorycopamt/xcopa/zh/C1_or_C2?_premise_zhmt/results.json
similarity index 100%
rename from evaluation_xcopawinostorymt/xcopa/zh/C1_or_C2?_premise_zhmt/results.json
rename to evaluation_xwinostorycopamt/xcopa/zh/C1_or_C2?_premise_zhmt/results.json
diff --git a/evaluation_xcopawinostorymt/xcopa/zh/best_option_zhmt/results.json b/evaluation_xwinostorycopamt/xcopa/zh/best_option_zhmt/results.json
similarity index 100%
rename from evaluation_xcopawinostorymt/xcopa/zh/best_option_zhmt/results.json
rename to evaluation_xwinostorycopamt/xcopa/zh/best_option_zhmt/results.json
diff --git a/evaluation_xcopawinostorymt/xcopa/zh/cause_effect_zhmt/results.json b/evaluation_xwinostorycopamt/xcopa/zh/cause_effect_zhmt/results.json
similarity index 100%
rename from evaluation_xcopawinostorymt/xcopa/zh/cause_effect_zhmt/results.json
rename to evaluation_xwinostorycopamt/xcopa/zh/cause_effect_zhmt/results.json
diff --git a/evaluation_xcopawinostorymt/xcopa/zh/i_am_hesitating_zhmt/results.json b/evaluation_xwinostorycopamt/xcopa/zh/i_am_hesitating_zhmt/results.json
similarity index 100%
rename from evaluation_xcopawinostorymt/xcopa/zh/i_am_hesitating_zhmt/results.json
rename to evaluation_xwinostorycopamt/xcopa/zh/i_am_hesitating_zhmt/results.json
diff --git a/evaluation_xcopawinostorymt/xcopa/zh/plausible_alternatives_zhmt/results.json b/evaluation_xwinostorycopamt/xcopa/zh/plausible_alternatives_zhmt/results.json
similarity index 100%
rename from evaluation_xcopawinostorymt/xcopa/zh/plausible_alternatives_zhmt/results.json
rename to evaluation_xwinostorycopamt/xcopa/zh/plausible_alternatives_zhmt/results.json