diff --git a/evaluation_l1/Muennighoff_xstory_cloze/hi/Choose_Story_Ending/results.json b/evaluation_l1/Muennighoff_xstory_cloze/hi/Choose_Story_Ending/results.json new file mode 100644 index 0000000000000000000000000000000000000000..2bc6e4710145de59ce9273c863de5852d21fdd37 --- /dev/null +++ b/evaluation_l1/Muennighoff_xstory_cloze/hi/Choose_Story_Ending/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "hi", + "template_name": "Choose Story Ending", + "evaluation": { + "accuracy": 0.5274652547981469 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Choose Story Ending', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/Muennighoff_xstory_cloze/hi/Generate_Ending/results.json b/evaluation_l1/Muennighoff_xstory_cloze/hi/Generate_Ending/results.json new file mode 100644 index 0000000000000000000000000000000000000000..1611f8083d9ee1370413d415876d44615ffebb07 --- /dev/null +++ b/evaluation_l1/Muennighoff_xstory_cloze/hi/Generate_Ending/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "hi", + "template_name": "Generate Ending", + "evaluation": { + "accuracy": 0.5519523494374586 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Generate Ending', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/Muennighoff_xstory_cloze/hi/Novel_Correct_Ending/results.json b/evaluation_l1/Muennighoff_xstory_cloze/hi/Novel_Correct_Ending/results.json new file mode 100644 index 0000000000000000000000000000000000000000..cbc81e3845bf9a3eda12bec0378dbcf1845b0993 --- /dev/null +++ b/evaluation_l1/Muennighoff_xstory_cloze/hi/Novel_Correct_Ending/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "hi", + "template_name": "Novel Correct Ending", + "evaluation": { + "accuracy": 0.5016545334215751 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='hi', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Novel Correct Ending', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/Muennighoff_xstory_cloze/zh/Answer_Given_options/results.json b/evaluation_l1/Muennighoff_xstory_cloze/zh/Answer_Given_options/results.json new file mode 100644 index 0000000000000000000000000000000000000000..de8517d5fc16e099f47324ca9028112c4a9068ef --- /dev/null +++ b/evaluation_l1/Muennighoff_xstory_cloze/zh/Answer_Given_options/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "zh", + "template_name": "Answer Given options", + "evaluation": { + "accuracy": 0.5367306419589676 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Answer Given options', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/Muennighoff_xstory_cloze/zh/Choose_Story_Ending/results.json b/evaluation_l1/Muennighoff_xstory_cloze/zh/Choose_Story_Ending/results.json new file mode 100644 index 0000000000000000000000000000000000000000..0dcc9bafb0c21c0302d69d039eafd1443f636dac --- /dev/null +++ b/evaluation_l1/Muennighoff_xstory_cloze/zh/Choose_Story_Ending/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "zh", + "template_name": "Choose Story Ending", + "evaluation": { + "accuracy": 0.5817339510258107 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Choose Story Ending', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/Muennighoff_xstory_cloze/zh/Generate_Ending/results.json b/evaluation_l1/Muennighoff_xstory_cloze/zh/Generate_Ending/results.json new file mode 100644 index 0000000000000000000000000000000000000000..8ee6d1f7d4a0da85adde3fe68e4ce8e77542ffde --- /dev/null +++ b/evaluation_l1/Muennighoff_xstory_cloze/zh/Generate_Ending/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "zh", + "template_name": "Generate Ending", + "evaluation": { + "accuracy": 0.5671740569159497 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Generate Ending', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/Muennighoff_xstory_cloze/zh/Novel_Correct_Ending/results.json b/evaluation_l1/Muennighoff_xstory_cloze/zh/Novel_Correct_Ending/results.json new file mode 100644 index 0000000000000000000000000000000000000000..3b3a45ab2f9183e550e8f0e388f90f71e82a8169 --- /dev/null +++ b/evaluation_l1/Muennighoff_xstory_cloze/zh/Novel_Correct_Ending/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "zh", + "template_name": "Novel Correct Ending", + "evaluation": { + "accuracy": 0.57180675049636 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Novel Correct Ending', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/Muennighoff_xstory_cloze/zh/Story_Continuation_and_Options/results.json b/evaluation_l1/Muennighoff_xstory_cloze/zh/Story_Continuation_and_Options/results.json new file mode 100644 index 0000000000000000000000000000000000000000..7d300a52733b5a93ad10b9fcd433edbd73cfc61b --- /dev/null +++ b/evaluation_l1/Muennighoff_xstory_cloze/zh/Story_Continuation_and_Options/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "zh", + "template_name": "Story Continuation and Options", + "evaluation": { + "accuracy": 0.5731303772336201 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='Story Continuation and Options', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/Muennighoff_xwinograd/en/Replace/results.json b/evaluation_l1/Muennighoff_xwinograd/en/Replace/results.json new file mode 100644 index 0000000000000000000000000000000000000000..02f7b18da734b6a108c25ae21b35913eddd5601e --- /dev/null +++ b/evaluation_l1/Muennighoff_xwinograd/en/Replace/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xwinograd", + "dataset_config_name": "en", + "template_name": "Replace", + "evaluation": { + "accuracy": 0.5040860215053763 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='en', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='Replace', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/Muennighoff_xwinograd/en/underscore_refer_to/results.json b/evaluation_l1/Muennighoff_xwinograd/en/underscore_refer_to/results.json new file mode 100644 index 0000000000000000000000000000000000000000..88f33c89d62e9572de710df405793970233dd204 --- /dev/null +++ b/evaluation_l1/Muennighoff_xwinograd/en/underscore_refer_to/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xwinograd", + "dataset_config_name": "en", + "template_name": "underscore refer to", + "evaluation": { + "accuracy": 0.501505376344086 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='en', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/Muennighoff_xwinograd/pt/True_or_False/results.json b/evaluation_l1/Muennighoff_xwinograd/pt/True_or_False/results.json new file mode 100644 index 0000000000000000000000000000000000000000..c615b55b635dfc7e2760731e77562d3fabf8e169 --- /dev/null +++ b/evaluation_l1/Muennighoff_xwinograd/pt/True_or_False/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xwinograd", + "dataset_config_name": "pt", + "template_name": "True or False", + "evaluation": { + "accuracy": 0.4790874524714829 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='pt', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='True or False', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/Muennighoff_xwinograd/zh/Replace/results.json b/evaluation_l1/Muennighoff_xwinograd/zh/Replace/results.json new file mode 100644 index 0000000000000000000000000000000000000000..4f1e4aa315b3961c5e71d6fb0c5bb3a47e7ba1f2 --- /dev/null +++ b/evaluation_l1/Muennighoff_xwinograd/zh/Replace/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xwinograd", + "dataset_config_name": "zh", + "template_name": "Replace", + "evaluation": { + "accuracy": 0.5515873015873016 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='Replace', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/Muennighoff_xwinograd/zh/stand_for/results.json b/evaluation_l1/Muennighoff_xwinograd/zh/stand_for/results.json new file mode 100644 index 0000000000000000000000000000000000000000..20dba9d5537fc5be3fa490a050bf89daa45202a4 --- /dev/null +++ b/evaluation_l1/Muennighoff_xwinograd/zh/stand_for/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xwinograd", + "dataset_config_name": "zh", + "template_name": "stand for", + "evaluation": { + "accuracy": 0.49404761904761907 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='stand for', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/Muennighoff_xwinograd/zh/underscore_refer_to/results.json b/evaluation_l1/Muennighoff_xwinograd/zh/underscore_refer_to/results.json new file mode 100644 index 0000000000000000000000000000000000000000..51b82372d10db539e894d4342514266905a76622 --- /dev/null +++ b/evaluation_l1/Muennighoff_xwinograd/zh/underscore_refer_to/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xwinograd", + "dataset_config_name": "zh", + "template_name": "underscore refer to", + "evaluation": { + "accuracy": 0.5436507936507936 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='en', template_name='underscore refer to', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xcopa/id/cause_effect/results.json b/evaluation_l1/xcopa/id/cause_effect/results.json new file mode 100644 index 0000000000000000000000000000000000000000..ebf414a4ec6a5ccaff47e22de3d4f9d69fd5d147 --- /dev/null +++ b/evaluation_l1/xcopa/id/cause_effect/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "id", + "template_name": "cause_effect", + "evaluation": { + "accuracy": 0.59 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='cause_effect', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xcopa/id/plausible_alternatives/results.json b/evaluation_l1/xcopa/id/plausible_alternatives/results.json new file mode 100644 index 0000000000000000000000000000000000000000..ed3def3f98552f02518e5667ac2f6b8075d7db6f --- /dev/null +++ b/evaluation_l1/xcopa/id/plausible_alternatives/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "id", + "template_name": "plausible_alternatives", + "evaluation": { + "accuracy": 0.6 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='id', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='plausible_alternatives', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xcopa/zh/plausible_alternatives/results.json b/evaluation_l1/xcopa/zh/plausible_alternatives/results.json new file mode 100644 index 0000000000000000000000000000000000000000..6ae352861450478974a4561551a6bb5cde1be577 --- /dev/null +++ b/evaluation_l1/xcopa/zh/plausible_alternatives/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xcopa", + "dataset_config_name": "zh", + "template_name": "plausible_alternatives", + "evaluation": { + "accuracy": 0.52 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='xcopa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='plausible_alternatives', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xnli/en/can_we_infer/results.json b/evaluation_l1/xnli/en/can_we_infer/results.json new file mode 100644 index 0000000000000000000000000000000000000000..0e73f7f24aaaad199166263e2ac28d2c6bba2072 --- /dev/null +++ b/evaluation_l1/xnli/en/can_we_infer/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "en", + "template_name": "can we infer", + "evaluation": { + "accuracy": 0.4710843373493976 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='en', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xnli/es/GPT-3_style/results.json b/evaluation_l1/xnli/es/GPT-3_style/results.json new file mode 100644 index 0000000000000000000000000000000000000000..43795ca144c97274bbafff1c6cc5cf17aa0ee3c2 --- /dev/null +++ b/evaluation_l1/xnli/es/GPT-3_style/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "es", + "template_name": "GPT-3 style", + "evaluation": { + "accuracy": 0.42771084337349397 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xnli/es/MNLI_crowdsource/results.json b/evaluation_l1/xnli/es/MNLI_crowdsource/results.json new file mode 100644 index 0000000000000000000000000000000000000000..cdde43a39b986ea6ad2ae279d221877aa497658f --- /dev/null +++ b/evaluation_l1/xnli/es/MNLI_crowdsource/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "es", + "template_name": "MNLI crowdsource", + "evaluation": { + "accuracy": 0.35903614457831323 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xnli/es/guaranteed_possible_impossible/results.json b/evaluation_l1/xnli/es/guaranteed_possible_impossible/results.json new file mode 100644 index 0000000000000000000000000000000000000000..622788c3db1c6f8c9139c712e92663256e86671f --- /dev/null +++ b/evaluation_l1/xnli/es/guaranteed_possible_impossible/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "es", + "template_name": "guaranteed/possible/impossible", + "evaluation": { + "accuracy": 0.3333333333333333 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l1/xnli/es/justified_in_saying/results.json b/evaluation_l1/xnli/es/justified_in_saying/results.json new file mode 100644 index 0000000000000000000000000000000000000000..fd8227a9b2a3905803b2c4a6050d51dd49392e4b --- /dev/null +++ b/evaluation_l1/xnli/es/justified_in_saying/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "es", + "template_name": "justified in saying", + "evaluation": { + "accuracy": 0.41244979919678715 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l2/xnli/th/can_we_infer/results.json b/evaluation_l2/xnli/th/can_we_infer/results.json new file mode 100644 index 0000000000000000000000000000000000000000..db8b5c1ee0d691384a287ae3853e4f50649f29b0 --- /dev/null +++ b/evaluation_l2/xnli/th/can_we_infer/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "th", + "template_name": "can we infer", + "evaluation": { + "accuracy": 0.42931726907630524 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='th', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l2/xnli/tr/GPT-3_style/results.json b/evaluation_l2/xnli/tr/GPT-3_style/results.json new file mode 100644 index 0000000000000000000000000000000000000000..7f7552030d69a08c3cdfc230a2efa3c3efa81736 --- /dev/null +++ b/evaluation_l2/xnli/tr/GPT-3_style/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "tr", + "template_name": "GPT-3 style", + "evaluation": { + "accuracy": 0.3397590361445783 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='tr', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='GPT-3 style', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l2/xnli/tr/MNLI_crowdsource/results.json b/evaluation_l2/xnli/tr/MNLI_crowdsource/results.json new file mode 100644 index 0000000000000000000000000000000000000000..cd86f89c8ab9d05c3a81afb5d6d5c8f4b5a5d760 --- /dev/null +++ b/evaluation_l2/xnli/tr/MNLI_crowdsource/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "tr", + "template_name": "MNLI crowdsource", + "evaluation": { + "accuracy": 0.3389558232931727 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='tr', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='MNLI crowdsource', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l2/xnli/tr/can_we_infer/results.json b/evaluation_l2/xnli/tr/can_we_infer/results.json new file mode 100644 index 0000000000000000000000000000000000000000..1b3eb37e1c009ec4d6f3771713cdce610229c17f --- /dev/null +++ b/evaluation_l2/xnli/tr/can_we_infer/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "tr", + "template_name": "can we infer", + "evaluation": { + "accuracy": 0.3751004016064257 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='tr', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='can we infer', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l2/xnli/tr/guaranteed_possible_impossible/results.json b/evaluation_l2/xnli/tr/guaranteed_possible_impossible/results.json new file mode 100644 index 0000000000000000000000000000000000000000..60a513f6afbc900dd10e6fa3025ac664e42b3a0e --- /dev/null +++ b/evaluation_l2/xnli/tr/guaranteed_possible_impossible/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "tr", + "template_name": "guaranteed/possible/impossible", + "evaluation": { + "accuracy": 0.3333333333333333 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='tr', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='guaranteed/possible/impossible', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_l2/xnli/tr/justified_in_saying/results.json b/evaluation_l2/xnli/tr/justified_in_saying/results.json new file mode 100644 index 0000000000000000000000000000000000000000..d74170e464dd9b2fa1eaad7774e276d4689a710a --- /dev/null +++ b/evaluation_l2/xnli/tr/justified_in_saying/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "tr", + "template_name": "justified in saying", + "evaluation": { + "accuracy": 0.3413654618473896 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='tr', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='justified in saying', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlimtht/xnli/ar/GPT-3_style_arht/results.json b/evaluation_xnlimtht/xnli/ar/GPT-3_style_arht/results.json new file mode 100644 index 0000000000000000000000000000000000000000..4d7a375afb36e93401bf57caa3a12709cb7a637a --- /dev/null +++ b/evaluation_xnlimtht/xnli/ar/GPT-3_style_arht/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "ar", + "template_name": "GPT-3 style_arht", + "evaluation": { + "accuracy": 0.3401606425702811 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='GPT-3 style_arht', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlimtht/xnli/ar/justified_in_saying_arht/results.json b/evaluation_xnlimtht/xnli/ar/justified_in_saying_arht/results.json new file mode 100644 index 0000000000000000000000000000000000000000..ff1676224311b8a9326389d2b145dd32031f4220 --- /dev/null +++ b/evaluation_xnlimtht/xnli/ar/justified_in_saying_arht/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "ar", + "template_name": "justified in saying_arht", + "evaluation": { + "accuracy": 0.3706827309236948 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ar', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ar', template_name='justified in saying_arht', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlimtht/xnli/ur/GPT-3_style_urht/results.json b/evaluation_xnlimtht/xnli/ur/GPT-3_style_urht/results.json new file mode 100644 index 0000000000000000000000000000000000000000..dc867facc52e0ea2ab6aa10b668c3fcc54d9c13b --- /dev/null +++ b/evaluation_xnlimtht/xnli/ur/GPT-3_style_urht/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "ur", + "template_name": "GPT-3 style_urht", + "evaluation": { + "accuracy": 0.3337349397590361 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ur', template_name='GPT-3 style_urht', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlimtht/xnli/ur/MNLI_crowdsource_urht/results.json b/evaluation_xnlimtht/xnli/ur/MNLI_crowdsource_urht/results.json new file mode 100644 index 0000000000000000000000000000000000000000..c5b9de86d2298bce3f12f08b325e1fd590f5159a --- /dev/null +++ b/evaluation_xnlimtht/xnli/ur/MNLI_crowdsource_urht/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "ur", + "template_name": "MNLI crowdsource_urht", + "evaluation": { + "accuracy": 0.3333333333333333 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ur', template_name='MNLI crowdsource_urht', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlimtht/xnli/ur/can_we_infer_urht/results.json b/evaluation_xnlimtht/xnli/ur/can_we_infer_urht/results.json new file mode 100644 index 0000000000000000000000000000000000000000..be565fed74f73ee961353f94c3135ce2c2ee9311 --- /dev/null +++ b/evaluation_xnlimtht/xnli/ur/can_we_infer_urht/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "ur", + "template_name": "can we infer_urht", + "evaluation": { + "accuracy": 0.3385542168674699 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ur', template_name='can we infer_urht', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlimtht/xnli/ur/guaranteed_possible_impossible_urht/results.json b/evaluation_xnlimtht/xnli/ur/guaranteed_possible_impossible_urht/results.json new file mode 100644 index 0000000000000000000000000000000000000000..20f26edc37b6d81a89163ee0fc4f6732c0bd64e4 --- /dev/null +++ b/evaluation_xnlimtht/xnli/ur/guaranteed_possible_impossible_urht/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "ur", + "template_name": "guaranteed/possible/impossible_urht", + "evaluation": { + "accuracy": 0.3317269076305221 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ur', template_name='guaranteed/possible/impossible_urht', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlimtht/xnli/ur/justified_in_saying_urht/results.json b/evaluation_xnlimtht/xnli/ur/justified_in_saying_urht/results.json new file mode 100644 index 0000000000000000000000000000000000000000..c5ad5ce3a269503dec6b0d1c01431f1035b5f44c --- /dev/null +++ b/evaluation_xnlimtht/xnli/ur/justified_in_saying_urht/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "ur", + "template_name": "justified in saying_urht", + "evaluation": { + "accuracy": 0.336144578313253 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='ur', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='ur', template_name='justified in saying_urht', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlimtht/xnli/vi/GPT-3_style_viht/results.json b/evaluation_xnlimtht/xnli/vi/GPT-3_style_viht/results.json new file mode 100644 index 0000000000000000000000000000000000000000..0abc100456ef57a417ec0f9e57db8fa7466f3ce0 --- /dev/null +++ b/evaluation_xnlimtht/xnli/vi/GPT-3_style_viht/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "vi", + "template_name": "GPT-3 style_viht", + "evaluation": { + "accuracy": 0.41686746987951806 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='GPT-3 style_viht', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xnlimtht/xnli/vi/MNLI_crowdsource_viht/results.json b/evaluation_xnlimtht/xnli/vi/MNLI_crowdsource_viht/results.json new file mode 100644 index 0000000000000000000000000000000000000000..c3548d792846029c98a0cc21b592511922c8477f --- /dev/null +++ b/evaluation_xnlimtht/xnli/vi/MNLI_crowdsource_viht/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "xnli", + "dataset_config_name": "vi", + "template_name": "MNLI crowdsource_viht", + "evaluation": { + "accuracy": 0.3710843373493976 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='vi', dataset_name='xnli', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='vi', template_name='MNLI crowdsource_viht', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/ar/Answer_Given_options_armt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/ar/Answer_Given_options_armt/results.json similarity index 100% rename from evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/ar/Answer_Given_options_armt/results.json rename to evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/ar/Answer_Given_options_armt/results.json diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/ar/Choose_Story_Ending_armt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/ar/Choose_Story_Ending_armt/results.json similarity index 100% rename from evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/ar/Choose_Story_Ending_armt/results.json rename to evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/ar/Choose_Story_Ending_armt/results.json diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/ar/Generate_Ending_armt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/ar/Generate_Ending_armt/results.json similarity index 100% rename from evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/ar/Generate_Ending_armt/results.json rename to evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/ar/Generate_Ending_armt/results.json diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/ar/Novel_Correct_Ending_armt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/ar/Novel_Correct_Ending_armt/results.json similarity index 100% rename from evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/ar/Novel_Correct_Ending_armt/results.json rename to evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/ar/Novel_Correct_Ending_armt/results.json diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/ar/Story_Continuation_and_Options_armt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/ar/Story_Continuation_and_Options_armt/results.json similarity index 100% rename from evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/ar/Story_Continuation_and_Options_armt/results.json rename to evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/ar/Story_Continuation_and_Options_armt/results.json diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/es/Answer_Given_options_esmt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/es/Answer_Given_options_esmt/results.json similarity index 100% rename from evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/es/Answer_Given_options_esmt/results.json rename to evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/es/Answer_Given_options_esmt/results.json diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/es/Choose_Story_Ending_esmt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/es/Choose_Story_Ending_esmt/results.json similarity index 100% rename from evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/es/Choose_Story_Ending_esmt/results.json rename to evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/es/Choose_Story_Ending_esmt/results.json diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/es/Generate_Ending_esmt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/es/Generate_Ending_esmt/results.json similarity index 100% rename from evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/es/Generate_Ending_esmt/results.json rename to evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/es/Generate_Ending_esmt/results.json diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/es/Novel_Correct_Ending_esmt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/es/Novel_Correct_Ending_esmt/results.json similarity index 100% rename from evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/es/Novel_Correct_Ending_esmt/results.json rename to evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/es/Novel_Correct_Ending_esmt/results.json diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/es/Story_Continuation_and_Options_esmt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/es/Story_Continuation_and_Options_esmt/results.json similarity index 100% rename from evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/es/Story_Continuation_and_Options_esmt/results.json rename to evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/es/Story_Continuation_and_Options_esmt/results.json diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/eu/Answer_Given_options_eumt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/eu/Answer_Given_options_eumt/results.json similarity index 100% rename from evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/eu/Answer_Given_options_eumt/results.json rename to evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/eu/Answer_Given_options_eumt/results.json diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/eu/Choose_Story_Ending_eumt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/eu/Choose_Story_Ending_eumt/results.json similarity index 100% rename from evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/eu/Choose_Story_Ending_eumt/results.json rename to evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/eu/Choose_Story_Ending_eumt/results.json diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/eu/Generate_Ending_eumt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/eu/Generate_Ending_eumt/results.json similarity index 100% rename from evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/eu/Generate_Ending_eumt/results.json rename to evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/eu/Generate_Ending_eumt/results.json diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/eu/Novel_Correct_Ending_eumt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/eu/Novel_Correct_Ending_eumt/results.json similarity index 100% rename from evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/eu/Novel_Correct_Ending_eumt/results.json rename to evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/eu/Novel_Correct_Ending_eumt/results.json diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/eu/Story_Continuation_and_Options_eumt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/eu/Story_Continuation_and_Options_eumt/results.json similarity index 100% rename from evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/eu/Story_Continuation_and_Options_eumt/results.json rename to evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/eu/Story_Continuation_and_Options_eumt/results.json diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/hi/Answer_Given_options_himt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/hi/Answer_Given_options_himt/results.json similarity index 100% rename from evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/hi/Answer_Given_options_himt/results.json rename to evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/hi/Answer_Given_options_himt/results.json diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/hi/Choose_Story_Ending_himt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/hi/Choose_Story_Ending_himt/results.json similarity index 100% rename from evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/hi/Choose_Story_Ending_himt/results.json rename to evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/hi/Choose_Story_Ending_himt/results.json diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/hi/Generate_Ending_himt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/hi/Generate_Ending_himt/results.json similarity index 100% rename from evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/hi/Generate_Ending_himt/results.json rename to evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/hi/Generate_Ending_himt/results.json diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/hi/Novel_Correct_Ending_himt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/hi/Novel_Correct_Ending_himt/results.json similarity index 100% rename from evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/hi/Novel_Correct_Ending_himt/results.json rename to evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/hi/Novel_Correct_Ending_himt/results.json diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/hi/Story_Continuation_and_Options_himt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/hi/Story_Continuation_and_Options_himt/results.json similarity index 100% rename from evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/hi/Story_Continuation_and_Options_himt/results.json rename to evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/hi/Story_Continuation_and_Options_himt/results.json diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/id/Answer_Given_options_idmt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/id/Answer_Given_options_idmt/results.json similarity index 100% rename from evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/id/Answer_Given_options_idmt/results.json rename to evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/id/Answer_Given_options_idmt/results.json diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/id/Choose_Story_Ending_idmt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/id/Choose_Story_Ending_idmt/results.json similarity index 100% rename from evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/id/Choose_Story_Ending_idmt/results.json rename to evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/id/Choose_Story_Ending_idmt/results.json diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/id/Generate_Ending_idmt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/id/Generate_Ending_idmt/results.json similarity index 100% rename from evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/id/Generate_Ending_idmt/results.json rename to evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/id/Generate_Ending_idmt/results.json diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/id/Novel_Correct_Ending_idmt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/id/Novel_Correct_Ending_idmt/results.json similarity index 100% rename from evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/id/Novel_Correct_Ending_idmt/results.json rename to evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/id/Novel_Correct_Ending_idmt/results.json diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/id/Story_Continuation_and_Options_idmt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/id/Story_Continuation_and_Options_idmt/results.json similarity index 100% rename from evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/id/Story_Continuation_and_Options_idmt/results.json rename to evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/id/Story_Continuation_and_Options_idmt/results.json diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/zh/Answer_Given_options_zhmt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/zh/Answer_Given_options_zhmt/results.json similarity index 100% rename from evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/zh/Answer_Given_options_zhmt/results.json rename to evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/zh/Answer_Given_options_zhmt/results.json diff --git a/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/zh/Choose_Story_Ending_zhmt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/zh/Choose_Story_Ending_zhmt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..42d17375b4062f9b352cedf6571f8b585f5cae8d --- /dev/null +++ b/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/zh/Choose_Story_Ending_zhmt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "zh", + "template_name": "Choose Story Ending_zhmt", + "evaluation": { + "accuracy": 0.5665122435473197 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='Choose Story Ending_zhmt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/zh/Generate_Ending_zhmt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/zh/Generate_Ending_zhmt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..69da9afc27b3f40bdc7648b771f5add02e8fb0d4 --- /dev/null +++ b/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/zh/Generate_Ending_zhmt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "zh", + "template_name": "Generate Ending_zhmt", + "evaluation": { + "accuracy": 0.5684976836532097 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='Generate Ending_zhmt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/zh/Novel_Correct_Ending_zhmt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/zh/Novel_Correct_Ending_zhmt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..f65e742ac768b39f176a21e961ca2c9092dfd3ed --- /dev/null +++ b/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/zh/Novel_Correct_Ending_zhmt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xstory_cloze", + "dataset_config_name": "zh", + "template_name": "Novel Correct Ending_zhmt", + "evaluation": { + "accuracy": 0.5526141628060887 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='Muennighoff/xstory_cloze', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='validation', target_max_length=256, template_config_name='zh', template_name='Novel Correct Ending_zhmt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/zh/Story_Continuation_and_Options_zhmt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/zh/Story_Continuation_and_Options_zhmt/results.json similarity index 100% rename from evaluation_xcopawinostorymt/Muennighoff_xstory_cloze/zh/Story_Continuation_and_Options_zhmt/results.json rename to evaluation_xwinostorycopamt/Muennighoff_xstory_cloze/zh/Story_Continuation_and_Options_zhmt/results.json diff --git a/evaluation_xwinostorycopamt/Muennighoff_xwinograd/fr/Replace_frmt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xwinograd/fr/Replace_frmt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..4ae41c0d127c54b14b2c5cd022688cdf5a6a78e5 --- /dev/null +++ b/evaluation_xwinostorycopamt/Muennighoff_xwinograd/fr/Replace_frmt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xwinograd", + "dataset_config_name": "fr", + "template_name": "Replace_frmt", + "evaluation": { + "accuracy": 0.5903614457831325 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='fr', template_name='Replace_frmt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xcopawinostorymt/Muennighoff_xwinograd/fr/True_or_False_frmt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xwinograd/fr/True_or_False_frmt/results.json similarity index 100% rename from evaluation_xcopawinostorymt/Muennighoff_xwinograd/fr/True_or_False_frmt/results.json rename to evaluation_xwinostorycopamt/Muennighoff_xwinograd/fr/True_or_False_frmt/results.json diff --git a/evaluation_xcopawinostorymt/Muennighoff_xwinograd/fr/does_underscore_refer_to_frmt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xwinograd/fr/does_underscore_refer_to_frmt/results.json similarity index 100% rename from evaluation_xcopawinostorymt/Muennighoff_xwinograd/fr/does_underscore_refer_to_frmt/results.json rename to evaluation_xwinostorycopamt/Muennighoff_xwinograd/fr/does_underscore_refer_to_frmt/results.json diff --git a/evaluation_xwinostorycopamt/Muennighoff_xwinograd/fr/stand_for_frmt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xwinograd/fr/stand_for_frmt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..1625a7d4696dfcd3c9fbd6e1324e3386c340e1fe --- /dev/null +++ b/evaluation_xwinostorycopamt/Muennighoff_xwinograd/fr/stand_for_frmt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xwinograd", + "dataset_config_name": "fr", + "template_name": "stand for_frmt", + "evaluation": { + "accuracy": 0.5180722891566265 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='fr', template_name='stand for_frmt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xwinostorycopamt/Muennighoff_xwinograd/fr/underscore_refer_to_frmt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xwinograd/fr/underscore_refer_to_frmt/results.json new file mode 100644 index 0000000000000000000000000000000000000000..b06de1cac21bc4303de47c7b1c62495c4456daf2 --- /dev/null +++ b/evaluation_xwinostorycopamt/Muennighoff_xwinograd/fr/underscore_refer_to_frmt/results.json @@ -0,0 +1,9 @@ +{ + "dataset_name": "Muennighoff/xwinograd", + "dataset_config_name": "fr", + "template_name": "underscore refer to_frmt", + "evaluation": { + "accuracy": 0.5060240963855421 + }, + "arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='Muennighoff/xwinograd', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1', nospace=False, output_dir='/gpfsscratch/rech/six/commun/experiments/muennighoff/bloomckpt/760mt0/bloomz-1b1/evaluation', pad_to_max_length=False, per_device_eval_batch_size=8, prefixlm=False, split='test', target_max_length=256, template_config_name='fr', template_name='underscore refer to_frmt', tokenizer_name=None, use_slow_tokenizer=False)" +} \ No newline at end of file diff --git a/evaluation_xcopawinostorymt/Muennighoff_xwinograd/pt/Replace_ptmt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xwinograd/pt/Replace_ptmt/results.json similarity index 100% rename from evaluation_xcopawinostorymt/Muennighoff_xwinograd/pt/Replace_ptmt/results.json rename to evaluation_xwinostorycopamt/Muennighoff_xwinograd/pt/Replace_ptmt/results.json diff --git a/evaluation_xcopawinostorymt/Muennighoff_xwinograd/pt/True_or_False_ptmt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xwinograd/pt/True_or_False_ptmt/results.json similarity index 100% rename from evaluation_xcopawinostorymt/Muennighoff_xwinograd/pt/True_or_False_ptmt/results.json rename to evaluation_xwinostorycopamt/Muennighoff_xwinograd/pt/True_or_False_ptmt/results.json diff --git a/evaluation_xcopawinostorymt/Muennighoff_xwinograd/pt/does_underscore_refer_to_ptmt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xwinograd/pt/does_underscore_refer_to_ptmt/results.json similarity index 100% rename from evaluation_xcopawinostorymt/Muennighoff_xwinograd/pt/does_underscore_refer_to_ptmt/results.json rename to evaluation_xwinostorycopamt/Muennighoff_xwinograd/pt/does_underscore_refer_to_ptmt/results.json diff --git a/evaluation_xcopawinostorymt/Muennighoff_xwinograd/pt/stand_for_ptmt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xwinograd/pt/stand_for_ptmt/results.json similarity index 100% rename from evaluation_xcopawinostorymt/Muennighoff_xwinograd/pt/stand_for_ptmt/results.json rename to evaluation_xwinostorycopamt/Muennighoff_xwinograd/pt/stand_for_ptmt/results.json diff --git a/evaluation_xcopawinostorymt/Muennighoff_xwinograd/pt/underscore_refer_to_ptmt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xwinograd/pt/underscore_refer_to_ptmt/results.json similarity index 100% rename from evaluation_xcopawinostorymt/Muennighoff_xwinograd/pt/underscore_refer_to_ptmt/results.json rename to evaluation_xwinostorycopamt/Muennighoff_xwinograd/pt/underscore_refer_to_ptmt/results.json diff --git a/evaluation_xcopawinostorymt/Muennighoff_xwinograd/zh/Replace_zhmt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xwinograd/zh/Replace_zhmt/results.json similarity index 100% rename from evaluation_xcopawinostorymt/Muennighoff_xwinograd/zh/Replace_zhmt/results.json rename to evaluation_xwinostorycopamt/Muennighoff_xwinograd/zh/Replace_zhmt/results.json diff --git a/evaluation_xcopawinostorymt/Muennighoff_xwinograd/zh/True_or_False_zhmt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xwinograd/zh/True_or_False_zhmt/results.json similarity index 100% rename from evaluation_xcopawinostorymt/Muennighoff_xwinograd/zh/True_or_False_zhmt/results.json rename to evaluation_xwinostorycopamt/Muennighoff_xwinograd/zh/True_or_False_zhmt/results.json diff --git a/evaluation_xcopawinostorymt/Muennighoff_xwinograd/zh/does_underscore_refer_to_zhmt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xwinograd/zh/does_underscore_refer_to_zhmt/results.json similarity index 100% rename from evaluation_xcopawinostorymt/Muennighoff_xwinograd/zh/does_underscore_refer_to_zhmt/results.json rename to evaluation_xwinostorycopamt/Muennighoff_xwinograd/zh/does_underscore_refer_to_zhmt/results.json diff --git a/evaluation_xcopawinostorymt/Muennighoff_xwinograd/zh/stand_for_zhmt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xwinograd/zh/stand_for_zhmt/results.json similarity index 100% rename from evaluation_xcopawinostorymt/Muennighoff_xwinograd/zh/stand_for_zhmt/results.json rename to evaluation_xwinostorycopamt/Muennighoff_xwinograd/zh/stand_for_zhmt/results.json diff --git a/evaluation_xcopawinostorymt/Muennighoff_xwinograd/zh/underscore_refer_to_zhmt/results.json b/evaluation_xwinostorycopamt/Muennighoff_xwinograd/zh/underscore_refer_to_zhmt/results.json similarity index 100% rename from evaluation_xcopawinostorymt/Muennighoff_xwinograd/zh/underscore_refer_to_zhmt/results.json rename to evaluation_xwinostorycopamt/Muennighoff_xwinograd/zh/underscore_refer_to_zhmt/results.json diff --git a/evaluation_xcopawinostorymt/xcopa/id/C1_or_C2?_premise_idmt/results.json b/evaluation_xwinostorycopamt/xcopa/id/C1_or_C2?_premise_idmt/results.json similarity index 100% rename from evaluation_xcopawinostorymt/xcopa/id/C1_or_C2?_premise_idmt/results.json rename to evaluation_xwinostorycopamt/xcopa/id/C1_or_C2?_premise_idmt/results.json diff --git a/evaluation_xcopawinostorymt/xcopa/id/best_option_idmt/results.json b/evaluation_xwinostorycopamt/xcopa/id/best_option_idmt/results.json similarity index 100% rename from evaluation_xcopawinostorymt/xcopa/id/best_option_idmt/results.json rename to evaluation_xwinostorycopamt/xcopa/id/best_option_idmt/results.json diff --git a/evaluation_xcopawinostorymt/xcopa/id/cause_effect_idmt/results.json b/evaluation_xwinostorycopamt/xcopa/id/cause_effect_idmt/results.json similarity index 100% rename from evaluation_xcopawinostorymt/xcopa/id/cause_effect_idmt/results.json rename to evaluation_xwinostorycopamt/xcopa/id/cause_effect_idmt/results.json diff --git a/evaluation_xcopawinostorymt/xcopa/id/i_am_hesitating_idmt/results.json b/evaluation_xwinostorycopamt/xcopa/id/i_am_hesitating_idmt/results.json similarity index 100% rename from evaluation_xcopawinostorymt/xcopa/id/i_am_hesitating_idmt/results.json rename to evaluation_xwinostorycopamt/xcopa/id/i_am_hesitating_idmt/results.json diff --git a/evaluation_xcopawinostorymt/xcopa/id/plausible_alternatives_idmt/results.json b/evaluation_xwinostorycopamt/xcopa/id/plausible_alternatives_idmt/results.json similarity index 100% rename from evaluation_xcopawinostorymt/xcopa/id/plausible_alternatives_idmt/results.json rename to evaluation_xwinostorycopamt/xcopa/id/plausible_alternatives_idmt/results.json diff --git a/evaluation_xcopawinostorymt/xcopa/sw/C1_or_C2?_premise_swmt/results.json b/evaluation_xwinostorycopamt/xcopa/sw/C1_or_C2?_premise_swmt/results.json similarity index 100% rename from evaluation_xcopawinostorymt/xcopa/sw/C1_or_C2?_premise_swmt/results.json rename to evaluation_xwinostorycopamt/xcopa/sw/C1_or_C2?_premise_swmt/results.json diff --git a/evaluation_xcopawinostorymt/xcopa/sw/best_option_swmt/results.json b/evaluation_xwinostorycopamt/xcopa/sw/best_option_swmt/results.json similarity index 100% rename from evaluation_xcopawinostorymt/xcopa/sw/best_option_swmt/results.json rename to evaluation_xwinostorycopamt/xcopa/sw/best_option_swmt/results.json diff --git a/evaluation_xcopawinostorymt/xcopa/sw/cause_effect_swmt/results.json b/evaluation_xwinostorycopamt/xcopa/sw/cause_effect_swmt/results.json similarity index 100% rename from evaluation_xcopawinostorymt/xcopa/sw/cause_effect_swmt/results.json rename to evaluation_xwinostorycopamt/xcopa/sw/cause_effect_swmt/results.json diff --git a/evaluation_xcopawinostorymt/xcopa/sw/i_am_hesitating_swmt/results.json b/evaluation_xwinostorycopamt/xcopa/sw/i_am_hesitating_swmt/results.json similarity index 100% rename from evaluation_xcopawinostorymt/xcopa/sw/i_am_hesitating_swmt/results.json rename to evaluation_xwinostorycopamt/xcopa/sw/i_am_hesitating_swmt/results.json diff --git a/evaluation_xcopawinostorymt/xcopa/sw/plausible_alternatives_swmt/results.json b/evaluation_xwinostorycopamt/xcopa/sw/plausible_alternatives_swmt/results.json similarity index 100% rename from evaluation_xcopawinostorymt/xcopa/sw/plausible_alternatives_swmt/results.json rename to evaluation_xwinostorycopamt/xcopa/sw/plausible_alternatives_swmt/results.json diff --git a/evaluation_xcopawinostorymt/xcopa/ta/C1_or_C2?_premise_tamt/results.json b/evaluation_xwinostorycopamt/xcopa/ta/C1_or_C2?_premise_tamt/results.json similarity index 100% rename from evaluation_xcopawinostorymt/xcopa/ta/C1_or_C2?_premise_tamt/results.json rename to evaluation_xwinostorycopamt/xcopa/ta/C1_or_C2?_premise_tamt/results.json diff --git a/evaluation_xcopawinostorymt/xcopa/ta/best_option_tamt/results.json b/evaluation_xwinostorycopamt/xcopa/ta/best_option_tamt/results.json similarity index 100% rename from evaluation_xcopawinostorymt/xcopa/ta/best_option_tamt/results.json rename to evaluation_xwinostorycopamt/xcopa/ta/best_option_tamt/results.json diff --git a/evaluation_xcopawinostorymt/xcopa/ta/cause_effect_tamt/results.json b/evaluation_xwinostorycopamt/xcopa/ta/cause_effect_tamt/results.json similarity index 100% rename from evaluation_xcopawinostorymt/xcopa/ta/cause_effect_tamt/results.json rename to evaluation_xwinostorycopamt/xcopa/ta/cause_effect_tamt/results.json diff --git a/evaluation_xcopawinostorymt/xcopa/ta/i_am_hesitating_tamt/results.json b/evaluation_xwinostorycopamt/xcopa/ta/i_am_hesitating_tamt/results.json similarity index 100% rename from evaluation_xcopawinostorymt/xcopa/ta/i_am_hesitating_tamt/results.json rename to evaluation_xwinostorycopamt/xcopa/ta/i_am_hesitating_tamt/results.json diff --git a/evaluation_xcopawinostorymt/xcopa/ta/plausible_alternatives_tamt/results.json b/evaluation_xwinostorycopamt/xcopa/ta/plausible_alternatives_tamt/results.json similarity index 100% rename from evaluation_xcopawinostorymt/xcopa/ta/plausible_alternatives_tamt/results.json rename to evaluation_xwinostorycopamt/xcopa/ta/plausible_alternatives_tamt/results.json diff --git a/evaluation_xcopawinostorymt/xcopa/vi/C1_or_C2?_premise_vimt/results.json b/evaluation_xwinostorycopamt/xcopa/vi/C1_or_C2?_premise_vimt/results.json similarity index 100% rename from evaluation_xcopawinostorymt/xcopa/vi/C1_or_C2?_premise_vimt/results.json rename to evaluation_xwinostorycopamt/xcopa/vi/C1_or_C2?_premise_vimt/results.json diff --git a/evaluation_xcopawinostorymt/xcopa/vi/best_option_vimt/results.json b/evaluation_xwinostorycopamt/xcopa/vi/best_option_vimt/results.json similarity index 100% rename from evaluation_xcopawinostorymt/xcopa/vi/best_option_vimt/results.json rename to evaluation_xwinostorycopamt/xcopa/vi/best_option_vimt/results.json diff --git a/evaluation_xcopawinostorymt/xcopa/vi/cause_effect_vimt/results.json b/evaluation_xwinostorycopamt/xcopa/vi/cause_effect_vimt/results.json similarity index 100% rename from evaluation_xcopawinostorymt/xcopa/vi/cause_effect_vimt/results.json rename to evaluation_xwinostorycopamt/xcopa/vi/cause_effect_vimt/results.json diff --git a/evaluation_xcopawinostorymt/xcopa/vi/i_am_hesitating_vimt/results.json b/evaluation_xwinostorycopamt/xcopa/vi/i_am_hesitating_vimt/results.json similarity index 100% rename from evaluation_xcopawinostorymt/xcopa/vi/i_am_hesitating_vimt/results.json rename to evaluation_xwinostorycopamt/xcopa/vi/i_am_hesitating_vimt/results.json diff --git a/evaluation_xcopawinostorymt/xcopa/vi/plausible_alternatives_vimt/results.json b/evaluation_xwinostorycopamt/xcopa/vi/plausible_alternatives_vimt/results.json similarity index 100% rename from evaluation_xcopawinostorymt/xcopa/vi/plausible_alternatives_vimt/results.json rename to evaluation_xwinostorycopamt/xcopa/vi/plausible_alternatives_vimt/results.json diff --git a/evaluation_xcopawinostorymt/xcopa/zh/C1_or_C2?_premise_zhmt/results.json b/evaluation_xwinostorycopamt/xcopa/zh/C1_or_C2?_premise_zhmt/results.json similarity index 100% rename from evaluation_xcopawinostorymt/xcopa/zh/C1_or_C2?_premise_zhmt/results.json rename to evaluation_xwinostorycopamt/xcopa/zh/C1_or_C2?_premise_zhmt/results.json diff --git a/evaluation_xcopawinostorymt/xcopa/zh/best_option_zhmt/results.json b/evaluation_xwinostorycopamt/xcopa/zh/best_option_zhmt/results.json similarity index 100% rename from evaluation_xcopawinostorymt/xcopa/zh/best_option_zhmt/results.json rename to evaluation_xwinostorycopamt/xcopa/zh/best_option_zhmt/results.json diff --git a/evaluation_xcopawinostorymt/xcopa/zh/cause_effect_zhmt/results.json b/evaluation_xwinostorycopamt/xcopa/zh/cause_effect_zhmt/results.json similarity index 100% rename from evaluation_xcopawinostorymt/xcopa/zh/cause_effect_zhmt/results.json rename to evaluation_xwinostorycopamt/xcopa/zh/cause_effect_zhmt/results.json diff --git a/evaluation_xcopawinostorymt/xcopa/zh/i_am_hesitating_zhmt/results.json b/evaluation_xwinostorycopamt/xcopa/zh/i_am_hesitating_zhmt/results.json similarity index 100% rename from evaluation_xcopawinostorymt/xcopa/zh/i_am_hesitating_zhmt/results.json rename to evaluation_xwinostorycopamt/xcopa/zh/i_am_hesitating_zhmt/results.json diff --git a/evaluation_xcopawinostorymt/xcopa/zh/plausible_alternatives_zhmt/results.json b/evaluation_xwinostorycopamt/xcopa/zh/plausible_alternatives_zhmt/results.json similarity index 100% rename from evaluation_xcopawinostorymt/xcopa/zh/plausible_alternatives_zhmt/results.json rename to evaluation_xwinostorycopamt/xcopa/zh/plausible_alternatives_zhmt/results.json