Commit
·
1a19c5a
1
Parent(s):
1897f6d
Add files
Browse filesThis view is limited to 50 files because it contains too many changes.
See raw diff
- .gitattributes +13 -0
- xp3capmixlossseq_global_step1000/config.json +30 -0
- xp3capmixlossseq_global_step1000/evaluation/amazon_reviews_multi/en/prompt_body_title_to_star/results.json +9 -0
- xp3capmixlossseq_global_step1000/evaluation/amazon_reviews_multi/en/prompt_review_to_star/results.json +9 -0
- xp3capmixlossseq_global_step1000/evaluation/amazon_reviews_multi/en/prompt_title_to_star/results.json +9 -0
- xp3capmixlossseq_global_step1000/evaluation/amazon_reviews_multi/es/prompt_body_title_to_star/results.json +9 -0
- xp3capmixlossseq_global_step1000/evaluation/amazon_reviews_multi/es/prompt_review_to_star/results.json +9 -0
- xp3capmixlossseq_global_step1000/evaluation/amazon_reviews_multi/es/prompt_title_to_star/results.json +9 -0
- xp3capmixlossseq_global_step1000/evaluation/amazon_reviews_multi/fr/prompt_body_title_to_star/results.json +9 -0
- xp3capmixlossseq_global_step1000/evaluation/amazon_reviews_multi/fr/prompt_review_to_star/results.json +9 -0
- xp3capmixlossseq_global_step1000/evaluation/amazon_reviews_multi/fr/prompt_title_to_star/results.json +9 -0
- xp3capmixlossseq_global_step1000/evaluation/amazon_reviews_multi/zh/prompt_body_title_to_star/results.json +9 -0
- xp3capmixlossseq_global_step1000/evaluation/amazon_reviews_multi/zh/prompt_review_to_star/results.json +9 -0
- xp3capmixlossseq_global_step1000/evaluation/amazon_reviews_multi/zh/prompt_title_to_star/results.json +9 -0
- xp3capmixlossseq_global_step1000/evaluation/aqua_rat/raw/Answer_questions_from_options/results.json +9 -0
- xp3capmixlossseq_global_step1000/evaluation/aqua_rat/raw/answer_quiz/results.json +9 -0
- xp3capmixlossseq_global_step1000/evaluation/aqua_rat/raw/select_the_best_option/results.json +9 -0
- xp3capmixlossseq_global_step1000/evaluation/art/choose_hypothesis/results.json +9 -0
- xp3capmixlossseq_global_step1000/evaluation/art/choose_hypothesis_believable/results.json +9 -0
- xp3capmixlossseq_global_step1000/evaluation/art/choose_hypothesis_desc/results.json +9 -0
- xp3capmixlossseq_global_step1000/evaluation/art/choose_hypothesis_likely/results.json +9 -0
- xp3capmixlossseq_global_step1000/evaluation/art/choose_hypothesis_options/results.json +9 -0
- xp3capmixlossseq_global_step1000/evaluation/banking77/direct_to_which_department/results.json +9 -0
- xp3capmixlossseq_global_step1000/evaluation/banking77/help_page_topic/results.json +9 -0
- xp3capmixlossseq_global_step1000/evaluation/banking77/rephrase_as_banking_term/results.json +9 -0
- xp3capmixlossseq_global_step1000/evaluation/blbooksgenre/title_genre_classifiction/classify/results.json +9 -0
- xp3capmixlossseq_global_step1000/evaluation/blbooksgenre/title_genre_classifiction/multi-choice/results.json +9 -0
- xp3capmixlossseq_global_step1000/evaluation/blbooksgenre/title_genre_classifiction/premise_context_first/results.json +9 -0
- xp3capmixlossseq_global_step1000/evaluation/blimp/adjunct_island/grammatical_between_1_2/results.json +9 -0
- xp3capmixlossseq_global_step1000/evaluation/blimp/adjunct_island/grammatical_between_A_B/results.json +9 -0
- xp3capmixlossseq_global_step1000/evaluation/blimp/adjunct_island/grammatical_which_one_1_2/results.json +9 -0
- xp3capmixlossseq_global_step1000/evaluation/blimp/adjunct_island/single_sentence_bad_yes_no/results.json +9 -0
- xp3capmixlossseq_global_step1000/evaluation/blimp/adjunct_island/single_sentence_good_yes_no/results.json +9 -0
- xp3capmixlossseq_global_step1000/evaluation/climate_fever/claim_and_all_supporting_evidences/results.json +9 -0
- xp3capmixlossseq_global_step1000/evaluation/climate_fever/fifth_evidence_and_claim_itemization/results.json +9 -0
- xp3capmixlossseq_global_step1000/evaluation/climate_fever/first_evidence_and_claim_itemization/results.json +9 -0
- xp3capmixlossseq_global_step1000/evaluation/climate_fever/second_evidence_and_claim_itemization/results.json +9 -0
- xp3capmixlossseq_global_step1000/evaluation/climate_fever/third_evidence_claim_pair/results.json +9 -0
- xp3capmixlossseq_global_step1000/evaluation/codah/codah/affirmative_instruction_after_sentence_and_choices/results.json +9 -0
- xp3capmixlossseq_global_step1000/evaluation/codah/codah/affirmative_instruction_before_sentence_and_choices/results.json +9 -0
- xp3capmixlossseq_global_step1000/evaluation/codah/codah/interrogative_instruction_after_sentence_and_choices/results.json +9 -0
- xp3capmixlossseq_global_step1000/evaluation/commonsense_qa/answer_given_question_without_options/results.json +9 -0
- xp3capmixlossseq_global_step1000/evaluation/commonsense_qa/most_suitable_answer/results.json +9 -0
- xp3capmixlossseq_global_step1000/evaluation/commonsense_qa/question_answering/results.json +9 -0
- xp3capmixlossseq_global_step1000/evaluation/conv_ai_3/ambiguous/results.json +9 -0
- xp3capmixlossseq_global_step1000/evaluation/conv_ai_3/clarification_needed/results.json +9 -0
- xp3capmixlossseq_global_step1000/evaluation/conv_ai_3/directly_answer/results.json +9 -0
- xp3capmixlossseq_global_step1000/evaluation/conv_ai_3/score_give_number/results.json +9 -0
- xp3capmixlossseq_global_step1000/evaluation/conv_ai_3/score_how_much/results.json +9 -0
- xp3capmixlossseq_global_step1000/evaluation/craigslist_bargains/best_deal/results.json +9 -0
.gitattributes
CHANGED
|
@@ -30,3 +30,16 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 30 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 31 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 32 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 31 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 32 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 33 |
+
xp3capmixlossseq_global_step1000/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 34 |
+
xp3capmixlossseq_global_step1250/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 35 |
+
xp3capmixlossseq_global_step2500/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
xp3capmixlossseq_global_step3000/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
xp3capmixlossseq_global_step500/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 38 |
+
xp3capmixlossseq_global_step750/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 39 |
+
xp3capmixlossseq_global_step1500/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 40 |
+
xp3capmixlossseq_global_step1750/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 41 |
+
xp3capmixlossseq_global_step2000/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 42 |
+
xp3capmixlossseq_global_step2250/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 43 |
+
xp3capmixlossseq_global_step250/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 44 |
+
xp3capmixlossseq_global_step2750/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
| 45 |
+
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
xp3capmixlossseq_global_step1000/config.json
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"apply_residual_connection_post_layernorm": false,
|
| 3 |
+
"attention_dropout": 0.0,
|
| 4 |
+
"attention_softmax_in_fp32": true,
|
| 5 |
+
"bias_dropout_fusion": true,
|
| 6 |
+
"architectures": [
|
| 7 |
+
"BloomModel"
|
| 8 |
+
],
|
| 9 |
+
"bos_token_id": 1,
|
| 10 |
+
"eos_token_id": 2,
|
| 11 |
+
"pad_token_id": 3,
|
| 12 |
+
"unk_token_id": 0,
|
| 13 |
+
"hidden_dropout": 0.0,
|
| 14 |
+
"initializer_range": 0.02,
|
| 15 |
+
"layer_norm_epsilon": 1e-05,
|
| 16 |
+
"masked_softmax_fusion": true,
|
| 17 |
+
"model_type": "bloom",
|
| 18 |
+
"n_embed": 2560,
|
| 19 |
+
"n_inner": null,
|
| 20 |
+
"n_layer": 30,
|
| 21 |
+
"num_attention_heads": 32,
|
| 22 |
+
"offset_alibi": 100,
|
| 23 |
+
"pretraining_tp": 4,
|
| 24 |
+
"seq_length": 2048,
|
| 25 |
+
"skip_bias_add": true,
|
| 26 |
+
"skip_bias_add_qkv": false,
|
| 27 |
+
"transformers_version": "4.20.0",
|
| 28 |
+
"use_cache": true,
|
| 29 |
+
"vocab_size": 250880
|
| 30 |
+
}
|
xp3capmixlossseq_global_step1000/evaluation/amazon_reviews_multi/en/prompt_body_title_to_star/results.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset_name": "amazon_reviews_multi",
|
| 3 |
+
"dataset_config_name": "en",
|
| 4 |
+
"template_name": "prompt_body_title_to_star",
|
| 5 |
+
"evaluation": {
|
| 6 |
+
"accuracy": 0.5152
|
| 7 |
+
},
|
| 8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name='en', dataset_name='amazon_reviews_multi', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=2, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='prompt_body_title_to_star', tokenizer_name=None, use_slow_tokenizer=False)"
|
| 9 |
+
}
|
xp3capmixlossseq_global_step1000/evaluation/amazon_reviews_multi/en/prompt_review_to_star/results.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset_name": "amazon_reviews_multi",
|
| 3 |
+
"dataset_config_name": "en",
|
| 4 |
+
"template_name": "prompt_review_to_star",
|
| 5 |
+
"evaluation": {
|
| 6 |
+
"accuracy": 0.4722
|
| 7 |
+
},
|
| 8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name='en', dataset_name='amazon_reviews_multi', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=2, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='prompt_review_to_star', tokenizer_name=None, use_slow_tokenizer=False)"
|
| 9 |
+
}
|
xp3capmixlossseq_global_step1000/evaluation/amazon_reviews_multi/en/prompt_title_to_star/results.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset_name": "amazon_reviews_multi",
|
| 3 |
+
"dataset_config_name": "en",
|
| 4 |
+
"template_name": "prompt_title_to_star",
|
| 5 |
+
"evaluation": {
|
| 6 |
+
"accuracy": 0.328
|
| 7 |
+
},
|
| 8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name='en', dataset_name='amazon_reviews_multi', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=2, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='prompt_title_to_star', tokenizer_name=None, use_slow_tokenizer=False)"
|
| 9 |
+
}
|
xp3capmixlossseq_global_step1000/evaluation/amazon_reviews_multi/es/prompt_body_title_to_star/results.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset_name": "amazon_reviews_multi",
|
| 3 |
+
"dataset_config_name": "es",
|
| 4 |
+
"template_name": "prompt_body_title_to_star",
|
| 5 |
+
"evaluation": {
|
| 6 |
+
"accuracy": 0.4506
|
| 7 |
+
},
|
| 8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='amazon_reviews_multi', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=2, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='prompt_body_title_to_star', tokenizer_name=None, use_slow_tokenizer=False)"
|
| 9 |
+
}
|
xp3capmixlossseq_global_step1000/evaluation/amazon_reviews_multi/es/prompt_review_to_star/results.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset_name": "amazon_reviews_multi",
|
| 3 |
+
"dataset_config_name": "es",
|
| 4 |
+
"template_name": "prompt_review_to_star",
|
| 5 |
+
"evaluation": {
|
| 6 |
+
"accuracy": 0.4368
|
| 7 |
+
},
|
| 8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='amazon_reviews_multi', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=2, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='prompt_review_to_star', tokenizer_name=None, use_slow_tokenizer=False)"
|
| 9 |
+
}
|
xp3capmixlossseq_global_step1000/evaluation/amazon_reviews_multi/es/prompt_title_to_star/results.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset_name": "amazon_reviews_multi",
|
| 3 |
+
"dataset_config_name": "es",
|
| 4 |
+
"template_name": "prompt_title_to_star",
|
| 5 |
+
"evaluation": {
|
| 6 |
+
"accuracy": 0.3098
|
| 7 |
+
},
|
| 8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name='es', dataset_name='amazon_reviews_multi', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=2, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='prompt_title_to_star', tokenizer_name=None, use_slow_tokenizer=False)"
|
| 9 |
+
}
|
xp3capmixlossseq_global_step1000/evaluation/amazon_reviews_multi/fr/prompt_body_title_to_star/results.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset_name": "amazon_reviews_multi",
|
| 3 |
+
"dataset_config_name": "fr",
|
| 4 |
+
"template_name": "prompt_body_title_to_star",
|
| 5 |
+
"evaluation": {
|
| 6 |
+
"accuracy": 0.4492
|
| 7 |
+
},
|
| 8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='amazon_reviews_multi', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=2, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='prompt_body_title_to_star', tokenizer_name=None, use_slow_tokenizer=False)"
|
| 9 |
+
}
|
xp3capmixlossseq_global_step1000/evaluation/amazon_reviews_multi/fr/prompt_review_to_star/results.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset_name": "amazon_reviews_multi",
|
| 3 |
+
"dataset_config_name": "fr",
|
| 4 |
+
"template_name": "prompt_review_to_star",
|
| 5 |
+
"evaluation": {
|
| 6 |
+
"accuracy": 0.4226
|
| 7 |
+
},
|
| 8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='amazon_reviews_multi', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=2, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='prompt_review_to_star', tokenizer_name=None, use_slow_tokenizer=False)"
|
| 9 |
+
}
|
xp3capmixlossseq_global_step1000/evaluation/amazon_reviews_multi/fr/prompt_title_to_star/results.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset_name": "amazon_reviews_multi",
|
| 3 |
+
"dataset_config_name": "fr",
|
| 4 |
+
"template_name": "prompt_title_to_star",
|
| 5 |
+
"evaluation": {
|
| 6 |
+
"accuracy": 0.2842
|
| 7 |
+
},
|
| 8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name='fr', dataset_name='amazon_reviews_multi', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=2, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='prompt_title_to_star', tokenizer_name=None, use_slow_tokenizer=False)"
|
| 9 |
+
}
|
xp3capmixlossseq_global_step1000/evaluation/amazon_reviews_multi/zh/prompt_body_title_to_star/results.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset_name": "amazon_reviews_multi",
|
| 3 |
+
"dataset_config_name": "zh",
|
| 4 |
+
"template_name": "prompt_body_title_to_star",
|
| 5 |
+
"evaluation": {
|
| 6 |
+
"accuracy": 0.3686
|
| 7 |
+
},
|
| 8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='amazon_reviews_multi', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=2, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='prompt_body_title_to_star', tokenizer_name=None, use_slow_tokenizer=False)"
|
| 9 |
+
}
|
xp3capmixlossseq_global_step1000/evaluation/amazon_reviews_multi/zh/prompt_review_to_star/results.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset_name": "amazon_reviews_multi",
|
| 3 |
+
"dataset_config_name": "zh",
|
| 4 |
+
"template_name": "prompt_review_to_star",
|
| 5 |
+
"evaluation": {
|
| 6 |
+
"accuracy": 0.3538
|
| 7 |
+
},
|
| 8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='amazon_reviews_multi', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=2, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='prompt_review_to_star', tokenizer_name=None, use_slow_tokenizer=False)"
|
| 9 |
+
}
|
xp3capmixlossseq_global_step1000/evaluation/amazon_reviews_multi/zh/prompt_title_to_star/results.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset_name": "amazon_reviews_multi",
|
| 3 |
+
"dataset_config_name": "zh",
|
| 4 |
+
"template_name": "prompt_title_to_star",
|
| 5 |
+
"evaluation": {
|
| 6 |
+
"accuracy": 0.2502
|
| 7 |
+
},
|
| 8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name='zh', dataset_name='amazon_reviews_multi', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=2, prefixlm=False, split='validation', target_max_length=256, template_config_name='en', template_name='prompt_title_to_star', tokenizer_name=None, use_slow_tokenizer=False)"
|
| 9 |
+
}
|
xp3capmixlossseq_global_step1000/evaluation/aqua_rat/raw/Answer_questions_from_options/results.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset_name": "aqua_rat",
|
| 3 |
+
"dataset_config_name": "raw",
|
| 4 |
+
"template_name": "Answer questions from options",
|
| 5 |
+
"evaluation": {
|
| 6 |
+
"accuracy": 0.1889763779527559
|
| 7 |
+
},
|
| 8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name='raw', dataset_name='aqua_rat', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=2, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='Answer questions from options', tokenizer_name=None, use_slow_tokenizer=False)"
|
| 9 |
+
}
|
xp3capmixlossseq_global_step1000/evaluation/aqua_rat/raw/answer_quiz/results.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset_name": "aqua_rat",
|
| 3 |
+
"dataset_config_name": "raw",
|
| 4 |
+
"template_name": "answer_quiz",
|
| 5 |
+
"evaluation": {
|
| 6 |
+
"accuracy": 0.20866141732283464
|
| 7 |
+
},
|
| 8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name='raw', dataset_name='aqua_rat', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=2, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='answer_quiz', tokenizer_name=None, use_slow_tokenizer=False)"
|
| 9 |
+
}
|
xp3capmixlossseq_global_step1000/evaluation/aqua_rat/raw/select_the_best_option/results.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset_name": "aqua_rat",
|
| 3 |
+
"dataset_config_name": "raw",
|
| 4 |
+
"template_name": "select_the_best_option",
|
| 5 |
+
"evaluation": {
|
| 6 |
+
"accuracy": 0.20078740157480315
|
| 7 |
+
},
|
| 8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name='raw', dataset_name='aqua_rat', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=2, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='select_the_best_option', tokenizer_name=None, use_slow_tokenizer=False)"
|
| 9 |
+
}
|
xp3capmixlossseq_global_step1000/evaluation/art/choose_hypothesis/results.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset_name": "art",
|
| 3 |
+
"dataset_config_name": null,
|
| 4 |
+
"template_name": "choose_hypothesis",
|
| 5 |
+
"evaluation": {
|
| 6 |
+
"accuracy": 0.566579634464752
|
| 7 |
+
},
|
| 8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name=None, dataset_name='art', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=2, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='choose_hypothesis', tokenizer_name=None, use_slow_tokenizer=False)"
|
| 9 |
+
}
|
xp3capmixlossseq_global_step1000/evaluation/art/choose_hypothesis_believable/results.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset_name": "art",
|
| 3 |
+
"dataset_config_name": null,
|
| 4 |
+
"template_name": "choose_hypothesis_believable",
|
| 5 |
+
"evaluation": {
|
| 6 |
+
"accuracy": 0.5469973890339426
|
| 7 |
+
},
|
| 8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name=None, dataset_name='art', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=2, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='choose_hypothesis_believable', tokenizer_name=None, use_slow_tokenizer=False)"
|
| 9 |
+
}
|
xp3capmixlossseq_global_step1000/evaluation/art/choose_hypothesis_desc/results.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset_name": "art",
|
| 3 |
+
"dataset_config_name": null,
|
| 4 |
+
"template_name": "choose_hypothesis_desc",
|
| 5 |
+
"evaluation": {
|
| 6 |
+
"accuracy": 0.5254569190600522
|
| 7 |
+
},
|
| 8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name=None, dataset_name='art', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=2, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='choose_hypothesis_desc', tokenizer_name=None, use_slow_tokenizer=False)"
|
| 9 |
+
}
|
xp3capmixlossseq_global_step1000/evaluation/art/choose_hypothesis_likely/results.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset_name": "art",
|
| 3 |
+
"dataset_config_name": null,
|
| 4 |
+
"template_name": "choose_hypothesis_likely",
|
| 5 |
+
"evaluation": {
|
| 6 |
+
"accuracy": 0.49673629242819844
|
| 7 |
+
},
|
| 8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name=None, dataset_name='art', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=2, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='choose_hypothesis_likely', tokenizer_name=None, use_slow_tokenizer=False)"
|
| 9 |
+
}
|
xp3capmixlossseq_global_step1000/evaluation/art/choose_hypothesis_options/results.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset_name": "art",
|
| 3 |
+
"dataset_config_name": null,
|
| 4 |
+
"template_name": "choose_hypothesis_options",
|
| 5 |
+
"evaluation": {
|
| 6 |
+
"accuracy": 0.5378590078328982
|
| 7 |
+
},
|
| 8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name=None, dataset_name='art', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=2, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='choose_hypothesis_options', tokenizer_name=None, use_slow_tokenizer=False)"
|
| 9 |
+
}
|
xp3capmixlossseq_global_step1000/evaluation/banking77/direct_to_which_department/results.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset_name": "banking77",
|
| 3 |
+
"dataset_config_name": null,
|
| 4 |
+
"template_name": "direct_to_which_department",
|
| 5 |
+
"evaluation": {
|
| 6 |
+
"accuracy": 0.1444805194805195
|
| 7 |
+
},
|
| 8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name=None, dataset_name='banking77', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=2, prefixlm=False, split='test', target_max_length=256, template_config_name=None, template_name='direct_to_which_department', tokenizer_name=None, use_slow_tokenizer=False)"
|
| 9 |
+
}
|
xp3capmixlossseq_global_step1000/evaluation/banking77/help_page_topic/results.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset_name": "banking77",
|
| 3 |
+
"dataset_config_name": null,
|
| 4 |
+
"template_name": "help_page_topic",
|
| 5 |
+
"evaluation": {
|
| 6 |
+
"accuracy": 0.25
|
| 7 |
+
},
|
| 8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name=None, dataset_name='banking77', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=2, prefixlm=False, split='test', target_max_length=256, template_config_name=None, template_name='help_page_topic', tokenizer_name=None, use_slow_tokenizer=False)"
|
| 9 |
+
}
|
xp3capmixlossseq_global_step1000/evaluation/banking77/rephrase_as_banking_term/results.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset_name": "banking77",
|
| 3 |
+
"dataset_config_name": null,
|
| 4 |
+
"template_name": "rephrase_as_banking_term",
|
| 5 |
+
"evaluation": {
|
| 6 |
+
"accuracy": 0.2922077922077922
|
| 7 |
+
},
|
| 8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name=None, dataset_name='banking77', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=2, prefixlm=False, split='test', target_max_length=256, template_config_name=None, template_name='rephrase_as_banking_term', tokenizer_name=None, use_slow_tokenizer=False)"
|
| 9 |
+
}
|
xp3capmixlossseq_global_step1000/evaluation/blbooksgenre/title_genre_classifiction/classify/results.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset_name": "blbooksgenre",
|
| 3 |
+
"dataset_config_name": "title_genre_classifiction",
|
| 4 |
+
"template_name": "classify",
|
| 5 |
+
"evaluation": {
|
| 6 |
+
"accuracy": 0.34274193548387094
|
| 7 |
+
},
|
| 8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name='title_genre_classifiction', dataset_name='blbooksgenre', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=2, prefixlm=False, split='train', target_max_length=256, template_config_name=None, template_name='classify', tokenizer_name=None, use_slow_tokenizer=False)"
|
| 9 |
+
}
|
xp3capmixlossseq_global_step1000/evaluation/blbooksgenre/title_genre_classifiction/multi-choice/results.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset_name": "blbooksgenre",
|
| 3 |
+
"dataset_config_name": "title_genre_classifiction",
|
| 4 |
+
"template_name": "multi-choice",
|
| 5 |
+
"evaluation": {
|
| 6 |
+
"accuracy": 0.47523041474654376
|
| 7 |
+
},
|
| 8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name='title_genre_classifiction', dataset_name='blbooksgenre', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=2, prefixlm=False, split='train', target_max_length=256, template_config_name=None, template_name='multi-choice', tokenizer_name=None, use_slow_tokenizer=False)"
|
| 9 |
+
}
|
xp3capmixlossseq_global_step1000/evaluation/blbooksgenre/title_genre_classifiction/premise_context_first/results.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset_name": "blbooksgenre",
|
| 3 |
+
"dataset_config_name": "title_genre_classifiction",
|
| 4 |
+
"template_name": "premise_context_first",
|
| 5 |
+
"evaluation": {
|
| 6 |
+
"accuracy": 0.7482718894009217
|
| 7 |
+
},
|
| 8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name='title_genre_classifiction', dataset_name='blbooksgenre', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=2, prefixlm=False, split='train', target_max_length=256, template_config_name=None, template_name='premise_context_first', tokenizer_name=None, use_slow_tokenizer=False)"
|
| 9 |
+
}
|
xp3capmixlossseq_global_step1000/evaluation/blimp/adjunct_island/grammatical_between_1_2/results.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset_name": "blimp",
|
| 3 |
+
"dataset_config_name": "adjunct_island",
|
| 4 |
+
"template_name": "grammatical_between_1_2",
|
| 5 |
+
"evaluation": {
|
| 6 |
+
"accuracy": 0.515
|
| 7 |
+
},
|
| 8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name='adjunct_island', dataset_name='blimp', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=2, prefixlm=False, split='train', target_max_length=256, template_config_name=None, template_name='grammatical_between_1_2', tokenizer_name=None, use_slow_tokenizer=False)"
|
| 9 |
+
}
|
xp3capmixlossseq_global_step1000/evaluation/blimp/adjunct_island/grammatical_between_A_B/results.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset_name": "blimp",
|
| 3 |
+
"dataset_config_name": "adjunct_island",
|
| 4 |
+
"template_name": "grammatical_between_A_B",
|
| 5 |
+
"evaluation": {
|
| 6 |
+
"accuracy": 0.491
|
| 7 |
+
},
|
| 8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name='adjunct_island', dataset_name='blimp', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=2, prefixlm=False, split='train', target_max_length=256, template_config_name=None, template_name='grammatical_between_A_B', tokenizer_name=None, use_slow_tokenizer=False)"
|
| 9 |
+
}
|
xp3capmixlossseq_global_step1000/evaluation/blimp/adjunct_island/grammatical_which_one_1_2/results.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset_name": "blimp",
|
| 3 |
+
"dataset_config_name": "adjunct_island",
|
| 4 |
+
"template_name": "grammatical_which_one_1_2",
|
| 5 |
+
"evaluation": {
|
| 6 |
+
"accuracy": 0.509
|
| 7 |
+
},
|
| 8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name='adjunct_island', dataset_name='blimp', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=2, prefixlm=False, split='train', target_max_length=256, template_config_name=None, template_name='grammatical_which_one_1_2', tokenizer_name=None, use_slow_tokenizer=False)"
|
| 9 |
+
}
|
xp3capmixlossseq_global_step1000/evaluation/blimp/adjunct_island/single_sentence_bad_yes_no/results.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset_name": "blimp",
|
| 3 |
+
"dataset_config_name": "adjunct_island",
|
| 4 |
+
"template_name": "single_sentence_bad_yes_no",
|
| 5 |
+
"evaluation": {
|
| 6 |
+
"accuracy": 0.493
|
| 7 |
+
},
|
| 8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name='adjunct_island', dataset_name='blimp', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=2, prefixlm=False, split='train', target_max_length=256, template_config_name=None, template_name='single_sentence_bad_yes_no', tokenizer_name=None, use_slow_tokenizer=False)"
|
| 9 |
+
}
|
xp3capmixlossseq_global_step1000/evaluation/blimp/adjunct_island/single_sentence_good_yes_no/results.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset_name": "blimp",
|
| 3 |
+
"dataset_config_name": "adjunct_island",
|
| 4 |
+
"template_name": "single_sentence_good_yes_no",
|
| 5 |
+
"evaluation": {
|
| 6 |
+
"accuracy": 0.48
|
| 7 |
+
},
|
| 8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name='adjunct_island', dataset_name='blimp', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=2, prefixlm=False, split='train', target_max_length=256, template_config_name=None, template_name='single_sentence_good_yes_no', tokenizer_name=None, use_slow_tokenizer=False)"
|
| 9 |
+
}
|
xp3capmixlossseq_global_step1000/evaluation/climate_fever/claim_and_all_supporting_evidences/results.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset_name": "climate_fever",
|
| 3 |
+
"dataset_config_name": null,
|
| 4 |
+
"template_name": "claim_and_all_supporting_evidences",
|
| 5 |
+
"evaluation": {
|
| 6 |
+
"accuracy": 0.30749185667752443
|
| 7 |
+
},
|
| 8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name=None, dataset_name='climate_fever', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=2, prefixlm=False, split='test', target_max_length=256, template_config_name=None, template_name='claim_and_all_supporting_evidences', tokenizer_name=None, use_slow_tokenizer=False)"
|
| 9 |
+
}
|
xp3capmixlossseq_global_step1000/evaluation/climate_fever/fifth_evidence_and_claim_itemization/results.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset_name": "climate_fever",
|
| 3 |
+
"dataset_config_name": null,
|
| 4 |
+
"template_name": "fifth_evidence_and_claim_itemization",
|
| 5 |
+
"evaluation": {
|
| 6 |
+
"accuracy": 0.10618892508143322
|
| 7 |
+
},
|
| 8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name=None, dataset_name='climate_fever', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=2, prefixlm=False, split='test', target_max_length=256, template_config_name=None, template_name='fifth_evidence_and_claim_itemization', tokenizer_name=None, use_slow_tokenizer=False)"
|
| 9 |
+
}
|
xp3capmixlossseq_global_step1000/evaluation/climate_fever/first_evidence_and_claim_itemization/results.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset_name": "climate_fever",
|
| 3 |
+
"dataset_config_name": null,
|
| 4 |
+
"template_name": "first_evidence_and_claim_itemization",
|
| 5 |
+
"evaluation": {
|
| 6 |
+
"accuracy": 0.12638436482084692
|
| 7 |
+
},
|
| 8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name=None, dataset_name='climate_fever', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=2, prefixlm=False, split='test', target_max_length=256, template_config_name=None, template_name='first_evidence_and_claim_itemization', tokenizer_name=None, use_slow_tokenizer=False)"
|
| 9 |
+
}
|
xp3capmixlossseq_global_step1000/evaluation/climate_fever/second_evidence_and_claim_itemization/results.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset_name": "climate_fever",
|
| 3 |
+
"dataset_config_name": null,
|
| 4 |
+
"template_name": "second_evidence_and_claim_itemization",
|
| 5 |
+
"evaluation": {
|
| 6 |
+
"accuracy": 0.10684039087947883
|
| 7 |
+
},
|
| 8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name=None, dataset_name='climate_fever', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=2, prefixlm=False, split='test', target_max_length=256, template_config_name=None, template_name='second_evidence_and_claim_itemization', tokenizer_name=None, use_slow_tokenizer=False)"
|
| 9 |
+
}
|
xp3capmixlossseq_global_step1000/evaluation/climate_fever/third_evidence_claim_pair/results.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset_name": "climate_fever",
|
| 3 |
+
"dataset_config_name": null,
|
| 4 |
+
"template_name": "third_evidence_claim_pair",
|
| 5 |
+
"evaluation": {
|
| 6 |
+
"accuracy": 0.09902280130293159
|
| 7 |
+
},
|
| 8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name=None, dataset_name='climate_fever', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=2, prefixlm=False, split='test', target_max_length=256, template_config_name=None, template_name='third_evidence_claim_pair', tokenizer_name=None, use_slow_tokenizer=False)"
|
| 9 |
+
}
|
xp3capmixlossseq_global_step1000/evaluation/codah/codah/affirmative_instruction_after_sentence_and_choices/results.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset_name": "codah",
|
| 3 |
+
"dataset_config_name": "codah",
|
| 4 |
+
"template_name": "affirmative_instruction_after_sentence_and_choices",
|
| 5 |
+
"evaluation": {
|
| 6 |
+
"accuracy": 0.5623198847262247
|
| 7 |
+
},
|
| 8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name='codah', dataset_name='codah', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=2, prefixlm=False, split='train', target_max_length=256, template_config_name=None, template_name='affirmative_instruction_after_sentence_and_choices', tokenizer_name=None, use_slow_tokenizer=False)"
|
| 9 |
+
}
|
xp3capmixlossseq_global_step1000/evaluation/codah/codah/affirmative_instruction_before_sentence_and_choices/results.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset_name": "codah",
|
| 3 |
+
"dataset_config_name": "codah",
|
| 4 |
+
"template_name": "affirmative_instruction_before_sentence_and_choices",
|
| 5 |
+
"evaluation": {
|
| 6 |
+
"accuracy": 0.5947406340057637
|
| 7 |
+
},
|
| 8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name='codah', dataset_name='codah', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=2, prefixlm=False, split='train', target_max_length=256, template_config_name=None, template_name='affirmative_instruction_before_sentence_and_choices', tokenizer_name=None, use_slow_tokenizer=False)"
|
| 9 |
+
}
|
xp3capmixlossseq_global_step1000/evaluation/codah/codah/interrogative_instruction_after_sentence_and_choices/results.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset_name": "codah",
|
| 3 |
+
"dataset_config_name": "codah",
|
| 4 |
+
"template_name": "interrogative_instruction_after_sentence_and_choices",
|
| 5 |
+
"evaluation": {
|
| 6 |
+
"accuracy": 0.5673631123919308
|
| 7 |
+
},
|
| 8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name='codah', dataset_name='codah', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=2, prefixlm=False, split='train', target_max_length=256, template_config_name=None, template_name='interrogative_instruction_after_sentence_and_choices', tokenizer_name=None, use_slow_tokenizer=False)"
|
| 9 |
+
}
|
xp3capmixlossseq_global_step1000/evaluation/commonsense_qa/answer_given_question_without_options/results.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset_name": "commonsense_qa",
|
| 3 |
+
"dataset_config_name": null,
|
| 4 |
+
"template_name": "answer_given_question_without_options",
|
| 5 |
+
"evaluation": {
|
| 6 |
+
"accuracy": 0.5585585585585585
|
| 7 |
+
},
|
| 8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name=None, dataset_name='commonsense_qa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=2, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='answer_given_question_without_options', tokenizer_name=None, use_slow_tokenizer=False)"
|
| 9 |
+
}
|
xp3capmixlossseq_global_step1000/evaluation/commonsense_qa/most_suitable_answer/results.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset_name": "commonsense_qa",
|
| 3 |
+
"dataset_config_name": null,
|
| 4 |
+
"template_name": "most_suitable_answer",
|
| 5 |
+
"evaluation": {
|
| 6 |
+
"accuracy": 0.7526617526617526
|
| 7 |
+
},
|
| 8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name=None, dataset_name='commonsense_qa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=2, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='most_suitable_answer', tokenizer_name=None, use_slow_tokenizer=False)"
|
| 9 |
+
}
|
xp3capmixlossseq_global_step1000/evaluation/commonsense_qa/question_answering/results.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset_name": "commonsense_qa",
|
| 3 |
+
"dataset_config_name": null,
|
| 4 |
+
"template_name": "question_answering",
|
| 5 |
+
"evaluation": {
|
| 6 |
+
"accuracy": 0.7444717444717445
|
| 7 |
+
},
|
| 8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name=None, dataset_name='commonsense_qa', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=2, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='question_answering', tokenizer_name=None, use_slow_tokenizer=False)"
|
| 9 |
+
}
|
xp3capmixlossseq_global_step1000/evaluation/conv_ai_3/ambiguous/results.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset_name": "conv_ai_3",
|
| 3 |
+
"dataset_config_name": null,
|
| 4 |
+
"template_name": "ambiguous",
|
| 5 |
+
"evaluation": {
|
| 6 |
+
"accuracy": 0.39040207522697795
|
| 7 |
+
},
|
| 8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name=None, dataset_name='conv_ai_3', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=2, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='ambiguous', tokenizer_name=None, use_slow_tokenizer=False)"
|
| 9 |
+
}
|
xp3capmixlossseq_global_step1000/evaluation/conv_ai_3/clarification_needed/results.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset_name": "conv_ai_3",
|
| 3 |
+
"dataset_config_name": null,
|
| 4 |
+
"template_name": "clarification_needed",
|
| 5 |
+
"evaluation": {
|
| 6 |
+
"accuracy": 0.39040207522697795
|
| 7 |
+
},
|
| 8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name=None, dataset_name='conv_ai_3', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=2, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='clarification_needed', tokenizer_name=None, use_slow_tokenizer=False)"
|
| 9 |
+
}
|
xp3capmixlossseq_global_step1000/evaluation/conv_ai_3/directly_answer/results.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset_name": "conv_ai_3",
|
| 3 |
+
"dataset_config_name": null,
|
| 4 |
+
"template_name": "directly_answer",
|
| 5 |
+
"evaluation": {
|
| 6 |
+
"accuracy": 0.6095979247730221
|
| 7 |
+
},
|
| 8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name=None, dataset_name='conv_ai_3', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=2, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='directly_answer', tokenizer_name=None, use_slow_tokenizer=False)"
|
| 9 |
+
}
|
xp3capmixlossseq_global_step1000/evaluation/conv_ai_3/score_give_number/results.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset_name": "conv_ai_3",
|
| 3 |
+
"dataset_config_name": null,
|
| 4 |
+
"template_name": "score_give_number",
|
| 5 |
+
"evaluation": {
|
| 6 |
+
"accuracy": 0.27626459143968873
|
| 7 |
+
},
|
| 8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name=None, dataset_name='conv_ai_3', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=2, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='score_give_number', tokenizer_name=None, use_slow_tokenizer=False)"
|
| 9 |
+
}
|
xp3capmixlossseq_global_step1000/evaluation/conv_ai_3/score_how_much/results.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset_name": "conv_ai_3",
|
| 3 |
+
"dataset_config_name": null,
|
| 4 |
+
"template_name": "score_how_much",
|
| 5 |
+
"evaluation": {
|
| 6 |
+
"accuracy": 0.3186338089061824
|
| 7 |
+
},
|
| 8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name=None, dataset_name='conv_ai_3', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=2, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='score_how_much', tokenizer_name=None, use_slow_tokenizer=False)"
|
| 9 |
+
}
|
xp3capmixlossseq_global_step1000/evaluation/craigslist_bargains/best_deal/results.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"dataset_name": "craigslist_bargains",
|
| 3 |
+
"dataset_config_name": null,
|
| 4 |
+
"template_name": "best deal",
|
| 5 |
+
"evaluation": {
|
| 6 |
+
"accuracy": 0.20603015075376885
|
| 7 |
+
},
|
| 8 |
+
"arguments": "Namespace(config_name=None, dataset_config_name=None, dataset_name='craigslist_bargains', debug=False, dtype='float16', max_length=2048, model_name_or_path='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000', output_dir='/gpfsscratch/rech/six/commun/commun/experiments/muennighoff/bloomckpt/2b5t0/xp3capmixlossseq_global_step1000/evaluation', pad_to_max_length=False, per_device_eval_batch_size=2, prefixlm=False, split='validation', target_max_length=256, template_config_name=None, template_name='best deal', tokenizer_name=None, use_slow_tokenizer=False)"
|
| 9 |
+
}
|