Upload folder using huggingface_hub
#305
by
DavidNguyen
- opened
Pretrain_language_model/save_final/slimpajama_moe_no_attmoe_154M_sigmoid_standard_lb_v3/export/result-model-100000.pth.json
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
{"val/loss": 2.613065204923115, "val/accuracy": 0.47968982514880953, "val/perplexity": 13.640798679588498, "val/time_since_best_loss": 0, "val/time_since_best_accuracy": 0, "lambada/loss": 2.6702598429614715, "lambada/accuracy/total": 0.2554347826086957, "lambada/accuracy/openai_last_token": 0.7583462732919255, "lambada/perplexity": 12.212545412808787, "lambada/lm_loss": 3.179462916324444, "lambada/lm_perplexity": 24.033841900910463, "lambada/time_since_best_loss": 0, "lambada/time_since_best_accuracy": 0, "mean_accuracy": 0.3675623038787526, "mean_loss": 2.6416625239422933, "blimp/accuracy/passive_2": 0.892, "blimp/accuracy/determiner_noun_agreement_2": 0.982, "blimp/accuracy/ellipsis_n_bar_1": 0.811, "blimp/accuracy/tough_vs_raising_2": 0.864, "blimp/accuracy/tough_vs_raising_1": 0.62, "blimp/accuracy/irregular_plural_subject_verb_agreement_2": 0.839, "blimp/accuracy/principle_A_reconstruction": 0.361, "blimp/accuracy/wh_vs_that_with_gap": 0.566, "blimp/accuracy/principle_A_domain_2": 0.839, "blimp/accuracy/determiner_noun_agreement_1": 0.988, "blimp/accuracy/ellipsis_n_bar_2": 0.905, "blimp/accuracy/principle_A_domain_3": 0.545, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_2": 0.909, "blimp/accuracy/animate_subject_trans": 0.898, "blimp/accuracy/determiner_noun_agreement_with_adj_irregular_1": 0.887, "blimp/accuracy/distractor_agreement_relative_clause": 0.709, "blimp/accuracy/transitive": 0.864, "blimp/accuracy/sentential_subject_island": 0.285, "blimp/accuracy/adjunct_island": 0.789, "blimp/accuracy/intransitive": 0.771, "blimp/accuracy/existential_there_subject_raising": 0.86, "blimp/accuracy/irregular_past_participle_adjectives": 0.984, "blimp/accuracy/coordinate_structure_constraint_complex_left_branch": 0.385, "blimp/accuracy/principle_A_case_1": 1.0, "blimp/accuracy/wh_vs_that_with_gap_long_distance": 0.406, "blimp/accuracy/only_npi_scope": 0.597, "blimp/accuracy/superlative_quantifiers_2": 0.871, "blimp/accuracy/passive_1": 0.9, "blimp/accuracy/regular_plural_subject_verb_agreement_1": 0.898, "blimp/accuracy/inchoative": 0.627, "blimp/accuracy/anaphor_gender_agreement": 0.965, "blimp/accuracy/principle_A_c_command": 0.598, "blimp/accuracy/only_npi_licensor_present": 0.363, "blimp/accuracy/expletive_it_object_raising": 0.788, "blimp/accuracy/left_branch_island_simple_question": 0.381, "blimp/accuracy/wh_questions_subject_gap": 0.909, "blimp/accuracy/existential_there_quantifiers_2": 0.482, "blimp/accuracy/determiner_noun_agreement_with_adj_2": 0.93, "blimp/accuracy/sentential_negation_npi_scope": 0.723, "blimp/accuracy/coordinate_structure_constraint_object_extraction": 0.8, "blimp/accuracy/wh_questions_subject_gap_long_distance": 0.804, "blimp/accuracy/irregular_plural_subject_verb_agreement_1": 0.898, "blimp/accuracy/principle_A_case_2": 0.959, "blimp/accuracy/distractor_agreement_relational_noun": 0.786, "blimp/accuracy/sentential_negation_npi_licensor_present": 0.988, "blimp/accuracy/superlative_quantifiers_1": 0.585, "blimp/accuracy/wh_island": 0.658, "blimp/accuracy/principle_A_domain_1": 0.985, "blimp/accuracy/complex_NP_island": 0.501, "blimp/accuracy/determiner_noun_agreement_irregular_2": 0.962, "blimp/accuracy/irregular_past_participle_verbs": 0.891, "blimp/accuracy/drop_argument": 0.758, "blimp/accuracy/wh_questions_object_gap": 0.77, "blimp/accuracy/animate_subject_passive": 0.793, "blimp/accuracy/existential_there_quantifiers_1": 0.975, "blimp/accuracy/regular_plural_subject_verb_agreement_2": 0.866, "blimp/accuracy/npi_present_2": 0.59, "blimp/accuracy/determiner_noun_agreement_irregular_1": 0.914, "blimp/accuracy/anaphor_number_agreement": 0.988, "blimp/accuracy/determiner_noun_agreement_with_adjective_1": 0.954, "blimp/accuracy/existential_there_object_raising": 0.842, "blimp/accuracy/matrix_question_npi_licensor_present": 0.202, "blimp/accuracy/npi_present_1": 0.563, "blimp/accuracy/wh_vs_that_no_gap": 0.957, "blimp/accuracy/left_branch_island_echo_question": 0.441, "blimp/accuracy/wh_vs_that_no_gap_long_distance": 0.941, "blimp/accuracy/causative": 0.7, "blimp/accuracy/group_average": 0.7621194029850749, "blimp/accuracy/seq_average": 0.7621194029850746, "cbt/accuracy/NE": 0.7628205128205128, "cbt/accuracy/V": 0.9056, "cbt/accuracy/CN": 0.8208, "cbt/accuracy/P": 0.8876, "cbt/accuracy/group_average": 0.8442051282051282, "cbt/accuracy/seq_average": 0.8442376950780313, "hellaswag/accuracy/val": 0.29376618203545113, "hellaswag/accuracy/group_average": 0.29376618203545113, "hellaswag/accuracy/seq_average": 0.29376618203545113, "piqa/accuracy/val": 0.588683351468988, "piqa/accuracy/group_average": 0.588683351468988, "piqa/accuracy/seq_average": 0.588683351468988, "ai2arc/accuracy/ARC-Easy": 0.3310782241014799, "ai2arc/accuracy/ARC-Challenge": 0.21716738197424892, "ai2arc/accuracy/group_average": 0.2741228030378644, "ai2arc/accuracy/seq_average": 0.2934844192634561, "mmlu/accuracy/MMLU": 0.2598498391133357, "mmlu/accuracy/group_average": 0.2598498391133357, "mmlu/accuracy/seq_average": 0.2598498391133357, "openbookqa/accuracy/test": 0.276, "openbookqa/accuracy/group_average": 0.276, "openbookqa/accuracy/seq_average": 0.276, "race/accuracy/test/high": 0.2687249857061178, "race/accuracy/test/middle": 0.3370473537604457, "race/accuracy/group_average": 0.30288616973328175, "race/accuracy/seq_average": 0.28860964734495337, "siqa/accuracy/dev": 0.34953940634595704, "siqa/accuracy/group_average": 0.34953940634595704, "siqa/accuracy/seq_average": 0.34953940634595704, "winogrande/accuracy/dev": 0.5146014206787688, "winogrande/accuracy/group_average": 0.5146014206787688, "winogrande/accuracy/seq_average": 0.5146014206787688, "commonsenseqa/accuracy/dev_rand_split": 0.24733824733824733, "commonsenseqa/accuracy/group_average": 0.24733824733824733, "commonsenseqa/accuracy/seq_average": 0.24733824733824733}
|