| { | |
| "best_metric": 0.6115376353591917, | |
| "best_model_checkpoint": "/data2/fxu/lfqa_discourse/t5_large_finetuning_eli5_only_888/checkpoint-504", | |
| "epoch": 30.0, | |
| "global_step": 540, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "eval_Answer": 0.0, | |
| "eval_Answer(Summary)": 0.44135188866799197, | |
| "eval_Answer-Example": 0.0, | |
| "eval_Answer-Organizationalsentence": 0.0, | |
| "eval_AuxiliaryInformation": 0.0, | |
| "eval_Miscellaneous": 0.0, | |
| "eval_accuracy": 0.26811594202898553, | |
| "eval_loss": 0.27198654413223267, | |
| "eval_macro_f1": 0.07355864811133199, | |
| "eval_runtime": 13.2965, | |
| "eval_samples_per_second": 4.663, | |
| "eval_steps_per_second": 0.301, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_Answer": 0.4731707317073171, | |
| "eval_Answer(Summary)": 0.4433497536945813, | |
| "eval_Answer-Example": 0.0, | |
| "eval_Answer-Organizationalsentence": 0.0, | |
| "eval_AuxiliaryInformation": 0.0, | |
| "eval_Miscellaneous": 0.6, | |
| "eval_accuracy": 0.4082125603864734, | |
| "eval_loss": 0.18863672018051147, | |
| "eval_macro_f1": 0.2527534142336497, | |
| "eval_runtime": 10.164, | |
| "eval_samples_per_second": 6.1, | |
| "eval_steps_per_second": 0.394, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_Answer": 0.375, | |
| "eval_Answer(Summary)": 0.49720670391061456, | |
| "eval_Answer-Example": 0.15384615384615388, | |
| "eval_Answer-Organizationalsentence": 0.0, | |
| "eval_AuxiliaryInformation": 0.0, | |
| "eval_Miscellaneous": 0.6923076923076923, | |
| "eval_accuracy": 0.4082125603864734, | |
| "eval_loss": 0.17385143041610718, | |
| "eval_macro_f1": 0.28639342501074344, | |
| "eval_runtime": 12.0287, | |
| "eval_samples_per_second": 5.154, | |
| "eval_steps_per_second": 0.333, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_Answer": 0.4250871080139373, | |
| "eval_Answer(Summary)": 0.4433962264150943, | |
| "eval_Answer-Example": 0.5189189189189188, | |
| "eval_Answer-Organizationalsentence": 0.0, | |
| "eval_AuxiliaryInformation": 0.0, | |
| "eval_Miscellaneous": 0.6842105263157895, | |
| "eval_accuracy": 0.4396135265700483, | |
| "eval_loss": 0.1758367419242859, | |
| "eval_macro_f1": 0.3452687966106233, | |
| "eval_runtime": 11.2352, | |
| "eval_samples_per_second": 5.518, | |
| "eval_steps_per_second": 0.356, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_Answer": 0.43205574912891986, | |
| "eval_Answer(Summary)": 0.4, | |
| "eval_Answer-Example": 0.5072463768115941, | |
| "eval_Answer-Organizationalsentence": 0.0, | |
| "eval_AuxiliaryInformation": 0.2797202797202797, | |
| "eval_Miscellaneous": 0.765432098765432, | |
| "eval_accuracy": 0.4420289855072464, | |
| "eval_loss": 0.1732899248600006, | |
| "eval_macro_f1": 0.3974090840710376, | |
| "eval_runtime": 11.2192, | |
| "eval_samples_per_second": 5.526, | |
| "eval_steps_per_second": 0.357, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_Answer": 0.475609756097561, | |
| "eval_Answer(Summary)": 0.4120603015075377, | |
| "eval_Answer-Example": 0.5222929936305732, | |
| "eval_Answer-Organizationalsentence": 0.0, | |
| "eval_AuxiliaryInformation": 0.02985074626865672, | |
| "eval_Miscellaneous": 0.7123287671232877, | |
| "eval_accuracy": 0.45169082125603865, | |
| "eval_loss": 0.16830046474933624, | |
| "eval_macro_f1": 0.3586904274379361, | |
| "eval_runtime": 11.055, | |
| "eval_samples_per_second": 5.608, | |
| "eval_steps_per_second": 0.362, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_Answer": 0.4397163120567376, | |
| "eval_Answer(Summary)": 0.5263157894736842, | |
| "eval_Answer-Example": 0.6206896551724138, | |
| "eval_Answer-Organizationalsentence": 0.0, | |
| "eval_AuxiliaryInformation": 0.25999999999999995, | |
| "eval_Miscellaneous": 0.6732673267326732, | |
| "eval_accuracy": 0.49516908212560384, | |
| "eval_loss": 0.15517334640026093, | |
| "eval_macro_f1": 0.41999818057258476, | |
| "eval_runtime": 11.346, | |
| "eval_samples_per_second": 5.464, | |
| "eval_steps_per_second": 0.353, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_Answer": 0.4920127795527157, | |
| "eval_Answer(Summary)": 0.5025641025641026, | |
| "eval_Answer-Example": 0.6495726495726496, | |
| "eval_Answer-Organizationalsentence": 0.0, | |
| "eval_AuxiliaryInformation": 0.32786885245901637, | |
| "eval_Miscellaneous": 0.7272727272727273, | |
| "eval_accuracy": 0.5120772946859904, | |
| "eval_loss": 0.1799505650997162, | |
| "eval_macro_f1": 0.44988185190353525, | |
| "eval_runtime": 10.9313, | |
| "eval_samples_per_second": 5.672, | |
| "eval_steps_per_second": 0.366, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_Answer": 0.5092024539877301, | |
| "eval_Answer(Summary)": 0.5235602094240838, | |
| "eval_Answer-Example": 0.5636363636363636, | |
| "eval_Answer-Organizationalsentence": 0.2857142857142857, | |
| "eval_AuxiliaryInformation": 0.22033898305084745, | |
| "eval_Miscellaneous": 0.736842105263158, | |
| "eval_accuracy": 0.4975845410628019, | |
| "eval_loss": 0.18231035768985748, | |
| "eval_macro_f1": 0.47321573351274476, | |
| "eval_runtime": 10.9336, | |
| "eval_samples_per_second": 5.671, | |
| "eval_steps_per_second": 0.366, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_Answer": 0.42704626334519574, | |
| "eval_Answer(Summary)": 0.6071428571428571, | |
| "eval_Answer-Example": 0.5161290322580646, | |
| "eval_Answer-Organizationalsentence": 0.4, | |
| "eval_AuxiliaryInformation": 0.33333333333333337, | |
| "eval_Miscellaneous": 0.765432098765432, | |
| "eval_accuracy": 0.5024154589371981, | |
| "eval_loss": 0.20293939113616943, | |
| "eval_macro_f1": 0.5081805974741471, | |
| "eval_runtime": 11.8591, | |
| "eval_samples_per_second": 5.228, | |
| "eval_steps_per_second": 0.337, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_Answer": 0.46583850931677023, | |
| "eval_Answer(Summary)": 0.5294117647058824, | |
| "eval_Answer-Example": 0.6153846153846154, | |
| "eval_Answer-Organizationalsentence": 0.4, | |
| "eval_AuxiliaryInformation": 0.28571428571428575, | |
| "eval_Miscellaneous": 0.7297297297297297, | |
| "eval_accuracy": 0.4975845410628019, | |
| "eval_loss": 0.20877273380756378, | |
| "eval_macro_f1": 0.5043464841418805, | |
| "eval_runtime": 10.9599, | |
| "eval_samples_per_second": 5.657, | |
| "eval_steps_per_second": 0.365, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_Answer": 0.42909090909090913, | |
| "eval_Answer(Summary)": 0.5714285714285714, | |
| "eval_Answer-Example": 0.6611570247933884, | |
| "eval_Answer-Organizationalsentence": 0.0, | |
| "eval_AuxiliaryInformation": 0.3364485981308411, | |
| "eval_Miscellaneous": 0.736842105263158, | |
| "eval_accuracy": 0.5193236714975845, | |
| "eval_loss": 0.22424167394638062, | |
| "eval_macro_f1": 0.45582786811781134, | |
| "eval_runtime": 11.5376, | |
| "eval_samples_per_second": 5.374, | |
| "eval_steps_per_second": 0.347, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_Answer": 0.4677966101694915, | |
| "eval_Answer(Summary)": 0.5739910313901346, | |
| "eval_Answer-Example": 0.6721311475409836, | |
| "eval_Answer-Organizationalsentence": 0.4, | |
| "eval_AuxiliaryInformation": 0.31067961165048547, | |
| "eval_Miscellaneous": 0.7749999999999999, | |
| "eval_accuracy": 0.5362318840579711, | |
| "eval_loss": 0.23804587125778198, | |
| "eval_macro_f1": 0.5332664001251824, | |
| "eval_runtime": 10.943, | |
| "eval_samples_per_second": 5.666, | |
| "eval_steps_per_second": 0.366, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_Answer": 0.4834437086092715, | |
| "eval_Answer(Summary)": 0.5517241379310346, | |
| "eval_Answer-Example": 0.6666666666666667, | |
| "eval_Answer-Organizationalsentence": 0.5, | |
| "eval_AuxiliaryInformation": 0.3709677419354839, | |
| "eval_Miscellaneous": 0.75, | |
| "eval_accuracy": 0.5314009661835749, | |
| "eval_loss": 0.2845667898654938, | |
| "eval_macro_f1": 0.5538003758570761, | |
| "eval_runtime": 11.0141, | |
| "eval_samples_per_second": 5.629, | |
| "eval_steps_per_second": 0.363, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_Answer": 0.5100671140939598, | |
| "eval_Answer(Summary)": 0.5714285714285714, | |
| "eval_Answer-Example": 0.6464646464646465, | |
| "eval_Answer-Organizationalsentence": 0.6666666666666666, | |
| "eval_AuxiliaryInformation": 0.4153846153846154, | |
| "eval_Miscellaneous": 0.7126436781609196, | |
| "eval_accuracy": 0.5483091787439613, | |
| "eval_loss": 0.2950053811073303, | |
| "eval_macro_f1": 0.5871092153665631, | |
| "eval_runtime": 11.553, | |
| "eval_samples_per_second": 5.367, | |
| "eval_steps_per_second": 0.346, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_Answer": 0.45637583892617456, | |
| "eval_Answer(Summary)": 0.5945945945945946, | |
| "eval_Answer-Example": 0.607843137254902, | |
| "eval_Answer-Organizationalsentence": 0.6666666666666666, | |
| "eval_AuxiliaryInformation": 0.38333333333333336, | |
| "eval_Miscellaneous": 0.7848101265822784, | |
| "eval_accuracy": 0.5314009661835749, | |
| "eval_loss": 0.28483256697654724, | |
| "eval_macro_f1": 0.5822706162263249, | |
| "eval_runtime": 10.9374, | |
| "eval_samples_per_second": 5.669, | |
| "eval_steps_per_second": 0.366, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_Answer": 0.4557823129251701, | |
| "eval_Answer(Summary)": 0.5952380952380952, | |
| "eval_Answer-Example": 0.6923076923076923, | |
| "eval_Answer-Organizationalsentence": 0.6666666666666666, | |
| "eval_AuxiliaryInformation": 0.3125, | |
| "eval_Miscellaneous": 0.7848101265822784, | |
| "eval_accuracy": 0.5434782608695652, | |
| "eval_loss": 0.29017218947410583, | |
| "eval_macro_f1": 0.5845508156199838, | |
| "eval_runtime": 11.3062, | |
| "eval_samples_per_second": 5.484, | |
| "eval_steps_per_second": 0.354, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_Answer": 0.4621212121212122, | |
| "eval_Answer(Summary)": 0.5844748858447489, | |
| "eval_Answer-Example": 0.6611570247933884, | |
| "eval_Answer-Organizationalsentence": 0.33333333333333337, | |
| "eval_AuxiliaryInformation": 0.43283582089552236, | |
| "eval_Miscellaneous": 0.7380952380952381, | |
| "eval_accuracy": 0.5458937198067633, | |
| "eval_loss": 0.3154158294200897, | |
| "eval_macro_f1": 0.5353362525139073, | |
| "eval_runtime": 11.5419, | |
| "eval_samples_per_second": 5.372, | |
| "eval_steps_per_second": 0.347, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_Answer": 0.46905537459283386, | |
| "eval_Answer(Summary)": 0.5526315789473685, | |
| "eval_Answer-Example": 0.6315789473684211, | |
| "eval_Answer-Organizationalsentence": 0.6666666666666666, | |
| "eval_AuxiliaryInformation": 0.38775510204081637, | |
| "eval_Miscellaneous": 0.7692307692307693, | |
| "eval_accuracy": 0.533816425120773, | |
| "eval_loss": 0.3426768183708191, | |
| "eval_macro_f1": 0.5794864064744794, | |
| "eval_runtime": 11.3895, | |
| "eval_samples_per_second": 5.444, | |
| "eval_steps_per_second": 0.351, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_Answer": 0.5089820359281436, | |
| "eval_Answer(Summary)": 0.5700934579439253, | |
| "eval_Answer-Example": 0.45161290322580644, | |
| "eval_Answer-Organizationalsentence": 0.6666666666666666, | |
| "eval_AuxiliaryInformation": 0.3921568627450981, | |
| "eval_Miscellaneous": 0.765432098765432, | |
| "eval_accuracy": 0.5289855072463768, | |
| "eval_loss": 0.3493908941745758, | |
| "eval_macro_f1": 0.5591573375458453, | |
| "eval_runtime": 11.3644, | |
| "eval_samples_per_second": 5.456, | |
| "eval_steps_per_second": 0.352, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_Answer": 0.4316546762589928, | |
| "eval_Answer(Summary)": 0.5701754385964912, | |
| "eval_Answer-Example": 0.6666666666666667, | |
| "eval_Answer-Organizationalsentence": 0.5, | |
| "eval_AuxiliaryInformation": 0.4, | |
| "eval_Miscellaneous": 0.7749999999999999, | |
| "eval_accuracy": 0.5265700483091788, | |
| "eval_loss": 0.34431466460227966, | |
| "eval_macro_f1": 0.5572494635870251, | |
| "eval_runtime": 11.6127, | |
| "eval_samples_per_second": 5.339, | |
| "eval_steps_per_second": 0.344, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_Answer": 0.4968944099378882, | |
| "eval_Answer(Summary)": 0.5909090909090909, | |
| "eval_Answer-Example": 0.4597701149425288, | |
| "eval_Answer-Organizationalsentence": 0.6666666666666666, | |
| "eval_AuxiliaryInformation": 0.4000000000000001, | |
| "eval_Miscellaneous": 0.7749999999999999, | |
| "eval_accuracy": 0.5314009661835749, | |
| "eval_loss": 0.3664790987968445, | |
| "eval_macro_f1": 0.5648733804093624, | |
| "eval_runtime": 11.1595, | |
| "eval_samples_per_second": 5.556, | |
| "eval_steps_per_second": 0.358, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_Answer": 0.48965517241379314, | |
| "eval_Answer(Summary)": 0.6079295154185023, | |
| "eval_Answer-Example": 0.6666666666666667, | |
| "eval_Answer-Organizationalsentence": 0.5, | |
| "eval_AuxiliaryInformation": 0.416, | |
| "eval_Miscellaneous": 0.7749999999999999, | |
| "eval_accuracy": 0.5603864734299517, | |
| "eval_loss": 0.36973538994789124, | |
| "eval_macro_f1": 0.575875225749827, | |
| "eval_runtime": 11.3197, | |
| "eval_samples_per_second": 5.477, | |
| "eval_steps_per_second": 0.353, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_Answer": 0.4774193548387097, | |
| "eval_Answer(Summary)": 0.5833333333333334, | |
| "eval_Answer-Example": 0.5217391304347826, | |
| "eval_Answer-Organizationalsentence": 0.6666666666666666, | |
| "eval_AuxiliaryInformation": 0.40944881889763785, | |
| "eval_Miscellaneous": 0.759493670886076, | |
| "eval_accuracy": 0.5265700483091788, | |
| "eval_loss": 0.3806516230106354, | |
| "eval_macro_f1": 0.5696834958428677, | |
| "eval_runtime": 11.2648, | |
| "eval_samples_per_second": 5.504, | |
| "eval_steps_per_second": 0.355, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_Answer": 0.4871794871794871, | |
| "eval_Answer(Summary)": 0.588785046728972, | |
| "eval_Answer-Example": 0.5833333333333334, | |
| "eval_Answer-Organizationalsentence": 0.5, | |
| "eval_AuxiliaryInformation": 0.3934426229508197, | |
| "eval_Miscellaneous": 0.759493670886076, | |
| "eval_accuracy": 0.5362318840579711, | |
| "eval_loss": 0.39177072048187256, | |
| "eval_macro_f1": 0.552039026846448, | |
| "eval_runtime": 11.3644, | |
| "eval_samples_per_second": 5.456, | |
| "eval_steps_per_second": 0.352, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_Answer": 0.5116279069767442, | |
| "eval_Answer(Summary)": 0.5915492957746479, | |
| "eval_Answer-Example": 0.7102803738317757, | |
| "eval_Answer-Organizationalsentence": 0.6666666666666666, | |
| "eval_AuxiliaryInformation": 0.4032258064516129, | |
| "eval_Miscellaneous": 0.759493670886076, | |
| "eval_accuracy": 0.5652173913043478, | |
| "eval_loss": 0.3835026025772095, | |
| "eval_macro_f1": 0.6071406200979206, | |
| "eval_runtime": 11.3582, | |
| "eval_samples_per_second": 5.459, | |
| "eval_steps_per_second": 0.352, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "eval_Answer": 0.5100671140939598, | |
| "eval_Answer(Summary)": 0.609090909090909, | |
| "eval_Answer-Example": 0.69811320754717, | |
| "eval_Answer-Organizationalsentence": 0.6666666666666666, | |
| "eval_AuxiliaryInformation": 0.4132231404958678, | |
| "eval_Miscellaneous": 0.759493670886076, | |
| "eval_accuracy": 0.5700483091787439, | |
| "eval_loss": 0.3833402693271637, | |
| "eval_macro_f1": 0.6094424514634416, | |
| "eval_runtime": 11.4693, | |
| "eval_samples_per_second": 5.406, | |
| "eval_steps_per_second": 0.349, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 27.78, | |
| "learning_rate": 7.4074074074074075e-06, | |
| "loss": 0.0945, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_Answer": 0.5167785234899329, | |
| "eval_Answer(Summary)": 0.5972850678733032, | |
| "eval_Answer-Example": 0.7222222222222223, | |
| "eval_Answer-Organizationalsentence": 0.6666666666666666, | |
| "eval_AuxiliaryInformation": 0.4067796610169492, | |
| "eval_Miscellaneous": 0.759493670886076, | |
| "eval_accuracy": 0.572463768115942, | |
| "eval_loss": 0.39141198992729187, | |
| "eval_macro_f1": 0.6115376353591917, | |
| "eval_runtime": 11.4624, | |
| "eval_samples_per_second": 5.409, | |
| "eval_steps_per_second": 0.349, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "eval_Answer": 0.5214521452145214, | |
| "eval_Answer(Summary)": 0.599078341013825, | |
| "eval_Answer-Example": 0.7102803738317757, | |
| "eval_Answer-Organizationalsentence": 0.6666666666666666, | |
| "eval_AuxiliaryInformation": 0.4067796610169492, | |
| "eval_Miscellaneous": 0.759493670886076, | |
| "eval_accuracy": 0.572463768115942, | |
| "eval_loss": 0.3956356346607208, | |
| "eval_macro_f1": 0.610625143104969, | |
| "eval_runtime": 11.2747, | |
| "eval_samples_per_second": 5.499, | |
| "eval_steps_per_second": 0.355, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_Answer": 0.5197368421052633, | |
| "eval_Answer(Summary)": 0.5925925925925926, | |
| "eval_Answer-Example": 0.7102803738317757, | |
| "eval_Answer-Organizationalsentence": 0.6666666666666666, | |
| "eval_AuxiliaryInformation": 0.4067796610169492, | |
| "eval_Miscellaneous": 0.759493670886076, | |
| "eval_accuracy": 0.5700483091787439, | |
| "eval_loss": 0.39962947368621826, | |
| "eval_macro_f1": 0.6092583011832206, | |
| "eval_runtime": 11.3436, | |
| "eval_samples_per_second": 5.466, | |
| "eval_steps_per_second": 0.353, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "step": 540, | |
| "total_flos": 1.5886402423296e+16, | |
| "train_loss": 0.08773032142608254, | |
| "train_runtime": 1826.4445, | |
| "train_samples_per_second": 4.714, | |
| "train_steps_per_second": 0.296 | |
| } | |
| ], | |
| "max_steps": 540, | |
| "num_train_epochs": 30, | |
| "total_flos": 1.5886402423296e+16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |