{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.5644402634054563, "eval_steps": 500, "global_step": 600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0, "eval/E1/acc_on_CONSISTENT": 0.16435185185185186, "eval/E1/acc_on_INCONSISTENT": 0.9875, "eval/E1/accuracy": 0.29296875, "eval/E1/f1_INC": 0.3038461538461539, "eval/E1/precision_INC": 0.17954545454545454, "eval/E1/recall_INC": 0.9875, "eval/E2/acc_on_CONSISTENT": 0.6785714285714286, "eval/E2/acc_on_INCONSISTENT": 0.9594594594594594, "eval/E2/accuracy": 0.759765625, "eval/E2/f1_INC": 0.6977886977886979, "eval/E2/precision_INC": 0.5482625482625483, "eval/E2/recall_INC": 0.9594594594594594, "eval/E3/acc_on_CONSISTENT": 0.07098765432098765, "eval/E3/acc_on_INCONSISTENT": 0.9680851063829787, "eval/E3/accuracy": 0.400390625, "eval/E3/f1_INC": 0.5424739195230999, "eval/E3/precision_INC": 0.37681159420289856, "eval/E3/recall_INC": 0.9680851063829787, "eval/E4/acc_on_CONSISTENT": 0.0529595015576324, "eval/E4/acc_on_INCONSISTENT": 0.9476439790575916, "eval/E4/accuracy": 0.38671875, "eval/E4/f1_INC": 0.5355029585798816, "eval/E4/precision_INC": 0.3731958762886598, "eval/E4/recall_INC": 0.9476439790575916, "eval/edge_macro_accuracy": 0.4599609375, "eval/malformed_rate": 0.0, "eval/n_eval": 512.0, "eval/overall/acc_on_CONSISTENT": 0.08133971291866028, "eval/overall/acc_on_INCONSISTENT": 0.966996699669967, "eval/overall/accuracy": 0.60546875, "eval/overall/f1_INC": 0.7436548223350254, "eval/overall/precision_INC": 0.6041237113402061, "eval/overall/recall_INC": 0.966996699669967, "eval/overall_exact_match": 0.064453125, "eval_label": "start", "step": 0 }, { "epoch": 0.01881467544684854, "grad_norm": 0.13588111102581024, "learning_rate": 5.9375e-05, "loss": 0.0936, "step": 20 }, { "epoch": 0.03762935089369708, "grad_norm": 0.15987196564674377, "learning_rate": 9.932104752667314e-05, "loss": 0.0348, "step": 40 }, { "epoch": 0.05644402634054563, "grad_norm": 0.14962074160575867, "learning_rate": 9.73811833171678e-05, "loss": 0.0273, "step": 60 }, { "epoch": 0.07525870178739416, "grad_norm": 0.1912640929222107, "learning_rate": 9.544131910766246e-05, "loss": 0.0248, "step": 80 }, { "epoch": 0.09407337723424271, "eval/E1/acc_on_CONSISTENT": 0.8819444444444444, "eval/E1/acc_on_INCONSISTENT": 0.4625, "eval/E1/accuracy": 0.81640625, "eval/E1/f1_INC": 0.44047619047619047, "eval/E1/precision_INC": 0.42045454545454547, "eval/E1/recall_INC": 0.4625, "eval/E2/acc_on_CONSISTENT": 0.9752747252747253, "eval/E2/acc_on_INCONSISTENT": 0.8243243243243243, "eval/E2/accuracy": 0.931640625, "eval/E2/f1_INC": 0.8745519713261649, "eval/E2/precision_INC": 0.9312977099236641, "eval/E2/recall_INC": 0.8243243243243243, "eval/E3/acc_on_CONSISTENT": 0.8179012345679012, "eval/E3/acc_on_INCONSISTENT": 0.6968085106382979, "eval/E3/accuracy": 0.7734375, "eval/E3/f1_INC": 0.6931216931216931, "eval/E3/precision_INC": 0.6894736842105263, "eval/E3/recall_INC": 0.6968085106382979, "eval/E4/acc_on_CONSISTENT": 0.8629283489096573, "eval/E4/acc_on_INCONSISTENT": 0.7068062827225131, "eval/E4/accuracy": 0.8046875, "eval/E4/f1_INC": 0.7297297297297297, "eval/E4/precision_INC": 0.7541899441340782, "eval/E4/recall_INC": 0.7068062827225131, "eval/edge_macro_accuracy": 0.83154296875, "eval/malformed_rate": 0.0, "eval/n_eval": 512.0, "eval/overall/acc_on_CONSISTENT": 0.8325358851674641, "eval/overall/acc_on_INCONSISTENT": 0.7887788778877888, "eval/overall/accuracy": 0.806640625, "eval/overall/f1_INC": 0.82842287694974, "eval/overall/precision_INC": 0.8722627737226277, "eval/overall/recall_INC": 0.7887788778877888, "eval/overall_exact_match": 0.576171875, "eval_label": "step_100", "step": 100 }, { "epoch": 0.09407337723424271, "grad_norm": 0.10156096518039703, "learning_rate": 9.350145489815713e-05, "loss": 0.0248, "step": 100 }, { "epoch": 0.11288805268109126, "grad_norm": 0.15984299778938293, "learning_rate": 9.15615906886518e-05, "loss": 0.0241, "step": 120 }, { "epoch": 0.1317027281279398, "grad_norm": 0.14268864691257477, "learning_rate": 8.962172647914647e-05, "loss": 0.0222, "step": 140 }, { "epoch": 0.15051740357478832, "grad_norm": 0.09480800479650497, "learning_rate": 8.768186226964112e-05, "loss": 0.022, "step": 160 }, { "epoch": 0.16933207902163688, "grad_norm": 0.09908359497785568, "learning_rate": 8.57419980601358e-05, "loss": 0.0205, "step": 180 }, { "epoch": 0.18814675446848542, "eval/E1/acc_on_CONSISTENT": 0.9236111111111112, "eval/E1/acc_on_INCONSISTENT": 0.4125, "eval/E1/accuracy": 0.84375, "eval/E1/f1_INC": 0.4520547945205479, "eval/E1/precision_INC": 0.5, "eval/E1/recall_INC": 0.4125, "eval/E2/acc_on_CONSISTENT": 0.945054945054945, "eval/E2/acc_on_INCONSISTENT": 0.9391891891891891, "eval/E2/accuracy": 0.943359375, "eval/E2/f1_INC": 0.9055374592833876, "eval/E2/precision_INC": 0.8742138364779874, "eval/E2/recall_INC": 0.9391891891891891, "eval/E3/acc_on_CONSISTENT": 0.8703703703703703, "eval/E3/acc_on_INCONSISTENT": 0.8191489361702128, "eval/E3/accuracy": 0.8515625, "eval/E3/f1_INC": 0.8020833333333333, "eval/E3/precision_INC": 0.7857142857142857, "eval/E3/recall_INC": 0.8191489361702128, "eval/E4/acc_on_CONSISTENT": 0.8411214953271028, "eval/E4/acc_on_INCONSISTENT": 0.7905759162303665, "eval/E4/accuracy": 0.822265625, "eval/E4/f1_INC": 0.7684478371501273, "eval/E4/precision_INC": 0.7475247524752475, "eval/E4/recall_INC": 0.7905759162303665, "eval/edge_macro_accuracy": 0.865234375, "eval/malformed_rate": 0.0, "eval/n_eval": 512.0, "eval/overall/acc_on_CONSISTENT": 0.7559808612440191, "eval/overall/acc_on_INCONSISTENT": 0.8712871287128713, "eval/overall/accuracy": 0.82421875, "eval/overall/f1_INC": 0.854368932038835, "eval/overall/precision_INC": 0.8380952380952381, "eval/overall/recall_INC": 0.8712871287128713, "eval/overall_exact_match": 0.63671875, "eval_label": "step_200", "step": 200 }, { "epoch": 0.18814675446848542, "grad_norm": 0.1302443891763687, "learning_rate": 8.380213385063046e-05, "loss": 0.0211, "step": 200 }, { "epoch": 0.20696142991533395, "grad_norm": 0.14304408431053162, "learning_rate": 8.186226964112513e-05, "loss": 0.0187, "step": 220 }, { "epoch": 0.2257761053621825, "grad_norm": 0.20394913852214813, "learning_rate": 7.99224054316198e-05, "loss": 0.0208, "step": 240 }, { "epoch": 0.24459078080903104, "grad_norm": 0.11161988973617554, "learning_rate": 7.798254122211446e-05, "loss": 0.019, "step": 260 }, { "epoch": 0.2634054562558796, "grad_norm": 0.11904545873403549, "learning_rate": 7.604267701260912e-05, "loss": 0.0204, "step": 280 }, { "epoch": 0.28222013170272814, "eval/E1/acc_on_CONSISTENT": 0.9652777777777778, "eval/E1/acc_on_INCONSISTENT": 0.3375, "eval/E1/accuracy": 0.8671875, "eval/E1/f1_INC": 0.4426229508196722, "eval/E1/precision_INC": 0.6428571428571429, "eval/E1/recall_INC": 0.3375, "eval/E2/acc_on_CONSISTENT": 0.967032967032967, "eval/E2/acc_on_INCONSISTENT": 0.9324324324324325, "eval/E2/accuracy": 0.95703125, "eval/E2/f1_INC": 0.9261744966442953, "eval/E2/precision_INC": 0.92, "eval/E2/recall_INC": 0.9324324324324325, "eval/E3/acc_on_CONSISTENT": 0.9382716049382716, "eval/E3/acc_on_INCONSISTENT": 0.8138297872340425, "eval/E3/accuracy": 0.892578125, "eval/E3/f1_INC": 0.8476454293628809, "eval/E3/precision_INC": 0.884393063583815, "eval/E3/recall_INC": 0.8138297872340425, "eval/E4/acc_on_CONSISTENT": 0.8940809968847352, "eval/E4/acc_on_INCONSISTENT": 0.8010471204188482, "eval/E4/accuracy": 0.859375, "eval/E4/f1_INC": 0.8095238095238095, "eval/E4/precision_INC": 0.8181818181818182, "eval/E4/recall_INC": 0.8010471204188482, "eval/edge_macro_accuracy": 0.89404296875, "eval/malformed_rate": 0.0, "eval/n_eval": 512.0, "eval/overall/acc_on_CONSISTENT": 0.8851674641148325, "eval/overall/acc_on_INCONSISTENT": 0.8448844884488449, "eval/overall/accuracy": 0.861328125, "eval/overall/f1_INC": 0.8782161234991424, "eval/overall/precision_INC": 0.9142857142857143, "eval/overall/recall_INC": 0.8448844884488449, "eval/overall_exact_match": 0.697265625, "eval_label": "step_300", "step": 300 }, { "epoch": 0.28222013170272814, "grad_norm": 0.1075235903263092, "learning_rate": 7.410281280310378e-05, "loss": 0.0212, "step": 300 }, { "epoch": 0.30103480714957664, "grad_norm": 0.08106118440628052, "learning_rate": 7.216294859359845e-05, "loss": 0.0177, "step": 320 }, { "epoch": 0.3198494825964252, "grad_norm": 0.07731106132268906, "learning_rate": 7.022308438409312e-05, "loss": 0.0182, "step": 340 }, { "epoch": 0.33866415804327377, "grad_norm": 0.1482355296611786, "learning_rate": 6.828322017458779e-05, "loss": 0.0178, "step": 360 }, { "epoch": 0.35747883349012227, "grad_norm": 0.07897721230983734, "learning_rate": 6.634335596508244e-05, "loss": 0.018, "step": 380 }, { "epoch": 0.37629350893697083, "eval/E1/acc_on_CONSISTENT": 0.9722222222222222, "eval/E1/acc_on_INCONSISTENT": 0.45, "eval/E1/accuracy": 0.890625, "eval/E1/f1_INC": 0.5625000000000001, "eval/E1/precision_INC": 0.75, "eval/E1/recall_INC": 0.45, "eval/E2/acc_on_CONSISTENT": 0.9697802197802198, "eval/E2/acc_on_INCONSISTENT": 0.9324324324324325, "eval/E2/accuracy": 0.958984375, "eval/E2/f1_INC": 0.9292929292929293, "eval/E2/precision_INC": 0.9261744966442953, "eval/E2/recall_INC": 0.9324324324324325, "eval/E3/acc_on_CONSISTENT": 0.9598765432098766, "eval/E3/acc_on_INCONSISTENT": 0.8138297872340425, "eval/E3/accuracy": 0.90625, "eval/E3/f1_INC": 0.8644067796610171, "eval/E3/precision_INC": 0.9216867469879518, "eval/E3/recall_INC": 0.8138297872340425, "eval/E4/acc_on_CONSISTENT": 0.9096573208722741, "eval/E4/acc_on_INCONSISTENT": 0.806282722513089, "eval/E4/accuracy": 0.87109375, "eval/E4/f1_INC": 0.823529411764706, "eval/E4/precision_INC": 0.8415300546448088, "eval/E4/recall_INC": 0.806282722513089, "eval/edge_macro_accuracy": 0.90673828125, "eval/malformed_rate": 0.0, "eval/n_eval": 512.0, "eval/overall/acc_on_CONSISTENT": 0.9282296650717703, "eval/overall/acc_on_INCONSISTENT": 0.8481848184818482, "eval/overall/accuracy": 0.880859375, "eval/overall/f1_INC": 0.8939130434782608, "eval/overall/precision_INC": 0.9448529411764706, "eval/overall/recall_INC": 0.8481848184818482, "eval/overall_exact_match": 0.728515625, "eval_label": "step_400", "step": 400 }, { "epoch": 0.37629350893697083, "grad_norm": 0.1120094284415245, "learning_rate": 6.440349175557712e-05, "loss": 0.0168, "step": 400 }, { "epoch": 0.3951081843838194, "grad_norm": 0.16071178019046783, "learning_rate": 6.246362754607178e-05, "loss": 0.016, "step": 420 }, { "epoch": 0.4139228598306679, "grad_norm": 0.10165177285671234, "learning_rate": 6.0523763336566445e-05, "loss": 0.0159, "step": 440 }, { "epoch": 0.43273753527751646, "grad_norm": 0.08946932107210159, "learning_rate": 5.8583899127061106e-05, "loss": 0.0167, "step": 460 }, { "epoch": 0.451552210724365, "grad_norm": 0.07830841839313507, "learning_rate": 5.664403491755578e-05, "loss": 0.016, "step": 480 }, { "epoch": 0.4703668861712135, "eval/E1/acc_on_CONSISTENT": 0.9444444444444444, "eval/E1/acc_on_INCONSISTENT": 0.575, "eval/E1/accuracy": 0.88671875, "eval/E1/f1_INC": 0.6133333333333333, "eval/E1/precision_INC": 0.6571428571428571, "eval/E1/recall_INC": 0.575, "eval/E2/acc_on_CONSISTENT": 0.9285714285714286, "eval/E2/acc_on_INCONSISTENT": 0.9594594594594594, "eval/E2/accuracy": 0.9375, "eval/E2/f1_INC": 0.8987341772151899, "eval/E2/precision_INC": 0.8452380952380952, "eval/E2/recall_INC": 0.9594594594594594, "eval/E3/acc_on_CONSISTENT": 0.8919753086419753, "eval/E3/acc_on_INCONSISTENT": 0.8882978723404256, "eval/E3/accuracy": 0.890625, "eval/E3/f1_INC": 0.8564102564102564, "eval/E3/precision_INC": 0.8267326732673267, "eval/E3/recall_INC": 0.8882978723404256, "eval/E4/acc_on_CONSISTENT": 0.8847352024922118, "eval/E4/acc_on_INCONSISTENT": 0.8638743455497382, "eval/E4/accuracy": 0.876953125, "eval/E4/f1_INC": 0.8396946564885497, "eval/E4/precision_INC": 0.8168316831683168, "eval/E4/recall_INC": 0.8638743455497382, "eval/edge_macro_accuracy": 0.89794921875, "eval/malformed_rate": 0.0, "eval/n_eval": 512.0, "eval/overall/acc_on_CONSISTENT": 0.8325358851674641, "eval/overall/acc_on_INCONSISTENT": 0.8976897689768977, "eval/overall/accuracy": 0.87109375, "eval/overall/f1_INC": 0.8918032786885246, "eval/overall/precision_INC": 0.8859934853420195, "eval/overall/recall_INC": 0.8976897689768977, "eval/overall_exact_match": 0.71484375, "eval_label": "step_500", "step": 500 }, { "epoch": 0.4703668861712135, "grad_norm": 0.23986610770225525, "learning_rate": 5.470417070805044e-05, "loss": 0.0168, "step": 500 }, { "epoch": 0.4891815616180621, "grad_norm": 0.07184334099292755, "learning_rate": 5.27643064985451e-05, "loss": 0.0169, "step": 520 }, { "epoch": 0.5079962370649106, "grad_norm": 0.0486169196665287, "learning_rate": 5.0824442289039763e-05, "loss": 0.0148, "step": 540 }, { "epoch": 0.5268109125117592, "grad_norm": 0.11095824092626572, "learning_rate": 4.888457807953444e-05, "loss": 0.0173, "step": 560 }, { "epoch": 0.5456255879586077, "grad_norm": 0.11410392075777054, "learning_rate": 4.69447138700291e-05, "loss": 0.0149, "step": 580 }, { "epoch": 0.5644402634054563, "eval/E1/acc_on_CONSISTENT": 0.9722222222222222, "eval/E1/acc_on_INCONSISTENT": 0.4375, "eval/E1/accuracy": 0.888671875, "eval/E1/f1_INC": 0.5511811023622046, "eval/E1/precision_INC": 0.7446808510638298, "eval/E1/recall_INC": 0.4375, "eval/E2/acc_on_CONSISTENT": 0.978021978021978, "eval/E2/acc_on_INCONSISTENT": 0.9391891891891891, "eval/E2/accuracy": 0.966796875, "eval/E2/f1_INC": 0.9423728813559321, "eval/E2/precision_INC": 0.9455782312925171, "eval/E2/recall_INC": 0.9391891891891891, "eval/E3/acc_on_CONSISTENT": 0.9691358024691358, "eval/E3/acc_on_INCONSISTENT": 0.75, "eval/E3/accuracy": 0.888671875, "eval/E3/f1_INC": 0.831858407079646, "eval/E3/precision_INC": 0.9337748344370861, "eval/E3/recall_INC": 0.75, "eval/E4/acc_on_CONSISTENT": 0.956386292834891, "eval/E4/acc_on_INCONSISTENT": 0.7801047120418848, "eval/E4/accuracy": 0.890625, "eval/E4/f1_INC": 0.8418079096045198, "eval/E4/precision_INC": 0.9141104294478528, "eval/E4/recall_INC": 0.7801047120418848, "eval/edge_macro_accuracy": 0.90869140625, "eval/malformed_rate": 0.0, "eval/n_eval": 512.0, "eval/overall/acc_on_CONSISTENT": 0.9473684210526315, "eval/overall/acc_on_INCONSISTENT": 0.8118811881188119, "eval/overall/accuracy": 0.8671875, "eval/overall/f1_INC": 0.8785714285714287, "eval/overall/precision_INC": 0.9571984435797666, "eval/overall/recall_INC": 0.8118811881188119, "eval/overall_exact_match": 0.728515625, "eval_label": "step_600", "step": 600 }, { "epoch": 0.5644402634054563, "grad_norm": 0.13476960361003876, "learning_rate": 4.500484966052377e-05, "loss": 0.0159, "step": 600 } ], "logging_steps": 20, "max_steps": 1063, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 300, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 8.738632608964936e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }