{ "best_metric": 0.35031697154045105, "best_model_checkpoint": "deberta-sent-multi/checkpoint-1800", "epoch": 1.0, "eval_steps": 500, "global_step": 1800, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.013888888888888888, "grad_norm": 1.9551440477371216, "learning_rate": 2.3148148148148148e-06, "loss": 0.6935, "step": 25 }, { "epoch": 0.027777777777777776, "grad_norm": 1.187191128730774, "learning_rate": 4.6296296296296296e-06, "loss": 0.6957, "step": 50 }, { "epoch": 0.041666666666666664, "grad_norm": 0.9179525375366211, "learning_rate": 6.944444444444445e-06, "loss": 0.6948, "step": 75 }, { "epoch": 0.05555555555555555, "grad_norm": 1.4890326261520386, "learning_rate": 9.259259259259259e-06, "loss": 0.6827, "step": 100 }, { "epoch": 0.06944444444444445, "grad_norm": 7.563885688781738, "learning_rate": 1.1574074074074075e-05, "loss": 0.5348, "step": 125 }, { "epoch": 0.08333333333333333, "grad_norm": 5.335231781005859, "learning_rate": 1.388888888888889e-05, "loss": 0.5517, "step": 150 }, { "epoch": 0.09722222222222222, "grad_norm": 31.28253746032715, "learning_rate": 1.6203703703703704e-05, "loss": 0.4477, "step": 175 }, { "epoch": 0.1111111111111111, "grad_norm": 3.2105932235717773, "learning_rate": 1.8518518518518518e-05, "loss": 0.5092, "step": 200 }, { "epoch": 0.125, "grad_norm": 4.380924701690674, "learning_rate": 2.0833333333333336e-05, "loss": 0.3824, "step": 225 }, { "epoch": 0.1388888888888889, "grad_norm": 2.6306023597717285, "learning_rate": 2.314814814814815e-05, "loss": 0.3407, "step": 250 }, { "epoch": 0.1527777777777778, "grad_norm": 54.588436126708984, "learning_rate": 2.5462962962962965e-05, "loss": 0.6848, "step": 275 }, { "epoch": 0.16666666666666666, "grad_norm": 9.066441535949707, "learning_rate": 2.777777777777778e-05, "loss": 0.5151, "step": 300 }, { "epoch": 0.18055555555555555, "grad_norm": 2.912062168121338, "learning_rate": 3.0092592592592593e-05, "loss": 0.3909, "step": 325 }, { "epoch": 0.19444444444444445, "grad_norm": 14.664259910583496, "learning_rate": 3.240740740740741e-05, "loss": 0.4754, "step": 350 }, { "epoch": 0.20833333333333334, "grad_norm": 17.37154769897461, "learning_rate": 3.472222222222222e-05, "loss": 0.4067, "step": 375 }, { "epoch": 0.2222222222222222, "grad_norm": 7.159306049346924, "learning_rate": 3.7037037037037037e-05, "loss": 0.5269, "step": 400 }, { "epoch": 0.2361111111111111, "grad_norm": 24.980106353759766, "learning_rate": 3.935185185185186e-05, "loss": 0.3985, "step": 425 }, { "epoch": 0.25, "grad_norm": 10.40503215789795, "learning_rate": 4.166666666666667e-05, "loss": 0.5491, "step": 450 }, { "epoch": 0.2638888888888889, "grad_norm": 2.9398627281188965, "learning_rate": 4.3981481481481486e-05, "loss": 0.3183, "step": 475 }, { "epoch": 0.2777777777777778, "grad_norm": 20.303274154663086, "learning_rate": 4.62962962962963e-05, "loss": 0.4905, "step": 500 }, { "epoch": 0.2916666666666667, "grad_norm": 5.474150657653809, "learning_rate": 4.8611111111111115e-05, "loss": 0.4931, "step": 525 }, { "epoch": 0.3055555555555556, "grad_norm": 18.128984451293945, "learning_rate": 4.9897119341563785e-05, "loss": 0.636, "step": 550 }, { "epoch": 0.3194444444444444, "grad_norm": 41.586219787597656, "learning_rate": 4.963991769547325e-05, "loss": 0.3997, "step": 575 }, { "epoch": 0.3333333333333333, "grad_norm": 11.251654624938965, "learning_rate": 4.938271604938271e-05, "loss": 0.6164, "step": 600 }, { "epoch": 0.3472222222222222, "grad_norm": 3.6719627380371094, "learning_rate": 4.912551440329218e-05, "loss": 0.4721, "step": 625 }, { "epoch": 0.3611111111111111, "grad_norm": 6.4116435050964355, "learning_rate": 4.886831275720165e-05, "loss": 0.5444, "step": 650 }, { "epoch": 0.375, "grad_norm": 8.13191032409668, "learning_rate": 4.8611111111111115e-05, "loss": 0.4481, "step": 675 }, { "epoch": 0.3888888888888889, "grad_norm": 2.3826730251312256, "learning_rate": 4.835390946502058e-05, "loss": 0.4876, "step": 700 }, { "epoch": 0.4027777777777778, "grad_norm": 9.0649995803833, "learning_rate": 4.809670781893004e-05, "loss": 0.5457, "step": 725 }, { "epoch": 0.4166666666666667, "grad_norm": 3.2031192779541016, "learning_rate": 4.783950617283951e-05, "loss": 0.4291, "step": 750 }, { "epoch": 0.4305555555555556, "grad_norm": 3.011695384979248, "learning_rate": 4.758230452674897e-05, "loss": 0.5681, "step": 775 }, { "epoch": 0.4444444444444444, "grad_norm": 17.774301528930664, "learning_rate": 4.732510288065844e-05, "loss": 0.5143, "step": 800 }, { "epoch": 0.4583333333333333, "grad_norm": 3.0613749027252197, "learning_rate": 4.70679012345679e-05, "loss": 0.603, "step": 825 }, { "epoch": 0.4722222222222222, "grad_norm": 7.315563201904297, "learning_rate": 4.6810699588477366e-05, "loss": 0.5161, "step": 850 }, { "epoch": 0.4861111111111111, "grad_norm": 10.456121444702148, "learning_rate": 4.6553497942386833e-05, "loss": 0.39, "step": 875 }, { "epoch": 0.5, "grad_norm": 9.36212158203125, "learning_rate": 4.62962962962963e-05, "loss": 0.4058, "step": 900 }, { "epoch": 0.5138888888888888, "grad_norm": 2.4673802852630615, "learning_rate": 4.603909465020577e-05, "loss": 0.3361, "step": 925 }, { "epoch": 0.5277777777777778, "grad_norm": 7.2702202796936035, "learning_rate": 4.578189300411523e-05, "loss": 0.3313, "step": 950 }, { "epoch": 0.5416666666666666, "grad_norm": 15.533823013305664, "learning_rate": 4.5524691358024696e-05, "loss": 0.4702, "step": 975 }, { "epoch": 0.5555555555555556, "grad_norm": 12.91049861907959, "learning_rate": 4.5267489711934157e-05, "loss": 0.4219, "step": 1000 }, { "epoch": 0.5694444444444444, "grad_norm": 8.368412017822266, "learning_rate": 4.5010288065843624e-05, "loss": 0.3685, "step": 1025 }, { "epoch": 0.5833333333333334, "grad_norm": 38.96614074707031, "learning_rate": 4.4753086419753084e-05, "loss": 0.4168, "step": 1050 }, { "epoch": 0.5972222222222222, "grad_norm": 5.273324489593506, "learning_rate": 4.449588477366255e-05, "loss": 0.5382, "step": 1075 }, { "epoch": 0.6111111111111112, "grad_norm": 9.082724571228027, "learning_rate": 4.423868312757202e-05, "loss": 0.525, "step": 1100 }, { "epoch": 0.625, "grad_norm": 12.262503623962402, "learning_rate": 4.3981481481481486e-05, "loss": 0.3931, "step": 1125 }, { "epoch": 0.6388888888888888, "grad_norm": 31.51844596862793, "learning_rate": 4.372427983539095e-05, "loss": 0.3836, "step": 1150 }, { "epoch": 0.6527777777777778, "grad_norm": 4.899830341339111, "learning_rate": 4.3467078189300414e-05, "loss": 0.4635, "step": 1175 }, { "epoch": 0.6666666666666666, "grad_norm": 8.966187477111816, "learning_rate": 4.3209876543209875e-05, "loss": 0.3706, "step": 1200 }, { "epoch": 0.6805555555555556, "grad_norm": 12.4137544631958, "learning_rate": 4.295267489711934e-05, "loss": 0.6281, "step": 1225 }, { "epoch": 0.6944444444444444, "grad_norm": 4.339605808258057, "learning_rate": 4.269547325102881e-05, "loss": 0.4616, "step": 1250 }, { "epoch": 0.7083333333333334, "grad_norm": 27.38968276977539, "learning_rate": 4.243827160493827e-05, "loss": 0.4681, "step": 1275 }, { "epoch": 0.7222222222222222, "grad_norm": 4.831294059753418, "learning_rate": 4.2181069958847744e-05, "loss": 0.4135, "step": 1300 }, { "epoch": 0.7361111111111112, "grad_norm": 9.18549633026123, "learning_rate": 4.1923868312757205e-05, "loss": 0.444, "step": 1325 }, { "epoch": 0.75, "grad_norm": 3.1465399265289307, "learning_rate": 4.166666666666667e-05, "loss": 0.3754, "step": 1350 }, { "epoch": 0.7638888888888888, "grad_norm": 16.972923278808594, "learning_rate": 4.140946502057613e-05, "loss": 0.466, "step": 1375 }, { "epoch": 0.7777777777777778, "grad_norm": 1.612660527229309, "learning_rate": 4.11522633744856e-05, "loss": 0.3867, "step": 1400 }, { "epoch": 0.7916666666666666, "grad_norm": 7.258045673370361, "learning_rate": 4.089506172839506e-05, "loss": 0.5741, "step": 1425 }, { "epoch": 0.8055555555555556, "grad_norm": 7.1187214851379395, "learning_rate": 4.063786008230453e-05, "loss": 0.4024, "step": 1450 }, { "epoch": 0.8194444444444444, "grad_norm": 15.787425994873047, "learning_rate": 4.038065843621399e-05, "loss": 0.4623, "step": 1475 }, { "epoch": 0.8333333333333334, "grad_norm": 2.3647310733795166, "learning_rate": 4.012345679012346e-05, "loss": 0.3146, "step": 1500 }, { "epoch": 0.8472222222222222, "grad_norm": 3.4890708923339844, "learning_rate": 3.986625514403292e-05, "loss": 0.3952, "step": 1525 }, { "epoch": 0.8611111111111112, "grad_norm": 17.58116912841797, "learning_rate": 3.960905349794239e-05, "loss": 0.4454, "step": 1550 }, { "epoch": 0.875, "grad_norm": 5.0533246994018555, "learning_rate": 3.935185185185186e-05, "loss": 0.4524, "step": 1575 }, { "epoch": 0.8888888888888888, "grad_norm": 23.176776885986328, "learning_rate": 3.909465020576132e-05, "loss": 0.401, "step": 1600 }, { "epoch": 0.9027777777777778, "grad_norm": 5.863523483276367, "learning_rate": 3.8837448559670786e-05, "loss": 0.4483, "step": 1625 }, { "epoch": 0.9166666666666666, "grad_norm": 12.402013778686523, "learning_rate": 3.8580246913580246e-05, "loss": 0.5053, "step": 1650 }, { "epoch": 0.9305555555555556, "grad_norm": 2.132073402404785, "learning_rate": 3.8323045267489713e-05, "loss": 0.3247, "step": 1675 }, { "epoch": 0.9444444444444444, "grad_norm": 2.4326186180114746, "learning_rate": 3.806584362139918e-05, "loss": 0.4661, "step": 1700 }, { "epoch": 0.9583333333333334, "grad_norm": 14.031126976013184, "learning_rate": 3.780864197530865e-05, "loss": 0.4249, "step": 1725 }, { "epoch": 0.9722222222222222, "grad_norm": 24.70652198791504, "learning_rate": 3.755144032921811e-05, "loss": 0.3789, "step": 1750 }, { "epoch": 0.9861111111111112, "grad_norm": 2.8892605304718018, "learning_rate": 3.7294238683127576e-05, "loss": 0.4248, "step": 1775 }, { "epoch": 1.0, "grad_norm": 6.622640132904053, "learning_rate": 3.7037037037037037e-05, "loss": 0.3189, "step": 1800 }, { "epoch": 1.0, "eval_accuracy": 0.865, "eval_auc": 0.9395007159925164, "eval_f1": 0.869073275862069, "eval_loss": 0.35031697154045105, "eval_precision": 0.8529878371232152, "eval_recall": 0.885777045579352, "eval_runtime": 1007.1559, "eval_samples_per_second": 3.574, "eval_steps_per_second": 0.223, "step": 1800 } ], "logging_steps": 25, "max_steps": 5400, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 5, "early_stopping_threshold": 0.01 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 947216786227200.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }