| { | |
| "best_metric": 0.35031697154045105, | |
| "best_model_checkpoint": "deberta-sent-multi/checkpoint-1800", | |
| "epoch": 1.0, | |
| "eval_steps": 500, | |
| "global_step": 1800, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.013888888888888888, | |
| "grad_norm": 1.9551440477371216, | |
| "learning_rate": 2.3148148148148148e-06, | |
| "loss": 0.6935, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.027777777777777776, | |
| "grad_norm": 1.187191128730774, | |
| "learning_rate": 4.6296296296296296e-06, | |
| "loss": 0.6957, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.041666666666666664, | |
| "grad_norm": 0.9179525375366211, | |
| "learning_rate": 6.944444444444445e-06, | |
| "loss": 0.6948, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.05555555555555555, | |
| "grad_norm": 1.4890326261520386, | |
| "learning_rate": 9.259259259259259e-06, | |
| "loss": 0.6827, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.06944444444444445, | |
| "grad_norm": 7.563885688781738, | |
| "learning_rate": 1.1574074074074075e-05, | |
| "loss": 0.5348, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.08333333333333333, | |
| "grad_norm": 5.335231781005859, | |
| "learning_rate": 1.388888888888889e-05, | |
| "loss": 0.5517, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.09722222222222222, | |
| "grad_norm": 31.28253746032715, | |
| "learning_rate": 1.6203703703703704e-05, | |
| "loss": 0.4477, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 0.1111111111111111, | |
| "grad_norm": 3.2105932235717773, | |
| "learning_rate": 1.8518518518518518e-05, | |
| "loss": 0.5092, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.125, | |
| "grad_norm": 4.380924701690674, | |
| "learning_rate": 2.0833333333333336e-05, | |
| "loss": 0.3824, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 0.1388888888888889, | |
| "grad_norm": 2.6306023597717285, | |
| "learning_rate": 2.314814814814815e-05, | |
| "loss": 0.3407, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.1527777777777778, | |
| "grad_norm": 54.588436126708984, | |
| "learning_rate": 2.5462962962962965e-05, | |
| "loss": 0.6848, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 0.16666666666666666, | |
| "grad_norm": 9.066441535949707, | |
| "learning_rate": 2.777777777777778e-05, | |
| "loss": 0.5151, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.18055555555555555, | |
| "grad_norm": 2.912062168121338, | |
| "learning_rate": 3.0092592592592593e-05, | |
| "loss": 0.3909, | |
| "step": 325 | |
| }, | |
| { | |
| "epoch": 0.19444444444444445, | |
| "grad_norm": 14.664259910583496, | |
| "learning_rate": 3.240740740740741e-05, | |
| "loss": 0.4754, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.20833333333333334, | |
| "grad_norm": 17.37154769897461, | |
| "learning_rate": 3.472222222222222e-05, | |
| "loss": 0.4067, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.2222222222222222, | |
| "grad_norm": 7.159306049346924, | |
| "learning_rate": 3.7037037037037037e-05, | |
| "loss": 0.5269, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.2361111111111111, | |
| "grad_norm": 24.980106353759766, | |
| "learning_rate": 3.935185185185186e-05, | |
| "loss": 0.3985, | |
| "step": 425 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "grad_norm": 10.40503215789795, | |
| "learning_rate": 4.166666666666667e-05, | |
| "loss": 0.5491, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.2638888888888889, | |
| "grad_norm": 2.9398627281188965, | |
| "learning_rate": 4.3981481481481486e-05, | |
| "loss": 0.3183, | |
| "step": 475 | |
| }, | |
| { | |
| "epoch": 0.2777777777777778, | |
| "grad_norm": 20.303274154663086, | |
| "learning_rate": 4.62962962962963e-05, | |
| "loss": 0.4905, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.2916666666666667, | |
| "grad_norm": 5.474150657653809, | |
| "learning_rate": 4.8611111111111115e-05, | |
| "loss": 0.4931, | |
| "step": 525 | |
| }, | |
| { | |
| "epoch": 0.3055555555555556, | |
| "grad_norm": 18.128984451293945, | |
| "learning_rate": 4.9897119341563785e-05, | |
| "loss": 0.636, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.3194444444444444, | |
| "grad_norm": 41.586219787597656, | |
| "learning_rate": 4.963991769547325e-05, | |
| "loss": 0.3997, | |
| "step": 575 | |
| }, | |
| { | |
| "epoch": 0.3333333333333333, | |
| "grad_norm": 11.251654624938965, | |
| "learning_rate": 4.938271604938271e-05, | |
| "loss": 0.6164, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.3472222222222222, | |
| "grad_norm": 3.6719627380371094, | |
| "learning_rate": 4.912551440329218e-05, | |
| "loss": 0.4721, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.3611111111111111, | |
| "grad_norm": 6.4116435050964355, | |
| "learning_rate": 4.886831275720165e-05, | |
| "loss": 0.5444, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.375, | |
| "grad_norm": 8.13191032409668, | |
| "learning_rate": 4.8611111111111115e-05, | |
| "loss": 0.4481, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 0.3888888888888889, | |
| "grad_norm": 2.3826730251312256, | |
| "learning_rate": 4.835390946502058e-05, | |
| "loss": 0.4876, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.4027777777777778, | |
| "grad_norm": 9.0649995803833, | |
| "learning_rate": 4.809670781893004e-05, | |
| "loss": 0.5457, | |
| "step": 725 | |
| }, | |
| { | |
| "epoch": 0.4166666666666667, | |
| "grad_norm": 3.2031192779541016, | |
| "learning_rate": 4.783950617283951e-05, | |
| "loss": 0.4291, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.4305555555555556, | |
| "grad_norm": 3.011695384979248, | |
| "learning_rate": 4.758230452674897e-05, | |
| "loss": 0.5681, | |
| "step": 775 | |
| }, | |
| { | |
| "epoch": 0.4444444444444444, | |
| "grad_norm": 17.774301528930664, | |
| "learning_rate": 4.732510288065844e-05, | |
| "loss": 0.5143, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.4583333333333333, | |
| "grad_norm": 3.0613749027252197, | |
| "learning_rate": 4.70679012345679e-05, | |
| "loss": 0.603, | |
| "step": 825 | |
| }, | |
| { | |
| "epoch": 0.4722222222222222, | |
| "grad_norm": 7.315563201904297, | |
| "learning_rate": 4.6810699588477366e-05, | |
| "loss": 0.5161, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.4861111111111111, | |
| "grad_norm": 10.456121444702148, | |
| "learning_rate": 4.6553497942386833e-05, | |
| "loss": 0.39, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "grad_norm": 9.36212158203125, | |
| "learning_rate": 4.62962962962963e-05, | |
| "loss": 0.4058, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.5138888888888888, | |
| "grad_norm": 2.4673802852630615, | |
| "learning_rate": 4.603909465020577e-05, | |
| "loss": 0.3361, | |
| "step": 925 | |
| }, | |
| { | |
| "epoch": 0.5277777777777778, | |
| "grad_norm": 7.2702202796936035, | |
| "learning_rate": 4.578189300411523e-05, | |
| "loss": 0.3313, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.5416666666666666, | |
| "grad_norm": 15.533823013305664, | |
| "learning_rate": 4.5524691358024696e-05, | |
| "loss": 0.4702, | |
| "step": 975 | |
| }, | |
| { | |
| "epoch": 0.5555555555555556, | |
| "grad_norm": 12.91049861907959, | |
| "learning_rate": 4.5267489711934157e-05, | |
| "loss": 0.4219, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.5694444444444444, | |
| "grad_norm": 8.368412017822266, | |
| "learning_rate": 4.5010288065843624e-05, | |
| "loss": 0.3685, | |
| "step": 1025 | |
| }, | |
| { | |
| "epoch": 0.5833333333333334, | |
| "grad_norm": 38.96614074707031, | |
| "learning_rate": 4.4753086419753084e-05, | |
| "loss": 0.4168, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.5972222222222222, | |
| "grad_norm": 5.273324489593506, | |
| "learning_rate": 4.449588477366255e-05, | |
| "loss": 0.5382, | |
| "step": 1075 | |
| }, | |
| { | |
| "epoch": 0.6111111111111112, | |
| "grad_norm": 9.082724571228027, | |
| "learning_rate": 4.423868312757202e-05, | |
| "loss": 0.525, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.625, | |
| "grad_norm": 12.262503623962402, | |
| "learning_rate": 4.3981481481481486e-05, | |
| "loss": 0.3931, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 0.6388888888888888, | |
| "grad_norm": 31.51844596862793, | |
| "learning_rate": 4.372427983539095e-05, | |
| "loss": 0.3836, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.6527777777777778, | |
| "grad_norm": 4.899830341339111, | |
| "learning_rate": 4.3467078189300414e-05, | |
| "loss": 0.4635, | |
| "step": 1175 | |
| }, | |
| { | |
| "epoch": 0.6666666666666666, | |
| "grad_norm": 8.966187477111816, | |
| "learning_rate": 4.3209876543209875e-05, | |
| "loss": 0.3706, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.6805555555555556, | |
| "grad_norm": 12.4137544631958, | |
| "learning_rate": 4.295267489711934e-05, | |
| "loss": 0.6281, | |
| "step": 1225 | |
| }, | |
| { | |
| "epoch": 0.6944444444444444, | |
| "grad_norm": 4.339605808258057, | |
| "learning_rate": 4.269547325102881e-05, | |
| "loss": 0.4616, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.7083333333333334, | |
| "grad_norm": 27.38968276977539, | |
| "learning_rate": 4.243827160493827e-05, | |
| "loss": 0.4681, | |
| "step": 1275 | |
| }, | |
| { | |
| "epoch": 0.7222222222222222, | |
| "grad_norm": 4.831294059753418, | |
| "learning_rate": 4.2181069958847744e-05, | |
| "loss": 0.4135, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.7361111111111112, | |
| "grad_norm": 9.18549633026123, | |
| "learning_rate": 4.1923868312757205e-05, | |
| "loss": 0.444, | |
| "step": 1325 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "grad_norm": 3.1465399265289307, | |
| "learning_rate": 4.166666666666667e-05, | |
| "loss": 0.3754, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.7638888888888888, | |
| "grad_norm": 16.972923278808594, | |
| "learning_rate": 4.140946502057613e-05, | |
| "loss": 0.466, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 0.7777777777777778, | |
| "grad_norm": 1.612660527229309, | |
| "learning_rate": 4.11522633744856e-05, | |
| "loss": 0.3867, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.7916666666666666, | |
| "grad_norm": 7.258045673370361, | |
| "learning_rate": 4.089506172839506e-05, | |
| "loss": 0.5741, | |
| "step": 1425 | |
| }, | |
| { | |
| "epoch": 0.8055555555555556, | |
| "grad_norm": 7.1187214851379395, | |
| "learning_rate": 4.063786008230453e-05, | |
| "loss": 0.4024, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.8194444444444444, | |
| "grad_norm": 15.787425994873047, | |
| "learning_rate": 4.038065843621399e-05, | |
| "loss": 0.4623, | |
| "step": 1475 | |
| }, | |
| { | |
| "epoch": 0.8333333333333334, | |
| "grad_norm": 2.3647310733795166, | |
| "learning_rate": 4.012345679012346e-05, | |
| "loss": 0.3146, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.8472222222222222, | |
| "grad_norm": 3.4890708923339844, | |
| "learning_rate": 3.986625514403292e-05, | |
| "loss": 0.3952, | |
| "step": 1525 | |
| }, | |
| { | |
| "epoch": 0.8611111111111112, | |
| "grad_norm": 17.58116912841797, | |
| "learning_rate": 3.960905349794239e-05, | |
| "loss": 0.4454, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.875, | |
| "grad_norm": 5.0533246994018555, | |
| "learning_rate": 3.935185185185186e-05, | |
| "loss": 0.4524, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 0.8888888888888888, | |
| "grad_norm": 23.176776885986328, | |
| "learning_rate": 3.909465020576132e-05, | |
| "loss": 0.401, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.9027777777777778, | |
| "grad_norm": 5.863523483276367, | |
| "learning_rate": 3.8837448559670786e-05, | |
| "loss": 0.4483, | |
| "step": 1625 | |
| }, | |
| { | |
| "epoch": 0.9166666666666666, | |
| "grad_norm": 12.402013778686523, | |
| "learning_rate": 3.8580246913580246e-05, | |
| "loss": 0.5053, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.9305555555555556, | |
| "grad_norm": 2.132073402404785, | |
| "learning_rate": 3.8323045267489713e-05, | |
| "loss": 0.3247, | |
| "step": 1675 | |
| }, | |
| { | |
| "epoch": 0.9444444444444444, | |
| "grad_norm": 2.4326186180114746, | |
| "learning_rate": 3.806584362139918e-05, | |
| "loss": 0.4661, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.9583333333333334, | |
| "grad_norm": 14.031126976013184, | |
| "learning_rate": 3.780864197530865e-05, | |
| "loss": 0.4249, | |
| "step": 1725 | |
| }, | |
| { | |
| "epoch": 0.9722222222222222, | |
| "grad_norm": 24.70652198791504, | |
| "learning_rate": 3.755144032921811e-05, | |
| "loss": 0.3789, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.9861111111111112, | |
| "grad_norm": 2.8892605304718018, | |
| "learning_rate": 3.7294238683127576e-05, | |
| "loss": 0.4248, | |
| "step": 1775 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "grad_norm": 6.622640132904053, | |
| "learning_rate": 3.7037037037037037e-05, | |
| "loss": 0.3189, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.865, | |
| "eval_auc": 0.9395007159925164, | |
| "eval_f1": 0.869073275862069, | |
| "eval_loss": 0.35031697154045105, | |
| "eval_precision": 0.8529878371232152, | |
| "eval_recall": 0.885777045579352, | |
| "eval_runtime": 1007.1559, | |
| "eval_samples_per_second": 3.574, | |
| "eval_steps_per_second": 0.223, | |
| "step": 1800 | |
| } | |
| ], | |
| "logging_steps": 25, | |
| "max_steps": 5400, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 5, | |
| "early_stopping_threshold": 0.01 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 0 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 947216786227200.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |