| { | |
| "best_metric": 0.5355029585798816, | |
| "best_model_checkpoint": "/media/cse/HDD/Shawon/shawon/10 fold timesformer/VideoMAE_Base_wlasl_100_longtail_200/checkpoint-5587", | |
| "epoch": 50.00498611111111, | |
| "eval_steps": 500, | |
| "global_step": 9192, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.005, | |
| "grad_norm": 57.62034225463867, | |
| "learning_rate": 2.4444444444444447e-06, | |
| "loss": 18.7021, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.005, | |
| "eval_accuracy": 0.011834319526627219, | |
| "eval_loss": 4.644242286682129, | |
| "eval_runtime": 29.4341, | |
| "eval_samples_per_second": 11.483, | |
| "eval_steps_per_second": 5.742, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 1.0049930555555555, | |
| "grad_norm": 63.21147155761719, | |
| "learning_rate": 4.9444444444444444e-06, | |
| "loss": 18.6026, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.0049930555555555, | |
| "eval_accuracy": 0.011834319526627219, | |
| "eval_loss": 4.631899356842041, | |
| "eval_runtime": 29.7352, | |
| "eval_samples_per_second": 11.367, | |
| "eval_steps_per_second": 5.684, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 2.004986111111111, | |
| "grad_norm": 34.4942741394043, | |
| "learning_rate": 7.444444444444444e-06, | |
| "loss": 18.5393, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 2.004986111111111, | |
| "eval_accuracy": 0.01775147928994083, | |
| "eval_loss": 4.623769283294678, | |
| "eval_runtime": 31.652, | |
| "eval_samples_per_second": 10.679, | |
| "eval_steps_per_second": 5.339, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 3.0050069444444443, | |
| "grad_norm": 30.521425247192383, | |
| "learning_rate": 9.958333333333333e-06, | |
| "loss": 18.3789, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 3.0050069444444443, | |
| "eval_accuracy": 0.01775147928994083, | |
| "eval_loss": 4.617008209228516, | |
| "eval_runtime": 28.7672, | |
| "eval_samples_per_second": 11.749, | |
| "eval_steps_per_second": 5.875, | |
| "step": 721 | |
| }, | |
| { | |
| "epoch": 4.005, | |
| "grad_norm": 32.52579116821289, | |
| "learning_rate": 1.2458333333333334e-05, | |
| "loss": 18.3763, | |
| "step": 901 | |
| }, | |
| { | |
| "epoch": 4.005, | |
| "eval_accuracy": 0.01775147928994083, | |
| "eval_loss": 4.6236653327941895, | |
| "eval_runtime": 30.9057, | |
| "eval_samples_per_second": 10.936, | |
| "eval_steps_per_second": 5.468, | |
| "step": 901 | |
| }, | |
| { | |
| "epoch": 5.0049930555555555, | |
| "grad_norm": 34.23224639892578, | |
| "learning_rate": 1.4958333333333336e-05, | |
| "loss": 18.334, | |
| "step": 1081 | |
| }, | |
| { | |
| "epoch": 5.0049930555555555, | |
| "eval_accuracy": 0.029585798816568046, | |
| "eval_loss": 4.634205341339111, | |
| "eval_runtime": 28.5668, | |
| "eval_samples_per_second": 11.832, | |
| "eval_steps_per_second": 5.916, | |
| "step": 1081 | |
| }, | |
| { | |
| "epoch": 6.004986111111111, | |
| "grad_norm": 35.624855041503906, | |
| "learning_rate": 1.7458333333333335e-05, | |
| "loss": 18.1779, | |
| "step": 1261 | |
| }, | |
| { | |
| "epoch": 6.004986111111111, | |
| "eval_accuracy": 0.029585798816568046, | |
| "eval_loss": 4.603573322296143, | |
| "eval_runtime": 28.2523, | |
| "eval_samples_per_second": 11.964, | |
| "eval_steps_per_second": 5.982, | |
| "step": 1261 | |
| }, | |
| { | |
| "epoch": 7.005006944444444, | |
| "grad_norm": 48.291839599609375, | |
| "learning_rate": 1.9972222222222223e-05, | |
| "loss": 17.9948, | |
| "step": 1442 | |
| }, | |
| { | |
| "epoch": 7.005006944444444, | |
| "eval_accuracy": 0.026627218934911243, | |
| "eval_loss": 4.590323448181152, | |
| "eval_runtime": 29.8277, | |
| "eval_samples_per_second": 11.332, | |
| "eval_steps_per_second": 5.666, | |
| "step": 1442 | |
| }, | |
| { | |
| "epoch": 8.005, | |
| "grad_norm": 36.94101333618164, | |
| "learning_rate": 2.2472222222222223e-05, | |
| "loss": 17.9333, | |
| "step": 1622 | |
| }, | |
| { | |
| "epoch": 8.005, | |
| "eval_accuracy": 0.023668639053254437, | |
| "eval_loss": 4.614376068115234, | |
| "eval_runtime": 27.5145, | |
| "eval_samples_per_second": 12.284, | |
| "eval_steps_per_second": 6.142, | |
| "step": 1622 | |
| }, | |
| { | |
| "epoch": 9.004993055555556, | |
| "grad_norm": 35.89722442626953, | |
| "learning_rate": 2.4972222222222226e-05, | |
| "loss": 17.7505, | |
| "step": 1802 | |
| }, | |
| { | |
| "epoch": 9.004993055555556, | |
| "eval_accuracy": 0.011834319526627219, | |
| "eval_loss": 4.586489200592041, | |
| "eval_runtime": 29.1474, | |
| "eval_samples_per_second": 11.596, | |
| "eval_steps_per_second": 5.798, | |
| "step": 1802 | |
| }, | |
| { | |
| "epoch": 10.00498611111111, | |
| "grad_norm": 36.66941452026367, | |
| "learning_rate": 2.7472222222222222e-05, | |
| "loss": 17.4917, | |
| "step": 1982 | |
| }, | |
| { | |
| "epoch": 10.00498611111111, | |
| "eval_accuracy": 0.020710059171597635, | |
| "eval_loss": 4.562595367431641, | |
| "eval_runtime": 27.9047, | |
| "eval_samples_per_second": 12.113, | |
| "eval_steps_per_second": 6.056, | |
| "step": 1982 | |
| }, | |
| { | |
| "epoch": 11.005006944444444, | |
| "grad_norm": 35.883052825927734, | |
| "learning_rate": 2.9986111111111116e-05, | |
| "loss": 16.9821, | |
| "step": 2163 | |
| }, | |
| { | |
| "epoch": 11.005006944444444, | |
| "eval_accuracy": 0.04437869822485207, | |
| "eval_loss": 4.36150598526001, | |
| "eval_runtime": 25.3334, | |
| "eval_samples_per_second": 13.342, | |
| "eval_steps_per_second": 6.671, | |
| "step": 2163 | |
| }, | |
| { | |
| "epoch": 12.005, | |
| "grad_norm": 48.5424690246582, | |
| "learning_rate": 3.248611111111111e-05, | |
| "loss": 16.2362, | |
| "step": 2343 | |
| }, | |
| { | |
| "epoch": 12.005, | |
| "eval_accuracy": 0.05325443786982249, | |
| "eval_loss": 4.151459693908691, | |
| "eval_runtime": 23.9608, | |
| "eval_samples_per_second": 14.106, | |
| "eval_steps_per_second": 7.053, | |
| "step": 2343 | |
| }, | |
| { | |
| "epoch": 13.004993055555556, | |
| "grad_norm": 41.33076095581055, | |
| "learning_rate": 3.4986111111111116e-05, | |
| "loss": 15.2255, | |
| "step": 2523 | |
| }, | |
| { | |
| "epoch": 13.004993055555556, | |
| "eval_accuracy": 0.07396449704142012, | |
| "eval_loss": 3.960317850112915, | |
| "eval_runtime": 18.4092, | |
| "eval_samples_per_second": 18.36, | |
| "eval_steps_per_second": 9.18, | |
| "step": 2523 | |
| }, | |
| { | |
| "epoch": 14.00498611111111, | |
| "grad_norm": 55.07318115234375, | |
| "learning_rate": 3.748611111111111e-05, | |
| "loss": 14.0646, | |
| "step": 2703 | |
| }, | |
| { | |
| "epoch": 14.00498611111111, | |
| "eval_accuracy": 0.08284023668639054, | |
| "eval_loss": 3.7594428062438965, | |
| "eval_runtime": 28.2986, | |
| "eval_samples_per_second": 11.944, | |
| "eval_steps_per_second": 5.972, | |
| "step": 2703 | |
| }, | |
| { | |
| "epoch": 15.005006944444444, | |
| "grad_norm": 41.32450485229492, | |
| "learning_rate": 4e-05, | |
| "loss": 12.8642, | |
| "step": 2884 | |
| }, | |
| { | |
| "epoch": 15.005006944444444, | |
| "eval_accuracy": 0.14201183431952663, | |
| "eval_loss": 3.4153764247894287, | |
| "eval_runtime": 26.9752, | |
| "eval_samples_per_second": 12.53, | |
| "eval_steps_per_second": 6.265, | |
| "step": 2884 | |
| }, | |
| { | |
| "epoch": 16.005, | |
| "grad_norm": 81.92693328857422, | |
| "learning_rate": 4.25e-05, | |
| "loss": 11.6502, | |
| "step": 3064 | |
| }, | |
| { | |
| "epoch": 16.005, | |
| "eval_accuracy": 0.16272189349112426, | |
| "eval_loss": 3.39172625541687, | |
| "eval_runtime": 27.2787, | |
| "eval_samples_per_second": 12.391, | |
| "eval_steps_per_second": 6.195, | |
| "step": 3064 | |
| }, | |
| { | |
| "epoch": 17.004993055555556, | |
| "grad_norm": 57.80864334106445, | |
| "learning_rate": 4.5e-05, | |
| "loss": 10.332, | |
| "step": 3244 | |
| }, | |
| { | |
| "epoch": 17.004993055555556, | |
| "eval_accuracy": 0.22485207100591717, | |
| "eval_loss": 3.0358824729919434, | |
| "eval_runtime": 28.8309, | |
| "eval_samples_per_second": 11.724, | |
| "eval_steps_per_second": 5.862, | |
| "step": 3244 | |
| }, | |
| { | |
| "epoch": 18.004986111111112, | |
| "grad_norm": 57.51765060424805, | |
| "learning_rate": 4.75e-05, | |
| "loss": 8.9465, | |
| "step": 3424 | |
| }, | |
| { | |
| "epoch": 18.004986111111112, | |
| "eval_accuracy": 0.28402366863905326, | |
| "eval_loss": 2.8625123500823975, | |
| "eval_runtime": 26.3032, | |
| "eval_samples_per_second": 12.85, | |
| "eval_steps_per_second": 6.425, | |
| "step": 3424 | |
| }, | |
| { | |
| "epoch": 19.005006944444446, | |
| "grad_norm": 61.120155334472656, | |
| "learning_rate": 5e-05, | |
| "loss": 7.6629, | |
| "step": 3605 | |
| }, | |
| { | |
| "epoch": 19.005006944444446, | |
| "eval_accuracy": 0.3106508875739645, | |
| "eval_loss": 2.820160388946533, | |
| "eval_runtime": 25.9784, | |
| "eval_samples_per_second": 13.011, | |
| "eval_steps_per_second": 6.505, | |
| "step": 3605 | |
| }, | |
| { | |
| "epoch": 20.005, | |
| "grad_norm": 66.28811645507812, | |
| "learning_rate": 4.972222222222223e-05, | |
| "loss": 6.2517, | |
| "step": 3785 | |
| }, | |
| { | |
| "epoch": 20.005, | |
| "eval_accuracy": 0.3343195266272189, | |
| "eval_loss": 2.647813320159912, | |
| "eval_runtime": 28.2017, | |
| "eval_samples_per_second": 11.985, | |
| "eval_steps_per_second": 5.993, | |
| "step": 3785 | |
| }, | |
| { | |
| "epoch": 21.004993055555556, | |
| "grad_norm": 83.8398666381836, | |
| "learning_rate": 4.9444444444444446e-05, | |
| "loss": 5.1876, | |
| "step": 3965 | |
| }, | |
| { | |
| "epoch": 21.004993055555556, | |
| "eval_accuracy": 0.3727810650887574, | |
| "eval_loss": 2.4981963634490967, | |
| "eval_runtime": 29.7423, | |
| "eval_samples_per_second": 11.364, | |
| "eval_steps_per_second": 5.682, | |
| "step": 3965 | |
| }, | |
| { | |
| "epoch": 22.004986111111112, | |
| "grad_norm": 70.95669555664062, | |
| "learning_rate": 4.9166666666666665e-05, | |
| "loss": 4.0929, | |
| "step": 4145 | |
| }, | |
| { | |
| "epoch": 22.004986111111112, | |
| "eval_accuracy": 0.3875739644970414, | |
| "eval_loss": 2.389131546020508, | |
| "eval_runtime": 29.0106, | |
| "eval_samples_per_second": 11.651, | |
| "eval_steps_per_second": 5.825, | |
| "step": 4145 | |
| }, | |
| { | |
| "epoch": 23.005006944444446, | |
| "grad_norm": 73.58196258544922, | |
| "learning_rate": 4.8887345679012346e-05, | |
| "loss": 3.0425, | |
| "step": 4326 | |
| }, | |
| { | |
| "epoch": 23.005006944444446, | |
| "eval_accuracy": 0.40828402366863903, | |
| "eval_loss": 2.2211599349975586, | |
| "eval_runtime": 29.3983, | |
| "eval_samples_per_second": 11.497, | |
| "eval_steps_per_second": 5.749, | |
| "step": 4326 | |
| }, | |
| { | |
| "epoch": 24.005, | |
| "grad_norm": 38.20242691040039, | |
| "learning_rate": 4.860956790123457e-05, | |
| "loss": 2.3667, | |
| "step": 4506 | |
| }, | |
| { | |
| "epoch": 24.005, | |
| "eval_accuracy": 0.45857988165680474, | |
| "eval_loss": 2.1608965396881104, | |
| "eval_runtime": 26.1013, | |
| "eval_samples_per_second": 12.95, | |
| "eval_steps_per_second": 6.475, | |
| "step": 4506 | |
| }, | |
| { | |
| "epoch": 25.004993055555556, | |
| "grad_norm": 55.99176025390625, | |
| "learning_rate": 4.8331790123456797e-05, | |
| "loss": 1.7821, | |
| "step": 4686 | |
| }, | |
| { | |
| "epoch": 25.004993055555556, | |
| "eval_accuracy": 0.4260355029585799, | |
| "eval_loss": 2.2471060752868652, | |
| "eval_runtime": 29.7721, | |
| "eval_samples_per_second": 11.353, | |
| "eval_steps_per_second": 5.676, | |
| "step": 4686 | |
| }, | |
| { | |
| "epoch": 26.004986111111112, | |
| "grad_norm": 37.733924865722656, | |
| "learning_rate": 4.8054012345679015e-05, | |
| "loss": 1.4215, | |
| "step": 4866 | |
| }, | |
| { | |
| "epoch": 26.004986111111112, | |
| "eval_accuracy": 0.46745562130177515, | |
| "eval_loss": 2.2263102531433105, | |
| "eval_runtime": 28.251, | |
| "eval_samples_per_second": 11.964, | |
| "eval_steps_per_second": 5.982, | |
| "step": 4866 | |
| }, | |
| { | |
| "epoch": 27.005006944444446, | |
| "grad_norm": 23.38159942626953, | |
| "learning_rate": 4.777469135802469e-05, | |
| "loss": 1.1324, | |
| "step": 5047 | |
| }, | |
| { | |
| "epoch": 27.005006944444446, | |
| "eval_accuracy": 0.4556213017751479, | |
| "eval_loss": 2.235961437225342, | |
| "eval_runtime": 29.6258, | |
| "eval_samples_per_second": 11.409, | |
| "eval_steps_per_second": 5.704, | |
| "step": 5047 | |
| }, | |
| { | |
| "epoch": 28.005, | |
| "grad_norm": 37.854759216308594, | |
| "learning_rate": 4.7496913580246915e-05, | |
| "loss": 0.9114, | |
| "step": 5227 | |
| }, | |
| { | |
| "epoch": 28.005, | |
| "eval_accuracy": 0.48520710059171596, | |
| "eval_loss": 2.202089548110962, | |
| "eval_runtime": 30.2271, | |
| "eval_samples_per_second": 11.182, | |
| "eval_steps_per_second": 5.591, | |
| "step": 5227 | |
| }, | |
| { | |
| "epoch": 29.004993055555556, | |
| "grad_norm": 11.60957145690918, | |
| "learning_rate": 4.721913580246914e-05, | |
| "loss": 0.6966, | |
| "step": 5407 | |
| }, | |
| { | |
| "epoch": 29.004993055555556, | |
| "eval_accuracy": 0.4408284023668639, | |
| "eval_loss": 2.312260627746582, | |
| "eval_runtime": 27.0737, | |
| "eval_samples_per_second": 12.484, | |
| "eval_steps_per_second": 6.242, | |
| "step": 5407 | |
| }, | |
| { | |
| "epoch": 30.004986111111112, | |
| "grad_norm": 11.04503345489502, | |
| "learning_rate": 4.694135802469136e-05, | |
| "loss": 0.5676, | |
| "step": 5587 | |
| }, | |
| { | |
| "epoch": 30.004986111111112, | |
| "eval_accuracy": 0.5355029585798816, | |
| "eval_loss": 2.119814395904541, | |
| "eval_runtime": 26.3784, | |
| "eval_samples_per_second": 12.814, | |
| "eval_steps_per_second": 6.407, | |
| "step": 5587 | |
| }, | |
| { | |
| "epoch": 31.005006944444446, | |
| "grad_norm": 23.7001895904541, | |
| "learning_rate": 4.666203703703704e-05, | |
| "loss": 0.4494, | |
| "step": 5768 | |
| }, | |
| { | |
| "epoch": 31.005006944444446, | |
| "eval_accuracy": 0.4911242603550296, | |
| "eval_loss": 2.2495245933532715, | |
| "eval_runtime": 26.6338, | |
| "eval_samples_per_second": 12.691, | |
| "eval_steps_per_second": 6.345, | |
| "step": 5768 | |
| }, | |
| { | |
| "epoch": 32.005, | |
| "grad_norm": 8.150625228881836, | |
| "learning_rate": 4.638425925925926e-05, | |
| "loss": 0.3321, | |
| "step": 5948 | |
| }, | |
| { | |
| "epoch": 32.005, | |
| "eval_accuracy": 0.5088757396449705, | |
| "eval_loss": 2.2658286094665527, | |
| "eval_runtime": 12.0221, | |
| "eval_samples_per_second": 28.115, | |
| "eval_steps_per_second": 14.057, | |
| "step": 5948 | |
| }, | |
| { | |
| "epoch": 33.00499305555556, | |
| "grad_norm": 4.771080017089844, | |
| "learning_rate": 4.6106481481481484e-05, | |
| "loss": 0.227, | |
| "step": 6128 | |
| }, | |
| { | |
| "epoch": 33.00499305555556, | |
| "eval_accuracy": 0.4881656804733728, | |
| "eval_loss": 2.442265033721924, | |
| "eval_runtime": 28.4938, | |
| "eval_samples_per_second": 11.862, | |
| "eval_steps_per_second": 5.931, | |
| "step": 6128 | |
| }, | |
| { | |
| "epoch": 34.00498611111111, | |
| "grad_norm": 31.75286293029785, | |
| "learning_rate": 4.582870370370371e-05, | |
| "loss": 0.2203, | |
| "step": 6308 | |
| }, | |
| { | |
| "epoch": 34.00498611111111, | |
| "eval_accuracy": 0.47633136094674555, | |
| "eval_loss": 2.435835123062134, | |
| "eval_runtime": 12.5987, | |
| "eval_samples_per_second": 26.828, | |
| "eval_steps_per_second": 13.414, | |
| "step": 6308 | |
| }, | |
| { | |
| "epoch": 35.005006944444446, | |
| "grad_norm": 17.9893798828125, | |
| "learning_rate": 4.5549382716049384e-05, | |
| "loss": 0.2643, | |
| "step": 6489 | |
| }, | |
| { | |
| "epoch": 35.005006944444446, | |
| "eval_accuracy": 0.46745562130177515, | |
| "eval_loss": 2.552072525024414, | |
| "eval_runtime": 11.7733, | |
| "eval_samples_per_second": 28.709, | |
| "eval_steps_per_second": 14.354, | |
| "step": 6489 | |
| }, | |
| { | |
| "epoch": 36.005, | |
| "grad_norm": 1.910334825515747, | |
| "learning_rate": 4.527160493827161e-05, | |
| "loss": 0.111, | |
| "step": 6669 | |
| }, | |
| { | |
| "epoch": 36.005, | |
| "eval_accuracy": 0.4881656804733728, | |
| "eval_loss": 2.578749895095825, | |
| "eval_runtime": 11.8894, | |
| "eval_samples_per_second": 28.429, | |
| "eval_steps_per_second": 14.214, | |
| "step": 6669 | |
| }, | |
| { | |
| "epoch": 37.00499305555556, | |
| "grad_norm": 0.18931302428245544, | |
| "learning_rate": 4.499382716049383e-05, | |
| "loss": 0.1009, | |
| "step": 6849 | |
| }, | |
| { | |
| "epoch": 37.00499305555556, | |
| "eval_accuracy": 0.5059171597633136, | |
| "eval_loss": 2.402237892150879, | |
| "eval_runtime": 21.2587, | |
| "eval_samples_per_second": 15.899, | |
| "eval_steps_per_second": 7.95, | |
| "step": 6849 | |
| }, | |
| { | |
| "epoch": 38.00498611111111, | |
| "grad_norm": 0.6850244998931885, | |
| "learning_rate": 4.4717592592592597e-05, | |
| "loss": 0.1275, | |
| "step": 7029 | |
| }, | |
| { | |
| "epoch": 38.00498611111111, | |
| "eval_accuracy": 0.5, | |
| "eval_loss": 2.5450520515441895, | |
| "eval_runtime": 29.372, | |
| "eval_samples_per_second": 11.508, | |
| "eval_steps_per_second": 5.754, | |
| "step": 7029 | |
| }, | |
| { | |
| "epoch": 39.005006944444446, | |
| "grad_norm": 1.1550191640853882, | |
| "learning_rate": 4.443827160493828e-05, | |
| "loss": 0.1874, | |
| "step": 7210 | |
| }, | |
| { | |
| "epoch": 39.005006944444446, | |
| "eval_accuracy": 0.45857988165680474, | |
| "eval_loss": 2.8338911533355713, | |
| "eval_runtime": 27.5638, | |
| "eval_samples_per_second": 12.262, | |
| "eval_steps_per_second": 6.131, | |
| "step": 7210 | |
| }, | |
| { | |
| "epoch": 40.005, | |
| "grad_norm": 0.09220674633979797, | |
| "learning_rate": 4.4160493827160497e-05, | |
| "loss": 0.1695, | |
| "step": 7390 | |
| }, | |
| { | |
| "epoch": 40.005, | |
| "eval_accuracy": 0.4319526627218935, | |
| "eval_loss": 3.031979560852051, | |
| "eval_runtime": 28.426, | |
| "eval_samples_per_second": 11.891, | |
| "eval_steps_per_second": 5.945, | |
| "step": 7390 | |
| }, | |
| { | |
| "epoch": 41.00499305555556, | |
| "grad_norm": 0.9716654419898987, | |
| "learning_rate": 4.3882716049382715e-05, | |
| "loss": 0.1735, | |
| "step": 7570 | |
| }, | |
| { | |
| "epoch": 41.00499305555556, | |
| "eval_accuracy": 0.4940828402366864, | |
| "eval_loss": 2.696147918701172, | |
| "eval_runtime": 24.5611, | |
| "eval_samples_per_second": 13.762, | |
| "eval_steps_per_second": 6.881, | |
| "step": 7570 | |
| }, | |
| { | |
| "epoch": 42.00498611111111, | |
| "grad_norm": 1.0446219444274902, | |
| "learning_rate": 4.360493827160494e-05, | |
| "loss": 0.1299, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 42.00498611111111, | |
| "eval_accuracy": 0.46745562130177515, | |
| "eval_loss": 2.9589343070983887, | |
| "eval_runtime": 27.9316, | |
| "eval_samples_per_second": 12.101, | |
| "eval_steps_per_second": 6.051, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 43.005006944444446, | |
| "grad_norm": 37.8647346496582, | |
| "learning_rate": 4.332561728395062e-05, | |
| "loss": 0.1399, | |
| "step": 7931 | |
| }, | |
| { | |
| "epoch": 43.005006944444446, | |
| "eval_accuracy": 0.5325443786982249, | |
| "eval_loss": 2.6799256801605225, | |
| "eval_runtime": 29.2099, | |
| "eval_samples_per_second": 11.571, | |
| "eval_steps_per_second": 5.786, | |
| "step": 7931 | |
| }, | |
| { | |
| "epoch": 44.005, | |
| "grad_norm": 8.789149284362793, | |
| "learning_rate": 4.304783950617284e-05, | |
| "loss": 0.118, | |
| "step": 8111 | |
| }, | |
| { | |
| "epoch": 44.005, | |
| "eval_accuracy": 0.5, | |
| "eval_loss": 2.8731420040130615, | |
| "eval_runtime": 26.167, | |
| "eval_samples_per_second": 12.917, | |
| "eval_steps_per_second": 6.459, | |
| "step": 8111 | |
| }, | |
| { | |
| "epoch": 45.00499305555556, | |
| "grad_norm": 10.257121086120605, | |
| "learning_rate": 4.277006172839506e-05, | |
| "loss": 0.1583, | |
| "step": 8291 | |
| }, | |
| { | |
| "epoch": 45.00499305555556, | |
| "eval_accuracy": 0.4970414201183432, | |
| "eval_loss": 2.875749349594116, | |
| "eval_runtime": 26.8437, | |
| "eval_samples_per_second": 12.591, | |
| "eval_steps_per_second": 6.296, | |
| "step": 8291 | |
| }, | |
| { | |
| "epoch": 46.00498611111111, | |
| "grad_norm": 5.601294040679932, | |
| "learning_rate": 4.2492283950617284e-05, | |
| "loss": 0.1667, | |
| "step": 8471 | |
| }, | |
| { | |
| "epoch": 46.00498611111111, | |
| "eval_accuracy": 0.4940828402366864, | |
| "eval_loss": 2.9293906688690186, | |
| "eval_runtime": 28.8864, | |
| "eval_samples_per_second": 11.701, | |
| "eval_steps_per_second": 5.85, | |
| "step": 8471 | |
| }, | |
| { | |
| "epoch": 47.005006944444446, | |
| "grad_norm": 0.18206937611103058, | |
| "learning_rate": 4.2212962962962965e-05, | |
| "loss": 0.202, | |
| "step": 8652 | |
| }, | |
| { | |
| "epoch": 47.005006944444446, | |
| "eval_accuracy": 0.46153846153846156, | |
| "eval_loss": 3.144300937652588, | |
| "eval_runtime": 29.5485, | |
| "eval_samples_per_second": 11.439, | |
| "eval_steps_per_second": 5.719, | |
| "step": 8652 | |
| }, | |
| { | |
| "epoch": 48.005, | |
| "grad_norm": 0.22736448049545288, | |
| "learning_rate": 4.193518518518519e-05, | |
| "loss": 0.1301, | |
| "step": 8832 | |
| }, | |
| { | |
| "epoch": 48.005, | |
| "eval_accuracy": 0.5207100591715976, | |
| "eval_loss": 2.8941469192504883, | |
| "eval_runtime": 29.2679, | |
| "eval_samples_per_second": 11.549, | |
| "eval_steps_per_second": 5.774, | |
| "step": 8832 | |
| }, | |
| { | |
| "epoch": 49.00499305555556, | |
| "grad_norm": 11.593667030334473, | |
| "learning_rate": 4.165895061728395e-05, | |
| "loss": 0.2298, | |
| "step": 9012 | |
| }, | |
| { | |
| "epoch": 49.00499305555556, | |
| "eval_accuracy": 0.47041420118343197, | |
| "eval_loss": 3.1270129680633545, | |
| "eval_runtime": 26.2692, | |
| "eval_samples_per_second": 12.867, | |
| "eval_steps_per_second": 6.433, | |
| "step": 9012 | |
| }, | |
| { | |
| "epoch": 50.00498611111111, | |
| "grad_norm": 9.116874694824219, | |
| "learning_rate": 4.138117283950618e-05, | |
| "loss": 0.1858, | |
| "step": 9192 | |
| }, | |
| { | |
| "epoch": 50.00498611111111, | |
| "eval_accuracy": 0.4822485207100592, | |
| "eval_loss": 2.973588228225708, | |
| "eval_runtime": 28.2275, | |
| "eval_samples_per_second": 11.974, | |
| "eval_steps_per_second": 5.987, | |
| "step": 9192 | |
| }, | |
| { | |
| "epoch": 50.00498611111111, | |
| "step": 9192, | |
| "total_flos": 9.171817168536207e+19, | |
| "train_loss": 6.774725560419864, | |
| "train_runtime": 9513.7238, | |
| "train_samples_per_second": 30.272, | |
| "train_steps_per_second": 3.784 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 36000, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 9223372036854775807, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 20, | |
| "early_stopping_threshold": 0.0 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 20 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 9.171817168536207e+19, | |
| "train_batch_size": 2, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |