| { | |
| "best_metric": 0.91875, | |
| "best_model_checkpoint": "mvit_v1_rwf-2000/checkpoint-1140", | |
| "epoch": 6.125, | |
| "eval_steps": 500, | |
| "global_step": 1330, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.006578947368421052, | |
| "grad_norm": 24.263521194458008, | |
| "learning_rate": 1.986842105263158e-05, | |
| "loss": 6.6259, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.013157894736842105, | |
| "grad_norm": 23.018878936767578, | |
| "learning_rate": 1.9736842105263158e-05, | |
| "loss": 5.6792, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.019736842105263157, | |
| "grad_norm": 30.53267478942871, | |
| "learning_rate": 1.960526315789474e-05, | |
| "loss": 4.2589, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.02631578947368421, | |
| "grad_norm": 20.766124725341797, | |
| "learning_rate": 1.9473684210526318e-05, | |
| "loss": 2.8341, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.03289473684210526, | |
| "grad_norm": 17.489206314086914, | |
| "learning_rate": 1.9342105263157896e-05, | |
| "loss": 2.2023, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.039473684210526314, | |
| "grad_norm": 20.925432205200195, | |
| "learning_rate": 1.9210526315789474e-05, | |
| "loss": 1.8171, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.046052631578947366, | |
| "grad_norm": 27.107561111450195, | |
| "learning_rate": 1.9078947368421056e-05, | |
| "loss": 1.3806, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.05263157894736842, | |
| "grad_norm": 11.384747505187988, | |
| "learning_rate": 1.894736842105263e-05, | |
| "loss": 1.1143, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.05921052631578947, | |
| "grad_norm": 22.29890251159668, | |
| "learning_rate": 1.8815789473684213e-05, | |
| "loss": 0.9791, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.06578947368421052, | |
| "grad_norm": 7.384612560272217, | |
| "learning_rate": 1.868421052631579e-05, | |
| "loss": 0.6817, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.07236842105263158, | |
| "grad_norm": 20.520063400268555, | |
| "learning_rate": 1.8552631578947373e-05, | |
| "loss": 0.7152, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.07894736842105263, | |
| "grad_norm": 20.498456954956055, | |
| "learning_rate": 1.8421052631578947e-05, | |
| "loss": 0.6505, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.08552631578947369, | |
| "grad_norm": 21.175556182861328, | |
| "learning_rate": 1.828947368421053e-05, | |
| "loss": 0.574, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.09210526315789473, | |
| "grad_norm": 17.214597702026367, | |
| "learning_rate": 1.8157894736842107e-05, | |
| "loss": 0.7436, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.09868421052631579, | |
| "grad_norm": 7.9284467697143555, | |
| "learning_rate": 1.8026315789473685e-05, | |
| "loss": 0.3877, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.10526315789473684, | |
| "grad_norm": 14.26107120513916, | |
| "learning_rate": 1.7894736842105264e-05, | |
| "loss": 0.6372, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.1118421052631579, | |
| "grad_norm": 23.547595977783203, | |
| "learning_rate": 1.7763157894736845e-05, | |
| "loss": 0.6531, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.11842105263157894, | |
| "grad_norm": 11.07791805267334, | |
| "learning_rate": 1.763157894736842e-05, | |
| "loss": 0.3413, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.125, | |
| "grad_norm": 6.498636722564697, | |
| "learning_rate": 1.7500000000000002e-05, | |
| "loss": 0.5548, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.125, | |
| "eval_accuracy": 0.875, | |
| "eval_f1": 0.8737373737373737, | |
| "eval_loss": 0.40524157881736755, | |
| "eval_precision": 0.890625, | |
| "eval_runtime": 75.8397, | |
| "eval_samples_per_second": 2.11, | |
| "eval_steps_per_second": 0.264, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 1.006578947368421, | |
| "grad_norm": 10.57153034210205, | |
| "learning_rate": 1.736842105263158e-05, | |
| "loss": 0.5478, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.013157894736842, | |
| "grad_norm": 16.34922218322754, | |
| "learning_rate": 1.723684210526316e-05, | |
| "loss": 0.4674, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.019736842105263, | |
| "grad_norm": 14.608505249023438, | |
| "learning_rate": 1.7105263157894737e-05, | |
| "loss": 0.3183, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 1.0263157894736843, | |
| "grad_norm": 12.857906341552734, | |
| "learning_rate": 1.6973684210526318e-05, | |
| "loss": 0.3537, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 1.0328947368421053, | |
| "grad_norm": 5.8704142570495605, | |
| "learning_rate": 1.6842105263157896e-05, | |
| "loss": 0.3518, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 1.0394736842105263, | |
| "grad_norm": 8.11811351776123, | |
| "learning_rate": 1.6710526315789475e-05, | |
| "loss": 0.2841, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.0460526315789473, | |
| "grad_norm": 5.711803913116455, | |
| "learning_rate": 1.6578947368421053e-05, | |
| "loss": 0.2999, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 1.0526315789473684, | |
| "grad_norm": 18.15171241760254, | |
| "learning_rate": 1.644736842105263e-05, | |
| "loss": 0.3002, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 1.0592105263157894, | |
| "grad_norm": 16.901477813720703, | |
| "learning_rate": 1.6315789473684213e-05, | |
| "loss": 0.4953, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 1.0657894736842106, | |
| "grad_norm": 0.6202385425567627, | |
| "learning_rate": 1.618421052631579e-05, | |
| "loss": 0.3806, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 1.0723684210526316, | |
| "grad_norm": 10.928872108459473, | |
| "learning_rate": 1.605263157894737e-05, | |
| "loss": 0.3549, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.0789473684210527, | |
| "grad_norm": 11.599842071533203, | |
| "learning_rate": 1.5921052631578948e-05, | |
| "loss": 0.2657, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 1.0855263157894737, | |
| "grad_norm": 3.240905284881592, | |
| "learning_rate": 1.578947368421053e-05, | |
| "loss": 0.2715, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 1.0921052631578947, | |
| "grad_norm": 3.5071094036102295, | |
| "learning_rate": 1.5657894736842107e-05, | |
| "loss": 0.2921, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 1.0986842105263157, | |
| "grad_norm": 20.44765281677246, | |
| "learning_rate": 1.5526315789473686e-05, | |
| "loss": 0.4525, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 1.1052631578947367, | |
| "grad_norm": 4.01133394241333, | |
| "learning_rate": 1.5394736842105264e-05, | |
| "loss": 0.4148, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 1.111842105263158, | |
| "grad_norm": 6.698035717010498, | |
| "learning_rate": 1.5263157894736846e-05, | |
| "loss": 0.3606, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.118421052631579, | |
| "grad_norm": 7.709211826324463, | |
| "learning_rate": 1.5131578947368422e-05, | |
| "loss": 0.2781, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.125, | |
| "grad_norm": 20.90215301513672, | |
| "learning_rate": 1.5000000000000002e-05, | |
| "loss": 0.5137, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.125, | |
| "eval_accuracy": 0.86875, | |
| "eval_f1": 0.8681266925703521, | |
| "eval_loss": 0.3075089454650879, | |
| "eval_precision": 0.875856028029941, | |
| "eval_runtime": 73.4214, | |
| "eval_samples_per_second": 2.179, | |
| "eval_steps_per_second": 0.272, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 2.0065789473684212, | |
| "grad_norm": 11.481311798095703, | |
| "learning_rate": 1.486842105263158e-05, | |
| "loss": 0.0835, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 2.013157894736842, | |
| "grad_norm": 12.361924171447754, | |
| "learning_rate": 1.4736842105263159e-05, | |
| "loss": 0.3356, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 2.0197368421052633, | |
| "grad_norm": 31.301998138427734, | |
| "learning_rate": 1.4605263157894739e-05, | |
| "loss": 0.4863, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 2.026315789473684, | |
| "grad_norm": 18.023582458496094, | |
| "learning_rate": 1.4473684210526317e-05, | |
| "loss": 0.2546, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 2.0328947368421053, | |
| "grad_norm": 2.6840920448303223, | |
| "learning_rate": 1.4342105263157895e-05, | |
| "loss": 0.1389, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 2.039473684210526, | |
| "grad_norm": 30.538143157958984, | |
| "learning_rate": 1.4210526315789475e-05, | |
| "loss": 0.4029, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 2.0460526315789473, | |
| "grad_norm": 0.3170570731163025, | |
| "learning_rate": 1.4078947368421055e-05, | |
| "loss": 0.3008, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 2.0526315789473686, | |
| "grad_norm": 30.189603805541992, | |
| "learning_rate": 1.3947368421052631e-05, | |
| "loss": 0.1997, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 2.0592105263157894, | |
| "grad_norm": 15.058737754821777, | |
| "learning_rate": 1.3815789473684211e-05, | |
| "loss": 0.2271, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 2.0657894736842106, | |
| "grad_norm": 15.625029563903809, | |
| "learning_rate": 1.3684210526315791e-05, | |
| "loss": 0.2911, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 2.0723684210526314, | |
| "grad_norm": 10.246795654296875, | |
| "learning_rate": 1.3552631578947371e-05, | |
| "loss": 0.4835, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 2.0789473684210527, | |
| "grad_norm": 30.28328514099121, | |
| "learning_rate": 1.3421052631578948e-05, | |
| "loss": 0.2619, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 2.085526315789474, | |
| "grad_norm": 21.168128967285156, | |
| "learning_rate": 1.3289473684210528e-05, | |
| "loss": 0.2278, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 2.0921052631578947, | |
| "grad_norm": 35.779266357421875, | |
| "learning_rate": 1.3157894736842108e-05, | |
| "loss": 0.4695, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 2.098684210526316, | |
| "grad_norm": 34.01453399658203, | |
| "learning_rate": 1.3026315789473684e-05, | |
| "loss": 0.2589, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 2.1052631578947367, | |
| "grad_norm": 21.93201446533203, | |
| "learning_rate": 1.2894736842105264e-05, | |
| "loss": 0.2671, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 2.111842105263158, | |
| "grad_norm": 27.154544830322266, | |
| "learning_rate": 1.2763157894736844e-05, | |
| "loss": 0.2239, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 2.1184210526315788, | |
| "grad_norm": 18.04817771911621, | |
| "learning_rate": 1.263157894736842e-05, | |
| "loss": 0.3137, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 2.125, | |
| "grad_norm": 22.873342514038086, | |
| "learning_rate": 1.25e-05, | |
| "loss": 0.3765, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 2.125, | |
| "eval_accuracy": 0.86875, | |
| "eval_f1": 0.8678777869529316, | |
| "eval_loss": 0.3078489899635315, | |
| "eval_precision": 0.8787514042689777, | |
| "eval_runtime": 72.0845, | |
| "eval_samples_per_second": 2.22, | |
| "eval_steps_per_second": 0.277, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 3.0065789473684212, | |
| "grad_norm": 19.09450912475586, | |
| "learning_rate": 1.236842105263158e-05, | |
| "loss": 0.3152, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 3.013157894736842, | |
| "grad_norm": 1.042327642440796, | |
| "learning_rate": 1.2236842105263159e-05, | |
| "loss": 0.2481, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 3.0197368421052633, | |
| "grad_norm": 0.14646773040294647, | |
| "learning_rate": 1.2105263157894737e-05, | |
| "loss": 0.2742, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 3.026315789473684, | |
| "grad_norm": 10.439544677734375, | |
| "learning_rate": 1.1973684210526317e-05, | |
| "loss": 0.1442, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 3.0328947368421053, | |
| "grad_norm": 16.675586700439453, | |
| "learning_rate": 1.1842105263157895e-05, | |
| "loss": 0.2714, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 3.039473684210526, | |
| "grad_norm": 25.11818504333496, | |
| "learning_rate": 1.1710526315789475e-05, | |
| "loss": 0.2881, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 3.0460526315789473, | |
| "grad_norm": 2.4255502223968506, | |
| "learning_rate": 1.1578947368421053e-05, | |
| "loss": 0.2611, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 3.0526315789473686, | |
| "grad_norm": 40.66857147216797, | |
| "learning_rate": 1.1447368421052632e-05, | |
| "loss": 0.1137, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 3.0592105263157894, | |
| "grad_norm": 0.9761056900024414, | |
| "learning_rate": 1.1315789473684212e-05, | |
| "loss": 0.0536, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 3.0657894736842106, | |
| "grad_norm": 11.886970520019531, | |
| "learning_rate": 1.1184210526315792e-05, | |
| "loss": 0.4967, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 3.0723684210526314, | |
| "grad_norm": 38.012176513671875, | |
| "learning_rate": 1.105263157894737e-05, | |
| "loss": 0.472, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 3.0789473684210527, | |
| "grad_norm": 20.898456573486328, | |
| "learning_rate": 1.0921052631578948e-05, | |
| "loss": 0.2362, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 3.085526315789474, | |
| "grad_norm": 15.948339462280273, | |
| "learning_rate": 1.0789473684210528e-05, | |
| "loss": 0.4394, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 3.0921052631578947, | |
| "grad_norm": 2.8146371841430664, | |
| "learning_rate": 1.0657894736842108e-05, | |
| "loss": 0.0875, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 3.098684210526316, | |
| "grad_norm": 3.5820298194885254, | |
| "learning_rate": 1.0526315789473684e-05, | |
| "loss": 0.2212, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 3.1052631578947367, | |
| "grad_norm": 2.0417447090148926, | |
| "learning_rate": 1.0394736842105264e-05, | |
| "loss": 0.405, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 3.111842105263158, | |
| "grad_norm": 25.29057502746582, | |
| "learning_rate": 1.0263157894736844e-05, | |
| "loss": 0.2517, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 3.1184210526315788, | |
| "grad_norm": 12.358236312866211, | |
| "learning_rate": 1.0131578947368421e-05, | |
| "loss": 0.2932, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 3.125, | |
| "grad_norm": 42.99756622314453, | |
| "learning_rate": 1e-05, | |
| "loss": 0.3435, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 3.125, | |
| "eval_accuracy": 0.9125, | |
| "eval_f1": 0.912445278298937, | |
| "eval_loss": 0.26522132754325867, | |
| "eval_precision": 0.9135338345864662, | |
| "eval_runtime": 68.3847, | |
| "eval_samples_per_second": 2.34, | |
| "eval_steps_per_second": 0.292, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 4.006578947368421, | |
| "grad_norm": 6.114632606506348, | |
| "learning_rate": 9.868421052631579e-06, | |
| "loss": 0.2472, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 4.0131578947368425, | |
| "grad_norm": 7.220970630645752, | |
| "learning_rate": 9.736842105263159e-06, | |
| "loss": 0.126, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 4.019736842105263, | |
| "grad_norm": 12.066508293151855, | |
| "learning_rate": 9.605263157894737e-06, | |
| "loss": 0.2556, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 4.026315789473684, | |
| "grad_norm": 8.093453407287598, | |
| "learning_rate": 9.473684210526315e-06, | |
| "loss": 0.3412, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 4.032894736842105, | |
| "grad_norm": 0.10468351095914841, | |
| "learning_rate": 9.342105263157895e-06, | |
| "loss": 0.2042, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 4.0394736842105265, | |
| "grad_norm": 1.4523197412490845, | |
| "learning_rate": 9.210526315789474e-06, | |
| "loss": 0.151, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 4.046052631578948, | |
| "grad_norm": 30.899551391601562, | |
| "learning_rate": 9.078947368421054e-06, | |
| "loss": 0.3615, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 4.052631578947368, | |
| "grad_norm": 12.887349128723145, | |
| "learning_rate": 8.947368421052632e-06, | |
| "loss": 0.2108, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 4.059210526315789, | |
| "grad_norm": 39.25297546386719, | |
| "learning_rate": 8.81578947368421e-06, | |
| "loss": 0.3158, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 4.065789473684211, | |
| "grad_norm": 0.8256903290748596, | |
| "learning_rate": 8.68421052631579e-06, | |
| "loss": 0.1581, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 4.072368421052632, | |
| "grad_norm": 34.28030014038086, | |
| "learning_rate": 8.552631578947368e-06, | |
| "loss": 0.2332, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 4.078947368421052, | |
| "grad_norm": 2.9141697883605957, | |
| "learning_rate": 8.421052631578948e-06, | |
| "loss": 0.1121, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 4.0855263157894735, | |
| "grad_norm": 21.433950424194336, | |
| "learning_rate": 8.289473684210526e-06, | |
| "loss": 0.1999, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 4.092105263157895, | |
| "grad_norm": 23.863176345825195, | |
| "learning_rate": 8.157894736842106e-06, | |
| "loss": 0.2711, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 4.098684210526316, | |
| "grad_norm": 1.389123797416687, | |
| "learning_rate": 8.026315789473685e-06, | |
| "loss": 0.0866, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 4.105263157894737, | |
| "grad_norm": 0.18721434473991394, | |
| "learning_rate": 7.894736842105265e-06, | |
| "loss": 0.1126, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 4.1118421052631575, | |
| "grad_norm": 0.1629679799079895, | |
| "learning_rate": 7.763157894736843e-06, | |
| "loss": 0.1087, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 4.118421052631579, | |
| "grad_norm": 28.87115478515625, | |
| "learning_rate": 7.631578947368423e-06, | |
| "loss": 0.2668, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 4.125, | |
| "grad_norm": 7.828221797943115, | |
| "learning_rate": 7.500000000000001e-06, | |
| "loss": 0.0875, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 4.125, | |
| "eval_accuracy": 0.9125, | |
| "eval_f1": 0.9123767798466593, | |
| "eval_loss": 0.31034404039382935, | |
| "eval_precision": 0.9148334380892521, | |
| "eval_runtime": 64.4753, | |
| "eval_samples_per_second": 2.482, | |
| "eval_steps_per_second": 0.31, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 5.006578947368421, | |
| "grad_norm": 33.18913269042969, | |
| "learning_rate": 7.368421052631579e-06, | |
| "loss": 0.1835, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 5.0131578947368425, | |
| "grad_norm": 29.68429183959961, | |
| "learning_rate": 7.236842105263158e-06, | |
| "loss": 0.2858, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 5.019736842105263, | |
| "grad_norm": 9.609414100646973, | |
| "learning_rate": 7.1052631578947375e-06, | |
| "loss": 0.1843, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 5.026315789473684, | |
| "grad_norm": 28.97188949584961, | |
| "learning_rate": 6.973684210526316e-06, | |
| "loss": 0.1405, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 5.032894736842105, | |
| "grad_norm": 21.269493103027344, | |
| "learning_rate": 6.842105263157896e-06, | |
| "loss": 0.1006, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 5.0394736842105265, | |
| "grad_norm": 12.934701919555664, | |
| "learning_rate": 6.710526315789474e-06, | |
| "loss": 0.0653, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 5.046052631578948, | |
| "grad_norm": 22.229690551757812, | |
| "learning_rate": 6.578947368421054e-06, | |
| "loss": 0.2763, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 5.052631578947368, | |
| "grad_norm": 0.046190641820430756, | |
| "learning_rate": 6.447368421052632e-06, | |
| "loss": 0.1705, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 5.059210526315789, | |
| "grad_norm": 11.431427001953125, | |
| "learning_rate": 6.31578947368421e-06, | |
| "loss": 0.3233, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 5.065789473684211, | |
| "grad_norm": 34.42988204956055, | |
| "learning_rate": 6.18421052631579e-06, | |
| "loss": 0.2078, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 5.072368421052632, | |
| "grad_norm": 0.727376401424408, | |
| "learning_rate": 6.0526315789473685e-06, | |
| "loss": 0.1895, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 5.078947368421052, | |
| "grad_norm": 0.2818295955657959, | |
| "learning_rate": 5.921052631578948e-06, | |
| "loss": 0.2082, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 5.0855263157894735, | |
| "grad_norm": 20.29454803466797, | |
| "learning_rate": 5.789473684210527e-06, | |
| "loss": 0.3015, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 5.092105263157895, | |
| "grad_norm": 0.514033317565918, | |
| "learning_rate": 5.657894736842106e-06, | |
| "loss": 0.3415, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 5.098684210526316, | |
| "grad_norm": 0.6979091167449951, | |
| "learning_rate": 5.526315789473685e-06, | |
| "loss": 0.2626, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 5.105263157894737, | |
| "grad_norm": 12.886882781982422, | |
| "learning_rate": 5.394736842105264e-06, | |
| "loss": 0.289, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 5.1118421052631575, | |
| "grad_norm": 45.84357833862305, | |
| "learning_rate": 5.263157894736842e-06, | |
| "loss": 0.1984, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 5.118421052631579, | |
| "grad_norm": 0.4069100618362427, | |
| "learning_rate": 5.131578947368422e-06, | |
| "loss": 0.1014, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 5.125, | |
| "grad_norm": 45.21007537841797, | |
| "learning_rate": 5e-06, | |
| "loss": 0.3563, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 5.125, | |
| "eval_accuracy": 0.91875, | |
| "eval_f1": 0.9185941841806583, | |
| "eval_loss": 0.3360721468925476, | |
| "eval_precision": 0.9219807904267044, | |
| "eval_runtime": 72.7002, | |
| "eval_samples_per_second": 2.201, | |
| "eval_steps_per_second": 0.275, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 6.006578947368421, | |
| "grad_norm": 15.24558162689209, | |
| "learning_rate": 4.8684210526315795e-06, | |
| "loss": 0.2636, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 6.0131578947368425, | |
| "grad_norm": 12.842823028564453, | |
| "learning_rate": 4.736842105263158e-06, | |
| "loss": 0.051, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 6.019736842105263, | |
| "grad_norm": 0.09382585436105728, | |
| "learning_rate": 4.605263157894737e-06, | |
| "loss": 0.2079, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 6.026315789473684, | |
| "grad_norm": 1.5985610485076904, | |
| "learning_rate": 4.473684210526316e-06, | |
| "loss": 0.2978, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 6.032894736842105, | |
| "grad_norm": 35.310848236083984, | |
| "learning_rate": 4.342105263157895e-06, | |
| "loss": 0.2338, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 6.0394736842105265, | |
| "grad_norm": 34.83409881591797, | |
| "learning_rate": 4.210526315789474e-06, | |
| "loss": 0.1555, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 6.046052631578948, | |
| "grad_norm": 31.529682159423828, | |
| "learning_rate": 4.078947368421053e-06, | |
| "loss": 0.28, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 6.052631578947368, | |
| "grad_norm": 14.309679985046387, | |
| "learning_rate": 3.947368421052632e-06, | |
| "loss": 0.212, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 6.059210526315789, | |
| "grad_norm": 8.03559684753418, | |
| "learning_rate": 3.815789473684211e-06, | |
| "loss": 0.1348, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 6.065789473684211, | |
| "grad_norm": 7.191884517669678, | |
| "learning_rate": 3.6842105263157896e-06, | |
| "loss": 0.3452, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 6.072368421052632, | |
| "grad_norm": 1.6156086921691895, | |
| "learning_rate": 3.5526315789473687e-06, | |
| "loss": 0.1401, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 6.078947368421052, | |
| "grad_norm": 26.94179344177246, | |
| "learning_rate": 3.421052631578948e-06, | |
| "loss": 0.3836, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 6.0855263157894735, | |
| "grad_norm": 0.12195608764886856, | |
| "learning_rate": 3.289473684210527e-06, | |
| "loss": 0.0756, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 6.092105263157895, | |
| "grad_norm": 0.1377830058336258, | |
| "learning_rate": 3.157894736842105e-06, | |
| "loss": 0.114, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 6.098684210526316, | |
| "grad_norm": 0.08636432886123657, | |
| "learning_rate": 3.0263157894736843e-06, | |
| "loss": 0.2905, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 6.105263157894737, | |
| "grad_norm": 1.403969407081604, | |
| "learning_rate": 2.8947368421052634e-06, | |
| "loss": 0.0935, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 6.1118421052631575, | |
| "grad_norm": 16.330184936523438, | |
| "learning_rate": 2.7631578947368424e-06, | |
| "loss": 0.0748, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 6.118421052631579, | |
| "grad_norm": 0.6670919060707092, | |
| "learning_rate": 2.631578947368421e-06, | |
| "loss": 0.1215, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 6.125, | |
| "grad_norm": 0.2528199851512909, | |
| "learning_rate": 2.5e-06, | |
| "loss": 0.2176, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 6.125, | |
| "eval_accuracy": 0.91875, | |
| "eval_f1": 0.9186705767350929, | |
| "eval_loss": 0.3505542278289795, | |
| "eval_precision": 0.920392156862745, | |
| "eval_runtime": 75.0919, | |
| "eval_samples_per_second": 2.131, | |
| "eval_steps_per_second": 0.266, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 6.125, | |
| "step": 1330, | |
| "total_flos": 0.0, | |
| "train_loss": 0.469042217059243, | |
| "train_runtime": 6646.0028, | |
| "train_samples_per_second": 1.83, | |
| "train_steps_per_second": 0.229 | |
| }, | |
| { | |
| "epoch": 6.125, | |
| "eval_accuracy": 0.89375, | |
| "eval_f1": 0.8937299083107899, | |
| "eval_loss": 0.4446839988231659, | |
| "eval_precision": 0.8940479987990918, | |
| "eval_runtime": 366.1036, | |
| "eval_samples_per_second": 2.185, | |
| "eval_steps_per_second": 0.273, | |
| "step": 1330 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 1520, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 9223372036854775807, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "EarlyStoppingCallback": { | |
| "args": { | |
| "early_stopping_patience": 3, | |
| "early_stopping_threshold": 0.01 | |
| }, | |
| "attributes": { | |
| "early_stopping_patience_counter": 0 | |
| } | |
| }, | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 0.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |