| { | |
| "best_metric": 0.78125, | |
| "best_model_checkpoint": "Swin-dmae-DA5-N-Colab\\checkpoint-1361", | |
| "epoch": 116.36363636363636, | |
| "eval_steps": 500, | |
| "global_step": 2880, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.4, | |
| "learning_rate": 1.3888888888888892e-06, | |
| "loss": 6.7613, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 2.7777777777777783e-06, | |
| "loss": 6.8626, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "eval_accuracy": 0.125, | |
| "eval_loss": 7.802581787109375, | |
| "eval_runtime": 0.4701, | |
| "eval_samples_per_second": 68.069, | |
| "eval_steps_per_second": 4.254, | |
| "step": 24 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 4.166666666666667e-06, | |
| "loss": 6.8425, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 5.555555555555557e-06, | |
| "loss": 6.6392, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "eval_accuracy": 0.125, | |
| "eval_loss": 7.5823211669921875, | |
| "eval_runtime": 0.4001, | |
| "eval_samples_per_second": 79.982, | |
| "eval_steps_per_second": 4.999, | |
| "step": 49 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "learning_rate": 6.944444444444445e-06, | |
| "loss": 6.5894, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 8.333333333333334e-06, | |
| "loss": 6.3711, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 9.722222222222223e-06, | |
| "loss": 6.3514, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "eval_accuracy": 0.125, | |
| "eval_loss": 6.665298938751221, | |
| "eval_runtime": 0.4046, | |
| "eval_samples_per_second": 79.091, | |
| "eval_steps_per_second": 4.943, | |
| "step": 74 | |
| }, | |
| { | |
| "epoch": 3.23, | |
| "learning_rate": 1.1111111111111113e-05, | |
| "loss": 5.8289, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 3.64, | |
| "learning_rate": 1.25e-05, | |
| "loss": 5.3109, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.125, | |
| "eval_loss": 5.279791355133057, | |
| "eval_runtime": 0.4241, | |
| "eval_samples_per_second": 75.455, | |
| "eval_steps_per_second": 4.716, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 4.04, | |
| "learning_rate": 1.388888888888889e-05, | |
| "loss": 4.9844, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 4.44, | |
| "learning_rate": 1.5277777777777777e-05, | |
| "loss": 4.5015, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 4.85, | |
| "learning_rate": 1.6666666666666667e-05, | |
| "loss": 3.7476, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 4.97, | |
| "eval_accuracy": 0.125, | |
| "eval_loss": 3.618558168411255, | |
| "eval_runtime": 0.4101, | |
| "eval_samples_per_second": 78.031, | |
| "eval_steps_per_second": 4.877, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 5.25, | |
| "learning_rate": 1.8055555555555558e-05, | |
| "loss": 3.1884, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 5.66, | |
| "learning_rate": 1.9444444444444445e-05, | |
| "loss": 2.7138, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 5.98, | |
| "eval_accuracy": 0.125, | |
| "eval_loss": 1.937683343887329, | |
| "eval_runtime": 0.4036, | |
| "eval_samples_per_second": 79.286, | |
| "eval_steps_per_second": 4.955, | |
| "step": 148 | |
| }, | |
| { | |
| "epoch": 6.06, | |
| "learning_rate": 2.0833333333333336e-05, | |
| "loss": 2.1007, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 6.46, | |
| "learning_rate": 2.2222222222222227e-05, | |
| "loss": 1.7134, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 6.87, | |
| "learning_rate": 2.3611111111111114e-05, | |
| "loss": 1.4116, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 6.99, | |
| "eval_accuracy": 0.125, | |
| "eval_loss": 1.4680219888687134, | |
| "eval_runtime": 0.3951, | |
| "eval_samples_per_second": 80.993, | |
| "eval_steps_per_second": 5.062, | |
| "step": 173 | |
| }, | |
| { | |
| "epoch": 7.27, | |
| "learning_rate": 2.5e-05, | |
| "loss": 1.4216, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 7.68, | |
| "learning_rate": 2.6388888888888892e-05, | |
| "loss": 1.3932, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.5, | |
| "eval_loss": 1.3818670511245728, | |
| "eval_runtime": 0.4901, | |
| "eval_samples_per_second": 65.291, | |
| "eval_steps_per_second": 4.081, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 8.08, | |
| "learning_rate": 2.777777777777778e-05, | |
| "loss": 1.3888, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 8.48, | |
| "learning_rate": 2.9166666666666666e-05, | |
| "loss": 1.3077, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 8.89, | |
| "learning_rate": 3.0555555555555554e-05, | |
| "loss": 1.2566, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 8.97, | |
| "eval_accuracy": 0.15625, | |
| "eval_loss": 1.591198205947876, | |
| "eval_runtime": 0.4171, | |
| "eval_samples_per_second": 76.721, | |
| "eval_steps_per_second": 4.795, | |
| "step": 222 | |
| }, | |
| { | |
| "epoch": 9.29, | |
| "learning_rate": 3.194444444444445e-05, | |
| "loss": 1.1679, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 9.7, | |
| "learning_rate": 3.3333333333333335e-05, | |
| "loss": 1.1332, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 9.98, | |
| "eval_accuracy": 0.4375, | |
| "eval_loss": 1.3336095809936523, | |
| "eval_runtime": 0.4176, | |
| "eval_samples_per_second": 76.628, | |
| "eval_steps_per_second": 4.789, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 10.1, | |
| "learning_rate": 3.472222222222223e-05, | |
| "loss": 1.0699, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 10.51, | |
| "learning_rate": 3.6111111111111116e-05, | |
| "loss": 0.9928, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 10.91, | |
| "learning_rate": 3.7500000000000003e-05, | |
| "loss": 0.9511, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 10.99, | |
| "eval_accuracy": 0.28125, | |
| "eval_loss": 1.1492191553115845, | |
| "eval_runtime": 0.4006, | |
| "eval_samples_per_second": 79.881, | |
| "eval_steps_per_second": 4.993, | |
| "step": 272 | |
| }, | |
| { | |
| "epoch": 11.31, | |
| "learning_rate": 3.888888888888889e-05, | |
| "loss": 0.9286, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 11.72, | |
| "learning_rate": 3.996913580246914e-05, | |
| "loss": 0.8905, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.53125, | |
| "eval_loss": 1.0571255683898926, | |
| "eval_runtime": 0.4566, | |
| "eval_samples_per_second": 70.082, | |
| "eval_steps_per_second": 4.38, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 12.12, | |
| "learning_rate": 3.981481481481482e-05, | |
| "loss": 0.8031, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 12.53, | |
| "learning_rate": 3.9660493827160496e-05, | |
| "loss": 0.8253, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 12.93, | |
| "learning_rate": 3.950617283950617e-05, | |
| "loss": 0.8317, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 12.97, | |
| "eval_accuracy": 0.65625, | |
| "eval_loss": 0.8473750948905945, | |
| "eval_runtime": 0.4201, | |
| "eval_samples_per_second": 76.173, | |
| "eval_steps_per_second": 4.761, | |
| "step": 321 | |
| }, | |
| { | |
| "epoch": 13.33, | |
| "learning_rate": 3.935185185185186e-05, | |
| "loss": 0.7445, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 13.74, | |
| "learning_rate": 3.9197530864197535e-05, | |
| "loss": 0.6611, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 13.98, | |
| "eval_accuracy": 0.71875, | |
| "eval_loss": 0.7519997954368591, | |
| "eval_runtime": 0.4141, | |
| "eval_samples_per_second": 77.277, | |
| "eval_steps_per_second": 4.83, | |
| "step": 346 | |
| }, | |
| { | |
| "epoch": 14.14, | |
| "learning_rate": 3.904320987654321e-05, | |
| "loss": 0.6514, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 14.55, | |
| "learning_rate": 3.888888888888889e-05, | |
| "loss": 0.6022, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 14.95, | |
| "learning_rate": 3.8734567901234575e-05, | |
| "loss": 0.5683, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 14.99, | |
| "eval_accuracy": 0.75, | |
| "eval_loss": 0.6655727624893188, | |
| "eval_runtime": 0.4031, | |
| "eval_samples_per_second": 79.386, | |
| "eval_steps_per_second": 4.962, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 15.35, | |
| "learning_rate": 3.8580246913580246e-05, | |
| "loss": 0.5743, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 15.76, | |
| "learning_rate": 3.842592592592593e-05, | |
| "loss": 0.569, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.53125, | |
| "eval_loss": 0.8109104037284851, | |
| "eval_runtime": 0.4166, | |
| "eval_samples_per_second": 76.812, | |
| "eval_steps_per_second": 4.801, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 16.16, | |
| "learning_rate": 3.827160493827161e-05, | |
| "loss": 0.4743, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 16.57, | |
| "learning_rate": 3.8117283950617286e-05, | |
| "loss": 0.502, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 16.97, | |
| "learning_rate": 3.7962962962962964e-05, | |
| "loss": 0.4702, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 16.97, | |
| "eval_accuracy": 0.625, | |
| "eval_loss": 0.703583836555481, | |
| "eval_runtime": 0.4076, | |
| "eval_samples_per_second": 78.509, | |
| "eval_steps_per_second": 4.907, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 17.37, | |
| "learning_rate": 3.780864197530865e-05, | |
| "loss": 0.4441, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 17.78, | |
| "learning_rate": 3.7654320987654326e-05, | |
| "loss": 0.4244, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 17.98, | |
| "eval_accuracy": 0.65625, | |
| "eval_loss": 0.8169162273406982, | |
| "eval_runtime": 0.4216, | |
| "eval_samples_per_second": 75.9, | |
| "eval_steps_per_second": 4.744, | |
| "step": 445 | |
| }, | |
| { | |
| "epoch": 18.18, | |
| "learning_rate": 3.7500000000000003e-05, | |
| "loss": 0.4277, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 18.59, | |
| "learning_rate": 3.734567901234568e-05, | |
| "loss": 0.3584, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 18.99, | |
| "learning_rate": 3.719135802469136e-05, | |
| "loss": 0.3483, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 18.99, | |
| "eval_accuracy": 0.71875, | |
| "eval_loss": 0.7075902223587036, | |
| "eval_runtime": 0.4306, | |
| "eval_samples_per_second": 74.312, | |
| "eval_steps_per_second": 4.645, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 19.39, | |
| "learning_rate": 3.703703703703704e-05, | |
| "loss": 0.3005, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 19.8, | |
| "learning_rate": 3.688271604938272e-05, | |
| "loss": 0.3853, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.71875, | |
| "eval_loss": 0.8644444942474365, | |
| "eval_runtime": 0.4716, | |
| "eval_samples_per_second": 67.852, | |
| "eval_steps_per_second": 4.241, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 20.2, | |
| "learning_rate": 3.67283950617284e-05, | |
| "loss": 0.307, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 20.61, | |
| "learning_rate": 3.6574074074074076e-05, | |
| "loss": 0.3038, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 20.97, | |
| "eval_accuracy": 0.71875, | |
| "eval_loss": 0.86528080701828, | |
| "eval_runtime": 0.4181, | |
| "eval_samples_per_second": 76.538, | |
| "eval_steps_per_second": 4.784, | |
| "step": 519 | |
| }, | |
| { | |
| "epoch": 21.01, | |
| "learning_rate": 3.641975308641976e-05, | |
| "loss": 0.3084, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 21.41, | |
| "learning_rate": 3.626543209876543e-05, | |
| "loss": 0.2967, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 21.82, | |
| "learning_rate": 3.6111111111111116e-05, | |
| "loss": 0.2885, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 21.98, | |
| "eval_accuracy": 0.71875, | |
| "eval_loss": 1.0443750619888306, | |
| "eval_runtime": 0.4421, | |
| "eval_samples_per_second": 72.382, | |
| "eval_steps_per_second": 4.524, | |
| "step": 544 | |
| }, | |
| { | |
| "epoch": 22.22, | |
| "learning_rate": 3.5956790123456794e-05, | |
| "loss": 0.2644, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 22.63, | |
| "learning_rate": 3.580246913580247e-05, | |
| "loss": 0.2014, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 22.99, | |
| "eval_accuracy": 0.59375, | |
| "eval_loss": 1.06843900680542, | |
| "eval_runtime": 0.3991, | |
| "eval_samples_per_second": 80.182, | |
| "eval_steps_per_second": 5.011, | |
| "step": 569 | |
| }, | |
| { | |
| "epoch": 23.03, | |
| "learning_rate": 3.564814814814815e-05, | |
| "loss": 0.309, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 23.43, | |
| "learning_rate": 3.5493827160493834e-05, | |
| "loss": 0.2515, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 23.84, | |
| "learning_rate": 3.533950617283951e-05, | |
| "loss": 0.2764, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_accuracy": 0.65625, | |
| "eval_loss": 1.1421856880187988, | |
| "eval_runtime": 0.4621, | |
| "eval_samples_per_second": 69.248, | |
| "eval_steps_per_second": 4.328, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 24.24, | |
| "learning_rate": 3.518518518518519e-05, | |
| "loss": 0.2519, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 24.65, | |
| "learning_rate": 3.503086419753087e-05, | |
| "loss": 0.2493, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 24.97, | |
| "eval_accuracy": 0.6875, | |
| "eval_loss": 1.102545976638794, | |
| "eval_runtime": 0.4256, | |
| "eval_samples_per_second": 75.187, | |
| "eval_steps_per_second": 4.699, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 25.05, | |
| "learning_rate": 3.4876543209876545e-05, | |
| "loss": 0.2863, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 25.45, | |
| "learning_rate": 3.472222222222223e-05, | |
| "loss": 0.2352, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 25.86, | |
| "learning_rate": 3.45679012345679e-05, | |
| "loss": 0.2754, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 25.98, | |
| "eval_accuracy": 0.71875, | |
| "eval_loss": 1.0370571613311768, | |
| "eval_runtime": 0.4276, | |
| "eval_samples_per_second": 74.835, | |
| "eval_steps_per_second": 4.677, | |
| "step": 643 | |
| }, | |
| { | |
| "epoch": 26.26, | |
| "learning_rate": 3.4413580246913584e-05, | |
| "loss": 0.2549, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 26.67, | |
| "learning_rate": 3.425925925925926e-05, | |
| "loss": 0.1793, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 26.99, | |
| "eval_accuracy": 0.65625, | |
| "eval_loss": 1.1623690128326416, | |
| "eval_runtime": 0.4181, | |
| "eval_samples_per_second": 76.534, | |
| "eval_steps_per_second": 4.783, | |
| "step": 668 | |
| }, | |
| { | |
| "epoch": 27.07, | |
| "learning_rate": 3.410493827160494e-05, | |
| "loss": 0.2305, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 27.47, | |
| "learning_rate": 3.395061728395062e-05, | |
| "loss": 0.1836, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 27.88, | |
| "learning_rate": 3.37962962962963e-05, | |
| "loss": 0.1971, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_accuracy": 0.6875, | |
| "eval_loss": 1.3177158832550049, | |
| "eval_runtime": 0.3941, | |
| "eval_samples_per_second": 81.2, | |
| "eval_steps_per_second": 5.075, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 28.28, | |
| "learning_rate": 3.364197530864198e-05, | |
| "loss": 0.1514, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 28.69, | |
| "learning_rate": 3.348765432098766e-05, | |
| "loss": 0.1881, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 28.97, | |
| "eval_accuracy": 0.6875, | |
| "eval_loss": 1.2813467979431152, | |
| "eval_runtime": 0.3961, | |
| "eval_samples_per_second": 80.79, | |
| "eval_steps_per_second": 5.049, | |
| "step": 717 | |
| }, | |
| { | |
| "epoch": 29.09, | |
| "learning_rate": 3.3333333333333335e-05, | |
| "loss": 0.1854, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 29.49, | |
| "learning_rate": 3.317901234567901e-05, | |
| "loss": 0.2045, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 29.9, | |
| "learning_rate": 3.30246913580247e-05, | |
| "loss": 0.167, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 29.98, | |
| "eval_accuracy": 0.625, | |
| "eval_loss": 1.5564466714859009, | |
| "eval_runtime": 0.4011, | |
| "eval_samples_per_second": 79.783, | |
| "eval_steps_per_second": 4.986, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 30.3, | |
| "learning_rate": 3.2870370370370375e-05, | |
| "loss": 0.1761, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 30.71, | |
| "learning_rate": 3.271604938271605e-05, | |
| "loss": 0.1872, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 30.99, | |
| "eval_accuracy": 0.71875, | |
| "eval_loss": 1.3761742115020752, | |
| "eval_runtime": 0.4136, | |
| "eval_samples_per_second": 77.369, | |
| "eval_steps_per_second": 4.836, | |
| "step": 767 | |
| }, | |
| { | |
| "epoch": 31.11, | |
| "learning_rate": 3.256172839506173e-05, | |
| "loss": 0.1697, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 31.52, | |
| "learning_rate": 3.2407407407407415e-05, | |
| "loss": 0.1806, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 31.92, | |
| "learning_rate": 3.2253086419753086e-05, | |
| "loss": 0.1374, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "eval_accuracy": 0.625, | |
| "eval_loss": 1.4406580924987793, | |
| "eval_runtime": 0.4051, | |
| "eval_samples_per_second": 78.994, | |
| "eval_steps_per_second": 4.937, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 32.32, | |
| "learning_rate": 3.209876543209877e-05, | |
| "loss": 0.1848, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 32.73, | |
| "learning_rate": 3.194444444444445e-05, | |
| "loss": 0.1841, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 32.97, | |
| "eval_accuracy": 0.6875, | |
| "eval_loss": 1.4038010835647583, | |
| "eval_runtime": 0.4176, | |
| "eval_samples_per_second": 76.627, | |
| "eval_steps_per_second": 4.789, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 33.13, | |
| "learning_rate": 3.1790123456790125e-05, | |
| "loss": 0.1365, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 33.54, | |
| "learning_rate": 3.16358024691358e-05, | |
| "loss": 0.1309, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 33.94, | |
| "learning_rate": 3.148148148148149e-05, | |
| "loss": 0.167, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 33.98, | |
| "eval_accuracy": 0.6875, | |
| "eval_loss": 1.3768980503082275, | |
| "eval_runtime": 0.4141, | |
| "eval_samples_per_second": 77.277, | |
| "eval_steps_per_second": 4.83, | |
| "step": 841 | |
| }, | |
| { | |
| "epoch": 34.34, | |
| "learning_rate": 3.1327160493827165e-05, | |
| "loss": 0.1689, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 34.75, | |
| "learning_rate": 3.117283950617284e-05, | |
| "loss": 0.1614, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 34.99, | |
| "eval_accuracy": 0.65625, | |
| "eval_loss": 1.5350613594055176, | |
| "eval_runtime": 0.4051, | |
| "eval_samples_per_second": 78.994, | |
| "eval_steps_per_second": 4.937, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 35.15, | |
| "learning_rate": 3.101851851851852e-05, | |
| "loss": 0.1439, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 35.56, | |
| "learning_rate": 3.08641975308642e-05, | |
| "loss": 0.1706, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 35.96, | |
| "learning_rate": 3.070987654320988e-05, | |
| "loss": 0.1835, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "eval_accuracy": 0.6875, | |
| "eval_loss": 1.4466042518615723, | |
| "eval_runtime": 0.4396, | |
| "eval_samples_per_second": 72.793, | |
| "eval_steps_per_second": 4.55, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 36.36, | |
| "learning_rate": 3.0555555555555554e-05, | |
| "loss": 0.1558, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 36.77, | |
| "learning_rate": 3.0401234567901238e-05, | |
| "loss": 0.1917, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 36.97, | |
| "eval_accuracy": 0.75, | |
| "eval_loss": 1.349318265914917, | |
| "eval_runtime": 0.4031, | |
| "eval_samples_per_second": 79.387, | |
| "eval_steps_per_second": 4.962, | |
| "step": 915 | |
| }, | |
| { | |
| "epoch": 37.17, | |
| "learning_rate": 3.0246913580246916e-05, | |
| "loss": 0.1761, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 37.58, | |
| "learning_rate": 3.0092592592592593e-05, | |
| "loss": 0.1861, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 37.98, | |
| "learning_rate": 2.9938271604938275e-05, | |
| "loss": 0.1171, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 37.98, | |
| "eval_accuracy": 0.75, | |
| "eval_loss": 1.4756131172180176, | |
| "eval_runtime": 0.4051, | |
| "eval_samples_per_second": 78.994, | |
| "eval_steps_per_second": 4.937, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 38.38, | |
| "learning_rate": 2.9783950617283956e-05, | |
| "loss": 0.1234, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 38.79, | |
| "learning_rate": 2.962962962962963e-05, | |
| "loss": 0.163, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 38.99, | |
| "eval_accuracy": 0.6875, | |
| "eval_loss": 1.4373202323913574, | |
| "eval_runtime": 0.4221, | |
| "eval_samples_per_second": 75.809, | |
| "eval_steps_per_second": 4.738, | |
| "step": 965 | |
| }, | |
| { | |
| "epoch": 39.19, | |
| "learning_rate": 2.947530864197531e-05, | |
| "loss": 0.1452, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 39.6, | |
| "learning_rate": 2.9320987654320992e-05, | |
| "loss": 0.088, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "learning_rate": 2.9166666666666666e-05, | |
| "loss": 0.1688, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "eval_accuracy": 0.75, | |
| "eval_loss": 1.4082034826278687, | |
| "eval_runtime": 0.4786, | |
| "eval_samples_per_second": 66.859, | |
| "eval_steps_per_second": 4.179, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 40.4, | |
| "learning_rate": 2.9012345679012347e-05, | |
| "loss": 0.1359, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 40.81, | |
| "learning_rate": 2.885802469135803e-05, | |
| "loss": 0.1318, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 40.97, | |
| "eval_accuracy": 0.6875, | |
| "eval_loss": 1.5907220840454102, | |
| "eval_runtime": 0.4061, | |
| "eval_samples_per_second": 78.797, | |
| "eval_steps_per_second": 4.925, | |
| "step": 1014 | |
| }, | |
| { | |
| "epoch": 41.21, | |
| "learning_rate": 2.8703703703703706e-05, | |
| "loss": 0.1547, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 41.62, | |
| "learning_rate": 2.8549382716049384e-05, | |
| "loss": 0.1107, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 41.98, | |
| "eval_accuracy": 0.6875, | |
| "eval_loss": 1.7461936473846436, | |
| "eval_runtime": 0.4081, | |
| "eval_samples_per_second": 78.414, | |
| "eval_steps_per_second": 4.901, | |
| "step": 1039 | |
| }, | |
| { | |
| "epoch": 42.02, | |
| "learning_rate": 2.8395061728395065e-05, | |
| "loss": 0.1144, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 42.42, | |
| "learning_rate": 2.8240740740740743e-05, | |
| "loss": 0.1239, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 42.83, | |
| "learning_rate": 2.8086419753086424e-05, | |
| "loss": 0.1064, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 42.99, | |
| "eval_accuracy": 0.5625, | |
| "eval_loss": 1.8704116344451904, | |
| "eval_runtime": 0.4081, | |
| "eval_samples_per_second": 78.414, | |
| "eval_steps_per_second": 4.901, | |
| "step": 1064 | |
| }, | |
| { | |
| "epoch": 43.23, | |
| "learning_rate": 2.79320987654321e-05, | |
| "loss": 0.0978, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 43.64, | |
| "learning_rate": 2.777777777777778e-05, | |
| "loss": 0.1423, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 44.0, | |
| "eval_accuracy": 0.5625, | |
| "eval_loss": 1.715477705001831, | |
| "eval_runtime": 0.4051, | |
| "eval_samples_per_second": 78.994, | |
| "eval_steps_per_second": 4.937, | |
| "step": 1089 | |
| }, | |
| { | |
| "epoch": 44.04, | |
| "learning_rate": 2.762345679012346e-05, | |
| "loss": 0.1031, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 44.44, | |
| "learning_rate": 2.746913580246914e-05, | |
| "loss": 0.1473, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 44.85, | |
| "learning_rate": 2.7314814814814816e-05, | |
| "loss": 0.082, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 44.97, | |
| "eval_accuracy": 0.71875, | |
| "eval_loss": 1.5552070140838623, | |
| "eval_runtime": 0.3991, | |
| "eval_samples_per_second": 80.182, | |
| "eval_steps_per_second": 5.011, | |
| "step": 1113 | |
| }, | |
| { | |
| "epoch": 45.25, | |
| "learning_rate": 2.7160493827160497e-05, | |
| "loss": 0.1206, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 45.66, | |
| "learning_rate": 2.7006172839506174e-05, | |
| "loss": 0.1012, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 45.98, | |
| "eval_accuracy": 0.6875, | |
| "eval_loss": 1.4189940690994263, | |
| "eval_runtime": 0.4136, | |
| "eval_samples_per_second": 77.37, | |
| "eval_steps_per_second": 4.836, | |
| "step": 1138 | |
| }, | |
| { | |
| "epoch": 46.06, | |
| "learning_rate": 2.6851851851851852e-05, | |
| "loss": 0.1124, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 46.46, | |
| "learning_rate": 2.6697530864197533e-05, | |
| "loss": 0.114, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 46.87, | |
| "learning_rate": 2.654320987654321e-05, | |
| "loss": 0.1001, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 46.99, | |
| "eval_accuracy": 0.71875, | |
| "eval_loss": 1.6800808906555176, | |
| "eval_runtime": 0.4021, | |
| "eval_samples_per_second": 79.584, | |
| "eval_steps_per_second": 4.974, | |
| "step": 1163 | |
| }, | |
| { | |
| "epoch": 47.27, | |
| "learning_rate": 2.6388888888888892e-05, | |
| "loss": 0.1309, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 47.68, | |
| "learning_rate": 2.623456790123457e-05, | |
| "loss": 0.1037, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 48.0, | |
| "eval_accuracy": 0.71875, | |
| "eval_loss": 1.686426043510437, | |
| "eval_runtime": 0.4541, | |
| "eval_samples_per_second": 70.469, | |
| "eval_steps_per_second": 4.404, | |
| "step": 1188 | |
| }, | |
| { | |
| "epoch": 48.08, | |
| "learning_rate": 2.6080246913580247e-05, | |
| "loss": 0.0909, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 48.48, | |
| "learning_rate": 2.5925925925925928e-05, | |
| "loss": 0.1296, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 48.89, | |
| "learning_rate": 2.577160493827161e-05, | |
| "loss": 0.1089, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 48.97, | |
| "eval_accuracy": 0.6875, | |
| "eval_loss": 1.5225051641464233, | |
| "eval_runtime": 0.4051, | |
| "eval_samples_per_second": 78.995, | |
| "eval_steps_per_second": 4.937, | |
| "step": 1212 | |
| }, | |
| { | |
| "epoch": 49.29, | |
| "learning_rate": 2.5617283950617284e-05, | |
| "loss": 0.1184, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 49.7, | |
| "learning_rate": 2.5462962962962965e-05, | |
| "loss": 0.0835, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 49.98, | |
| "eval_accuracy": 0.6875, | |
| "eval_loss": 1.979781150817871, | |
| "eval_runtime": 0.4161, | |
| "eval_samples_per_second": 76.903, | |
| "eval_steps_per_second": 4.806, | |
| "step": 1237 | |
| }, | |
| { | |
| "epoch": 50.1, | |
| "learning_rate": 2.5308641975308646e-05, | |
| "loss": 0.1079, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 50.51, | |
| "learning_rate": 2.515432098765432e-05, | |
| "loss": 0.1204, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 50.91, | |
| "learning_rate": 2.5e-05, | |
| "loss": 0.0818, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 50.99, | |
| "eval_accuracy": 0.65625, | |
| "eval_loss": 1.726827621459961, | |
| "eval_runtime": 0.4051, | |
| "eval_samples_per_second": 78.995, | |
| "eval_steps_per_second": 4.937, | |
| "step": 1262 | |
| }, | |
| { | |
| "epoch": 51.31, | |
| "learning_rate": 2.4845679012345682e-05, | |
| "loss": 0.0759, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 51.72, | |
| "learning_rate": 2.469135802469136e-05, | |
| "loss": 0.1134, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 52.0, | |
| "eval_accuracy": 0.75, | |
| "eval_loss": 1.599599838256836, | |
| "eval_runtime": 0.4061, | |
| "eval_samples_per_second": 78.8, | |
| "eval_steps_per_second": 4.925, | |
| "step": 1287 | |
| }, | |
| { | |
| "epoch": 52.12, | |
| "learning_rate": 2.4537037037037038e-05, | |
| "loss": 0.0988, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 52.53, | |
| "learning_rate": 2.438271604938272e-05, | |
| "loss": 0.0851, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 52.93, | |
| "learning_rate": 2.4228395061728396e-05, | |
| "loss": 0.1115, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 52.97, | |
| "eval_accuracy": 0.65625, | |
| "eval_loss": 1.7280734777450562, | |
| "eval_runtime": 0.4181, | |
| "eval_samples_per_second": 76.538, | |
| "eval_steps_per_second": 4.784, | |
| "step": 1311 | |
| }, | |
| { | |
| "epoch": 53.33, | |
| "learning_rate": 2.4074074074074077e-05, | |
| "loss": 0.0995, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 53.74, | |
| "learning_rate": 2.391975308641976e-05, | |
| "loss": 0.0929, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 53.98, | |
| "eval_accuracy": 0.75, | |
| "eval_loss": 1.6346489191055298, | |
| "eval_runtime": 0.4061, | |
| "eval_samples_per_second": 78.8, | |
| "eval_steps_per_second": 4.925, | |
| "step": 1336 | |
| }, | |
| { | |
| "epoch": 54.14, | |
| "learning_rate": 2.3765432098765433e-05, | |
| "loss": 0.1187, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 54.55, | |
| "learning_rate": 2.3611111111111114e-05, | |
| "loss": 0.1477, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 54.95, | |
| "learning_rate": 2.3456790123456795e-05, | |
| "loss": 0.0909, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 54.99, | |
| "eval_accuracy": 0.78125, | |
| "eval_loss": 1.4369856119155884, | |
| "eval_runtime": 0.4041, | |
| "eval_samples_per_second": 79.19, | |
| "eval_steps_per_second": 4.949, | |
| "step": 1361 | |
| }, | |
| { | |
| "epoch": 55.35, | |
| "learning_rate": 2.330246913580247e-05, | |
| "loss": 0.1106, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 55.76, | |
| "learning_rate": 2.314814814814815e-05, | |
| "loss": 0.1076, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 56.0, | |
| "eval_accuracy": 0.78125, | |
| "eval_loss": 1.550971508026123, | |
| "eval_runtime": 0.4091, | |
| "eval_samples_per_second": 78.222, | |
| "eval_steps_per_second": 4.889, | |
| "step": 1386 | |
| }, | |
| { | |
| "epoch": 56.16, | |
| "learning_rate": 2.299382716049383e-05, | |
| "loss": 0.1301, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 56.57, | |
| "learning_rate": 2.2839506172839506e-05, | |
| "loss": 0.0807, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 56.97, | |
| "learning_rate": 2.2685185185185187e-05, | |
| "loss": 0.0948, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 56.97, | |
| "eval_accuracy": 0.75, | |
| "eval_loss": 1.6382544040679932, | |
| "eval_runtime": 0.4751, | |
| "eval_samples_per_second": 67.353, | |
| "eval_steps_per_second": 4.21, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 57.37, | |
| "learning_rate": 2.2530864197530865e-05, | |
| "loss": 0.0961, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 57.78, | |
| "learning_rate": 2.2376543209876546e-05, | |
| "loss": 0.0914, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 57.98, | |
| "eval_accuracy": 0.6875, | |
| "eval_loss": 1.6937541961669922, | |
| "eval_runtime": 0.3996, | |
| "eval_samples_per_second": 80.08, | |
| "eval_steps_per_second": 5.005, | |
| "step": 1435 | |
| }, | |
| { | |
| "epoch": 58.18, | |
| "learning_rate": 2.2222222222222227e-05, | |
| "loss": 0.0716, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 58.59, | |
| "learning_rate": 2.20679012345679e-05, | |
| "loss": 0.0911, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 58.99, | |
| "learning_rate": 2.1913580246913582e-05, | |
| "loss": 0.0598, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 58.99, | |
| "eval_accuracy": 0.75, | |
| "eval_loss": 1.6290702819824219, | |
| "eval_runtime": 0.3996, | |
| "eval_samples_per_second": 80.081, | |
| "eval_steps_per_second": 5.005, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 59.39, | |
| "learning_rate": 2.1759259259259263e-05, | |
| "loss": 0.087, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 59.8, | |
| "learning_rate": 2.1604938271604937e-05, | |
| "loss": 0.0769, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 60.0, | |
| "eval_accuracy": 0.75, | |
| "eval_loss": 1.6593661308288574, | |
| "eval_runtime": 0.4171, | |
| "eval_samples_per_second": 76.721, | |
| "eval_steps_per_second": 4.795, | |
| "step": 1485 | |
| }, | |
| { | |
| "epoch": 60.2, | |
| "learning_rate": 2.145061728395062e-05, | |
| "loss": 0.0964, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 60.61, | |
| "learning_rate": 2.12962962962963e-05, | |
| "loss": 0.0894, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 60.97, | |
| "eval_accuracy": 0.78125, | |
| "eval_loss": 1.630151629447937, | |
| "eval_runtime": 0.4706, | |
| "eval_samples_per_second": 67.996, | |
| "eval_steps_per_second": 4.25, | |
| "step": 1509 | |
| }, | |
| { | |
| "epoch": 61.01, | |
| "learning_rate": 2.1141975308641974e-05, | |
| "loss": 0.0664, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 61.41, | |
| "learning_rate": 2.0987654320987655e-05, | |
| "loss": 0.0701, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 61.82, | |
| "learning_rate": 2.0833333333333336e-05, | |
| "loss": 0.0999, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 61.98, | |
| "eval_accuracy": 0.71875, | |
| "eval_loss": 1.656170129776001, | |
| "eval_runtime": 0.7332, | |
| "eval_samples_per_second": 43.646, | |
| "eval_steps_per_second": 2.728, | |
| "step": 1534 | |
| }, | |
| { | |
| "epoch": 62.22, | |
| "learning_rate": 2.0679012345679014e-05, | |
| "loss": 0.0559, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 62.63, | |
| "learning_rate": 2.0524691358024695e-05, | |
| "loss": 0.0759, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 62.99, | |
| "eval_accuracy": 0.75, | |
| "eval_loss": 1.598868489265442, | |
| "eval_runtime": 0.4032, | |
| "eval_samples_per_second": 79.364, | |
| "eval_steps_per_second": 4.96, | |
| "step": 1559 | |
| }, | |
| { | |
| "epoch": 63.03, | |
| "learning_rate": 2.0370370370370372e-05, | |
| "loss": 0.1008, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 63.43, | |
| "learning_rate": 2.021604938271605e-05, | |
| "loss": 0.06, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 63.84, | |
| "learning_rate": 2.006172839506173e-05, | |
| "loss": 0.102, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 64.0, | |
| "eval_accuracy": 0.78125, | |
| "eval_loss": 1.660170078277588, | |
| "eval_runtime": 0.4806, | |
| "eval_samples_per_second": 66.581, | |
| "eval_steps_per_second": 4.161, | |
| "step": 1584 | |
| }, | |
| { | |
| "epoch": 64.24, | |
| "learning_rate": 1.990740740740741e-05, | |
| "loss": 0.0836, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 64.65, | |
| "learning_rate": 1.9753086419753087e-05, | |
| "loss": 0.0864, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 64.97, | |
| "eval_accuracy": 0.78125, | |
| "eval_loss": 1.7385599613189697, | |
| "eval_runtime": 0.4181, | |
| "eval_samples_per_second": 76.538, | |
| "eval_steps_per_second": 4.784, | |
| "step": 1608 | |
| }, | |
| { | |
| "epoch": 65.05, | |
| "learning_rate": 1.9598765432098768e-05, | |
| "loss": 0.076, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 65.45, | |
| "learning_rate": 1.9444444444444445e-05, | |
| "loss": 0.0763, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 65.86, | |
| "learning_rate": 1.9290123456790123e-05, | |
| "loss": 0.0722, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 65.98, | |
| "eval_accuracy": 0.71875, | |
| "eval_loss": 2.0494906902313232, | |
| "eval_runtime": 0.4086, | |
| "eval_samples_per_second": 78.317, | |
| "eval_steps_per_second": 4.895, | |
| "step": 1633 | |
| }, | |
| { | |
| "epoch": 66.26, | |
| "learning_rate": 1.9135802469135804e-05, | |
| "loss": 0.0929, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 66.67, | |
| "learning_rate": 1.8981481481481482e-05, | |
| "loss": 0.0956, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 66.99, | |
| "eval_accuracy": 0.6875, | |
| "eval_loss": 1.9748592376708984, | |
| "eval_runtime": 0.4206, | |
| "eval_samples_per_second": 76.081, | |
| "eval_steps_per_second": 4.755, | |
| "step": 1658 | |
| }, | |
| { | |
| "epoch": 67.07, | |
| "learning_rate": 1.8827160493827163e-05, | |
| "loss": 0.071, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 67.47, | |
| "learning_rate": 1.867283950617284e-05, | |
| "loss": 0.0844, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 67.88, | |
| "learning_rate": 1.851851851851852e-05, | |
| "loss": 0.0698, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 68.0, | |
| "eval_accuracy": 0.65625, | |
| "eval_loss": 2.0089621543884277, | |
| "eval_runtime": 0.4176, | |
| "eval_samples_per_second": 76.628, | |
| "eval_steps_per_second": 4.789, | |
| "step": 1683 | |
| }, | |
| { | |
| "epoch": 68.28, | |
| "learning_rate": 1.83641975308642e-05, | |
| "loss": 0.0837, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 68.69, | |
| "learning_rate": 1.820987654320988e-05, | |
| "loss": 0.0635, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 68.97, | |
| "eval_accuracy": 0.625, | |
| "eval_loss": 2.160045862197876, | |
| "eval_runtime": 0.4306, | |
| "eval_samples_per_second": 74.314, | |
| "eval_steps_per_second": 4.645, | |
| "step": 1707 | |
| }, | |
| { | |
| "epoch": 69.09, | |
| "learning_rate": 1.8055555555555558e-05, | |
| "loss": 0.0719, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 69.49, | |
| "learning_rate": 1.7901234567901236e-05, | |
| "loss": 0.0656, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 69.9, | |
| "learning_rate": 1.7746913580246917e-05, | |
| "loss": 0.0726, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 69.98, | |
| "eval_accuracy": 0.75, | |
| "eval_loss": 1.8476567268371582, | |
| "eval_runtime": 0.4076, | |
| "eval_samples_per_second": 78.508, | |
| "eval_steps_per_second": 4.907, | |
| "step": 1732 | |
| }, | |
| { | |
| "epoch": 70.3, | |
| "learning_rate": 1.7592592592592595e-05, | |
| "loss": 0.0651, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 70.71, | |
| "learning_rate": 1.7438271604938272e-05, | |
| "loss": 0.0905, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 70.99, | |
| "eval_accuracy": 0.71875, | |
| "eval_loss": 1.9970349073410034, | |
| "eval_runtime": 0.4801, | |
| "eval_samples_per_second": 66.652, | |
| "eval_steps_per_second": 4.166, | |
| "step": 1757 | |
| }, | |
| { | |
| "epoch": 71.11, | |
| "learning_rate": 1.728395061728395e-05, | |
| "loss": 0.0705, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 71.52, | |
| "learning_rate": 1.712962962962963e-05, | |
| "loss": 0.053, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 71.92, | |
| "learning_rate": 1.697530864197531e-05, | |
| "loss": 0.0955, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 72.0, | |
| "eval_accuracy": 0.75, | |
| "eval_loss": 1.9001073837280273, | |
| "eval_runtime": 0.4716, | |
| "eval_samples_per_second": 67.852, | |
| "eval_steps_per_second": 4.241, | |
| "step": 1782 | |
| }, | |
| { | |
| "epoch": 72.32, | |
| "learning_rate": 1.682098765432099e-05, | |
| "loss": 0.0943, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 72.73, | |
| "learning_rate": 1.6666666666666667e-05, | |
| "loss": 0.0614, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 72.97, | |
| "eval_accuracy": 0.65625, | |
| "eval_loss": 1.9346568584442139, | |
| "eval_runtime": 0.4161, | |
| "eval_samples_per_second": 76.906, | |
| "eval_steps_per_second": 4.807, | |
| "step": 1806 | |
| }, | |
| { | |
| "epoch": 73.13, | |
| "learning_rate": 1.651234567901235e-05, | |
| "loss": 0.0652, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 73.54, | |
| "learning_rate": 1.6358024691358026e-05, | |
| "loss": 0.0634, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 73.94, | |
| "learning_rate": 1.6203703703703707e-05, | |
| "loss": 0.0721, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 73.98, | |
| "eval_accuracy": 0.6875, | |
| "eval_loss": 1.900674819946289, | |
| "eval_runtime": 0.4776, | |
| "eval_samples_per_second": 66.996, | |
| "eval_steps_per_second": 4.187, | |
| "step": 1831 | |
| }, | |
| { | |
| "epoch": 74.34, | |
| "learning_rate": 1.6049382716049385e-05, | |
| "loss": 0.0633, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 74.75, | |
| "learning_rate": 1.5895061728395063e-05, | |
| "loss": 0.0868, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 74.99, | |
| "eval_accuracy": 0.65625, | |
| "eval_loss": 2.0204198360443115, | |
| "eval_runtime": 0.4036, | |
| "eval_samples_per_second": 79.286, | |
| "eval_steps_per_second": 4.955, | |
| "step": 1856 | |
| }, | |
| { | |
| "epoch": 75.15, | |
| "learning_rate": 1.5740740740740744e-05, | |
| "loss": 0.0798, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 75.56, | |
| "learning_rate": 1.558641975308642e-05, | |
| "loss": 0.0783, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 75.96, | |
| "learning_rate": 1.54320987654321e-05, | |
| "loss": 0.0817, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 76.0, | |
| "eval_accuracy": 0.71875, | |
| "eval_loss": 1.9806559085845947, | |
| "eval_runtime": 0.4096, | |
| "eval_samples_per_second": 78.123, | |
| "eval_steps_per_second": 4.883, | |
| "step": 1881 | |
| }, | |
| { | |
| "epoch": 76.36, | |
| "learning_rate": 1.5277777777777777e-05, | |
| "loss": 0.0823, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 76.77, | |
| "learning_rate": 1.5123456790123458e-05, | |
| "loss": 0.0533, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 76.97, | |
| "eval_accuracy": 0.75, | |
| "eval_loss": 1.978171467781067, | |
| "eval_runtime": 0.4191, | |
| "eval_samples_per_second": 76.353, | |
| "eval_steps_per_second": 4.772, | |
| "step": 1905 | |
| }, | |
| { | |
| "epoch": 77.17, | |
| "learning_rate": 1.4969135802469137e-05, | |
| "loss": 0.0866, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 77.58, | |
| "learning_rate": 1.4814814814814815e-05, | |
| "loss": 0.0737, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 77.98, | |
| "learning_rate": 1.4660493827160496e-05, | |
| "loss": 0.0682, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 77.98, | |
| "eval_accuracy": 0.75, | |
| "eval_loss": 1.831963062286377, | |
| "eval_runtime": 0.4216, | |
| "eval_samples_per_second": 75.902, | |
| "eval_steps_per_second": 4.744, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 78.38, | |
| "learning_rate": 1.4506172839506174e-05, | |
| "loss": 0.0819, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 78.79, | |
| "learning_rate": 1.4351851851851853e-05, | |
| "loss": 0.078, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 78.99, | |
| "eval_accuracy": 0.71875, | |
| "eval_loss": 1.835146427154541, | |
| "eval_runtime": 0.4216, | |
| "eval_samples_per_second": 75.902, | |
| "eval_steps_per_second": 4.744, | |
| "step": 1955 | |
| }, | |
| { | |
| "epoch": 79.19, | |
| "learning_rate": 1.4197530864197532e-05, | |
| "loss": 0.0574, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 79.6, | |
| "learning_rate": 1.4043209876543212e-05, | |
| "loss": 0.0468, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 80.0, | |
| "learning_rate": 1.388888888888889e-05, | |
| "loss": 0.0991, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 80.0, | |
| "eval_accuracy": 0.71875, | |
| "eval_loss": 1.9694030284881592, | |
| "eval_runtime": 0.4798, | |
| "eval_samples_per_second": 66.7, | |
| "eval_steps_per_second": 4.169, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 80.4, | |
| "learning_rate": 1.373456790123457e-05, | |
| "loss": 0.0466, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 80.81, | |
| "learning_rate": 1.3580246913580248e-05, | |
| "loss": 0.0601, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 80.97, | |
| "eval_accuracy": 0.71875, | |
| "eval_loss": 1.8794611692428589, | |
| "eval_runtime": 0.4141, | |
| "eval_samples_per_second": 77.276, | |
| "eval_steps_per_second": 4.83, | |
| "step": 2004 | |
| }, | |
| { | |
| "epoch": 81.21, | |
| "learning_rate": 1.3425925925925926e-05, | |
| "loss": 0.0567, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 81.62, | |
| "learning_rate": 1.3271604938271605e-05, | |
| "loss": 0.072, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 81.98, | |
| "eval_accuracy": 0.65625, | |
| "eval_loss": 2.02938175201416, | |
| "eval_runtime": 0.4232, | |
| "eval_samples_per_second": 75.623, | |
| "eval_steps_per_second": 4.726, | |
| "step": 2029 | |
| }, | |
| { | |
| "epoch": 82.02, | |
| "learning_rate": 1.3117283950617285e-05, | |
| "loss": 0.073, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 82.42, | |
| "learning_rate": 1.2962962962962964e-05, | |
| "loss": 0.0788, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 82.83, | |
| "learning_rate": 1.2808641975308642e-05, | |
| "loss": 0.0746, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 82.99, | |
| "eval_accuracy": 0.71875, | |
| "eval_loss": 1.8438613414764404, | |
| "eval_runtime": 0.3956, | |
| "eval_samples_per_second": 80.89, | |
| "eval_steps_per_second": 5.056, | |
| "step": 2054 | |
| }, | |
| { | |
| "epoch": 83.23, | |
| "learning_rate": 1.2654320987654323e-05, | |
| "loss": 0.0626, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 83.64, | |
| "learning_rate": 1.25e-05, | |
| "loss": 0.0547, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 84.0, | |
| "eval_accuracy": 0.71875, | |
| "eval_loss": 1.932082176208496, | |
| "eval_runtime": 0.4141, | |
| "eval_samples_per_second": 77.273, | |
| "eval_steps_per_second": 4.83, | |
| "step": 2079 | |
| }, | |
| { | |
| "epoch": 84.04, | |
| "learning_rate": 1.234567901234568e-05, | |
| "loss": 0.0632, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 84.44, | |
| "learning_rate": 1.219135802469136e-05, | |
| "loss": 0.0478, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 84.85, | |
| "learning_rate": 1.2037037037037039e-05, | |
| "loss": 0.0497, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 84.97, | |
| "eval_accuracy": 0.78125, | |
| "eval_loss": 1.8862378597259521, | |
| "eval_runtime": 0.4806, | |
| "eval_samples_per_second": 66.582, | |
| "eval_steps_per_second": 4.161, | |
| "step": 2103 | |
| }, | |
| { | |
| "epoch": 85.25, | |
| "learning_rate": 1.1882716049382716e-05, | |
| "loss": 0.0544, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 85.66, | |
| "learning_rate": 1.1728395061728398e-05, | |
| "loss": 0.0566, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 85.98, | |
| "eval_accuracy": 0.65625, | |
| "eval_loss": 2.0067098140716553, | |
| "eval_runtime": 0.5932, | |
| "eval_samples_per_second": 53.947, | |
| "eval_steps_per_second": 3.372, | |
| "step": 2128 | |
| }, | |
| { | |
| "epoch": 86.06, | |
| "learning_rate": 1.1574074074074075e-05, | |
| "loss": 0.0557, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 86.46, | |
| "learning_rate": 1.1419753086419753e-05, | |
| "loss": 0.0872, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 86.87, | |
| "learning_rate": 1.1265432098765432e-05, | |
| "loss": 0.0353, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 86.99, | |
| "eval_accuracy": 0.71875, | |
| "eval_loss": 2.095703363418579, | |
| "eval_runtime": 0.4511, | |
| "eval_samples_per_second": 70.942, | |
| "eval_steps_per_second": 4.434, | |
| "step": 2153 | |
| }, | |
| { | |
| "epoch": 87.27, | |
| "learning_rate": 1.1111111111111113e-05, | |
| "loss": 0.0482, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 87.68, | |
| "learning_rate": 1.0956790123456791e-05, | |
| "loss": 0.0634, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 88.0, | |
| "eval_accuracy": 0.65625, | |
| "eval_loss": 2.15714168548584, | |
| "eval_runtime": 0.4471, | |
| "eval_samples_per_second": 71.57, | |
| "eval_steps_per_second": 4.473, | |
| "step": 2178 | |
| }, | |
| { | |
| "epoch": 88.08, | |
| "learning_rate": 1.0802469135802469e-05, | |
| "loss": 0.0632, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 88.48, | |
| "learning_rate": 1.064814814814815e-05, | |
| "loss": 0.0504, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 88.89, | |
| "learning_rate": 1.0493827160493827e-05, | |
| "loss": 0.0477, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 88.97, | |
| "eval_accuracy": 0.6875, | |
| "eval_loss": 2.0384438037872314, | |
| "eval_runtime": 0.4271, | |
| "eval_samples_per_second": 74.924, | |
| "eval_steps_per_second": 4.683, | |
| "step": 2202 | |
| }, | |
| { | |
| "epoch": 89.29, | |
| "learning_rate": 1.0339506172839507e-05, | |
| "loss": 0.0446, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 89.7, | |
| "learning_rate": 1.0185185185185186e-05, | |
| "loss": 0.0513, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 89.98, | |
| "eval_accuracy": 0.75, | |
| "eval_loss": 1.9145632982254028, | |
| "eval_runtime": 0.4711, | |
| "eval_samples_per_second": 67.923, | |
| "eval_steps_per_second": 4.245, | |
| "step": 2227 | |
| }, | |
| { | |
| "epoch": 90.1, | |
| "learning_rate": 1.0030864197530866e-05, | |
| "loss": 0.0626, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 90.51, | |
| "learning_rate": 9.876543209876543e-06, | |
| "loss": 0.0354, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 90.91, | |
| "learning_rate": 9.722222222222223e-06, | |
| "loss": 0.0717, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 90.99, | |
| "eval_accuracy": 0.71875, | |
| "eval_loss": 1.8837898969650269, | |
| "eval_runtime": 0.4031, | |
| "eval_samples_per_second": 79.378, | |
| "eval_steps_per_second": 4.961, | |
| "step": 2252 | |
| }, | |
| { | |
| "epoch": 91.31, | |
| "learning_rate": 9.567901234567902e-06, | |
| "loss": 0.0486, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 91.72, | |
| "learning_rate": 9.413580246913581e-06, | |
| "loss": 0.0644, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 92.0, | |
| "eval_accuracy": 0.6875, | |
| "eval_loss": 1.9186292886734009, | |
| "eval_runtime": 0.4621, | |
| "eval_samples_per_second": 69.247, | |
| "eval_steps_per_second": 4.328, | |
| "step": 2277 | |
| }, | |
| { | |
| "epoch": 92.12, | |
| "learning_rate": 9.25925925925926e-06, | |
| "loss": 0.048, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 92.53, | |
| "learning_rate": 9.10493827160494e-06, | |
| "loss": 0.0711, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 92.93, | |
| "learning_rate": 8.950617283950618e-06, | |
| "loss": 0.0848, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 92.97, | |
| "eval_accuracy": 0.71875, | |
| "eval_loss": 1.882826805114746, | |
| "eval_runtime": 0.4741, | |
| "eval_samples_per_second": 67.495, | |
| "eval_steps_per_second": 4.218, | |
| "step": 2301 | |
| }, | |
| { | |
| "epoch": 93.33, | |
| "learning_rate": 8.796296296296297e-06, | |
| "loss": 0.038, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 93.74, | |
| "learning_rate": 8.641975308641975e-06, | |
| "loss": 0.0393, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 93.98, | |
| "eval_accuracy": 0.71875, | |
| "eval_loss": 1.9442168474197388, | |
| "eval_runtime": 0.4091, | |
| "eval_samples_per_second": 78.217, | |
| "eval_steps_per_second": 4.889, | |
| "step": 2326 | |
| }, | |
| { | |
| "epoch": 94.14, | |
| "learning_rate": 8.487654320987654e-06, | |
| "loss": 0.038, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 94.55, | |
| "learning_rate": 8.333333333333334e-06, | |
| "loss": 0.0703, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 94.95, | |
| "learning_rate": 8.179012345679013e-06, | |
| "loss": 0.046, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 94.99, | |
| "eval_accuracy": 0.71875, | |
| "eval_loss": 1.8865550756454468, | |
| "eval_runtime": 0.3931, | |
| "eval_samples_per_second": 81.403, | |
| "eval_steps_per_second": 5.088, | |
| "step": 2351 | |
| }, | |
| { | |
| "epoch": 95.35, | |
| "learning_rate": 8.024691358024692e-06, | |
| "loss": 0.0762, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 95.76, | |
| "learning_rate": 7.870370370370372e-06, | |
| "loss": 0.0487, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 96.0, | |
| "eval_accuracy": 0.6875, | |
| "eval_loss": 1.9787421226501465, | |
| "eval_runtime": 0.4221, | |
| "eval_samples_per_second": 75.807, | |
| "eval_steps_per_second": 4.738, | |
| "step": 2376 | |
| }, | |
| { | |
| "epoch": 96.16, | |
| "learning_rate": 7.71604938271605e-06, | |
| "loss": 0.0724, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 96.57, | |
| "learning_rate": 7.561728395061729e-06, | |
| "loss": 0.0563, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 96.97, | |
| "learning_rate": 7.4074074074074075e-06, | |
| "loss": 0.074, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 96.97, | |
| "eval_accuracy": 0.6875, | |
| "eval_loss": 2.0081026554107666, | |
| "eval_runtime": 0.6192, | |
| "eval_samples_per_second": 51.682, | |
| "eval_steps_per_second": 3.23, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 97.37, | |
| "learning_rate": 7.253086419753087e-06, | |
| "loss": 0.0624, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 97.78, | |
| "learning_rate": 7.098765432098766e-06, | |
| "loss": 0.0435, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 97.98, | |
| "eval_accuracy": 0.75, | |
| "eval_loss": 1.8838809728622437, | |
| "eval_runtime": 0.4211, | |
| "eval_samples_per_second": 75.988, | |
| "eval_steps_per_second": 4.749, | |
| "step": 2425 | |
| }, | |
| { | |
| "epoch": 98.18, | |
| "learning_rate": 6.944444444444445e-06, | |
| "loss": 0.0688, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 98.59, | |
| "learning_rate": 6.790123456790124e-06, | |
| "loss": 0.0441, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 98.99, | |
| "learning_rate": 6.635802469135803e-06, | |
| "loss": 0.0509, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 98.99, | |
| "eval_accuracy": 0.71875, | |
| "eval_loss": 1.9207700490951538, | |
| "eval_runtime": 0.4015, | |
| "eval_samples_per_second": 79.693, | |
| "eval_steps_per_second": 4.981, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 99.39, | |
| "learning_rate": 6.481481481481482e-06, | |
| "loss": 0.0557, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 99.8, | |
| "learning_rate": 6.3271604938271615e-06, | |
| "loss": 0.0571, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 100.0, | |
| "eval_accuracy": 0.71875, | |
| "eval_loss": 1.9770445823669434, | |
| "eval_runtime": 0.4191, | |
| "eval_samples_per_second": 76.363, | |
| "eval_steps_per_second": 4.773, | |
| "step": 2475 | |
| }, | |
| { | |
| "epoch": 100.2, | |
| "learning_rate": 6.17283950617284e-06, | |
| "loss": 0.0505, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 100.61, | |
| "learning_rate": 6.018518518518519e-06, | |
| "loss": 0.0327, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 100.97, | |
| "eval_accuracy": 0.71875, | |
| "eval_loss": 1.969954013824463, | |
| "eval_runtime": 0.5071, | |
| "eval_samples_per_second": 63.101, | |
| "eval_steps_per_second": 3.944, | |
| "step": 2499 | |
| }, | |
| { | |
| "epoch": 101.01, | |
| "learning_rate": 5.864197530864199e-06, | |
| "loss": 0.0472, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 101.41, | |
| "learning_rate": 5.7098765432098764e-06, | |
| "loss": 0.0512, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 101.82, | |
| "learning_rate": 5.555555555555557e-06, | |
| "loss": 0.0387, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 101.98, | |
| "eval_accuracy": 0.75, | |
| "eval_loss": 1.9250848293304443, | |
| "eval_runtime": 0.4241, | |
| "eval_samples_per_second": 75.45, | |
| "eval_steps_per_second": 4.716, | |
| "step": 2524 | |
| }, | |
| { | |
| "epoch": 102.22, | |
| "learning_rate": 5.401234567901234e-06, | |
| "loss": 0.0614, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 102.63, | |
| "learning_rate": 5.246913580246914e-06, | |
| "loss": 0.029, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 102.99, | |
| "eval_accuracy": 0.71875, | |
| "eval_loss": 1.9489786624908447, | |
| "eval_runtime": 0.4031, | |
| "eval_samples_per_second": 79.385, | |
| "eval_steps_per_second": 4.962, | |
| "step": 2549 | |
| }, | |
| { | |
| "epoch": 103.03, | |
| "learning_rate": 5.092592592592593e-06, | |
| "loss": 0.052, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 103.43, | |
| "learning_rate": 4.938271604938272e-06, | |
| "loss": 0.0696, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 103.84, | |
| "learning_rate": 4.783950617283951e-06, | |
| "loss": 0.0478, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 104.0, | |
| "eval_accuracy": 0.71875, | |
| "eval_loss": 1.9358038902282715, | |
| "eval_runtime": 0.4036, | |
| "eval_samples_per_second": 79.283, | |
| "eval_steps_per_second": 4.955, | |
| "step": 2574 | |
| }, | |
| { | |
| "epoch": 104.24, | |
| "learning_rate": 4.62962962962963e-06, | |
| "loss": 0.0358, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 104.65, | |
| "learning_rate": 4.475308641975309e-06, | |
| "loss": 0.0587, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 104.97, | |
| "eval_accuracy": 0.75, | |
| "eval_loss": 1.9197365045547485, | |
| "eval_runtime": 0.4296, | |
| "eval_samples_per_second": 74.488, | |
| "eval_steps_per_second": 4.655, | |
| "step": 2598 | |
| }, | |
| { | |
| "epoch": 105.05, | |
| "learning_rate": 4.3209876543209875e-06, | |
| "loss": 0.0417, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 105.45, | |
| "learning_rate": 4.166666666666667e-06, | |
| "loss": 0.0644, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 105.86, | |
| "learning_rate": 4.012345679012346e-06, | |
| "loss": 0.0523, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 105.98, | |
| "eval_accuracy": 0.71875, | |
| "eval_loss": 1.9309440851211548, | |
| "eval_runtime": 0.4621, | |
| "eval_samples_per_second": 69.248, | |
| "eval_steps_per_second": 4.328, | |
| "step": 2623 | |
| }, | |
| { | |
| "epoch": 106.26, | |
| "learning_rate": 3.858024691358025e-06, | |
| "loss": 0.0365, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 106.67, | |
| "learning_rate": 3.7037037037037037e-06, | |
| "loss": 0.0581, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 106.99, | |
| "eval_accuracy": 0.71875, | |
| "eval_loss": 1.9828894138336182, | |
| "eval_runtime": 0.4227, | |
| "eval_samples_per_second": 75.712, | |
| "eval_steps_per_second": 4.732, | |
| "step": 2648 | |
| }, | |
| { | |
| "epoch": 107.07, | |
| "learning_rate": 3.549382716049383e-06, | |
| "loss": 0.0459, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 107.47, | |
| "learning_rate": 3.395061728395062e-06, | |
| "loss": 0.031, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 107.88, | |
| "learning_rate": 3.240740740740741e-06, | |
| "loss": 0.0352, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 108.0, | |
| "eval_accuracy": 0.6875, | |
| "eval_loss": 2.00472354888916, | |
| "eval_runtime": 0.4442, | |
| "eval_samples_per_second": 72.042, | |
| "eval_steps_per_second": 4.503, | |
| "step": 2673 | |
| }, | |
| { | |
| "epoch": 108.28, | |
| "learning_rate": 3.08641975308642e-06, | |
| "loss": 0.0553, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 108.69, | |
| "learning_rate": 2.9320987654320994e-06, | |
| "loss": 0.0373, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 108.97, | |
| "eval_accuracy": 0.71875, | |
| "eval_loss": 1.9897255897521973, | |
| "eval_runtime": 0.4256, | |
| "eval_samples_per_second": 75.187, | |
| "eval_steps_per_second": 4.699, | |
| "step": 2697 | |
| }, | |
| { | |
| "epoch": 109.09, | |
| "learning_rate": 2.7777777777777783e-06, | |
| "loss": 0.049, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 109.49, | |
| "learning_rate": 2.623456790123457e-06, | |
| "loss": 0.0294, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 109.9, | |
| "learning_rate": 2.469135802469136e-06, | |
| "loss": 0.0258, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 109.98, | |
| "eval_accuracy": 0.71875, | |
| "eval_loss": 1.938352346420288, | |
| "eval_runtime": 0.4216, | |
| "eval_samples_per_second": 75.898, | |
| "eval_steps_per_second": 4.744, | |
| "step": 2722 | |
| }, | |
| { | |
| "epoch": 110.3, | |
| "learning_rate": 2.314814814814815e-06, | |
| "loss": 0.0484, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 110.71, | |
| "learning_rate": 2.1604938271604937e-06, | |
| "loss": 0.039, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 110.99, | |
| "eval_accuracy": 0.6875, | |
| "eval_loss": 1.9356164932250977, | |
| "eval_runtime": 0.4076, | |
| "eval_samples_per_second": 78.509, | |
| "eval_steps_per_second": 4.907, | |
| "step": 2747 | |
| }, | |
| { | |
| "epoch": 111.11, | |
| "learning_rate": 2.006172839506173e-06, | |
| "loss": 0.0326, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 111.52, | |
| "learning_rate": 1.8518518518518519e-06, | |
| "loss": 0.066, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 111.92, | |
| "learning_rate": 1.697530864197531e-06, | |
| "loss": 0.0333, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 112.0, | |
| "eval_accuracy": 0.71875, | |
| "eval_loss": 1.980454683303833, | |
| "eval_runtime": 0.4112, | |
| "eval_samples_per_second": 77.828, | |
| "eval_steps_per_second": 4.864, | |
| "step": 2772 | |
| }, | |
| { | |
| "epoch": 112.32, | |
| "learning_rate": 1.54320987654321e-06, | |
| "loss": 0.0524, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 112.73, | |
| "learning_rate": 1.3888888888888892e-06, | |
| "loss": 0.0641, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 112.97, | |
| "eval_accuracy": 0.6875, | |
| "eval_loss": 1.9813566207885742, | |
| "eval_runtime": 0.5541, | |
| "eval_samples_per_second": 57.748, | |
| "eval_steps_per_second": 3.609, | |
| "step": 2796 | |
| }, | |
| { | |
| "epoch": 113.13, | |
| "learning_rate": 1.234567901234568e-06, | |
| "loss": 0.0505, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 113.54, | |
| "learning_rate": 1.0802469135802469e-06, | |
| "loss": 0.0222, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 113.94, | |
| "learning_rate": 9.259259259259259e-07, | |
| "loss": 0.0649, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 113.98, | |
| "eval_accuracy": 0.6875, | |
| "eval_loss": 1.972588062286377, | |
| "eval_runtime": 0.4431, | |
| "eval_samples_per_second": 72.211, | |
| "eval_steps_per_second": 4.513, | |
| "step": 2821 | |
| }, | |
| { | |
| "epoch": 114.34, | |
| "learning_rate": 7.71604938271605e-07, | |
| "loss": 0.0354, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 114.75, | |
| "learning_rate": 6.17283950617284e-07, | |
| "loss": 0.0241, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 114.99, | |
| "eval_accuracy": 0.6875, | |
| "eval_loss": 1.9736533164978027, | |
| "eval_runtime": 0.4011, | |
| "eval_samples_per_second": 79.777, | |
| "eval_steps_per_second": 4.986, | |
| "step": 2846 | |
| }, | |
| { | |
| "epoch": 115.15, | |
| "learning_rate": 4.6296296296296297e-07, | |
| "loss": 0.0418, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 115.56, | |
| "learning_rate": 3.08641975308642e-07, | |
| "loss": 0.0382, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 115.96, | |
| "learning_rate": 1.54320987654321e-07, | |
| "loss": 0.0356, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 116.0, | |
| "eval_accuracy": 0.6875, | |
| "eval_loss": 1.985576868057251, | |
| "eval_runtime": 0.4721, | |
| "eval_samples_per_second": 67.78, | |
| "eval_steps_per_second": 4.236, | |
| "step": 2871 | |
| }, | |
| { | |
| "epoch": 116.36, | |
| "learning_rate": 0.0, | |
| "loss": 0.0601, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 116.36, | |
| "eval_accuracy": 0.6875, | |
| "eval_loss": 1.9852969646453857, | |
| "eval_runtime": 0.4181, | |
| "eval_samples_per_second": 76.535, | |
| "eval_steps_per_second": 4.783, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 116.36, | |
| "step": 2880, | |
| "total_flos": 5.970642494234296e+18, | |
| "train_loss": 0.4532645735475752, | |
| "train_runtime": 3508.1723, | |
| "train_samples_per_second": 53.943, | |
| "train_steps_per_second": 0.821 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 2880, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 120, | |
| "save_steps": 500, | |
| "total_flos": 5.970642494234296e+18, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |