| { | |
| "best_metric": 0.8598130841121495, | |
| "best_model_checkpoint": "SW2-RHS-DA\\checkpoint-1100", | |
| "epoch": 39.436619718309856, | |
| "eval_steps": 500, | |
| "global_step": 1400, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 3.971428571428572e-05, | |
| "loss": 3.6129, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 3.9428571428571435e-05, | |
| "loss": 3.1033, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 3.9142857142857145e-05, | |
| "loss": 2.0538, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "eval_accuracy": 0.411214953271028, | |
| "eval_loss": 1.2866225242614746, | |
| "eval_runtime": 2.5936, | |
| "eval_samples_per_second": 41.255, | |
| "eval_steps_per_second": 2.699, | |
| "step": 35 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 3.885714285714286e-05, | |
| "loss": 1.274, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 3.857142857142858e-05, | |
| "loss": 0.7171, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 3.828571428571429e-05, | |
| "loss": 0.7163, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 3.8e-05, | |
| "loss": 0.7464, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.5887850467289719, | |
| "eval_loss": 0.6779574751853943, | |
| "eval_runtime": 2.9282, | |
| "eval_samples_per_second": 36.541, | |
| "eval_steps_per_second": 2.391, | |
| "step": 71 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "learning_rate": 3.771428571428572e-05, | |
| "loss": 0.7044, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "learning_rate": 3.7428571428571434e-05, | |
| "loss": 0.6984, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "learning_rate": 3.714285714285715e-05, | |
| "loss": 0.7061, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "eval_accuracy": 0.5887850467289719, | |
| "eval_loss": 0.6850669980049133, | |
| "eval_runtime": 2.5306, | |
| "eval_samples_per_second": 42.282, | |
| "eval_steps_per_second": 2.766, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 3.1, | |
| "learning_rate": 3.685714285714286e-05, | |
| "loss": 0.6987, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 3.38, | |
| "learning_rate": 3.6571428571428576e-05, | |
| "loss": 0.6953, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 3.66, | |
| "learning_rate": 3.628571428571429e-05, | |
| "loss": 0.7011, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 3.94, | |
| "learning_rate": 3.6e-05, | |
| "loss": 0.6951, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.5887850467289719, | |
| "eval_loss": 0.6741566061973572, | |
| "eval_runtime": 2.4201, | |
| "eval_samples_per_second": 44.212, | |
| "eval_steps_per_second": 2.892, | |
| "step": 142 | |
| }, | |
| { | |
| "epoch": 4.23, | |
| "learning_rate": 3.571428571428572e-05, | |
| "loss": 0.7019, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 4.51, | |
| "learning_rate": 3.542857142857143e-05, | |
| "loss": 0.6844, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 4.79, | |
| "learning_rate": 3.514285714285714e-05, | |
| "loss": 0.6928, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 4.99, | |
| "eval_accuracy": 0.4485981308411215, | |
| "eval_loss": 0.6916905045509338, | |
| "eval_runtime": 2.487, | |
| "eval_samples_per_second": 43.024, | |
| "eval_steps_per_second": 2.815, | |
| "step": 177 | |
| }, | |
| { | |
| "epoch": 5.07, | |
| "learning_rate": 3.485714285714286e-05, | |
| "loss": 0.6884, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 5.35, | |
| "learning_rate": 3.4571428571428574e-05, | |
| "loss": 0.6884, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 5.63, | |
| "learning_rate": 3.4285714285714284e-05, | |
| "loss": 0.6871, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 5.92, | |
| "learning_rate": 3.4e-05, | |
| "loss": 0.683, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.5794392523364486, | |
| "eval_loss": 0.6530604362487793, | |
| "eval_runtime": 2.4047, | |
| "eval_samples_per_second": 44.497, | |
| "eval_steps_per_second": 2.911, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 6.2, | |
| "learning_rate": 3.3714285714285716e-05, | |
| "loss": 0.7098, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 6.48, | |
| "learning_rate": 3.342857142857143e-05, | |
| "loss": 0.6721, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 6.76, | |
| "learning_rate": 3.314285714285715e-05, | |
| "loss": 0.7013, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 6.99, | |
| "eval_accuracy": 0.4485981308411215, | |
| "eval_loss": 0.7000461220741272, | |
| "eval_runtime": 2.4932, | |
| "eval_samples_per_second": 42.916, | |
| "eval_steps_per_second": 2.808, | |
| "step": 248 | |
| }, | |
| { | |
| "epoch": 7.04, | |
| "learning_rate": 3.285714285714286e-05, | |
| "loss": 0.7038, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 7.32, | |
| "learning_rate": 3.257142857142857e-05, | |
| "loss": 0.6696, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 7.61, | |
| "learning_rate": 3.228571428571429e-05, | |
| "loss": 0.6589, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 7.89, | |
| "learning_rate": 3.2000000000000005e-05, | |
| "loss": 0.6921, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.5514018691588785, | |
| "eval_loss": 0.7518867254257202, | |
| "eval_runtime": 2.4222, | |
| "eval_samples_per_second": 44.175, | |
| "eval_steps_per_second": 2.89, | |
| "step": 284 | |
| }, | |
| { | |
| "epoch": 8.17, | |
| "learning_rate": 3.1714285714285715e-05, | |
| "loss": 0.6944, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 8.45, | |
| "learning_rate": 3.142857142857143e-05, | |
| "loss": 0.6419, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 8.73, | |
| "learning_rate": 3.114285714285715e-05, | |
| "loss": 0.6166, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 8.99, | |
| "eval_accuracy": 0.6822429906542056, | |
| "eval_loss": 0.5946707129478455, | |
| "eval_runtime": 2.466, | |
| "eval_samples_per_second": 43.39, | |
| "eval_steps_per_second": 2.839, | |
| "step": 319 | |
| }, | |
| { | |
| "epoch": 9.01, | |
| "learning_rate": 3.085714285714286e-05, | |
| "loss": 0.6401, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 9.3, | |
| "learning_rate": 3.057142857142857e-05, | |
| "loss": 0.6273, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 9.58, | |
| "learning_rate": 3.0285714285714288e-05, | |
| "loss": 0.598, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 9.86, | |
| "learning_rate": 3.0000000000000004e-05, | |
| "loss": 0.6128, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.7850467289719626, | |
| "eval_loss": 0.5433729290962219, | |
| "eval_runtime": 2.6984, | |
| "eval_samples_per_second": 39.653, | |
| "eval_steps_per_second": 2.594, | |
| "step": 355 | |
| }, | |
| { | |
| "epoch": 10.14, | |
| "learning_rate": 2.9714285714285717e-05, | |
| "loss": 0.6519, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 10.42, | |
| "learning_rate": 2.9428571428571433e-05, | |
| "loss": 0.6322, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 10.7, | |
| "learning_rate": 2.9142857142857146e-05, | |
| "loss": 0.601, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 10.99, | |
| "learning_rate": 2.8857142857142858e-05, | |
| "loss": 0.5737, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 10.99, | |
| "eval_accuracy": 0.7570093457943925, | |
| "eval_loss": 0.5533133149147034, | |
| "eval_runtime": 2.4276, | |
| "eval_samples_per_second": 44.076, | |
| "eval_steps_per_second": 2.883, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 11.27, | |
| "learning_rate": 2.8571428571428574e-05, | |
| "loss": 0.5554, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 11.55, | |
| "learning_rate": 2.828571428571429e-05, | |
| "loss": 0.587, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 11.83, | |
| "learning_rate": 2.8e-05, | |
| "loss": 0.5376, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.7102803738317757, | |
| "eval_loss": 0.5347476005554199, | |
| "eval_runtime": 2.4287, | |
| "eval_samples_per_second": 44.056, | |
| "eval_steps_per_second": 2.882, | |
| "step": 426 | |
| }, | |
| { | |
| "epoch": 12.11, | |
| "learning_rate": 2.7714285714285716e-05, | |
| "loss": 0.5263, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 12.39, | |
| "learning_rate": 2.742857142857143e-05, | |
| "loss": 0.5883, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 12.68, | |
| "learning_rate": 2.7142857142857148e-05, | |
| "loss": 0.5797, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 12.96, | |
| "learning_rate": 2.6857142857142857e-05, | |
| "loss": 0.5056, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 12.99, | |
| "eval_accuracy": 0.7663551401869159, | |
| "eval_loss": 0.4949099123477936, | |
| "eval_runtime": 2.4981, | |
| "eval_samples_per_second": 42.833, | |
| "eval_steps_per_second": 2.802, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 13.24, | |
| "learning_rate": 2.6571428571428573e-05, | |
| "loss": 0.5289, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 13.52, | |
| "learning_rate": 2.628571428571429e-05, | |
| "loss": 0.5917, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 13.8, | |
| "learning_rate": 2.6000000000000002e-05, | |
| "loss": 0.5396, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.7476635514018691, | |
| "eval_loss": 0.5150982737541199, | |
| "eval_runtime": 2.4567, | |
| "eval_samples_per_second": 43.555, | |
| "eval_steps_per_second": 2.849, | |
| "step": 497 | |
| }, | |
| { | |
| "epoch": 14.08, | |
| "learning_rate": 2.5714285714285718e-05, | |
| "loss": 0.5083, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 14.37, | |
| "learning_rate": 2.542857142857143e-05, | |
| "loss": 0.4903, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 14.65, | |
| "learning_rate": 2.5142857142857143e-05, | |
| "loss": 0.4745, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 14.93, | |
| "learning_rate": 2.485714285714286e-05, | |
| "loss": 0.4826, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 14.99, | |
| "eval_accuracy": 0.719626168224299, | |
| "eval_loss": 0.5669169425964355, | |
| "eval_runtime": 2.4462, | |
| "eval_samples_per_second": 43.742, | |
| "eval_steps_per_second": 2.862, | |
| "step": 532 | |
| }, | |
| { | |
| "epoch": 15.21, | |
| "learning_rate": 2.4571428571428575e-05, | |
| "loss": 0.4987, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 15.49, | |
| "learning_rate": 2.4285714285714285e-05, | |
| "loss": 0.4501, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 15.77, | |
| "learning_rate": 2.4e-05, | |
| "loss": 0.4269, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.7570093457943925, | |
| "eval_loss": 0.47963711619377136, | |
| "eval_runtime": 2.4721, | |
| "eval_samples_per_second": 43.283, | |
| "eval_steps_per_second": 2.832, | |
| "step": 568 | |
| }, | |
| { | |
| "epoch": 16.06, | |
| "learning_rate": 2.3714285714285717e-05, | |
| "loss": 0.4652, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 16.34, | |
| "learning_rate": 2.3428571428571433e-05, | |
| "loss": 0.4804, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 16.62, | |
| "learning_rate": 2.3142857142857145e-05, | |
| "loss": 0.4738, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 16.9, | |
| "learning_rate": 2.2857142857142858e-05, | |
| "loss": 0.5004, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 16.99, | |
| "eval_accuracy": 0.8037383177570093, | |
| "eval_loss": 0.4488687813282013, | |
| "eval_runtime": 2.4276, | |
| "eval_samples_per_second": 44.077, | |
| "eval_steps_per_second": 2.884, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 17.18, | |
| "learning_rate": 2.2571428571428574e-05, | |
| "loss": 0.424, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 17.46, | |
| "learning_rate": 2.2285714285714287e-05, | |
| "loss": 0.4187, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 17.75, | |
| "learning_rate": 2.2000000000000003e-05, | |
| "loss": 0.4116, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_accuracy": 0.822429906542056, | |
| "eval_loss": 0.4361763000488281, | |
| "eval_runtime": 2.4211, | |
| "eval_samples_per_second": 44.194, | |
| "eval_steps_per_second": 2.891, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 18.03, | |
| "learning_rate": 2.1714285714285715e-05, | |
| "loss": 0.4238, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 18.31, | |
| "learning_rate": 2.1428571428571428e-05, | |
| "loss": 0.3971, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 18.59, | |
| "learning_rate": 2.1142857142857144e-05, | |
| "loss": 0.3699, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 18.87, | |
| "learning_rate": 2.085714285714286e-05, | |
| "loss": 0.3776, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 18.99, | |
| "eval_accuracy": 0.7570093457943925, | |
| "eval_loss": 0.5299957990646362, | |
| "eval_runtime": 2.4947, | |
| "eval_samples_per_second": 42.892, | |
| "eval_steps_per_second": 2.806, | |
| "step": 674 | |
| }, | |
| { | |
| "epoch": 19.15, | |
| "learning_rate": 2.057142857142857e-05, | |
| "loss": 0.43, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 19.44, | |
| "learning_rate": 2.0285714285714286e-05, | |
| "loss": 0.3729, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 19.72, | |
| "learning_rate": 2e-05, | |
| "loss": 0.4071, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "learning_rate": 1.9714285714285718e-05, | |
| "loss": 0.3646, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.8037383177570093, | |
| "eval_loss": 0.417547345161438, | |
| "eval_runtime": 2.539, | |
| "eval_samples_per_second": 42.142, | |
| "eval_steps_per_second": 2.757, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 20.28, | |
| "learning_rate": 1.942857142857143e-05, | |
| "loss": 0.3292, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 20.56, | |
| "learning_rate": 1.9142857142857146e-05, | |
| "loss": 0.3608, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 20.85, | |
| "learning_rate": 1.885714285714286e-05, | |
| "loss": 0.3683, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 20.99, | |
| "eval_accuracy": 0.822429906542056, | |
| "eval_loss": 0.4699637293815613, | |
| "eval_runtime": 2.4331, | |
| "eval_samples_per_second": 43.977, | |
| "eval_steps_per_second": 2.877, | |
| "step": 745 | |
| }, | |
| { | |
| "epoch": 21.13, | |
| "learning_rate": 1.8571428571428575e-05, | |
| "loss": 0.3365, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 21.41, | |
| "learning_rate": 1.8285714285714288e-05, | |
| "loss": 0.3775, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 21.69, | |
| "learning_rate": 1.8e-05, | |
| "loss": 0.3147, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 21.97, | |
| "learning_rate": 1.7714285714285717e-05, | |
| "loss": 0.3277, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_accuracy": 0.8130841121495327, | |
| "eval_loss": 0.470651239156723, | |
| "eval_runtime": 2.6037, | |
| "eval_samples_per_second": 41.095, | |
| "eval_steps_per_second": 2.688, | |
| "step": 781 | |
| }, | |
| { | |
| "epoch": 22.25, | |
| "learning_rate": 1.742857142857143e-05, | |
| "loss": 0.2866, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 22.54, | |
| "learning_rate": 1.7142857142857142e-05, | |
| "loss": 0.3304, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 22.82, | |
| "learning_rate": 1.6857142857142858e-05, | |
| "loss": 0.3534, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 22.99, | |
| "eval_accuracy": 0.8130841121495327, | |
| "eval_loss": 0.523983359336853, | |
| "eval_runtime": 2.439, | |
| "eval_samples_per_second": 43.871, | |
| "eval_steps_per_second": 2.87, | |
| "step": 816 | |
| }, | |
| { | |
| "epoch": 23.1, | |
| "learning_rate": 1.6571428571428574e-05, | |
| "loss": 0.3282, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 23.38, | |
| "learning_rate": 1.6285714285714287e-05, | |
| "loss": 0.2964, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 23.66, | |
| "learning_rate": 1.6000000000000003e-05, | |
| "loss": 0.3044, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 23.94, | |
| "learning_rate": 1.5714285714285715e-05, | |
| "loss": 0.3083, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_accuracy": 0.8130841121495327, | |
| "eval_loss": 0.5011804103851318, | |
| "eval_runtime": 2.4268, | |
| "eval_samples_per_second": 44.092, | |
| "eval_steps_per_second": 2.885, | |
| "step": 852 | |
| }, | |
| { | |
| "epoch": 24.23, | |
| "learning_rate": 1.542857142857143e-05, | |
| "loss": 0.3059, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 24.51, | |
| "learning_rate": 1.5142857142857144e-05, | |
| "loss": 0.3426, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 24.79, | |
| "learning_rate": 1.4857142857142858e-05, | |
| "loss": 0.2829, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 24.99, | |
| "eval_accuracy": 0.8317757009345794, | |
| "eval_loss": 0.442084401845932, | |
| "eval_runtime": 2.8308, | |
| "eval_samples_per_second": 37.798, | |
| "eval_steps_per_second": 2.473, | |
| "step": 887 | |
| }, | |
| { | |
| "epoch": 25.07, | |
| "learning_rate": 1.4571428571428573e-05, | |
| "loss": 0.2844, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 25.35, | |
| "learning_rate": 1.4285714285714287e-05, | |
| "loss": 0.2989, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 25.63, | |
| "learning_rate": 1.4e-05, | |
| "loss": 0.2708, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 25.92, | |
| "learning_rate": 1.3714285714285716e-05, | |
| "loss": 0.2564, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_accuracy": 0.822429906542056, | |
| "eval_loss": 0.4547804296016693, | |
| "eval_runtime": 2.4376, | |
| "eval_samples_per_second": 43.896, | |
| "eval_steps_per_second": 2.872, | |
| "step": 923 | |
| }, | |
| { | |
| "epoch": 26.2, | |
| "learning_rate": 1.3428571428571429e-05, | |
| "loss": 0.2668, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 26.48, | |
| "learning_rate": 1.3142857142857145e-05, | |
| "loss": 0.266, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 26.76, | |
| "learning_rate": 1.2857142857142859e-05, | |
| "loss": 0.3136, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 26.99, | |
| "eval_accuracy": 0.8317757009345794, | |
| "eval_loss": 0.4374113380908966, | |
| "eval_runtime": 2.5027, | |
| "eval_samples_per_second": 42.754, | |
| "eval_steps_per_second": 2.797, | |
| "step": 958 | |
| }, | |
| { | |
| "epoch": 27.04, | |
| "learning_rate": 1.2571428571428572e-05, | |
| "loss": 0.2296, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 27.32, | |
| "learning_rate": 1.2285714285714288e-05, | |
| "loss": 0.3007, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 27.61, | |
| "learning_rate": 1.2e-05, | |
| "loss": 0.2848, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 27.89, | |
| "learning_rate": 1.1714285714285716e-05, | |
| "loss": 0.2443, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_accuracy": 0.8130841121495327, | |
| "eval_loss": 0.5276730060577393, | |
| "eval_runtime": 2.5662, | |
| "eval_samples_per_second": 41.696, | |
| "eval_steps_per_second": 2.728, | |
| "step": 994 | |
| }, | |
| { | |
| "epoch": 28.17, | |
| "learning_rate": 1.1428571428571429e-05, | |
| "loss": 0.2526, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 28.45, | |
| "learning_rate": 1.1142857142857143e-05, | |
| "loss": 0.2539, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 28.73, | |
| "learning_rate": 1.0857142857142858e-05, | |
| "loss": 0.258, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 28.99, | |
| "eval_accuracy": 0.822429906542056, | |
| "eval_loss": 0.46009838581085205, | |
| "eval_runtime": 2.4468, | |
| "eval_samples_per_second": 43.73, | |
| "eval_steps_per_second": 2.861, | |
| "step": 1029 | |
| }, | |
| { | |
| "epoch": 29.01, | |
| "learning_rate": 1.0571428571428572e-05, | |
| "loss": 0.2614, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 29.3, | |
| "learning_rate": 1.0285714285714285e-05, | |
| "loss": 0.241, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 29.58, | |
| "learning_rate": 1e-05, | |
| "loss": 0.2606, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 29.86, | |
| "learning_rate": 9.714285714285715e-06, | |
| "loss": 0.2673, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_accuracy": 0.8317757009345794, | |
| "eval_loss": 0.45198604464530945, | |
| "eval_runtime": 2.4355, | |
| "eval_samples_per_second": 43.934, | |
| "eval_steps_per_second": 2.874, | |
| "step": 1065 | |
| }, | |
| { | |
| "epoch": 30.14, | |
| "learning_rate": 9.42857142857143e-06, | |
| "loss": 0.2763, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 30.42, | |
| "learning_rate": 9.142857142857144e-06, | |
| "loss": 0.2448, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 30.7, | |
| "learning_rate": 8.857142857142858e-06, | |
| "loss": 0.2573, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 30.99, | |
| "learning_rate": 8.571428571428571e-06, | |
| "loss": 0.2233, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 30.99, | |
| "eval_accuracy": 0.8598130841121495, | |
| "eval_loss": 0.45412737131118774, | |
| "eval_runtime": 2.5403, | |
| "eval_samples_per_second": 42.12, | |
| "eval_steps_per_second": 2.756, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 31.27, | |
| "learning_rate": 8.285714285714287e-06, | |
| "loss": 0.295, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 31.55, | |
| "learning_rate": 8.000000000000001e-06, | |
| "loss": 0.1998, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 31.83, | |
| "learning_rate": 7.714285714285716e-06, | |
| "loss": 0.2276, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "eval_accuracy": 0.8504672897196262, | |
| "eval_loss": 0.4247356951236725, | |
| "eval_runtime": 2.4319, | |
| "eval_samples_per_second": 43.999, | |
| "eval_steps_per_second": 2.878, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 32.11, | |
| "learning_rate": 7.428571428571429e-06, | |
| "loss": 0.2076, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 32.39, | |
| "learning_rate": 7.1428571428571436e-06, | |
| "loss": 0.2375, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 32.68, | |
| "learning_rate": 6.857142857142858e-06, | |
| "loss": 0.2523, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 32.96, | |
| "learning_rate": 6.571428571428572e-06, | |
| "loss": 0.2653, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 32.99, | |
| "eval_accuracy": 0.8504672897196262, | |
| "eval_loss": 0.4091378450393677, | |
| "eval_runtime": 2.4461, | |
| "eval_samples_per_second": 43.744, | |
| "eval_steps_per_second": 2.862, | |
| "step": 1171 | |
| }, | |
| { | |
| "epoch": 33.24, | |
| "learning_rate": 6.285714285714286e-06, | |
| "loss": 0.2188, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 33.52, | |
| "learning_rate": 6e-06, | |
| "loss": 0.2332, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 33.8, | |
| "learning_rate": 5.7142857142857145e-06, | |
| "loss": 0.2007, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "eval_accuracy": 0.8504672897196262, | |
| "eval_loss": 0.47192811965942383, | |
| "eval_runtime": 2.4405, | |
| "eval_samples_per_second": 43.843, | |
| "eval_steps_per_second": 2.868, | |
| "step": 1207 | |
| }, | |
| { | |
| "epoch": 34.08, | |
| "learning_rate": 5.428571428571429e-06, | |
| "loss": 0.2196, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 34.37, | |
| "learning_rate": 5.142857142857142e-06, | |
| "loss": 0.2328, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 34.65, | |
| "learning_rate": 4.857142857142858e-06, | |
| "loss": 0.1952, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 34.93, | |
| "learning_rate": 4.571428571428572e-06, | |
| "loss": 0.2082, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 34.99, | |
| "eval_accuracy": 0.8411214953271028, | |
| "eval_loss": 0.4623873233795166, | |
| "eval_runtime": 3.047, | |
| "eval_samples_per_second": 35.117, | |
| "eval_steps_per_second": 2.297, | |
| "step": 1242 | |
| }, | |
| { | |
| "epoch": 35.21, | |
| "learning_rate": 4.2857142857142855e-06, | |
| "loss": 0.1957, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 35.49, | |
| "learning_rate": 4.000000000000001e-06, | |
| "loss": 0.2021, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 35.77, | |
| "learning_rate": 3.7142857142857146e-06, | |
| "loss": 0.1794, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "eval_accuracy": 0.8317757009345794, | |
| "eval_loss": 0.48562705516815186, | |
| "eval_runtime": 2.5128, | |
| "eval_samples_per_second": 42.582, | |
| "eval_steps_per_second": 2.786, | |
| "step": 1278 | |
| }, | |
| { | |
| "epoch": 36.06, | |
| "learning_rate": 3.428571428571429e-06, | |
| "loss": 0.2378, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 36.34, | |
| "learning_rate": 3.142857142857143e-06, | |
| "loss": 0.2363, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 36.62, | |
| "learning_rate": 2.8571428571428573e-06, | |
| "loss": 0.2538, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 36.9, | |
| "learning_rate": 2.571428571428571e-06, | |
| "loss": 0.1987, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 36.99, | |
| "eval_accuracy": 0.822429906542056, | |
| "eval_loss": 0.49035122990608215, | |
| "eval_runtime": 2.426, | |
| "eval_samples_per_second": 44.106, | |
| "eval_steps_per_second": 2.885, | |
| "step": 1313 | |
| }, | |
| { | |
| "epoch": 37.18, | |
| "learning_rate": 2.285714285714286e-06, | |
| "loss": 0.2088, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 37.46, | |
| "learning_rate": 2.0000000000000003e-06, | |
| "loss": 0.2154, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 37.75, | |
| "learning_rate": 1.7142857142857145e-06, | |
| "loss": 0.2066, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 38.0, | |
| "eval_accuracy": 0.8504672897196262, | |
| "eval_loss": 0.47410905361175537, | |
| "eval_runtime": 2.4985, | |
| "eval_samples_per_second": 42.825, | |
| "eval_steps_per_second": 2.802, | |
| "step": 1349 | |
| }, | |
| { | |
| "epoch": 38.03, | |
| "learning_rate": 1.4285714285714286e-06, | |
| "loss": 0.2381, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 38.31, | |
| "learning_rate": 1.142857142857143e-06, | |
| "loss": 0.2242, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 38.59, | |
| "learning_rate": 8.571428571428572e-07, | |
| "loss": 0.2374, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 38.87, | |
| "learning_rate": 5.714285714285715e-07, | |
| "loss": 0.1972, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 38.99, | |
| "eval_accuracy": 0.8504672897196262, | |
| "eval_loss": 0.452989786863327, | |
| "eval_runtime": 2.4866, | |
| "eval_samples_per_second": 43.031, | |
| "eval_steps_per_second": 2.815, | |
| "step": 1384 | |
| }, | |
| { | |
| "epoch": 39.15, | |
| "learning_rate": 2.8571428571428575e-07, | |
| "loss": 0.2104, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 39.44, | |
| "learning_rate": 0.0, | |
| "loss": 0.2319, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 39.44, | |
| "eval_accuracy": 0.8504672897196262, | |
| "eval_loss": 0.45361945033073425, | |
| "eval_runtime": 2.5324, | |
| "eval_samples_per_second": 42.252, | |
| "eval_steps_per_second": 2.764, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 39.44, | |
| "step": 1400, | |
| "total_flos": 2.8973120909884785e+18, | |
| "train_loss": 0.4776322265182223, | |
| "train_runtime": 1583.2431, | |
| "train_samples_per_second": 57.047, | |
| "train_steps_per_second": 0.884 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 1400, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 40, | |
| "save_steps": 500, | |
| "total_flos": 2.8973120909884785e+18, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |