| { | |
| "best_metric": 0.03933868557214737, | |
| "best_model_checkpoint": "./rorshark_outputs/checkpoint-1840", | |
| "epoch": 5.0, | |
| "eval_steps": 500, | |
| "global_step": 1840, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 1.989130434782609e-05, | |
| "loss": 0.5675, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1.9782608695652176e-05, | |
| "loss": 0.4112, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 1.9673913043478263e-05, | |
| "loss": 0.3002, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 1.956521739130435e-05, | |
| "loss": 0.3774, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "learning_rate": 1.9456521739130436e-05, | |
| "loss": 0.3295, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 1.9347826086956523e-05, | |
| "loss": 0.3067, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 1.923913043478261e-05, | |
| "loss": 0.2457, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 1.9130434782608697e-05, | |
| "loss": 0.3318, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 1.9021739130434784e-05, | |
| "loss": 0.1932, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "learning_rate": 1.891304347826087e-05, | |
| "loss": 0.1948, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "learning_rate": 1.8804347826086958e-05, | |
| "loss": 0.2475, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 1.8695652173913045e-05, | |
| "loss": 0.1432, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 1.8586956521739132e-05, | |
| "loss": 0.2069, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 1.847826086956522e-05, | |
| "loss": 0.1986, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 1.8369565217391306e-05, | |
| "loss": 0.2156, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 1.8260869565217393e-05, | |
| "loss": 0.1187, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 1.815217391304348e-05, | |
| "loss": 0.1192, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 1.8043478260869567e-05, | |
| "loss": 0.1748, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 1.7934782608695654e-05, | |
| "loss": 0.0779, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 1.782608695652174e-05, | |
| "loss": 0.1075, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 1.7717391304347828e-05, | |
| "loss": 0.1298, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 1.7608695652173915e-05, | |
| "loss": 0.0728, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "learning_rate": 1.7500000000000002e-05, | |
| "loss": 0.1189, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 1.739130434782609e-05, | |
| "loss": 0.1102, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "learning_rate": 1.7282608695652176e-05, | |
| "loss": 0.1183, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 1.7173913043478263e-05, | |
| "loss": 0.3006, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 1.706521739130435e-05, | |
| "loss": 0.1408, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 1.6956521739130437e-05, | |
| "loss": 0.141, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 1.6847826086956524e-05, | |
| "loss": 0.1208, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 1.673913043478261e-05, | |
| "loss": 0.1004, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 1.6630434782608698e-05, | |
| "loss": 0.206, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 1.6521739130434785e-05, | |
| "loss": 0.12, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 1.641304347826087e-05, | |
| "loss": 0.0705, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 1.630434782608696e-05, | |
| "loss": 0.1018, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 1.6195652173913045e-05, | |
| "loss": 0.1501, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 1.6086956521739132e-05, | |
| "loss": 0.0597, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.9865125240847784, | |
| "eval_loss": 0.05456383526325226, | |
| "eval_runtime": 6.5116, | |
| "eval_samples_per_second": 79.704, | |
| "eval_steps_per_second": 9.982, | |
| "step": 368 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 1.597826086956522e-05, | |
| "loss": 0.0878, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 1.5869565217391306e-05, | |
| "loss": 0.1651, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 1.576086956521739e-05, | |
| "loss": 0.0645, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 1.565217391304348e-05, | |
| "loss": 0.1085, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 1.5543478260869567e-05, | |
| "loss": 0.0967, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 1.5434782608695654e-05, | |
| "loss": 0.1178, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 1.532608695652174e-05, | |
| "loss": 0.0605, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 1.5217391304347828e-05, | |
| "loss": 0.1394, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 1.5108695652173915e-05, | |
| "loss": 0.1113, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 1.5000000000000002e-05, | |
| "loss": 0.0225, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 1.4891304347826087e-05, | |
| "loss": 0.1861, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 1.4782608695652174e-05, | |
| "loss": 0.0879, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 1.4673913043478263e-05, | |
| "loss": 0.094, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 1.456521739130435e-05, | |
| "loss": 0.1837, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 1.4456521739130435e-05, | |
| "loss": 0.057, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 1.4347826086956522e-05, | |
| "loss": 0.0504, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 1.423913043478261e-05, | |
| "loss": 0.03, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 1.4130434782608698e-05, | |
| "loss": 0.0637, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 1.4021739130434783e-05, | |
| "loss": 0.1572, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 1.391304347826087e-05, | |
| "loss": 0.2074, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 1.3804347826086957e-05, | |
| "loss": 0.1031, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 1.3695652173913046e-05, | |
| "loss": 0.075, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 1.3586956521739133e-05, | |
| "loss": 0.0854, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 1.3478260869565218e-05, | |
| "loss": 0.0897, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 1.3369565217391305e-05, | |
| "loss": 0.1017, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 1.3260869565217392e-05, | |
| "loss": 0.132, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 1.315217391304348e-05, | |
| "loss": 0.0471, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 1.3043478260869566e-05, | |
| "loss": 0.0707, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 1.77, | |
| "learning_rate": 1.2934782608695653e-05, | |
| "loss": 0.0506, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 1.282608695652174e-05, | |
| "loss": 0.1308, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 1.2717391304347828e-05, | |
| "loss": 0.1188, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 1.2608695652173915e-05, | |
| "loss": 0.1021, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 1.25e-05, | |
| "loss": 0.1199, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 1.2391304347826088e-05, | |
| "loss": 0.1068, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.93, | |
| "learning_rate": 1.2282608695652175e-05, | |
| "loss": 0.0535, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 1.2173913043478263e-05, | |
| "loss": 0.0723, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 1.2065217391304348e-05, | |
| "loss": 0.2009, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.9865125240847784, | |
| "eval_loss": 0.05307452380657196, | |
| "eval_runtime": 6.4841, | |
| "eval_samples_per_second": 80.043, | |
| "eval_steps_per_second": 10.025, | |
| "step": 736 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 1.1956521739130435e-05, | |
| "loss": 0.0156, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 1.1847826086956522e-05, | |
| "loss": 0.169, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 1.1739130434782611e-05, | |
| "loss": 0.0866, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "learning_rate": 1.1630434782608698e-05, | |
| "loss": 0.0973, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 1.1521739130434783e-05, | |
| "loss": 0.0427, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 1.141304347826087e-05, | |
| "loss": 0.1296, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 1.1304347826086957e-05, | |
| "loss": 0.0265, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 1.1195652173913046e-05, | |
| "loss": 0.1574, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "learning_rate": 1.1086956521739131e-05, | |
| "loss": 0.0655, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 1.0978260869565218e-05, | |
| "loss": 0.0785, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 1.0869565217391305e-05, | |
| "loss": 0.1273, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "learning_rate": 1.076086956521739e-05, | |
| "loss": 0.0374, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 1.0652173913043479e-05, | |
| "loss": 0.2576, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 1.0543478260869566e-05, | |
| "loss": 0.0417, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 1.0434782608695653e-05, | |
| "loss": 0.115, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 2.42, | |
| "learning_rate": 1.032608695652174e-05, | |
| "loss": 0.105, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "learning_rate": 1.0217391304347829e-05, | |
| "loss": 0.1704, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 2.47, | |
| "learning_rate": 1.0108695652173914e-05, | |
| "loss": 0.0442, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 1e-05, | |
| "loss": 0.079, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 9.891304347826088e-06, | |
| "loss": 0.0214, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 9.782608695652175e-06, | |
| "loss": 0.112, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 9.673913043478262e-06, | |
| "loss": 0.0467, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 9.565217391304349e-06, | |
| "loss": 0.0944, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 9.456521739130436e-06, | |
| "loss": 0.0195, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "learning_rate": 9.347826086956523e-06, | |
| "loss": 0.1084, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 9.23913043478261e-06, | |
| "loss": 0.0598, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 9.130434782608697e-06, | |
| "loss": 0.0563, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "learning_rate": 9.021739130434784e-06, | |
| "loss": 0.1212, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 8.91304347826087e-06, | |
| "loss": 0.103, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 8.804347826086957e-06, | |
| "loss": 0.0708, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 8.695652173913044e-06, | |
| "loss": 0.0639, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 8.586956521739131e-06, | |
| "loss": 0.0153, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "learning_rate": 8.478260869565218e-06, | |
| "loss": 0.028, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 2.91, | |
| "learning_rate": 8.369565217391305e-06, | |
| "loss": 0.029, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 8.260869565217392e-06, | |
| "loss": 0.0915, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 2.96, | |
| "learning_rate": 8.15217391304348e-06, | |
| "loss": 0.0186, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 8.043478260869566e-06, | |
| "loss": 0.0114, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.9903660886319846, | |
| "eval_loss": 0.04182479530572891, | |
| "eval_runtime": 6.3668, | |
| "eval_samples_per_second": 81.517, | |
| "eval_steps_per_second": 10.209, | |
| "step": 1104 | |
| }, | |
| { | |
| "epoch": 3.02, | |
| "learning_rate": 7.934782608695653e-06, | |
| "loss": 0.2106, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "learning_rate": 7.82608695652174e-06, | |
| "loss": 0.0515, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 3.07, | |
| "learning_rate": 7.717391304347827e-06, | |
| "loss": 0.0406, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 3.1, | |
| "learning_rate": 7.608695652173914e-06, | |
| "loss": 0.0355, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 3.12, | |
| "learning_rate": 7.500000000000001e-06, | |
| "loss": 0.1842, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 3.15, | |
| "learning_rate": 7.391304347826087e-06, | |
| "loss": 0.0545, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 3.18, | |
| "learning_rate": 7.282608695652175e-06, | |
| "loss": 0.1349, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 3.21, | |
| "learning_rate": 7.173913043478261e-06, | |
| "loss": 0.0104, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 3.23, | |
| "learning_rate": 7.065217391304349e-06, | |
| "loss": 0.1324, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 3.26, | |
| "learning_rate": 6.956521739130435e-06, | |
| "loss": 0.0934, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 3.29, | |
| "learning_rate": 6.847826086956523e-06, | |
| "loss": 0.0966, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 3.32, | |
| "learning_rate": 6.739130434782609e-06, | |
| "loss": 0.0588, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 3.34, | |
| "learning_rate": 6.630434782608696e-06, | |
| "loss": 0.0802, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 3.37, | |
| "learning_rate": 6.521739130434783e-06, | |
| "loss": 0.0576, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 3.4, | |
| "learning_rate": 6.41304347826087e-06, | |
| "loss": 0.0419, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 3.42, | |
| "learning_rate": 6.304347826086958e-06, | |
| "loss": 0.0481, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 3.45, | |
| "learning_rate": 6.195652173913044e-06, | |
| "loss": 0.0861, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 3.48, | |
| "learning_rate": 6.086956521739132e-06, | |
| "loss": 0.1023, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 3.51, | |
| "learning_rate": 5.978260869565218e-06, | |
| "loss": 0.0584, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 3.53, | |
| "learning_rate": 5.8695652173913055e-06, | |
| "loss": 0.1282, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 3.56, | |
| "learning_rate": 5.760869565217392e-06, | |
| "loss": 0.0277, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 3.59, | |
| "learning_rate": 5.652173913043479e-06, | |
| "loss": 0.1837, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 3.61, | |
| "learning_rate": 5.543478260869566e-06, | |
| "loss": 0.0264, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 3.64, | |
| "learning_rate": 5.4347826086956525e-06, | |
| "loss": 0.1224, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 3.67, | |
| "learning_rate": 5.3260869565217395e-06, | |
| "loss": 0.0434, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 3.7, | |
| "learning_rate": 5.2173913043478265e-06, | |
| "loss": 0.1337, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 3.72, | |
| "learning_rate": 5.108695652173914e-06, | |
| "loss": 0.0071, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 3.75, | |
| "learning_rate": 5e-06, | |
| "loss": 0.0568, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 3.78, | |
| "learning_rate": 4.891304347826087e-06, | |
| "loss": 0.043, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 3.8, | |
| "learning_rate": 4.782608695652174e-06, | |
| "loss": 0.0719, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 3.83, | |
| "learning_rate": 4.673913043478261e-06, | |
| "loss": 0.1128, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 3.86, | |
| "learning_rate": 4.565217391304348e-06, | |
| "loss": 0.0477, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 3.89, | |
| "learning_rate": 4.456521739130435e-06, | |
| "loss": 0.0791, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 3.91, | |
| "learning_rate": 4.347826086956522e-06, | |
| "loss": 0.037, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 3.94, | |
| "learning_rate": 4.239130434782609e-06, | |
| "loss": 0.1466, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 3.97, | |
| "learning_rate": 4.130434782608696e-06, | |
| "loss": 0.0467, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 3.99, | |
| "learning_rate": 4.021739130434783e-06, | |
| "loss": 0.0998, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.9903660886319846, | |
| "eval_loss": 0.04251210391521454, | |
| "eval_runtime": 6.5268, | |
| "eval_samples_per_second": 79.518, | |
| "eval_steps_per_second": 9.959, | |
| "step": 1472 | |
| }, | |
| { | |
| "epoch": 4.02, | |
| "learning_rate": 3.91304347826087e-06, | |
| "loss": 0.1285, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 4.05, | |
| "learning_rate": 3.804347826086957e-06, | |
| "loss": 0.1634, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 4.08, | |
| "learning_rate": 3.6956521739130436e-06, | |
| "loss": 0.0462, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 4.1, | |
| "learning_rate": 3.5869565217391305e-06, | |
| "loss": 0.0846, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 4.13, | |
| "learning_rate": 3.4782608695652175e-06, | |
| "loss": 0.1239, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 4.16, | |
| "learning_rate": 3.3695652173913045e-06, | |
| "loss": 0.1818, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 4.18, | |
| "learning_rate": 3.2608695652173914e-06, | |
| "loss": 0.021, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 4.21, | |
| "learning_rate": 3.152173913043479e-06, | |
| "loss": 0.0741, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 4.24, | |
| "learning_rate": 3.043478260869566e-06, | |
| "loss": 0.182, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 4.27, | |
| "learning_rate": 2.9347826086956528e-06, | |
| "loss": 0.0433, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 4.29, | |
| "learning_rate": 2.8260869565217393e-06, | |
| "loss": 0.0437, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 4.32, | |
| "learning_rate": 2.7173913043478263e-06, | |
| "loss": 0.0382, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 4.35, | |
| "learning_rate": 2.6086956521739132e-06, | |
| "loss": 0.046, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 4.38, | |
| "learning_rate": 2.5e-06, | |
| "loss": 0.0213, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "learning_rate": 2.391304347826087e-06, | |
| "loss": 0.0186, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 4.43, | |
| "learning_rate": 2.282608695652174e-06, | |
| "loss": 0.0671, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 4.46, | |
| "learning_rate": 2.173913043478261e-06, | |
| "loss": 0.0908, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 4.48, | |
| "learning_rate": 2.065217391304348e-06, | |
| "loss": 0.0697, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 4.51, | |
| "learning_rate": 1.956521739130435e-06, | |
| "loss": 0.0637, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 4.54, | |
| "learning_rate": 1.8478260869565218e-06, | |
| "loss": 0.0819, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 4.57, | |
| "learning_rate": 1.7391304347826088e-06, | |
| "loss": 0.0623, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 4.59, | |
| "learning_rate": 1.6304347826086957e-06, | |
| "loss": 0.0114, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 4.62, | |
| "learning_rate": 1.521739130434783e-06, | |
| "loss": 0.0342, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 4.65, | |
| "learning_rate": 1.4130434782608697e-06, | |
| "loss": 0.0859, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 4.67, | |
| "learning_rate": 1.3043478260869566e-06, | |
| "loss": 0.0462, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 4.7, | |
| "learning_rate": 1.1956521739130436e-06, | |
| "loss": 0.1022, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 4.73, | |
| "learning_rate": 1.0869565217391306e-06, | |
| "loss": 0.0571, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 4.76, | |
| "learning_rate": 9.782608695652175e-07, | |
| "loss": 0.0108, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 4.78, | |
| "learning_rate": 8.695652173913044e-07, | |
| "loss": 0.0893, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 4.81, | |
| "learning_rate": 7.608695652173914e-07, | |
| "loss": 0.0214, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 4.84, | |
| "learning_rate": 6.521739130434783e-07, | |
| "loss": 0.0416, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 4.86, | |
| "learning_rate": 5.434782608695653e-07, | |
| "loss": 0.1022, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 4.89, | |
| "learning_rate": 4.347826086956522e-07, | |
| "loss": 0.0628, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 4.92, | |
| "learning_rate": 3.2608695652173915e-07, | |
| "loss": 0.0691, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 4.95, | |
| "learning_rate": 2.173913043478261e-07, | |
| "loss": 0.0371, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 4.97, | |
| "learning_rate": 1.0869565217391305e-07, | |
| "loss": 0.0714, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "learning_rate": 0.0, | |
| "loss": 0.1244, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.9922928709055877, | |
| "eval_loss": 0.03933868557214737, | |
| "eval_runtime": 6.3674, | |
| "eval_samples_per_second": 81.509, | |
| "eval_steps_per_second": 10.208, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "step": 1840, | |
| "total_flos": 1.1387447873864294e+18, | |
| "train_loss": 0.10440107471431079, | |
| "train_runtime": 430.0921, | |
| "train_samples_per_second": 34.167, | |
| "train_steps_per_second": 4.278 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 1840, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 5, | |
| "save_steps": 500, | |
| "total_flos": 1.1387447873864294e+18, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |