| { | |
| "best_metric": 0.04137137532234192, | |
| "best_model_checkpoint": "./vit-base-beans/checkpoint-1480", | |
| "epoch": 20.0, | |
| "global_step": 1840, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.11, | |
| "learning_rate": 9.945652173913043e-05, | |
| "loss": 3.3812, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 9.891304347826087e-05, | |
| "loss": 3.3288, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 9.836956521739132e-05, | |
| "loss": 3.3101, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 9.782608695652174e-05, | |
| "loss": 3.2579, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "eval_accuracy": 0.17882919005613473, | |
| "eval_loss": 3.1847527027130127, | |
| "eval_runtime": 13.4342, | |
| "eval_samples_per_second": 92.823, | |
| "eval_steps_per_second": 11.612, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 9.728260869565217e-05, | |
| "loss": 3.1255, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 9.673913043478261e-05, | |
| "loss": 3.0162, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 9.619565217391306e-05, | |
| "loss": 2.8706, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 9.565217391304348e-05, | |
| "loss": 2.7157, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "eval_accuracy": 0.43785084202085006, | |
| "eval_loss": 2.5922651290893555, | |
| "eval_runtime": 13.9416, | |
| "eval_samples_per_second": 89.445, | |
| "eval_steps_per_second": 11.19, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 9.510869565217391e-05, | |
| "loss": 2.5322, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 9.456521739130435e-05, | |
| "loss": 2.3504, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 9.402173913043478e-05, | |
| "loss": 2.1887, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 9.347826086956522e-05, | |
| "loss": 2.0664, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "eval_accuracy": 0.6696070569366479, | |
| "eval_loss": 1.9748882055282593, | |
| "eval_runtime": 13.4691, | |
| "eval_samples_per_second": 92.582, | |
| "eval_steps_per_second": 11.582, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 9.293478260869566e-05, | |
| "loss": 1.8705, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 9.239130434782609e-05, | |
| "loss": 1.688, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 9.184782608695652e-05, | |
| "loss": 1.5939, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "learning_rate": 9.130434782608696e-05, | |
| "loss": 1.4765, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "eval_accuracy": 0.917401764234162, | |
| "eval_loss": 1.3413872718811035, | |
| "eval_runtime": 13.9419, | |
| "eval_samples_per_second": 89.442, | |
| "eval_steps_per_second": 11.189, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 9.07608695652174e-05, | |
| "loss": 1.3014, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 9.021739130434783e-05, | |
| "loss": 1.201, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 8.967391304347826e-05, | |
| "loss": 1.0165, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 8.91304347826087e-05, | |
| "loss": 0.965, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "eval_accuracy": 0.9615076182838813, | |
| "eval_loss": 0.9264132380485535, | |
| "eval_runtime": 13.1542, | |
| "eval_samples_per_second": 94.798, | |
| "eval_steps_per_second": 11.859, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 8.858695652173914e-05, | |
| "loss": 0.9096, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 8.804347826086957e-05, | |
| "loss": 0.8527, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 2.5, | |
| "learning_rate": 8.75e-05, | |
| "loss": 0.759, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 8.695652173913044e-05, | |
| "loss": 0.7163, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "eval_accuracy": 0.9647153167602245, | |
| "eval_loss": 0.6652109622955322, | |
| "eval_runtime": 14.0687, | |
| "eval_samples_per_second": 88.637, | |
| "eval_steps_per_second": 11.088, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "learning_rate": 8.641304347826087e-05, | |
| "loss": 0.6403, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 8.586956521739131e-05, | |
| "loss": 0.5857, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 8.532608695652174e-05, | |
| "loss": 0.5406, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "learning_rate": 8.478260869565218e-05, | |
| "loss": 0.5061, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 3.04, | |
| "eval_accuracy": 0.9687249398556536, | |
| "eval_loss": 0.5080122947692871, | |
| "eval_runtime": 13.0785, | |
| "eval_samples_per_second": 95.347, | |
| "eval_steps_per_second": 11.928, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 3.15, | |
| "learning_rate": 8.423913043478261e-05, | |
| "loss": 0.4622, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 3.26, | |
| "learning_rate": 8.369565217391305e-05, | |
| "loss": 0.4919, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 3.37, | |
| "learning_rate": 8.315217391304349e-05, | |
| "loss": 0.371, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 3.48, | |
| "learning_rate": 8.260869565217392e-05, | |
| "loss": 0.3883, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 3.48, | |
| "eval_accuracy": 0.9759422614274258, | |
| "eval_loss": 0.3574630916118622, | |
| "eval_runtime": 13.9479, | |
| "eval_samples_per_second": 89.404, | |
| "eval_steps_per_second": 11.184, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 3.59, | |
| "learning_rate": 8.206521739130435e-05, | |
| "loss": 0.3831, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 3.7, | |
| "learning_rate": 8.152173913043478e-05, | |
| "loss": 0.3329, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 3.8, | |
| "learning_rate": 8.097826086956523e-05, | |
| "loss": 0.3383, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 3.91, | |
| "learning_rate": 8.043478260869566e-05, | |
| "loss": 0.3328, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 3.91, | |
| "eval_accuracy": 0.9839615076182838, | |
| "eval_loss": 0.27629122138023376, | |
| "eval_runtime": 13.7308, | |
| "eval_samples_per_second": 90.818, | |
| "eval_steps_per_second": 11.361, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 4.02, | |
| "learning_rate": 7.989130434782609e-05, | |
| "loss": 0.2387, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 4.13, | |
| "learning_rate": 7.934782608695653e-05, | |
| "loss": 0.2509, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 4.24, | |
| "learning_rate": 7.880434782608696e-05, | |
| "loss": 0.2259, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 4.35, | |
| "learning_rate": 7.82608695652174e-05, | |
| "loss": 0.2049, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 4.35, | |
| "eval_accuracy": 0.9855653568564555, | |
| "eval_loss": 0.2094665914773941, | |
| "eval_runtime": 13.8999, | |
| "eval_samples_per_second": 89.713, | |
| "eval_steps_per_second": 11.223, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 4.46, | |
| "learning_rate": 7.771739130434783e-05, | |
| "loss": 0.1979, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 4.57, | |
| "learning_rate": 7.717391304347827e-05, | |
| "loss": 0.1703, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 4.67, | |
| "learning_rate": 7.66304347826087e-05, | |
| "loss": 0.1771, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 4.78, | |
| "learning_rate": 7.608695652173914e-05, | |
| "loss": 0.2078, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 4.78, | |
| "eval_accuracy": 0.9871692060946271, | |
| "eval_loss": 0.19693857431411743, | |
| "eval_runtime": 13.7091, | |
| "eval_samples_per_second": 90.961, | |
| "eval_steps_per_second": 11.379, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 4.89, | |
| "learning_rate": 7.554347826086957e-05, | |
| "loss": 0.1564, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "learning_rate": 7.500000000000001e-05, | |
| "loss": 0.1512, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 5.11, | |
| "learning_rate": 7.445652173913044e-05, | |
| "loss": 0.1339, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 5.22, | |
| "learning_rate": 7.391304347826086e-05, | |
| "loss": 0.1447, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 5.22, | |
| "eval_accuracy": 0.9871692060946271, | |
| "eval_loss": 0.14835722744464874, | |
| "eval_runtime": 13.9008, | |
| "eval_samples_per_second": 89.707, | |
| "eval_steps_per_second": 11.222, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 5.33, | |
| "learning_rate": 7.336956521739132e-05, | |
| "loss": 0.1207, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 5.43, | |
| "learning_rate": 7.282608695652175e-05, | |
| "loss": 0.1294, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 5.54, | |
| "learning_rate": 7.228260869565217e-05, | |
| "loss": 0.116, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 5.65, | |
| "learning_rate": 7.17391304347826e-05, | |
| "loss": 0.1401, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 5.65, | |
| "eval_accuracy": 0.9839615076182838, | |
| "eval_loss": 0.14811548590660095, | |
| "eval_runtime": 13.5209, | |
| "eval_samples_per_second": 92.228, | |
| "eval_steps_per_second": 11.538, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 5.76, | |
| "learning_rate": 7.119565217391306e-05, | |
| "loss": 0.0904, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 5.87, | |
| "learning_rate": 7.065217391304349e-05, | |
| "loss": 0.1099, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 5.98, | |
| "learning_rate": 7.010869565217391e-05, | |
| "loss": 0.1599, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 6.09, | |
| "learning_rate": 6.956521739130436e-05, | |
| "loss": 0.1232, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 6.09, | |
| "eval_accuracy": 0.991980753809142, | |
| "eval_loss": 0.11416751146316528, | |
| "eval_runtime": 13.9361, | |
| "eval_samples_per_second": 89.48, | |
| "eval_steps_per_second": 11.194, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 6.2, | |
| "learning_rate": 6.902173913043478e-05, | |
| "loss": 0.1381, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 6.3, | |
| "learning_rate": 6.847826086956522e-05, | |
| "loss": 0.1001, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 6.41, | |
| "learning_rate": 6.793478260869565e-05, | |
| "loss": 0.0823, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 6.52, | |
| "learning_rate": 6.73913043478261e-05, | |
| "loss": 0.0725, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 6.52, | |
| "eval_accuracy": 0.9879711307137129, | |
| "eval_loss": 0.10076911747455597, | |
| "eval_runtime": 13.8114, | |
| "eval_samples_per_second": 90.288, | |
| "eval_steps_per_second": 11.295, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 6.63, | |
| "learning_rate": 6.684782608695652e-05, | |
| "loss": 0.0852, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 6.74, | |
| "learning_rate": 6.630434782608695e-05, | |
| "loss": 0.0723, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 6.85, | |
| "learning_rate": 6.576086956521739e-05, | |
| "loss": 0.0881, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 6.96, | |
| "learning_rate": 6.521739130434783e-05, | |
| "loss": 0.0934, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 6.96, | |
| "eval_accuracy": 0.9895749799518845, | |
| "eval_loss": 0.09398525953292847, | |
| "eval_runtime": 14.2025, | |
| "eval_samples_per_second": 87.801, | |
| "eval_steps_per_second": 10.984, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 7.07, | |
| "learning_rate": 6.467391304347826e-05, | |
| "loss": 0.0668, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 7.17, | |
| "learning_rate": 6.413043478260869e-05, | |
| "loss": 0.0586, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 7.28, | |
| "learning_rate": 6.358695652173913e-05, | |
| "loss": 0.0543, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 7.39, | |
| "learning_rate": 6.304347826086957e-05, | |
| "loss": 0.053, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 7.39, | |
| "eval_accuracy": 0.9895749799518845, | |
| "eval_loss": 0.08539092540740967, | |
| "eval_runtime": 13.9817, | |
| "eval_samples_per_second": 89.188, | |
| "eval_steps_per_second": 11.157, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 7.5, | |
| "learning_rate": 6.25e-05, | |
| "loss": 0.0514, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 7.61, | |
| "learning_rate": 6.195652173913043e-05, | |
| "loss": 0.0491, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 7.72, | |
| "learning_rate": 6.141304347826087e-05, | |
| "loss": 0.0481, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 7.83, | |
| "learning_rate": 6.086956521739131e-05, | |
| "loss": 0.0469, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 7.83, | |
| "eval_accuracy": 0.9903769045709703, | |
| "eval_loss": 0.06862174719572067, | |
| "eval_runtime": 14.4287, | |
| "eval_samples_per_second": 86.425, | |
| "eval_steps_per_second": 10.812, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 7.93, | |
| "learning_rate": 6.032608695652174e-05, | |
| "loss": 0.0693, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 8.04, | |
| "learning_rate": 5.9782608695652175e-05, | |
| "loss": 0.0664, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 8.15, | |
| "learning_rate": 5.923913043478261e-05, | |
| "loss": 0.0502, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 8.26, | |
| "learning_rate": 5.869565217391305e-05, | |
| "loss": 0.0429, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 8.26, | |
| "eval_accuracy": 0.9863672814755413, | |
| "eval_loss": 0.0824466422200203, | |
| "eval_runtime": 13.8977, | |
| "eval_samples_per_second": 89.727, | |
| "eval_steps_per_second": 11.225, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 8.37, | |
| "learning_rate": 5.815217391304349e-05, | |
| "loss": 0.0622, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 8.48, | |
| "learning_rate": 5.7608695652173915e-05, | |
| "loss": 0.0394, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 8.59, | |
| "learning_rate": 5.706521739130435e-05, | |
| "loss": 0.0375, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 8.7, | |
| "learning_rate": 5.652173913043478e-05, | |
| "loss": 0.0371, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 8.7, | |
| "eval_accuracy": 0.991980753809142, | |
| "eval_loss": 0.07010400295257568, | |
| "eval_runtime": 13.4894, | |
| "eval_samples_per_second": 92.443, | |
| "eval_steps_per_second": 11.565, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 8.8, | |
| "learning_rate": 5.5978260869565226e-05, | |
| "loss": 0.036, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 8.91, | |
| "learning_rate": 5.5434782608695654e-05, | |
| "loss": 0.0352, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 9.02, | |
| "learning_rate": 5.489130434782609e-05, | |
| "loss": 0.0344, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 9.13, | |
| "learning_rate": 5.4347826086956524e-05, | |
| "loss": 0.033, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 9.13, | |
| "eval_accuracy": 0.991980753809142, | |
| "eval_loss": 0.06847481429576874, | |
| "eval_runtime": 13.9465, | |
| "eval_samples_per_second": 89.413, | |
| "eval_steps_per_second": 11.186, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 9.24, | |
| "learning_rate": 5.380434782608695e-05, | |
| "loss": 0.0327, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 9.35, | |
| "learning_rate": 5.32608695652174e-05, | |
| "loss": 0.0318, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 9.46, | |
| "learning_rate": 5.271739130434783e-05, | |
| "loss": 0.0315, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 9.57, | |
| "learning_rate": 5.217391304347826e-05, | |
| "loss": 0.0308, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 9.57, | |
| "eval_accuracy": 0.991980753809142, | |
| "eval_loss": 0.06314855068922043, | |
| "eval_runtime": 13.4895, | |
| "eval_samples_per_second": 92.442, | |
| "eval_steps_per_second": 11.565, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 9.67, | |
| "learning_rate": 5.163043478260869e-05, | |
| "loss": 0.0502, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 9.78, | |
| "learning_rate": 5.108695652173914e-05, | |
| "loss": 0.03, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 9.89, | |
| "learning_rate": 5.054347826086957e-05, | |
| "loss": 0.0294, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "learning_rate": 5e-05, | |
| "loss": 0.0398, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.9927826784282278, | |
| "eval_loss": 0.05900084227323532, | |
| "eval_runtime": 14.0073, | |
| "eval_samples_per_second": 89.025, | |
| "eval_steps_per_second": 11.137, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 10.11, | |
| "learning_rate": 4.945652173913044e-05, | |
| "loss": 0.03, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 10.22, | |
| "learning_rate": 4.891304347826087e-05, | |
| "loss": 0.029, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 10.33, | |
| "learning_rate": 4.836956521739131e-05, | |
| "loss": 0.0273, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 10.43, | |
| "learning_rate": 4.782608695652174e-05, | |
| "loss": 0.0453, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 10.43, | |
| "eval_accuracy": 0.9895749799518845, | |
| "eval_loss": 0.062146905809640884, | |
| "eval_runtime": 14.1053, | |
| "eval_samples_per_second": 88.406, | |
| "eval_steps_per_second": 11.06, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 10.54, | |
| "learning_rate": 4.7282608695652177e-05, | |
| "loss": 0.0415, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 10.65, | |
| "learning_rate": 4.673913043478261e-05, | |
| "loss": 0.0268, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 10.76, | |
| "learning_rate": 4.6195652173913046e-05, | |
| "loss": 0.0282, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 10.87, | |
| "learning_rate": 4.565217391304348e-05, | |
| "loss": 0.026, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 10.87, | |
| "eval_accuracy": 0.9855653568564555, | |
| "eval_loss": 0.0649920180439949, | |
| "eval_runtime": 13.8769, | |
| "eval_samples_per_second": 89.861, | |
| "eval_steps_per_second": 11.242, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 10.98, | |
| "learning_rate": 4.5108695652173916e-05, | |
| "loss": 0.0255, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 11.09, | |
| "learning_rate": 4.456521739130435e-05, | |
| "loss": 0.0246, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 11.2, | |
| "learning_rate": 4.4021739130434786e-05, | |
| "loss": 0.0264, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 11.3, | |
| "learning_rate": 4.347826086956522e-05, | |
| "loss": 0.0257, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 11.3, | |
| "eval_accuracy": 0.9927826784282278, | |
| "eval_loss": 0.04654848575592041, | |
| "eval_runtime": 13.6877, | |
| "eval_samples_per_second": 91.103, | |
| "eval_steps_per_second": 11.397, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 11.41, | |
| "learning_rate": 4.2934782608695655e-05, | |
| "loss": 0.0237, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 11.52, | |
| "learning_rate": 4.239130434782609e-05, | |
| "loss": 0.0233, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 11.63, | |
| "learning_rate": 4.1847826086956525e-05, | |
| "loss": 0.0231, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 11.74, | |
| "learning_rate": 4.130434782608696e-05, | |
| "loss": 0.041, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 11.74, | |
| "eval_accuracy": 0.9927826784282278, | |
| "eval_loss": 0.04421408474445343, | |
| "eval_runtime": 14.1229, | |
| "eval_samples_per_second": 88.296, | |
| "eval_steps_per_second": 11.046, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 11.85, | |
| "learning_rate": 4.076086956521739e-05, | |
| "loss": 0.0234, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 11.96, | |
| "learning_rate": 4.021739130434783e-05, | |
| "loss": 0.0221, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 12.07, | |
| "learning_rate": 3.9673913043478264e-05, | |
| "loss": 0.0251, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 12.17, | |
| "learning_rate": 3.91304347826087e-05, | |
| "loss": 0.0223, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 12.17, | |
| "eval_accuracy": 0.9863672814755413, | |
| "eval_loss": 0.06379802525043488, | |
| "eval_runtime": 13.3726, | |
| "eval_samples_per_second": 93.25, | |
| "eval_steps_per_second": 11.666, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 12.28, | |
| "learning_rate": 3.8586956521739134e-05, | |
| "loss": 0.0222, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 12.39, | |
| "learning_rate": 3.804347826086957e-05, | |
| "loss": 0.0208, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 12.5, | |
| "learning_rate": 3.7500000000000003e-05, | |
| "loss": 0.0207, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 12.61, | |
| "learning_rate": 3.695652173913043e-05, | |
| "loss": 0.0205, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 12.61, | |
| "eval_accuracy": 0.9911788291900562, | |
| "eval_loss": 0.050300538539886475, | |
| "eval_runtime": 14.0669, | |
| "eval_samples_per_second": 88.648, | |
| "eval_steps_per_second": 11.09, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 12.72, | |
| "learning_rate": 3.641304347826087e-05, | |
| "loss": 0.0331, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 12.83, | |
| "learning_rate": 3.58695652173913e-05, | |
| "loss": 0.021, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 12.93, | |
| "learning_rate": 3.532608695652174e-05, | |
| "loss": 0.0203, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 13.04, | |
| "learning_rate": 3.478260869565218e-05, | |
| "loss": 0.0221, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 13.04, | |
| "eval_accuracy": 0.991980753809142, | |
| "eval_loss": 0.047799013555049896, | |
| "eval_runtime": 13.3712, | |
| "eval_samples_per_second": 93.26, | |
| "eval_steps_per_second": 11.667, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 13.15, | |
| "learning_rate": 3.423913043478261e-05, | |
| "loss": 0.0191, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 13.26, | |
| "learning_rate": 3.369565217391305e-05, | |
| "loss": 0.0195, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 13.37, | |
| "learning_rate": 3.3152173913043475e-05, | |
| "loss": 0.0188, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 13.48, | |
| "learning_rate": 3.260869565217392e-05, | |
| "loss": 0.0188, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 13.48, | |
| "eval_accuracy": 0.9911788291900562, | |
| "eval_loss": 0.04699365794658661, | |
| "eval_runtime": 13.8942, | |
| "eval_samples_per_second": 89.75, | |
| "eval_steps_per_second": 11.228, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 13.59, | |
| "learning_rate": 3.2065217391304345e-05, | |
| "loss": 0.019, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 13.7, | |
| "learning_rate": 3.152173913043479e-05, | |
| "loss": 0.0184, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 13.8, | |
| "learning_rate": 3.0978260869565215e-05, | |
| "loss": 0.0179, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 13.91, | |
| "learning_rate": 3.0434782608695656e-05, | |
| "loss": 0.0302, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 13.91, | |
| "eval_accuracy": 0.9927826784282278, | |
| "eval_loss": 0.04419828951358795, | |
| "eval_runtime": 13.9931, | |
| "eval_samples_per_second": 89.115, | |
| "eval_steps_per_second": 11.148, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 14.02, | |
| "learning_rate": 2.9891304347826088e-05, | |
| "loss": 0.0182, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 14.13, | |
| "learning_rate": 2.9347826086956526e-05, | |
| "loss": 0.0216, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 14.24, | |
| "learning_rate": 2.8804347826086957e-05, | |
| "loss": 0.0174, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 14.35, | |
| "learning_rate": 2.826086956521739e-05, | |
| "loss": 0.0171, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 14.35, | |
| "eval_accuracy": 0.9935846030473136, | |
| "eval_loss": 0.04177280142903328, | |
| "eval_runtime": 13.9993, | |
| "eval_samples_per_second": 89.076, | |
| "eval_steps_per_second": 11.143, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 14.46, | |
| "learning_rate": 2.7717391304347827e-05, | |
| "loss": 0.0172, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 14.57, | |
| "learning_rate": 2.7173913043478262e-05, | |
| "loss": 0.0173, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 14.67, | |
| "learning_rate": 2.66304347826087e-05, | |
| "loss": 0.0259, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 14.78, | |
| "learning_rate": 2.608695652173913e-05, | |
| "loss": 0.0197, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 14.78, | |
| "eval_accuracy": 0.991980753809142, | |
| "eval_loss": 0.04225374758243561, | |
| "eval_runtime": 14.4748, | |
| "eval_samples_per_second": 86.15, | |
| "eval_steps_per_second": 10.777, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 14.89, | |
| "learning_rate": 2.554347826086957e-05, | |
| "loss": 0.0166, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "learning_rate": 2.5e-05, | |
| "loss": 0.0163, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 15.11, | |
| "learning_rate": 2.4456521739130436e-05, | |
| "loss": 0.0164, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 15.22, | |
| "learning_rate": 2.391304347826087e-05, | |
| "loss": 0.0162, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 15.22, | |
| "eval_accuracy": 0.9927826784282278, | |
| "eval_loss": 0.04216426983475685, | |
| "eval_runtime": 14.0671, | |
| "eval_samples_per_second": 88.646, | |
| "eval_steps_per_second": 11.09, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 15.33, | |
| "learning_rate": 2.3369565217391306e-05, | |
| "loss": 0.0172, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 15.43, | |
| "learning_rate": 2.282608695652174e-05, | |
| "loss": 0.016, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 15.54, | |
| "learning_rate": 2.2282608695652175e-05, | |
| "loss": 0.0158, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 15.65, | |
| "learning_rate": 2.173913043478261e-05, | |
| "loss": 0.0159, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 15.65, | |
| "eval_accuracy": 0.991980753809142, | |
| "eval_loss": 0.043235816061496735, | |
| "eval_runtime": 13.435, | |
| "eval_samples_per_second": 92.817, | |
| "eval_steps_per_second": 11.611, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 15.76, | |
| "learning_rate": 2.1195652173913045e-05, | |
| "loss": 0.0158, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 15.87, | |
| "learning_rate": 2.065217391304348e-05, | |
| "loss": 0.0252, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 15.98, | |
| "learning_rate": 2.0108695652173915e-05, | |
| "loss": 0.0156, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 16.09, | |
| "learning_rate": 1.956521739130435e-05, | |
| "loss": 0.0155, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 16.09, | |
| "eval_accuracy": 0.9911788291900562, | |
| "eval_loss": 0.04137137532234192, | |
| "eval_runtime": 13.9815, | |
| "eval_samples_per_second": 89.189, | |
| "eval_steps_per_second": 11.158, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 16.2, | |
| "learning_rate": 1.9021739130434784e-05, | |
| "loss": 0.0175, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 16.3, | |
| "learning_rate": 1.8478260869565216e-05, | |
| "loss": 0.0155, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 16.41, | |
| "learning_rate": 1.793478260869565e-05, | |
| "loss": 0.0258, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 16.52, | |
| "learning_rate": 1.739130434782609e-05, | |
| "loss": 0.015, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 16.52, | |
| "eval_accuracy": 0.9911788291900562, | |
| "eval_loss": 0.0487416572868824, | |
| "eval_runtime": 13.4779, | |
| "eval_samples_per_second": 92.522, | |
| "eval_steps_per_second": 11.575, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 16.63, | |
| "learning_rate": 1.6847826086956524e-05, | |
| "loss": 0.0152, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 16.74, | |
| "learning_rate": 1.630434782608696e-05, | |
| "loss": 0.0174, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 16.85, | |
| "learning_rate": 1.5760869565217393e-05, | |
| "loss": 0.0147, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 16.96, | |
| "learning_rate": 1.5217391304347828e-05, | |
| "loss": 0.015, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 16.96, | |
| "eval_accuracy": 0.991980753809142, | |
| "eval_loss": 0.04399973526597023, | |
| "eval_runtime": 14.0057, | |
| "eval_samples_per_second": 89.035, | |
| "eval_steps_per_second": 11.138, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 17.07, | |
| "learning_rate": 1.4673913043478263e-05, | |
| "loss": 0.0148, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 17.17, | |
| "learning_rate": 1.4130434782608694e-05, | |
| "loss": 0.0147, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 17.28, | |
| "learning_rate": 1.3586956521739131e-05, | |
| "loss": 0.0148, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 17.39, | |
| "learning_rate": 1.3043478260869566e-05, | |
| "loss": 0.0146, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 17.39, | |
| "eval_accuracy": 0.991980753809142, | |
| "eval_loss": 0.04343697056174278, | |
| "eval_runtime": 14.4989, | |
| "eval_samples_per_second": 86.007, | |
| "eval_steps_per_second": 10.759, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 17.5, | |
| "learning_rate": 1.25e-05, | |
| "loss": 0.0145, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 17.61, | |
| "learning_rate": 1.1956521739130435e-05, | |
| "loss": 0.0144, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 17.72, | |
| "learning_rate": 1.141304347826087e-05, | |
| "loss": 0.0149, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 17.83, | |
| "learning_rate": 1.0869565217391305e-05, | |
| "loss": 0.0143, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 17.83, | |
| "eval_accuracy": 0.991980753809142, | |
| "eval_loss": 0.042883455753326416, | |
| "eval_runtime": 14.0877, | |
| "eval_samples_per_second": 88.517, | |
| "eval_steps_per_second": 11.073, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 17.93, | |
| "learning_rate": 1.032608695652174e-05, | |
| "loss": 0.0142, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 18.04, | |
| "learning_rate": 9.782608695652175e-06, | |
| "loss": 0.0225, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 18.15, | |
| "learning_rate": 9.239130434782608e-06, | |
| "loss": 0.0146, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 18.26, | |
| "learning_rate": 8.695652173913044e-06, | |
| "loss": 0.0143, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 18.26, | |
| "eval_accuracy": 0.9911788291900562, | |
| "eval_loss": 0.04524253308773041, | |
| "eval_runtime": 14.4999, | |
| "eval_samples_per_second": 86.001, | |
| "eval_steps_per_second": 10.759, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 18.37, | |
| "learning_rate": 8.15217391304348e-06, | |
| "loss": 0.0154, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 18.48, | |
| "learning_rate": 7.608695652173914e-06, | |
| "loss": 0.0144, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 18.59, | |
| "learning_rate": 7.065217391304347e-06, | |
| "loss": 0.014, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 18.7, | |
| "learning_rate": 6.521739130434783e-06, | |
| "loss": 0.014, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 18.7, | |
| "eval_accuracy": 0.9911788291900562, | |
| "eval_loss": 0.04453733563423157, | |
| "eval_runtime": 14.0735, | |
| "eval_samples_per_second": 88.606, | |
| "eval_steps_per_second": 11.085, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 18.8, | |
| "learning_rate": 5.978260869565218e-06, | |
| "loss": 0.018, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 18.91, | |
| "learning_rate": 5.4347826086956525e-06, | |
| "loss": 0.014, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 19.02, | |
| "learning_rate": 4.891304347826087e-06, | |
| "loss": 0.0155, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 19.13, | |
| "learning_rate": 4.347826086956522e-06, | |
| "loss": 0.0141, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 19.13, | |
| "eval_accuracy": 0.9911788291900562, | |
| "eval_loss": 0.048826370388269424, | |
| "eval_runtime": 14.2162, | |
| "eval_samples_per_second": 87.717, | |
| "eval_steps_per_second": 10.973, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 19.24, | |
| "learning_rate": 3.804347826086957e-06, | |
| "loss": 0.0139, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 19.35, | |
| "learning_rate": 3.2608695652173914e-06, | |
| "loss": 0.0139, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 19.46, | |
| "learning_rate": 2.7173913043478263e-06, | |
| "loss": 0.0139, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 19.57, | |
| "learning_rate": 2.173913043478261e-06, | |
| "loss": 0.0138, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 19.57, | |
| "eval_accuracy": 0.9911788291900562, | |
| "eval_loss": 0.048504043370485306, | |
| "eval_runtime": 13.6564, | |
| "eval_samples_per_second": 91.312, | |
| "eval_steps_per_second": 11.423, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 19.67, | |
| "learning_rate": 1.6304347826086957e-06, | |
| "loss": 0.0144, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 19.78, | |
| "learning_rate": 1.0869565217391306e-06, | |
| "loss": 0.0155, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 19.89, | |
| "learning_rate": 5.434782608695653e-07, | |
| "loss": 0.0141, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "learning_rate": 0.0, | |
| "loss": 0.0138, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.9911788291900562, | |
| "eval_loss": 0.0495075099170208, | |
| "eval_runtime": 14.095, | |
| "eval_samples_per_second": 88.471, | |
| "eval_steps_per_second": 11.068, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "step": 1840, | |
| "total_flos": 3.419773941089157e+18, | |
| "train_loss": 0.33101742866894474, | |
| "train_runtime": 1771.2395, | |
| "train_samples_per_second": 24.909, | |
| "train_steps_per_second": 1.039 | |
| } | |
| ], | |
| "max_steps": 1840, | |
| "num_train_epochs": 20, | |
| "total_flos": 3.419773941089157e+18, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |