| { | |
| "best_metric": 0.020489266142249107, | |
| "best_model_checkpoint": "./models/tiny-vit-GENERICO/checkpoint-1350", | |
| "epoch": 1.967930029154519, | |
| "global_step": 1350, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.01, | |
| "learning_rate": 0.0001991253644314869, | |
| "loss": 2.9673, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 0.00019766763848396503, | |
| "loss": 0.551, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.00019620991253644317, | |
| "loss": 0.4255, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 0.00019475218658892128, | |
| "loss": 0.3714, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "learning_rate": 0.00019329446064139942, | |
| "loss": 0.4272, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "eval_accuracy": 0.9147173489278753, | |
| "eval_loss": 0.24443647265434265, | |
| "eval_runtime": 286.3815, | |
| "eval_samples_per_second": 14.331, | |
| "eval_steps_per_second": 1.791, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00019183673469387756, | |
| "loss": 0.4202, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 0.0001903790087463557, | |
| "loss": 0.2785, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00018892128279883382, | |
| "loss": 0.2276, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 0.00018746355685131196, | |
| "loss": 0.2418, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 0.0001860058309037901, | |
| "loss": 0.2214, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "eval_accuracy": 0.8379629629629629, | |
| "eval_loss": 0.4828641414642334, | |
| "eval_runtime": 278.5231, | |
| "eval_samples_per_second": 14.735, | |
| "eval_steps_per_second": 1.842, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.00018454810495626821, | |
| "loss": 0.2319, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "learning_rate": 0.00018309037900874638, | |
| "loss": 0.2103, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.0001816326530612245, | |
| "loss": 0.2667, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00018017492711370264, | |
| "loss": 0.3514, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 0.00017871720116618075, | |
| "loss": 0.3004, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "eval_accuracy": 0.935672514619883, | |
| "eval_loss": 0.18615978956222534, | |
| "eval_runtime": 210.6691, | |
| "eval_samples_per_second": 19.481, | |
| "eval_steps_per_second": 2.435, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 0.00017725947521865892, | |
| "loss": 0.2605, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "learning_rate": 0.00017580174927113703, | |
| "loss": 0.1776, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 0.00017434402332361517, | |
| "loss": 0.1727, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.00017288629737609328, | |
| "loss": 0.191, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.00017142857142857143, | |
| "loss": 0.1255, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "eval_accuracy": 0.9432261208576999, | |
| "eval_loss": 0.19172662496566772, | |
| "eval_runtime": 227.2135, | |
| "eval_samples_per_second": 18.062, | |
| "eval_steps_per_second": 2.258, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 0.00016997084548104957, | |
| "loss": 0.1654, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 0.0001685131195335277, | |
| "loss": 0.2456, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "learning_rate": 0.00016705539358600585, | |
| "loss": 0.2103, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 0.00016559766763848396, | |
| "loss": 0.2826, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 0.00016413994169096213, | |
| "loss": 0.2402, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "eval_accuracy": 0.9522417153996101, | |
| "eval_loss": 0.14588728547096252, | |
| "eval_runtime": 228.2383, | |
| "eval_samples_per_second": 17.981, | |
| "eval_steps_per_second": 2.248, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 0.00016268221574344024, | |
| "loss": 0.2109, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "learning_rate": 0.00016122448979591838, | |
| "loss": 0.2142, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.0001597667638483965, | |
| "loss": 0.1838, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 0.00015830903790087464, | |
| "loss": 0.1783, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "learning_rate": 0.00015685131195335278, | |
| "loss": 0.059, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "eval_accuracy": 0.9485867446393762, | |
| "eval_loss": 0.1581156998872757, | |
| "eval_runtime": 222.9736, | |
| "eval_samples_per_second": 18.406, | |
| "eval_steps_per_second": 2.301, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.00015539358600583092, | |
| "loss": 0.1959, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.00015393586005830903, | |
| "loss": 0.0957, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 0.00015247813411078717, | |
| "loss": 0.1786, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "learning_rate": 0.0001510204081632653, | |
| "loss": 0.1695, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 0.00014956268221574345, | |
| "loss": 0.1513, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "eval_accuracy": 0.9627192982456141, | |
| "eval_loss": 0.11844132840633392, | |
| "eval_runtime": 238.6523, | |
| "eval_samples_per_second": 17.197, | |
| "eval_steps_per_second": 2.15, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "learning_rate": 0.0001481049562682216, | |
| "loss": 0.1532, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 0.0001466472303206997, | |
| "loss": 0.125, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "learning_rate": 0.00014518950437317785, | |
| "loss": 0.1735, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.000143731778425656, | |
| "loss": 0.1229, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 0.00014227405247813413, | |
| "loss": 0.1444, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "eval_accuracy": 0.9517543859649122, | |
| "eval_loss": 0.1490934193134308, | |
| "eval_runtime": 287.634, | |
| "eval_samples_per_second": 14.268, | |
| "eval_steps_per_second": 1.784, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "learning_rate": 0.00014081632653061224, | |
| "loss": 0.1257, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.00013935860058309038, | |
| "loss": 0.1251, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 0.00013790087463556852, | |
| "loss": 0.0909, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 0.00013644314868804666, | |
| "loss": 0.1236, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 0.00013498542274052478, | |
| "loss": 0.1385, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "eval_accuracy": 0.9675925925925926, | |
| "eval_loss": 0.09978482127189636, | |
| "eval_runtime": 326.8281, | |
| "eval_samples_per_second": 12.557, | |
| "eval_steps_per_second": 1.57, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 0.00013352769679300292, | |
| "loss": 0.1466, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 0.00013206997084548106, | |
| "loss": 0.1286, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 0.00013061224489795917, | |
| "loss": 0.1099, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "learning_rate": 0.00012915451895043734, | |
| "loss": 0.1559, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 0.00012769679300291545, | |
| "loss": 0.1263, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "eval_accuracy": 0.9624756335282652, | |
| "eval_loss": 0.11673439294099808, | |
| "eval_runtime": 291.2631, | |
| "eval_samples_per_second": 14.09, | |
| "eval_steps_per_second": 1.761, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 0.0001262390670553936, | |
| "loss": 0.2311, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 0.0001247813411078717, | |
| "loss": 0.103, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 0.00012332361516034988, | |
| "loss": 0.0728, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 0.000121865889212828, | |
| "loss": 0.1353, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 0.00012040816326530613, | |
| "loss": 0.1307, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "eval_accuracy": 0.9471247563352827, | |
| "eval_loss": 0.16630055010318756, | |
| "eval_runtime": 312.9758, | |
| "eval_samples_per_second": 13.113, | |
| "eval_steps_per_second": 1.639, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 0.00011895043731778426, | |
| "loss": 0.1407, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 0.00011749271137026238, | |
| "loss": 0.081, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 0.00011603498542274054, | |
| "loss": 0.1272, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.00011457725947521867, | |
| "loss": 0.1224, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 0.0001131195335276968, | |
| "loss": 0.1517, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "eval_accuracy": 0.9451754385964912, | |
| "eval_loss": 0.18923337757587433, | |
| "eval_runtime": 328.1373, | |
| "eval_samples_per_second": 12.507, | |
| "eval_steps_per_second": 1.563, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 0.00011166180758017492, | |
| "loss": 0.1336, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 0.00011020408163265306, | |
| "loss": 0.1245, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 0.0001087463556851312, | |
| "loss": 0.0762, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 0.00010728862973760934, | |
| "loss": 0.1901, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 0.00010583090379008747, | |
| "loss": 0.1051, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "eval_accuracy": 0.973196881091618, | |
| "eval_loss": 0.07433269917964935, | |
| "eval_runtime": 295.1825, | |
| "eval_samples_per_second": 13.903, | |
| "eval_steps_per_second": 1.738, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 0.0001043731778425656, | |
| "loss": 0.0844, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 0.00010291545189504375, | |
| "loss": 0.0764, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 0.00010145772594752188, | |
| "loss": 0.0786, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "learning_rate": 0.0001, | |
| "loss": 0.0626, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 9.854227405247813e-05, | |
| "loss": 0.04, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "eval_accuracy": 0.979775828460039, | |
| "eval_loss": 0.06021393463015556, | |
| "eval_runtime": 294.0751, | |
| "eval_samples_per_second": 13.956, | |
| "eval_steps_per_second": 1.744, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 9.708454810495627e-05, | |
| "loss": 0.0473, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 9.56268221574344e-05, | |
| "loss": 0.101, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 9.416909620991254e-05, | |
| "loss": 0.0604, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "learning_rate": 9.271137026239067e-05, | |
| "loss": 0.0917, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "learning_rate": 9.125364431486881e-05, | |
| "loss": 0.047, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "eval_accuracy": 0.976364522417154, | |
| "eval_loss": 0.07237815111875534, | |
| "eval_runtime": 314.914, | |
| "eval_samples_per_second": 13.032, | |
| "eval_steps_per_second": 1.629, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "learning_rate": 8.979591836734695e-05, | |
| "loss": 0.0216, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 8.833819241982508e-05, | |
| "loss": 0.0568, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 8.688046647230322e-05, | |
| "loss": 0.0664, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 8.542274052478134e-05, | |
| "loss": 0.0488, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "learning_rate": 8.396501457725948e-05, | |
| "loss": 0.0893, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "eval_accuracy": 0.9834307992202729, | |
| "eval_loss": 0.05777524411678314, | |
| "eval_runtime": 310.6824, | |
| "eval_samples_per_second": 13.21, | |
| "eval_steps_per_second": 1.651, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 8.250728862973761e-05, | |
| "loss": 0.0414, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 8.104956268221575e-05, | |
| "loss": 0.0306, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 7.959183673469388e-05, | |
| "loss": 0.0164, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 7.8134110787172e-05, | |
| "loss": 0.0465, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 7.667638483965015e-05, | |
| "loss": 0.0254, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "eval_accuracy": 0.9790448343079922, | |
| "eval_loss": 0.09394790232181549, | |
| "eval_runtime": 292.1931, | |
| "eval_samples_per_second": 14.046, | |
| "eval_steps_per_second": 1.756, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "learning_rate": 7.521865889212827e-05, | |
| "loss": 0.049, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "learning_rate": 7.376093294460641e-05, | |
| "loss": 0.0482, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 1.28, | |
| "learning_rate": 7.230320699708455e-05, | |
| "loss": 0.1339, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 7.08454810495627e-05, | |
| "loss": 0.0658, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 6.938775510204082e-05, | |
| "loss": 0.1217, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "eval_accuracy": 0.979775828460039, | |
| "eval_loss": 0.07414576411247253, | |
| "eval_runtime": 296.0615, | |
| "eval_samples_per_second": 13.862, | |
| "eval_steps_per_second": 1.733, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 1.33, | |
| "learning_rate": 6.793002915451895e-05, | |
| "loss": 0.0514, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 6.647230320699709e-05, | |
| "loss": 0.0903, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 1.36, | |
| "learning_rate": 6.501457725947522e-05, | |
| "loss": 0.0495, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "learning_rate": 6.355685131195336e-05, | |
| "loss": 0.0214, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "learning_rate": 6.209912536443149e-05, | |
| "loss": 0.081, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.38, | |
| "eval_accuracy": 0.9839181286549707, | |
| "eval_loss": 0.05142759159207344, | |
| "eval_runtime": 325.6943, | |
| "eval_samples_per_second": 12.601, | |
| "eval_steps_per_second": 1.575, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 6.0641399416909626e-05, | |
| "loss": 0.1152, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 5.918367346938776e-05, | |
| "loss": 0.0445, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 5.77259475218659e-05, | |
| "loss": 0.0555, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 5.626822157434403e-05, | |
| "loss": 0.0717, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "learning_rate": 5.4810495626822155e-05, | |
| "loss": 0.0607, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.46, | |
| "eval_accuracy": 0.9846491228070176, | |
| "eval_loss": 0.040749311447143555, | |
| "eval_runtime": 293.4556, | |
| "eval_samples_per_second": 13.985, | |
| "eval_steps_per_second": 1.748, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 5.3352769679300295e-05, | |
| "loss": 0.0147, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 1.49, | |
| "learning_rate": 5.189504373177842e-05, | |
| "loss": 0.042, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 5.043731778425656e-05, | |
| "loss": 0.0513, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 4.89795918367347e-05, | |
| "loss": 0.0177, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 4.752186588921283e-05, | |
| "loss": 0.0985, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "eval_accuracy": 0.9890350877192983, | |
| "eval_loss": 0.0381675623357296, | |
| "eval_runtime": 285.8353, | |
| "eval_samples_per_second": 14.358, | |
| "eval_steps_per_second": 1.795, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 4.6064139941690965e-05, | |
| "loss": 0.0308, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "learning_rate": 4.46064139941691e-05, | |
| "loss": 0.0297, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 1.57, | |
| "learning_rate": 4.314868804664723e-05, | |
| "loss": 0.1126, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 4.1690962099125366e-05, | |
| "loss": 0.0457, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 4.02332361516035e-05, | |
| "loss": 0.0492, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "eval_accuracy": 0.9883040935672515, | |
| "eval_loss": 0.03809972107410431, | |
| "eval_runtime": 315.6444, | |
| "eval_samples_per_second": 13.002, | |
| "eval_steps_per_second": 1.625, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 1.62, | |
| "learning_rate": 3.8775510204081634e-05, | |
| "loss": 0.0364, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 3.731778425655977e-05, | |
| "loss": 0.0521, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "learning_rate": 3.58600583090379e-05, | |
| "loss": 0.0393, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "learning_rate": 3.4402332361516035e-05, | |
| "loss": 0.0541, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 3.294460641399417e-05, | |
| "loss": 0.0504, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "eval_accuracy": 0.9880604288499025, | |
| "eval_loss": 0.03482714295387268, | |
| "eval_runtime": 330.0497, | |
| "eval_samples_per_second": 12.434, | |
| "eval_steps_per_second": 1.554, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "learning_rate": 3.148688046647231e-05, | |
| "loss": 0.0493, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 3.0029154518950437e-05, | |
| "loss": 0.034, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 2.857142857142857e-05, | |
| "loss": 0.0598, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "learning_rate": 2.7113702623906705e-05, | |
| "loss": 0.0415, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 2.5655976676384842e-05, | |
| "loss": 0.0354, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "eval_accuracy": 0.9902534113060428, | |
| "eval_loss": 0.025796251371502876, | |
| "eval_runtime": 316.0263, | |
| "eval_samples_per_second": 12.986, | |
| "eval_steps_per_second": 1.623, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "learning_rate": 2.4198250728862976e-05, | |
| "loss": 0.027, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 2.2740524781341106e-05, | |
| "loss": 0.031, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 2.1282798833819244e-05, | |
| "loss": 0.0448, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "learning_rate": 1.9825072886297377e-05, | |
| "loss": 0.0077, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 1.836734693877551e-05, | |
| "loss": 0.0604, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "eval_accuracy": 0.990009746588694, | |
| "eval_loss": 0.029700743034482002, | |
| "eval_runtime": 261.4803, | |
| "eval_samples_per_second": 15.695, | |
| "eval_steps_per_second": 1.962, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "learning_rate": 1.6909620991253645e-05, | |
| "loss": 0.0278, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 1.85, | |
| "learning_rate": 1.545189504373178e-05, | |
| "loss": 0.0219, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 1.3994169096209913e-05, | |
| "loss": 0.0366, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 1.88, | |
| "learning_rate": 1.2536443148688048e-05, | |
| "loss": 0.0307, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 1.1078717201166182e-05, | |
| "loss": 0.0918, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "eval_accuracy": 0.990009746588694, | |
| "eval_loss": 0.026875579729676247, | |
| "eval_runtime": 245.5691, | |
| "eval_samples_per_second": 16.712, | |
| "eval_steps_per_second": 2.089, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 9.620991253644314e-06, | |
| "loss": 0.0259, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "learning_rate": 8.163265306122448e-06, | |
| "loss": 0.0613, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "learning_rate": 6.705539358600584e-06, | |
| "loss": 0.0326, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 1.95, | |
| "learning_rate": 5.247813411078718e-06, | |
| "loss": 0.0315, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "learning_rate": 3.7900874635568516e-06, | |
| "loss": 0.0555, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "eval_accuracy": 0.9926900584795322, | |
| "eval_loss": 0.020489266142249107, | |
| "eval_runtime": 250.5348, | |
| "eval_samples_per_second": 16.381, | |
| "eval_steps_per_second": 2.048, | |
| "step": 1350 | |
| } | |
| ], | |
| "max_steps": 1372, | |
| "num_train_epochs": 2, | |
| "total_flos": 2.154507190843392e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |