{ "best_metric": 0.020489266142249107, "best_model_checkpoint": "./models/tiny-vit-GENERICO/checkpoint-1350", "epoch": 1.967930029154519, "global_step": 1350, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.01, "learning_rate": 0.0001991253644314869, "loss": 2.9673, "step": 10 }, { "epoch": 0.03, "learning_rate": 0.00019766763848396503, "loss": 0.551, "step": 20 }, { "epoch": 0.04, "learning_rate": 0.00019620991253644317, "loss": 0.4255, "step": 30 }, { "epoch": 0.06, "learning_rate": 0.00019475218658892128, "loss": 0.3714, "step": 40 }, { "epoch": 0.07, "learning_rate": 0.00019329446064139942, "loss": 0.4272, "step": 50 }, { "epoch": 0.07, "eval_accuracy": 0.9147173489278753, "eval_loss": 0.24443647265434265, "eval_runtime": 286.3815, "eval_samples_per_second": 14.331, "eval_steps_per_second": 1.791, "step": 50 }, { "epoch": 0.09, "learning_rate": 0.00019183673469387756, "loss": 0.4202, "step": 60 }, { "epoch": 0.1, "learning_rate": 0.0001903790087463557, "loss": 0.2785, "step": 70 }, { "epoch": 0.12, "learning_rate": 0.00018892128279883382, "loss": 0.2276, "step": 80 }, { "epoch": 0.13, "learning_rate": 0.00018746355685131196, "loss": 0.2418, "step": 90 }, { "epoch": 0.15, "learning_rate": 0.0001860058309037901, "loss": 0.2214, "step": 100 }, { "epoch": 0.15, "eval_accuracy": 0.8379629629629629, "eval_loss": 0.4828641414642334, "eval_runtime": 278.5231, "eval_samples_per_second": 14.735, "eval_steps_per_second": 1.842, "step": 100 }, { "epoch": 0.16, "learning_rate": 0.00018454810495626821, "loss": 0.2319, "step": 110 }, { "epoch": 0.17, "learning_rate": 0.00018309037900874638, "loss": 0.2103, "step": 120 }, { "epoch": 0.19, "learning_rate": 0.0001816326530612245, "loss": 0.2667, "step": 130 }, { "epoch": 0.2, "learning_rate": 0.00018017492711370264, "loss": 0.3514, "step": 140 }, { "epoch": 0.22, "learning_rate": 0.00017871720116618075, "loss": 0.3004, "step": 150 }, { "epoch": 0.22, "eval_accuracy": 0.935672514619883, "eval_loss": 0.18615978956222534, "eval_runtime": 210.6691, "eval_samples_per_second": 19.481, "eval_steps_per_second": 2.435, "step": 150 }, { "epoch": 0.23, "learning_rate": 0.00017725947521865892, "loss": 0.2605, "step": 160 }, { "epoch": 0.25, "learning_rate": 0.00017580174927113703, "loss": 0.1776, "step": 170 }, { "epoch": 0.26, "learning_rate": 0.00017434402332361517, "loss": 0.1727, "step": 180 }, { "epoch": 0.28, "learning_rate": 0.00017288629737609328, "loss": 0.191, "step": 190 }, { "epoch": 0.29, "learning_rate": 0.00017142857142857143, "loss": 0.1255, "step": 200 }, { "epoch": 0.29, "eval_accuracy": 0.9432261208576999, "eval_loss": 0.19172662496566772, "eval_runtime": 227.2135, "eval_samples_per_second": 18.062, "eval_steps_per_second": 2.258, "step": 200 }, { "epoch": 0.31, "learning_rate": 0.00016997084548104957, "loss": 0.1654, "step": 210 }, { "epoch": 0.32, "learning_rate": 0.0001685131195335277, "loss": 0.2456, "step": 220 }, { "epoch": 0.34, "learning_rate": 0.00016705539358600585, "loss": 0.2103, "step": 230 }, { "epoch": 0.35, "learning_rate": 0.00016559766763848396, "loss": 0.2826, "step": 240 }, { "epoch": 0.36, "learning_rate": 0.00016413994169096213, "loss": 0.2402, "step": 250 }, { "epoch": 0.36, "eval_accuracy": 0.9522417153996101, "eval_loss": 0.14588728547096252, "eval_runtime": 228.2383, "eval_samples_per_second": 17.981, "eval_steps_per_second": 2.248, "step": 250 }, { "epoch": 0.38, "learning_rate": 0.00016268221574344024, "loss": 0.2109, "step": 260 }, { "epoch": 0.39, "learning_rate": 0.00016122448979591838, "loss": 0.2142, "step": 270 }, { "epoch": 0.41, "learning_rate": 0.0001597667638483965, "loss": 0.1838, "step": 280 }, { "epoch": 0.42, "learning_rate": 0.00015830903790087464, "loss": 0.1783, "step": 290 }, { "epoch": 0.44, "learning_rate": 0.00015685131195335278, "loss": 0.059, "step": 300 }, { "epoch": 0.44, "eval_accuracy": 0.9485867446393762, "eval_loss": 0.1581156998872757, "eval_runtime": 222.9736, "eval_samples_per_second": 18.406, "eval_steps_per_second": 2.301, "step": 300 }, { "epoch": 0.45, "learning_rate": 0.00015539358600583092, "loss": 0.1959, "step": 310 }, { "epoch": 0.47, "learning_rate": 0.00015393586005830903, "loss": 0.0957, "step": 320 }, { "epoch": 0.48, "learning_rate": 0.00015247813411078717, "loss": 0.1786, "step": 330 }, { "epoch": 0.5, "learning_rate": 0.0001510204081632653, "loss": 0.1695, "step": 340 }, { "epoch": 0.51, "learning_rate": 0.00014956268221574345, "loss": 0.1513, "step": 350 }, { "epoch": 0.51, "eval_accuracy": 0.9627192982456141, "eval_loss": 0.11844132840633392, "eval_runtime": 238.6523, "eval_samples_per_second": 17.197, "eval_steps_per_second": 2.15, "step": 350 }, { "epoch": 0.52, "learning_rate": 0.0001481049562682216, "loss": 0.1532, "step": 360 }, { "epoch": 0.54, "learning_rate": 0.0001466472303206997, "loss": 0.125, "step": 370 }, { "epoch": 0.55, "learning_rate": 0.00014518950437317785, "loss": 0.1735, "step": 380 }, { "epoch": 0.57, "learning_rate": 0.000143731778425656, "loss": 0.1229, "step": 390 }, { "epoch": 0.58, "learning_rate": 0.00014227405247813413, "loss": 0.1444, "step": 400 }, { "epoch": 0.58, "eval_accuracy": 0.9517543859649122, "eval_loss": 0.1490934193134308, "eval_runtime": 287.634, "eval_samples_per_second": 14.268, "eval_steps_per_second": 1.784, "step": 400 }, { "epoch": 0.6, "learning_rate": 0.00014081632653061224, "loss": 0.1257, "step": 410 }, { "epoch": 0.61, "learning_rate": 0.00013935860058309038, "loss": 0.1251, "step": 420 }, { "epoch": 0.63, "learning_rate": 0.00013790087463556852, "loss": 0.0909, "step": 430 }, { "epoch": 0.64, "learning_rate": 0.00013644314868804666, "loss": 0.1236, "step": 440 }, { "epoch": 0.66, "learning_rate": 0.00013498542274052478, "loss": 0.1385, "step": 450 }, { "epoch": 0.66, "eval_accuracy": 0.9675925925925926, "eval_loss": 0.09978482127189636, "eval_runtime": 326.8281, "eval_samples_per_second": 12.557, "eval_steps_per_second": 1.57, "step": 450 }, { "epoch": 0.67, "learning_rate": 0.00013352769679300292, "loss": 0.1466, "step": 460 }, { "epoch": 0.69, "learning_rate": 0.00013206997084548106, "loss": 0.1286, "step": 470 }, { "epoch": 0.7, "learning_rate": 0.00013061224489795917, "loss": 0.1099, "step": 480 }, { "epoch": 0.71, "learning_rate": 0.00012915451895043734, "loss": 0.1559, "step": 490 }, { "epoch": 0.73, "learning_rate": 0.00012769679300291545, "loss": 0.1263, "step": 500 }, { "epoch": 0.73, "eval_accuracy": 0.9624756335282652, "eval_loss": 0.11673439294099808, "eval_runtime": 291.2631, "eval_samples_per_second": 14.09, "eval_steps_per_second": 1.761, "step": 500 }, { "epoch": 0.74, "learning_rate": 0.0001262390670553936, "loss": 0.2311, "step": 510 }, { "epoch": 0.76, "learning_rate": 0.0001247813411078717, "loss": 0.103, "step": 520 }, { "epoch": 0.77, "learning_rate": 0.00012332361516034988, "loss": 0.0728, "step": 530 }, { "epoch": 0.79, "learning_rate": 0.000121865889212828, "loss": 0.1353, "step": 540 }, { "epoch": 0.8, "learning_rate": 0.00012040816326530613, "loss": 0.1307, "step": 550 }, { "epoch": 0.8, "eval_accuracy": 0.9471247563352827, "eval_loss": 0.16630055010318756, "eval_runtime": 312.9758, "eval_samples_per_second": 13.113, "eval_steps_per_second": 1.639, "step": 550 }, { "epoch": 0.82, "learning_rate": 0.00011895043731778426, "loss": 0.1407, "step": 560 }, { "epoch": 0.83, "learning_rate": 0.00011749271137026238, "loss": 0.081, "step": 570 }, { "epoch": 0.85, "learning_rate": 0.00011603498542274054, "loss": 0.1272, "step": 580 }, { "epoch": 0.86, "learning_rate": 0.00011457725947521867, "loss": 0.1224, "step": 590 }, { "epoch": 0.87, "learning_rate": 0.0001131195335276968, "loss": 0.1517, "step": 600 }, { "epoch": 0.87, "eval_accuracy": 0.9451754385964912, "eval_loss": 0.18923337757587433, "eval_runtime": 328.1373, "eval_samples_per_second": 12.507, "eval_steps_per_second": 1.563, "step": 600 }, { "epoch": 0.89, "learning_rate": 0.00011166180758017492, "loss": 0.1336, "step": 610 }, { "epoch": 0.9, "learning_rate": 0.00011020408163265306, "loss": 0.1245, "step": 620 }, { "epoch": 0.92, "learning_rate": 0.0001087463556851312, "loss": 0.0762, "step": 630 }, { "epoch": 0.93, "learning_rate": 0.00010728862973760934, "loss": 0.1901, "step": 640 }, { "epoch": 0.95, "learning_rate": 0.00010583090379008747, "loss": 0.1051, "step": 650 }, { "epoch": 0.95, "eval_accuracy": 0.973196881091618, "eval_loss": 0.07433269917964935, "eval_runtime": 295.1825, "eval_samples_per_second": 13.903, "eval_steps_per_second": 1.738, "step": 650 }, { "epoch": 0.96, "learning_rate": 0.0001043731778425656, "loss": 0.0844, "step": 660 }, { "epoch": 0.98, "learning_rate": 0.00010291545189504375, "loss": 0.0764, "step": 670 }, { "epoch": 0.99, "learning_rate": 0.00010145772594752188, "loss": 0.0786, "step": 680 }, { "epoch": 1.01, "learning_rate": 0.0001, "loss": 0.0626, "step": 690 }, { "epoch": 1.02, "learning_rate": 9.854227405247813e-05, "loss": 0.04, "step": 700 }, { "epoch": 1.02, "eval_accuracy": 0.979775828460039, "eval_loss": 0.06021393463015556, "eval_runtime": 294.0751, "eval_samples_per_second": 13.956, "eval_steps_per_second": 1.744, "step": 700 }, { "epoch": 1.03, "learning_rate": 9.708454810495627e-05, "loss": 0.0473, "step": 710 }, { "epoch": 1.05, "learning_rate": 9.56268221574344e-05, "loss": 0.101, "step": 720 }, { "epoch": 1.06, "learning_rate": 9.416909620991254e-05, "loss": 0.0604, "step": 730 }, { "epoch": 1.08, "learning_rate": 9.271137026239067e-05, "loss": 0.0917, "step": 740 }, { "epoch": 1.09, "learning_rate": 9.125364431486881e-05, "loss": 0.047, "step": 750 }, { "epoch": 1.09, "eval_accuracy": 0.976364522417154, "eval_loss": 0.07237815111875534, "eval_runtime": 314.914, "eval_samples_per_second": 13.032, "eval_steps_per_second": 1.629, "step": 750 }, { "epoch": 1.11, "learning_rate": 8.979591836734695e-05, "loss": 0.0216, "step": 760 }, { "epoch": 1.12, "learning_rate": 8.833819241982508e-05, "loss": 0.0568, "step": 770 }, { "epoch": 1.14, "learning_rate": 8.688046647230322e-05, "loss": 0.0664, "step": 780 }, { "epoch": 1.15, "learning_rate": 8.542274052478134e-05, "loss": 0.0488, "step": 790 }, { "epoch": 1.17, "learning_rate": 8.396501457725948e-05, "loss": 0.0893, "step": 800 }, { "epoch": 1.17, "eval_accuracy": 0.9834307992202729, "eval_loss": 0.05777524411678314, "eval_runtime": 310.6824, "eval_samples_per_second": 13.21, "eval_steps_per_second": 1.651, "step": 800 }, { "epoch": 1.18, "learning_rate": 8.250728862973761e-05, "loss": 0.0414, "step": 810 }, { "epoch": 1.2, "learning_rate": 8.104956268221575e-05, "loss": 0.0306, "step": 820 }, { "epoch": 1.21, "learning_rate": 7.959183673469388e-05, "loss": 0.0164, "step": 830 }, { "epoch": 1.22, "learning_rate": 7.8134110787172e-05, "loss": 0.0465, "step": 840 }, { "epoch": 1.24, "learning_rate": 7.667638483965015e-05, "loss": 0.0254, "step": 850 }, { "epoch": 1.24, "eval_accuracy": 0.9790448343079922, "eval_loss": 0.09394790232181549, "eval_runtime": 292.1931, "eval_samples_per_second": 14.046, "eval_steps_per_second": 1.756, "step": 850 }, { "epoch": 1.25, "learning_rate": 7.521865889212827e-05, "loss": 0.049, "step": 860 }, { "epoch": 1.27, "learning_rate": 7.376093294460641e-05, "loss": 0.0482, "step": 870 }, { "epoch": 1.28, "learning_rate": 7.230320699708455e-05, "loss": 0.1339, "step": 880 }, { "epoch": 1.3, "learning_rate": 7.08454810495627e-05, "loss": 0.0658, "step": 890 }, { "epoch": 1.31, "learning_rate": 6.938775510204082e-05, "loss": 0.1217, "step": 900 }, { "epoch": 1.31, "eval_accuracy": 0.979775828460039, "eval_loss": 0.07414576411247253, "eval_runtime": 296.0615, "eval_samples_per_second": 13.862, "eval_steps_per_second": 1.733, "step": 900 }, { "epoch": 1.33, "learning_rate": 6.793002915451895e-05, "loss": 0.0514, "step": 910 }, { "epoch": 1.34, "learning_rate": 6.647230320699709e-05, "loss": 0.0903, "step": 920 }, { "epoch": 1.36, "learning_rate": 6.501457725947522e-05, "loss": 0.0495, "step": 930 }, { "epoch": 1.37, "learning_rate": 6.355685131195336e-05, "loss": 0.0214, "step": 940 }, { "epoch": 1.38, "learning_rate": 6.209912536443149e-05, "loss": 0.081, "step": 950 }, { "epoch": 1.38, "eval_accuracy": 0.9839181286549707, "eval_loss": 0.05142759159207344, "eval_runtime": 325.6943, "eval_samples_per_second": 12.601, "eval_steps_per_second": 1.575, "step": 950 }, { "epoch": 1.4, "learning_rate": 6.0641399416909626e-05, "loss": 0.1152, "step": 960 }, { "epoch": 1.41, "learning_rate": 5.918367346938776e-05, "loss": 0.0445, "step": 970 }, { "epoch": 1.43, "learning_rate": 5.77259475218659e-05, "loss": 0.0555, "step": 980 }, { "epoch": 1.44, "learning_rate": 5.626822157434403e-05, "loss": 0.0717, "step": 990 }, { "epoch": 1.46, "learning_rate": 5.4810495626822155e-05, "loss": 0.0607, "step": 1000 }, { "epoch": 1.46, "eval_accuracy": 0.9846491228070176, "eval_loss": 0.040749311447143555, "eval_runtime": 293.4556, "eval_samples_per_second": 13.985, "eval_steps_per_second": 1.748, "step": 1000 }, { "epoch": 1.47, "learning_rate": 5.3352769679300295e-05, "loss": 0.0147, "step": 1010 }, { "epoch": 1.49, "learning_rate": 5.189504373177842e-05, "loss": 0.042, "step": 1020 }, { "epoch": 1.5, "learning_rate": 5.043731778425656e-05, "loss": 0.0513, "step": 1030 }, { "epoch": 1.52, "learning_rate": 4.89795918367347e-05, "loss": 0.0177, "step": 1040 }, { "epoch": 1.53, "learning_rate": 4.752186588921283e-05, "loss": 0.0985, "step": 1050 }, { "epoch": 1.53, "eval_accuracy": 0.9890350877192983, "eval_loss": 0.0381675623357296, "eval_runtime": 285.8353, "eval_samples_per_second": 14.358, "eval_steps_per_second": 1.795, "step": 1050 }, { "epoch": 1.55, "learning_rate": 4.6064139941690965e-05, "loss": 0.0308, "step": 1060 }, { "epoch": 1.56, "learning_rate": 4.46064139941691e-05, "loss": 0.0297, "step": 1070 }, { "epoch": 1.57, "learning_rate": 4.314868804664723e-05, "loss": 0.1126, "step": 1080 }, { "epoch": 1.59, "learning_rate": 4.1690962099125366e-05, "loss": 0.0457, "step": 1090 }, { "epoch": 1.6, "learning_rate": 4.02332361516035e-05, "loss": 0.0492, "step": 1100 }, { "epoch": 1.6, "eval_accuracy": 0.9883040935672515, "eval_loss": 0.03809972107410431, "eval_runtime": 315.6444, "eval_samples_per_second": 13.002, "eval_steps_per_second": 1.625, "step": 1100 }, { "epoch": 1.62, "learning_rate": 3.8775510204081634e-05, "loss": 0.0364, "step": 1110 }, { "epoch": 1.63, "learning_rate": 3.731778425655977e-05, "loss": 0.0521, "step": 1120 }, { "epoch": 1.65, "learning_rate": 3.58600583090379e-05, "loss": 0.0393, "step": 1130 }, { "epoch": 1.66, "learning_rate": 3.4402332361516035e-05, "loss": 0.0541, "step": 1140 }, { "epoch": 1.68, "learning_rate": 3.294460641399417e-05, "loss": 0.0504, "step": 1150 }, { "epoch": 1.68, "eval_accuracy": 0.9880604288499025, "eval_loss": 0.03482714295387268, "eval_runtime": 330.0497, "eval_samples_per_second": 12.434, "eval_steps_per_second": 1.554, "step": 1150 }, { "epoch": 1.69, "learning_rate": 3.148688046647231e-05, "loss": 0.0493, "step": 1160 }, { "epoch": 1.71, "learning_rate": 3.0029154518950437e-05, "loss": 0.034, "step": 1170 }, { "epoch": 1.72, "learning_rate": 2.857142857142857e-05, "loss": 0.0598, "step": 1180 }, { "epoch": 1.73, "learning_rate": 2.7113702623906705e-05, "loss": 0.0415, "step": 1190 }, { "epoch": 1.75, "learning_rate": 2.5655976676384842e-05, "loss": 0.0354, "step": 1200 }, { "epoch": 1.75, "eval_accuracy": 0.9902534113060428, "eval_loss": 0.025796251371502876, "eval_runtime": 316.0263, "eval_samples_per_second": 12.986, "eval_steps_per_second": 1.623, "step": 1200 }, { "epoch": 1.76, "learning_rate": 2.4198250728862976e-05, "loss": 0.027, "step": 1210 }, { "epoch": 1.78, "learning_rate": 2.2740524781341106e-05, "loss": 0.031, "step": 1220 }, { "epoch": 1.79, "learning_rate": 2.1282798833819244e-05, "loss": 0.0448, "step": 1230 }, { "epoch": 1.81, "learning_rate": 1.9825072886297377e-05, "loss": 0.0077, "step": 1240 }, { "epoch": 1.82, "learning_rate": 1.836734693877551e-05, "loss": 0.0604, "step": 1250 }, { "epoch": 1.82, "eval_accuracy": 0.990009746588694, "eval_loss": 0.029700743034482002, "eval_runtime": 261.4803, "eval_samples_per_second": 15.695, "eval_steps_per_second": 1.962, "step": 1250 }, { "epoch": 1.84, "learning_rate": 1.6909620991253645e-05, "loss": 0.0278, "step": 1260 }, { "epoch": 1.85, "learning_rate": 1.545189504373178e-05, "loss": 0.0219, "step": 1270 }, { "epoch": 1.87, "learning_rate": 1.3994169096209913e-05, "loss": 0.0366, "step": 1280 }, { "epoch": 1.88, "learning_rate": 1.2536443148688048e-05, "loss": 0.0307, "step": 1290 }, { "epoch": 1.9, "learning_rate": 1.1078717201166182e-05, "loss": 0.0918, "step": 1300 }, { "epoch": 1.9, "eval_accuracy": 0.990009746588694, "eval_loss": 0.026875579729676247, "eval_runtime": 245.5691, "eval_samples_per_second": 16.712, "eval_steps_per_second": 2.089, "step": 1300 }, { "epoch": 1.91, "learning_rate": 9.620991253644314e-06, "loss": 0.0259, "step": 1310 }, { "epoch": 1.92, "learning_rate": 8.163265306122448e-06, "loss": 0.0613, "step": 1320 }, { "epoch": 1.94, "learning_rate": 6.705539358600584e-06, "loss": 0.0326, "step": 1330 }, { "epoch": 1.95, "learning_rate": 5.247813411078718e-06, "loss": 0.0315, "step": 1340 }, { "epoch": 1.97, "learning_rate": 3.7900874635568516e-06, "loss": 0.0555, "step": 1350 }, { "epoch": 1.97, "eval_accuracy": 0.9926900584795322, "eval_loss": 0.020489266142249107, "eval_runtime": 250.5348, "eval_samples_per_second": 16.381, "eval_steps_per_second": 2.048, "step": 1350 } ], "max_steps": 1372, "num_train_epochs": 2, "total_flos": 2.154507190843392e+17, "trial_name": null, "trial_params": null }