{ "best_metric": 0.6208920187793427, "best_model_checkpoint": "large-algae-vit-wirs/checkpoint-120", "epoch": 30.0, "global_step": 3600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.08, "learning_rate": 1.388888888888889e-06, "loss": 1.3612, "step": 10 }, { "epoch": 0.17, "learning_rate": 2.777777777777778e-06, "loss": 1.3039, "step": 20 }, { "epoch": 0.25, "learning_rate": 4.166666666666667e-06, "loss": 1.3076, "step": 30 }, { "epoch": 0.33, "learning_rate": 5.555555555555556e-06, "loss": 1.2573, "step": 40 }, { "epoch": 0.42, "learning_rate": 6.944444444444445e-06, "loss": 1.2478, "step": 50 }, { "epoch": 0.5, "learning_rate": 8.333333333333334e-06, "loss": 1.2011, "step": 60 }, { "epoch": 0.58, "learning_rate": 9.722222222222223e-06, "loss": 1.1795, "step": 70 }, { "epoch": 0.67, "learning_rate": 1.1111111111111112e-05, "loss": 1.1731, "step": 80 }, { "epoch": 0.75, "learning_rate": 1.25e-05, "loss": 1.2165, "step": 90 }, { "epoch": 0.83, "learning_rate": 1.388888888888889e-05, "loss": 1.2024, "step": 100 }, { "epoch": 0.92, "learning_rate": 1.527777777777778e-05, "loss": 1.1816, "step": 110 }, { "epoch": 1.0, "learning_rate": 1.6666666666666667e-05, "loss": 1.1662, "step": 120 }, { "epoch": 1.0, "eval_accuracy": 0.6208920187793427, "eval_loss": 0.9128149747848511, "eval_runtime": 23.9101, "eval_samples_per_second": 71.267, "eval_steps_per_second": 2.258, "step": 120 }, { "epoch": 1.08, "learning_rate": 1.8055555555555555e-05, "loss": 1.1644, "step": 130 }, { "epoch": 1.17, "learning_rate": 1.9444444444444445e-05, "loss": 1.1601, "step": 140 }, { "epoch": 1.25, "learning_rate": 2.0833333333333336e-05, "loss": 1.117, "step": 150 }, { "epoch": 1.33, "learning_rate": 2.2222222222222223e-05, "loss": 1.1592, "step": 160 }, { "epoch": 1.42, "learning_rate": 2.361111111111111e-05, "loss": 1.1402, "step": 170 }, { "epoch": 1.5, "learning_rate": 2.5e-05, "loss": 1.1438, "step": 180 }, { "epoch": 1.58, "learning_rate": 2.6388888888888892e-05, "loss": 1.1215, "step": 190 }, { "epoch": 1.67, "learning_rate": 2.777777777777778e-05, "loss": 1.1412, "step": 200 }, { "epoch": 1.75, "learning_rate": 2.916666666666667e-05, "loss": 1.1488, "step": 210 }, { "epoch": 1.83, "learning_rate": 3.055555555555556e-05, "loss": 1.1669, "step": 220 }, { "epoch": 1.92, "learning_rate": 3.194444444444444e-05, "loss": 1.12, "step": 230 }, { "epoch": 2.0, "learning_rate": 3.3333333333333335e-05, "loss": 1.0885, "step": 240 }, { "epoch": 2.0, "eval_accuracy": 0.613849765258216, "eval_loss": 0.9469314217567444, "eval_runtime": 23.792, "eval_samples_per_second": 71.621, "eval_steps_per_second": 2.27, "step": 240 }, { "epoch": 2.08, "learning_rate": 3.472222222222222e-05, "loss": 1.0651, "step": 250 }, { "epoch": 2.17, "learning_rate": 3.611111111111111e-05, "loss": 1.108, "step": 260 }, { "epoch": 2.25, "learning_rate": 3.7500000000000003e-05, "loss": 1.1074, "step": 270 }, { "epoch": 2.33, "learning_rate": 3.888888888888889e-05, "loss": 1.0908, "step": 280 }, { "epoch": 2.42, "learning_rate": 4.027777777777778e-05, "loss": 1.0659, "step": 290 }, { "epoch": 2.5, "learning_rate": 4.166666666666667e-05, "loss": 1.1009, "step": 300 }, { "epoch": 2.58, "learning_rate": 4.305555555555556e-05, "loss": 1.0968, "step": 310 }, { "epoch": 2.67, "learning_rate": 4.4444444444444447e-05, "loss": 1.0859, "step": 320 }, { "epoch": 2.75, "learning_rate": 4.5833333333333334e-05, "loss": 1.1342, "step": 330 }, { "epoch": 2.83, "learning_rate": 4.722222222222222e-05, "loss": 1.0747, "step": 340 }, { "epoch": 2.92, "learning_rate": 4.8611111111111115e-05, "loss": 1.0587, "step": 350 }, { "epoch": 3.0, "learning_rate": 5e-05, "loss": 1.1315, "step": 360 }, { "epoch": 3.0, "eval_accuracy": 0.5757042253521126, "eval_loss": 1.0919097661972046, "eval_runtime": 23.8029, "eval_samples_per_second": 71.588, "eval_steps_per_second": 2.269, "step": 360 }, { "epoch": 3.08, "learning_rate": 4.984567901234568e-05, "loss": 1.0413, "step": 370 }, { "epoch": 3.17, "learning_rate": 4.969135802469136e-05, "loss": 1.0521, "step": 380 }, { "epoch": 3.25, "learning_rate": 4.9537037037037035e-05, "loss": 1.0694, "step": 390 }, { "epoch": 3.33, "learning_rate": 4.938271604938271e-05, "loss": 1.0888, "step": 400 }, { "epoch": 3.42, "learning_rate": 4.92283950617284e-05, "loss": 1.0717, "step": 410 }, { "epoch": 3.5, "learning_rate": 4.9074074074074075e-05, "loss": 1.0258, "step": 420 }, { "epoch": 3.58, "learning_rate": 4.891975308641975e-05, "loss": 1.078, "step": 430 }, { "epoch": 3.67, "learning_rate": 4.876543209876544e-05, "loss": 1.0596, "step": 440 }, { "epoch": 3.75, "learning_rate": 4.8611111111111115e-05, "loss": 1.059, "step": 450 }, { "epoch": 3.83, "learning_rate": 4.845679012345679e-05, "loss": 1.0505, "step": 460 }, { "epoch": 3.92, "learning_rate": 4.830246913580247e-05, "loss": 1.0184, "step": 470 }, { "epoch": 4.0, "learning_rate": 4.814814814814815e-05, "loss": 1.0542, "step": 480 }, { "epoch": 4.0, "eval_accuracy": 0.5598591549295775, "eval_loss": 1.229072093963623, "eval_runtime": 24.2826, "eval_samples_per_second": 70.174, "eval_steps_per_second": 2.224, "step": 480 }, { "epoch": 4.08, "learning_rate": 4.799382716049383e-05, "loss": 1.0394, "step": 490 }, { "epoch": 4.17, "learning_rate": 4.783950617283951e-05, "loss": 1.0383, "step": 500 }, { "epoch": 4.25, "learning_rate": 4.768518518518519e-05, "loss": 1.0385, "step": 510 }, { "epoch": 4.33, "learning_rate": 4.7530864197530866e-05, "loss": 1.0761, "step": 520 }, { "epoch": 4.42, "learning_rate": 4.7376543209876543e-05, "loss": 1.0378, "step": 530 }, { "epoch": 4.5, "learning_rate": 4.722222222222222e-05, "loss": 1.0378, "step": 540 }, { "epoch": 4.58, "learning_rate": 4.70679012345679e-05, "loss": 1.0333, "step": 550 }, { "epoch": 4.67, "learning_rate": 4.691358024691358e-05, "loss": 1.0729, "step": 560 }, { "epoch": 4.75, "learning_rate": 4.675925925925926e-05, "loss": 1.0342, "step": 570 }, { "epoch": 4.83, "learning_rate": 4.6604938271604945e-05, "loss": 1.0316, "step": 580 }, { "epoch": 4.92, "learning_rate": 4.645061728395062e-05, "loss": 1.0213, "step": 590 }, { "epoch": 5.0, "learning_rate": 4.62962962962963e-05, "loss": 1.028, "step": 600 }, { "epoch": 5.0, "eval_accuracy": 0.5598591549295775, "eval_loss": 1.1931029558181763, "eval_runtime": 24.0021, "eval_samples_per_second": 70.994, "eval_steps_per_second": 2.25, "step": 600 }, { "epoch": 5.08, "learning_rate": 4.614197530864198e-05, "loss": 1.0657, "step": 610 }, { "epoch": 5.17, "learning_rate": 4.5987654320987656e-05, "loss": 1.0504, "step": 620 }, { "epoch": 5.25, "learning_rate": 4.5833333333333334e-05, "loss": 1.0345, "step": 630 }, { "epoch": 5.33, "learning_rate": 4.567901234567901e-05, "loss": 1.0113, "step": 640 }, { "epoch": 5.42, "learning_rate": 4.5524691358024696e-05, "loss": 1.0316, "step": 650 }, { "epoch": 5.5, "learning_rate": 4.5370370370370374e-05, "loss": 1.0287, "step": 660 }, { "epoch": 5.58, "learning_rate": 4.521604938271605e-05, "loss": 1.064, "step": 670 }, { "epoch": 5.67, "learning_rate": 4.506172839506173e-05, "loss": 1.0079, "step": 680 }, { "epoch": 5.75, "learning_rate": 4.490740740740741e-05, "loss": 1.007, "step": 690 }, { "epoch": 5.83, "learning_rate": 4.4753086419753084e-05, "loss": 0.9965, "step": 700 }, { "epoch": 5.92, "learning_rate": 4.459876543209877e-05, "loss": 1.035, "step": 710 }, { "epoch": 6.0, "learning_rate": 4.4444444444444447e-05, "loss": 1.0023, "step": 720 }, { "epoch": 6.0, "eval_accuracy": 0.5674882629107981, "eval_loss": 1.1548198461532593, "eval_runtime": 23.6784, "eval_samples_per_second": 71.964, "eval_steps_per_second": 2.281, "step": 720 }, { "epoch": 6.08, "learning_rate": 4.429012345679013e-05, "loss": 1.033, "step": 730 }, { "epoch": 6.17, "learning_rate": 4.413580246913581e-05, "loss": 1.0003, "step": 740 }, { "epoch": 6.25, "learning_rate": 4.3981481481481486e-05, "loss": 1.0196, "step": 750 }, { "epoch": 6.33, "learning_rate": 4.3827160493827164e-05, "loss": 1.0211, "step": 760 }, { "epoch": 6.42, "learning_rate": 4.367283950617284e-05, "loss": 0.9964, "step": 770 }, { "epoch": 6.5, "learning_rate": 4.351851851851852e-05, "loss": 1.0339, "step": 780 }, { "epoch": 6.58, "learning_rate": 4.33641975308642e-05, "loss": 0.9788, "step": 790 }, { "epoch": 6.67, "learning_rate": 4.3209876543209875e-05, "loss": 1.0567, "step": 800 }, { "epoch": 6.75, "learning_rate": 4.305555555555556e-05, "loss": 0.994, "step": 810 }, { "epoch": 6.83, "learning_rate": 4.290123456790124e-05, "loss": 1.0389, "step": 820 }, { "epoch": 6.92, "learning_rate": 4.2746913580246915e-05, "loss": 0.9833, "step": 830 }, { "epoch": 7.0, "learning_rate": 4.259259259259259e-05, "loss": 1.0176, "step": 840 }, { "epoch": 7.0, "eval_accuracy": 0.5757042253521126, "eval_loss": 1.0932233333587646, "eval_runtime": 23.5291, "eval_samples_per_second": 72.421, "eval_steps_per_second": 2.295, "step": 840 }, { "epoch": 7.08, "learning_rate": 4.243827160493827e-05, "loss": 1.021, "step": 850 }, { "epoch": 7.17, "learning_rate": 4.2283950617283955e-05, "loss": 0.9636, "step": 860 }, { "epoch": 7.25, "learning_rate": 4.212962962962963e-05, "loss": 0.9965, "step": 870 }, { "epoch": 7.33, "learning_rate": 4.197530864197531e-05, "loss": 0.9829, "step": 880 }, { "epoch": 7.42, "learning_rate": 4.1820987654320994e-05, "loss": 1.0139, "step": 890 }, { "epoch": 7.5, "learning_rate": 4.166666666666667e-05, "loss": 1.0446, "step": 900 }, { "epoch": 7.58, "learning_rate": 4.151234567901235e-05, "loss": 0.9917, "step": 910 }, { "epoch": 7.67, "learning_rate": 4.135802469135803e-05, "loss": 0.9904, "step": 920 }, { "epoch": 7.75, "learning_rate": 4.1203703703703705e-05, "loss": 1.0068, "step": 930 }, { "epoch": 7.83, "learning_rate": 4.104938271604938e-05, "loss": 0.974, "step": 940 }, { "epoch": 7.92, "learning_rate": 4.089506172839506e-05, "loss": 1.0291, "step": 950 }, { "epoch": 8.0, "learning_rate": 4.074074074074074e-05, "loss": 0.992, "step": 960 }, { "epoch": 8.0, "eval_accuracy": 0.5751173708920188, "eval_loss": 1.1386847496032715, "eval_runtime": 23.4444, "eval_samples_per_second": 72.683, "eval_steps_per_second": 2.303, "step": 960 }, { "epoch": 8.08, "learning_rate": 4.058641975308642e-05, "loss": 0.9983, "step": 970 }, { "epoch": 8.17, "learning_rate": 4.04320987654321e-05, "loss": 0.964, "step": 980 }, { "epoch": 8.25, "learning_rate": 4.027777777777778e-05, "loss": 0.9514, "step": 990 }, { "epoch": 8.33, "learning_rate": 4.012345679012346e-05, "loss": 1.003, "step": 1000 }, { "epoch": 8.42, "learning_rate": 3.996913580246914e-05, "loss": 0.9575, "step": 1010 }, { "epoch": 8.5, "learning_rate": 3.981481481481482e-05, "loss": 0.9965, "step": 1020 }, { "epoch": 8.58, "learning_rate": 3.9660493827160496e-05, "loss": 1.0445, "step": 1030 }, { "epoch": 8.67, "learning_rate": 3.950617283950617e-05, "loss": 0.9638, "step": 1040 }, { "epoch": 8.75, "learning_rate": 3.935185185185186e-05, "loss": 1.024, "step": 1050 }, { "epoch": 8.83, "learning_rate": 3.9197530864197535e-05, "loss": 1.0393, "step": 1060 }, { "epoch": 8.92, "learning_rate": 3.904320987654321e-05, "loss": 0.9897, "step": 1070 }, { "epoch": 9.0, "learning_rate": 3.888888888888889e-05, "loss": 0.9891, "step": 1080 }, { "epoch": 9.0, "eval_accuracy": 0.5463615023474179, "eval_loss": 1.238718867301941, "eval_runtime": 23.4716, "eval_samples_per_second": 72.598, "eval_steps_per_second": 2.301, "step": 1080 }, { "epoch": 9.08, "learning_rate": 3.873456790123457e-05, "loss": 1.0242, "step": 1090 }, { "epoch": 9.17, "learning_rate": 3.8580246913580246e-05, "loss": 0.9982, "step": 1100 }, { "epoch": 9.25, "learning_rate": 3.8425925925925924e-05, "loss": 0.9985, "step": 1110 }, { "epoch": 9.33, "learning_rate": 3.82716049382716e-05, "loss": 1.0509, "step": 1120 }, { "epoch": 9.42, "learning_rate": 3.8117283950617286e-05, "loss": 0.9643, "step": 1130 }, { "epoch": 9.5, "learning_rate": 3.7962962962962964e-05, "loss": 0.9595, "step": 1140 }, { "epoch": 9.58, "learning_rate": 3.780864197530865e-05, "loss": 1.0199, "step": 1150 }, { "epoch": 9.67, "learning_rate": 3.7654320987654326e-05, "loss": 0.948, "step": 1160 }, { "epoch": 9.75, "learning_rate": 3.7500000000000003e-05, "loss": 0.9953, "step": 1170 }, { "epoch": 9.83, "learning_rate": 3.734567901234568e-05, "loss": 0.9848, "step": 1180 }, { "epoch": 9.92, "learning_rate": 3.719135802469136e-05, "loss": 0.9249, "step": 1190 }, { "epoch": 10.0, "learning_rate": 3.7037037037037037e-05, "loss": 0.9635, "step": 1200 }, { "epoch": 10.0, "eval_accuracy": 0.5428403755868545, "eval_loss": 1.3771986961364746, "eval_runtime": 23.4919, "eval_samples_per_second": 72.536, "eval_steps_per_second": 2.299, "step": 1200 }, { "epoch": 10.08, "learning_rate": 3.6882716049382714e-05, "loss": 0.9894, "step": 1210 }, { "epoch": 10.17, "learning_rate": 3.67283950617284e-05, "loss": 1.0025, "step": 1220 }, { "epoch": 10.25, "learning_rate": 3.6574074074074076e-05, "loss": 0.9747, "step": 1230 }, { "epoch": 10.33, "learning_rate": 3.6419753086419754e-05, "loss": 0.9813, "step": 1240 }, { "epoch": 10.42, "learning_rate": 3.626543209876543e-05, "loss": 1.0371, "step": 1250 }, { "epoch": 10.5, "learning_rate": 3.611111111111111e-05, "loss": 0.9673, "step": 1260 }, { "epoch": 10.58, "learning_rate": 3.5956790123456794e-05, "loss": 0.9378, "step": 1270 }, { "epoch": 10.67, "learning_rate": 3.580246913580247e-05, "loss": 0.9659, "step": 1280 }, { "epoch": 10.75, "learning_rate": 3.564814814814815e-05, "loss": 0.9527, "step": 1290 }, { "epoch": 10.83, "learning_rate": 3.5493827160493834e-05, "loss": 0.9805, "step": 1300 }, { "epoch": 10.92, "learning_rate": 3.533950617283951e-05, "loss": 0.9974, "step": 1310 }, { "epoch": 11.0, "learning_rate": 3.518518518518519e-05, "loss": 0.9764, "step": 1320 }, { "epoch": 11.0, "eval_accuracy": 0.5258215962441315, "eval_loss": 1.4329094886779785, "eval_runtime": 23.2917, "eval_samples_per_second": 73.159, "eval_steps_per_second": 2.318, "step": 1320 }, { "epoch": 11.08, "learning_rate": 3.503086419753087e-05, "loss": 1.0079, "step": 1330 }, { "epoch": 11.17, "learning_rate": 3.4876543209876545e-05, "loss": 0.99, "step": 1340 }, { "epoch": 11.25, "learning_rate": 3.472222222222222e-05, "loss": 0.9836, "step": 1350 }, { "epoch": 11.33, "learning_rate": 3.45679012345679e-05, "loss": 0.9996, "step": 1360 }, { "epoch": 11.42, "learning_rate": 3.441358024691358e-05, "loss": 0.9794, "step": 1370 }, { "epoch": 11.5, "learning_rate": 3.425925925925926e-05, "loss": 0.9639, "step": 1380 }, { "epoch": 11.58, "learning_rate": 3.410493827160494e-05, "loss": 0.9945, "step": 1390 }, { "epoch": 11.67, "learning_rate": 3.395061728395062e-05, "loss": 0.9832, "step": 1400 }, { "epoch": 11.75, "learning_rate": 3.3796296296296295e-05, "loss": 0.9606, "step": 1410 }, { "epoch": 11.83, "learning_rate": 3.364197530864198e-05, "loss": 0.9581, "step": 1420 }, { "epoch": 11.92, "learning_rate": 3.348765432098766e-05, "loss": 0.9784, "step": 1430 }, { "epoch": 12.0, "learning_rate": 3.3333333333333335e-05, "loss": 0.9375, "step": 1440 }, { "epoch": 12.0, "eval_accuracy": 0.5522300469483568, "eval_loss": 1.2830231189727783, "eval_runtime": 23.496, "eval_samples_per_second": 72.523, "eval_steps_per_second": 2.298, "step": 1440 }, { "epoch": 12.08, "learning_rate": 3.317901234567901e-05, "loss": 0.9912, "step": 1450 }, { "epoch": 12.17, "learning_rate": 3.30246913580247e-05, "loss": 0.9722, "step": 1460 }, { "epoch": 12.25, "learning_rate": 3.2870370370370375e-05, "loss": 1.0089, "step": 1470 }, { "epoch": 12.33, "learning_rate": 3.271604938271605e-05, "loss": 0.954, "step": 1480 }, { "epoch": 12.42, "learning_rate": 3.256172839506173e-05, "loss": 0.9823, "step": 1490 }, { "epoch": 12.5, "learning_rate": 3.240740740740741e-05, "loss": 0.9813, "step": 1500 }, { "epoch": 12.58, "learning_rate": 3.2253086419753086e-05, "loss": 0.9827, "step": 1510 }, { "epoch": 12.67, "learning_rate": 3.209876543209876e-05, "loss": 0.9573, "step": 1520 }, { "epoch": 12.75, "learning_rate": 3.194444444444444e-05, "loss": 1.002, "step": 1530 }, { "epoch": 12.83, "learning_rate": 3.1790123456790125e-05, "loss": 0.9474, "step": 1540 }, { "epoch": 12.92, "learning_rate": 3.16358024691358e-05, "loss": 0.9435, "step": 1550 }, { "epoch": 13.0, "learning_rate": 3.148148148148148e-05, "loss": 0.9574, "step": 1560 }, { "epoch": 13.0, "eval_accuracy": 0.522887323943662, "eval_loss": 1.400294542312622, "eval_runtime": 23.5276, "eval_samples_per_second": 72.426, "eval_steps_per_second": 2.295, "step": 1560 }, { "epoch": 13.08, "learning_rate": 3.1327160493827165e-05, "loss": 0.9727, "step": 1570 }, { "epoch": 13.17, "learning_rate": 3.117283950617284e-05, "loss": 0.9709, "step": 1580 }, { "epoch": 13.25, "learning_rate": 3.101851851851852e-05, "loss": 0.9753, "step": 1590 }, { "epoch": 13.33, "learning_rate": 3.08641975308642e-05, "loss": 0.9267, "step": 1600 }, { "epoch": 13.42, "learning_rate": 3.0709876543209876e-05, "loss": 0.9905, "step": 1610 }, { "epoch": 13.5, "learning_rate": 3.055555555555556e-05, "loss": 0.9499, "step": 1620 }, { "epoch": 13.58, "learning_rate": 3.0401234567901238e-05, "loss": 1.0304, "step": 1630 }, { "epoch": 13.67, "learning_rate": 3.0246913580246916e-05, "loss": 1.0021, "step": 1640 }, { "epoch": 13.75, "learning_rate": 3.0092592592592593e-05, "loss": 0.9771, "step": 1650 }, { "epoch": 13.83, "learning_rate": 2.993827160493827e-05, "loss": 0.9278, "step": 1660 }, { "epoch": 13.92, "learning_rate": 2.9783950617283952e-05, "loss": 0.9555, "step": 1670 }, { "epoch": 14.0, "learning_rate": 2.962962962962963e-05, "loss": 0.9907, "step": 1680 }, { "epoch": 14.0, "eval_accuracy": 0.5422535211267606, "eval_loss": 1.344726324081421, "eval_runtime": 23.9181, "eval_samples_per_second": 71.243, "eval_steps_per_second": 2.258, "step": 1680 }, { "epoch": 14.08, "learning_rate": 2.9475308641975308e-05, "loss": 0.9437, "step": 1690 }, { "epoch": 14.17, "learning_rate": 2.9320987654320992e-05, "loss": 0.9556, "step": 1700 }, { "epoch": 14.25, "learning_rate": 2.916666666666667e-05, "loss": 0.9587, "step": 1710 }, { "epoch": 14.33, "learning_rate": 2.9012345679012347e-05, "loss": 0.9754, "step": 1720 }, { "epoch": 14.42, "learning_rate": 2.8858024691358025e-05, "loss": 0.9724, "step": 1730 }, { "epoch": 14.5, "learning_rate": 2.8703703703703706e-05, "loss": 0.9733, "step": 1740 }, { "epoch": 14.58, "learning_rate": 2.8549382716049384e-05, "loss": 0.9725, "step": 1750 }, { "epoch": 14.67, "learning_rate": 2.839506172839506e-05, "loss": 0.9776, "step": 1760 }, { "epoch": 14.75, "learning_rate": 2.824074074074074e-05, "loss": 0.95, "step": 1770 }, { "epoch": 14.83, "learning_rate": 2.8086419753086424e-05, "loss": 1.006, "step": 1780 }, { "epoch": 14.92, "learning_rate": 2.79320987654321e-05, "loss": 0.9395, "step": 1790 }, { "epoch": 15.0, "learning_rate": 2.777777777777778e-05, "loss": 0.9507, "step": 1800 }, { "epoch": 15.0, "eval_accuracy": 0.5604460093896714, "eval_loss": 1.290692925453186, "eval_runtime": 23.8671, "eval_samples_per_second": 71.395, "eval_steps_per_second": 2.263, "step": 1800 }, { "epoch": 15.08, "learning_rate": 2.762345679012346e-05, "loss": 0.9754, "step": 1810 }, { "epoch": 15.17, "learning_rate": 2.7469135802469138e-05, "loss": 0.9558, "step": 1820 }, { "epoch": 15.25, "learning_rate": 2.7314814814814816e-05, "loss": 0.9572, "step": 1830 }, { "epoch": 15.33, "learning_rate": 2.7160493827160493e-05, "loss": 0.9987, "step": 1840 }, { "epoch": 15.42, "learning_rate": 2.700617283950617e-05, "loss": 0.9429, "step": 1850 }, { "epoch": 15.5, "learning_rate": 2.6851851851851855e-05, "loss": 0.9161, "step": 1860 }, { "epoch": 15.58, "learning_rate": 2.6697530864197533e-05, "loss": 0.9153, "step": 1870 }, { "epoch": 15.67, "learning_rate": 2.654320987654321e-05, "loss": 1.0111, "step": 1880 }, { "epoch": 15.75, "learning_rate": 2.6388888888888892e-05, "loss": 0.9889, "step": 1890 }, { "epoch": 15.83, "learning_rate": 2.623456790123457e-05, "loss": 0.9616, "step": 1900 }, { "epoch": 15.92, "learning_rate": 2.6080246913580247e-05, "loss": 0.9414, "step": 1910 }, { "epoch": 16.0, "learning_rate": 2.5925925925925925e-05, "loss": 0.9866, "step": 1920 }, { "epoch": 16.0, "eval_accuracy": 0.5393192488262911, "eval_loss": 1.4577730894088745, "eval_runtime": 23.9597, "eval_samples_per_second": 71.119, "eval_steps_per_second": 2.254, "step": 1920 }, { "epoch": 16.08, "learning_rate": 2.5771604938271603e-05, "loss": 0.9638, "step": 1930 }, { "epoch": 16.17, "learning_rate": 2.5617283950617287e-05, "loss": 0.9562, "step": 1940 }, { "epoch": 16.25, "learning_rate": 2.5462962962962965e-05, "loss": 0.9464, "step": 1950 }, { "epoch": 16.33, "learning_rate": 2.5308641975308646e-05, "loss": 0.9533, "step": 1960 }, { "epoch": 16.42, "learning_rate": 2.5154320987654324e-05, "loss": 0.9763, "step": 1970 }, { "epoch": 16.5, "learning_rate": 2.5e-05, "loss": 0.9795, "step": 1980 }, { "epoch": 16.58, "learning_rate": 2.484567901234568e-05, "loss": 0.9827, "step": 1990 }, { "epoch": 16.67, "learning_rate": 2.4691358024691357e-05, "loss": 0.9269, "step": 2000 }, { "epoch": 16.75, "learning_rate": 2.4537037037037038e-05, "loss": 0.9397, "step": 2010 }, { "epoch": 16.83, "learning_rate": 2.438271604938272e-05, "loss": 0.9639, "step": 2020 }, { "epoch": 16.92, "learning_rate": 2.4228395061728396e-05, "loss": 0.9626, "step": 2030 }, { "epoch": 17.0, "learning_rate": 2.4074074074074074e-05, "loss": 0.9297, "step": 2040 }, { "epoch": 17.0, "eval_accuracy": 0.528169014084507, "eval_loss": 1.477900743484497, "eval_runtime": 23.9974, "eval_samples_per_second": 71.008, "eval_steps_per_second": 2.25, "step": 2040 }, { "epoch": 17.08, "learning_rate": 2.3919753086419755e-05, "loss": 0.9454, "step": 2050 }, { "epoch": 17.17, "learning_rate": 2.3765432098765433e-05, "loss": 0.9679, "step": 2060 }, { "epoch": 17.25, "learning_rate": 2.361111111111111e-05, "loss": 0.9748, "step": 2070 }, { "epoch": 17.33, "learning_rate": 2.345679012345679e-05, "loss": 0.9272, "step": 2080 }, { "epoch": 17.42, "learning_rate": 2.3302469135802473e-05, "loss": 0.9831, "step": 2090 }, { "epoch": 17.5, "learning_rate": 2.314814814814815e-05, "loss": 0.9593, "step": 2100 }, { "epoch": 17.58, "learning_rate": 2.2993827160493828e-05, "loss": 0.9239, "step": 2110 }, { "epoch": 17.67, "learning_rate": 2.2839506172839506e-05, "loss": 0.9255, "step": 2120 }, { "epoch": 17.75, "learning_rate": 2.2685185185185187e-05, "loss": 0.9606, "step": 2130 }, { "epoch": 17.83, "learning_rate": 2.2530864197530865e-05, "loss": 0.9407, "step": 2140 }, { "epoch": 17.92, "learning_rate": 2.2376543209876542e-05, "loss": 0.9578, "step": 2150 }, { "epoch": 18.0, "learning_rate": 2.2222222222222223e-05, "loss": 0.9385, "step": 2160 }, { "epoch": 18.0, "eval_accuracy": 0.5469483568075117, "eval_loss": 1.3874293565750122, "eval_runtime": 24.2732, "eval_samples_per_second": 70.201, "eval_steps_per_second": 2.225, "step": 2160 }, { "epoch": 18.08, "learning_rate": 2.2067901234567904e-05, "loss": 0.9642, "step": 2170 }, { "epoch": 18.17, "learning_rate": 2.1913580246913582e-05, "loss": 0.9263, "step": 2180 }, { "epoch": 18.25, "learning_rate": 2.175925925925926e-05, "loss": 0.9209, "step": 2190 }, { "epoch": 18.33, "learning_rate": 2.1604938271604937e-05, "loss": 0.9415, "step": 2200 }, { "epoch": 18.42, "learning_rate": 2.145061728395062e-05, "loss": 1.0009, "step": 2210 }, { "epoch": 18.5, "learning_rate": 2.1296296296296296e-05, "loss": 0.9318, "step": 2220 }, { "epoch": 18.58, "learning_rate": 2.1141975308641977e-05, "loss": 0.9256, "step": 2230 }, { "epoch": 18.67, "learning_rate": 2.0987654320987655e-05, "loss": 0.9422, "step": 2240 }, { "epoch": 18.75, "learning_rate": 2.0833333333333336e-05, "loss": 0.9357, "step": 2250 }, { "epoch": 18.83, "learning_rate": 2.0679012345679014e-05, "loss": 0.9546, "step": 2260 }, { "epoch": 18.92, "learning_rate": 2.052469135802469e-05, "loss": 0.9144, "step": 2270 }, { "epoch": 19.0, "learning_rate": 2.037037037037037e-05, "loss": 0.9951, "step": 2280 }, { "epoch": 19.0, "eval_accuracy": 0.5586854460093896, "eval_loss": 1.2975717782974243, "eval_runtime": 23.895, "eval_samples_per_second": 71.312, "eval_steps_per_second": 2.26, "step": 2280 }, { "epoch": 19.08, "learning_rate": 2.021604938271605e-05, "loss": 0.9519, "step": 2290 }, { "epoch": 19.17, "learning_rate": 2.006172839506173e-05, "loss": 0.9362, "step": 2300 }, { "epoch": 19.25, "learning_rate": 1.990740740740741e-05, "loss": 0.9226, "step": 2310 }, { "epoch": 19.33, "learning_rate": 1.9753086419753087e-05, "loss": 0.9176, "step": 2320 }, { "epoch": 19.42, "learning_rate": 1.9598765432098768e-05, "loss": 0.9464, "step": 2330 }, { "epoch": 19.5, "learning_rate": 1.9444444444444445e-05, "loss": 0.9369, "step": 2340 }, { "epoch": 19.58, "learning_rate": 1.9290123456790123e-05, "loss": 0.9732, "step": 2350 }, { "epoch": 19.67, "learning_rate": 1.91358024691358e-05, "loss": 0.9639, "step": 2360 }, { "epoch": 19.75, "learning_rate": 1.8981481481481482e-05, "loss": 0.9821, "step": 2370 }, { "epoch": 19.83, "learning_rate": 1.8827160493827163e-05, "loss": 0.9472, "step": 2380 }, { "epoch": 19.92, "learning_rate": 1.867283950617284e-05, "loss": 0.9674, "step": 2390 }, { "epoch": 20.0, "learning_rate": 1.8518518518518518e-05, "loss": 0.9794, "step": 2400 }, { "epoch": 20.0, "eval_accuracy": 0.556924882629108, "eval_loss": 1.3109557628631592, "eval_runtime": 23.3371, "eval_samples_per_second": 73.017, "eval_steps_per_second": 2.314, "step": 2400 }, { "epoch": 20.08, "learning_rate": 1.83641975308642e-05, "loss": 0.959, "step": 2410 }, { "epoch": 20.17, "learning_rate": 1.8209876543209877e-05, "loss": 0.9208, "step": 2420 }, { "epoch": 20.25, "learning_rate": 1.8055555555555555e-05, "loss": 0.8842, "step": 2430 }, { "epoch": 20.33, "learning_rate": 1.7901234567901236e-05, "loss": 0.9423, "step": 2440 }, { "epoch": 20.42, "learning_rate": 1.7746913580246917e-05, "loss": 0.909, "step": 2450 }, { "epoch": 20.5, "learning_rate": 1.7592592592592595e-05, "loss": 0.93, "step": 2460 }, { "epoch": 20.58, "learning_rate": 1.7438271604938272e-05, "loss": 0.94, "step": 2470 }, { "epoch": 20.67, "learning_rate": 1.728395061728395e-05, "loss": 0.9199, "step": 2480 }, { "epoch": 20.75, "learning_rate": 1.712962962962963e-05, "loss": 0.9436, "step": 2490 }, { "epoch": 20.83, "learning_rate": 1.697530864197531e-05, "loss": 0.9872, "step": 2500 }, { "epoch": 20.92, "learning_rate": 1.682098765432099e-05, "loss": 0.9622, "step": 2510 }, { "epoch": 21.0, "learning_rate": 1.6666666666666667e-05, "loss": 0.9974, "step": 2520 }, { "epoch": 21.0, "eval_accuracy": 0.5275821596244131, "eval_loss": 1.3648896217346191, "eval_runtime": 23.4473, "eval_samples_per_second": 72.674, "eval_steps_per_second": 2.303, "step": 2520 }, { "epoch": 21.08, "learning_rate": 1.651234567901235e-05, "loss": 0.9337, "step": 2530 }, { "epoch": 21.17, "learning_rate": 1.6358024691358026e-05, "loss": 0.9923, "step": 2540 }, { "epoch": 21.25, "learning_rate": 1.6203703703703704e-05, "loss": 0.9443, "step": 2550 }, { "epoch": 21.33, "learning_rate": 1.604938271604938e-05, "loss": 0.9586, "step": 2560 }, { "epoch": 21.42, "learning_rate": 1.5895061728395063e-05, "loss": 0.9377, "step": 2570 }, { "epoch": 21.5, "learning_rate": 1.574074074074074e-05, "loss": 0.9484, "step": 2580 }, { "epoch": 21.58, "learning_rate": 1.558641975308642e-05, "loss": 0.926, "step": 2590 }, { "epoch": 21.67, "learning_rate": 1.54320987654321e-05, "loss": 0.9426, "step": 2600 }, { "epoch": 21.75, "learning_rate": 1.527777777777778e-05, "loss": 0.9252, "step": 2610 }, { "epoch": 21.83, "learning_rate": 1.5123456790123458e-05, "loss": 0.9228, "step": 2620 }, { "epoch": 21.92, "learning_rate": 1.4969135802469136e-05, "loss": 0.9403, "step": 2630 }, { "epoch": 22.0, "learning_rate": 1.4814814814814815e-05, "loss": 0.9284, "step": 2640 }, { "epoch": 22.0, "eval_accuracy": 0.5363849765258216, "eval_loss": 1.3712799549102783, "eval_runtime": 23.3686, "eval_samples_per_second": 72.918, "eval_steps_per_second": 2.311, "step": 2640 }, { "epoch": 22.08, "learning_rate": 1.4660493827160496e-05, "loss": 0.9516, "step": 2650 }, { "epoch": 22.17, "learning_rate": 1.4506172839506174e-05, "loss": 0.932, "step": 2660 }, { "epoch": 22.25, "learning_rate": 1.4351851851851853e-05, "loss": 0.939, "step": 2670 }, { "epoch": 22.33, "learning_rate": 1.419753086419753e-05, "loss": 0.9343, "step": 2680 }, { "epoch": 22.42, "learning_rate": 1.4043209876543212e-05, "loss": 0.968, "step": 2690 }, { "epoch": 22.5, "learning_rate": 1.388888888888889e-05, "loss": 0.9169, "step": 2700 }, { "epoch": 22.58, "learning_rate": 1.3734567901234569e-05, "loss": 0.9549, "step": 2710 }, { "epoch": 22.67, "learning_rate": 1.3580246913580247e-05, "loss": 0.9106, "step": 2720 }, { "epoch": 22.75, "learning_rate": 1.3425925925925928e-05, "loss": 0.9678, "step": 2730 }, { "epoch": 22.83, "learning_rate": 1.3271604938271605e-05, "loss": 0.9296, "step": 2740 }, { "epoch": 22.92, "learning_rate": 1.3117283950617285e-05, "loss": 0.9827, "step": 2750 }, { "epoch": 23.0, "learning_rate": 1.2962962962962962e-05, "loss": 0.9144, "step": 2760 }, { "epoch": 23.0, "eval_accuracy": 0.534037558685446, "eval_loss": 1.4117422103881836, "eval_runtime": 23.4898, "eval_samples_per_second": 72.542, "eval_steps_per_second": 2.299, "step": 2760 }, { "epoch": 23.08, "learning_rate": 1.2808641975308644e-05, "loss": 0.946, "step": 2770 }, { "epoch": 23.17, "learning_rate": 1.2654320987654323e-05, "loss": 0.9184, "step": 2780 }, { "epoch": 23.25, "learning_rate": 1.25e-05, "loss": 0.8966, "step": 2790 }, { "epoch": 23.33, "learning_rate": 1.2345679012345678e-05, "loss": 0.9174, "step": 2800 }, { "epoch": 23.42, "learning_rate": 1.219135802469136e-05, "loss": 0.9383, "step": 2810 }, { "epoch": 23.5, "learning_rate": 1.2037037037037037e-05, "loss": 0.942, "step": 2820 }, { "epoch": 23.58, "learning_rate": 1.1882716049382716e-05, "loss": 0.9137, "step": 2830 }, { "epoch": 23.67, "learning_rate": 1.1728395061728396e-05, "loss": 0.933, "step": 2840 }, { "epoch": 23.75, "learning_rate": 1.1574074074074075e-05, "loss": 0.9348, "step": 2850 }, { "epoch": 23.83, "learning_rate": 1.1419753086419753e-05, "loss": 0.8839, "step": 2860 }, { "epoch": 23.92, "learning_rate": 1.1265432098765432e-05, "loss": 0.9548, "step": 2870 }, { "epoch": 24.0, "learning_rate": 1.1111111111111112e-05, "loss": 0.9771, "step": 2880 }, { "epoch": 24.0, "eval_accuracy": 0.5357981220657277, "eval_loss": 1.383649230003357, "eval_runtime": 23.4971, "eval_samples_per_second": 72.52, "eval_steps_per_second": 2.298, "step": 2880 }, { "epoch": 24.08, "learning_rate": 1.0956790123456791e-05, "loss": 0.9594, "step": 2890 }, { "epoch": 24.17, "learning_rate": 1.0802469135802469e-05, "loss": 0.9391, "step": 2900 }, { "epoch": 24.25, "learning_rate": 1.0648148148148148e-05, "loss": 0.9376, "step": 2910 }, { "epoch": 24.33, "learning_rate": 1.0493827160493827e-05, "loss": 0.9284, "step": 2920 }, { "epoch": 24.42, "learning_rate": 1.0339506172839507e-05, "loss": 0.9186, "step": 2930 }, { "epoch": 24.5, "learning_rate": 1.0185185185185185e-05, "loss": 0.8916, "step": 2940 }, { "epoch": 24.58, "learning_rate": 1.0030864197530866e-05, "loss": 0.9302, "step": 2950 }, { "epoch": 24.67, "learning_rate": 9.876543209876543e-06, "loss": 0.9572, "step": 2960 }, { "epoch": 24.75, "learning_rate": 9.722222222222223e-06, "loss": 0.8967, "step": 2970 }, { "epoch": 24.83, "learning_rate": 9.5679012345679e-06, "loss": 0.9554, "step": 2980 }, { "epoch": 24.92, "learning_rate": 9.413580246913581e-06, "loss": 0.9231, "step": 2990 }, { "epoch": 25.0, "learning_rate": 9.259259259259259e-06, "loss": 0.8994, "step": 3000 }, { "epoch": 25.0, "eval_accuracy": 0.528169014084507, "eval_loss": 1.5077193975448608, "eval_runtime": 23.4692, "eval_samples_per_second": 72.606, "eval_steps_per_second": 2.301, "step": 3000 }, { "epoch": 25.08, "learning_rate": 9.104938271604939e-06, "loss": 0.9479, "step": 3010 }, { "epoch": 25.17, "learning_rate": 8.950617283950618e-06, "loss": 0.9191, "step": 3020 }, { "epoch": 25.25, "learning_rate": 8.796296296296297e-06, "loss": 0.9563, "step": 3030 }, { "epoch": 25.33, "learning_rate": 8.641975308641975e-06, "loss": 0.9932, "step": 3040 }, { "epoch": 25.42, "learning_rate": 8.487654320987654e-06, "loss": 0.9327, "step": 3050 }, { "epoch": 25.5, "learning_rate": 8.333333333333334e-06, "loss": 0.9056, "step": 3060 }, { "epoch": 25.58, "learning_rate": 8.179012345679013e-06, "loss": 0.9228, "step": 3070 }, { "epoch": 25.67, "learning_rate": 8.02469135802469e-06, "loss": 0.9059, "step": 3080 }, { "epoch": 25.75, "learning_rate": 7.87037037037037e-06, "loss": 0.9224, "step": 3090 }, { "epoch": 25.83, "learning_rate": 7.71604938271605e-06, "loss": 0.8891, "step": 3100 }, { "epoch": 25.92, "learning_rate": 7.561728395061729e-06, "loss": 0.9142, "step": 3110 }, { "epoch": 26.0, "learning_rate": 7.4074074074074075e-06, "loss": 0.9061, "step": 3120 }, { "epoch": 26.0, "eval_accuracy": 0.5328638497652582, "eval_loss": 1.4622243642807007, "eval_runtime": 23.8116, "eval_samples_per_second": 71.562, "eval_steps_per_second": 2.268, "step": 3120 }, { "epoch": 26.08, "learning_rate": 7.253086419753087e-06, "loss": 0.9487, "step": 3130 }, { "epoch": 26.17, "learning_rate": 7.098765432098765e-06, "loss": 0.9639, "step": 3140 }, { "epoch": 26.25, "learning_rate": 6.944444444444445e-06, "loss": 0.9248, "step": 3150 }, { "epoch": 26.33, "learning_rate": 6.790123456790123e-06, "loss": 0.901, "step": 3160 }, { "epoch": 26.42, "learning_rate": 6.635802469135803e-06, "loss": 0.9158, "step": 3170 }, { "epoch": 26.5, "learning_rate": 6.481481481481481e-06, "loss": 0.9161, "step": 3180 }, { "epoch": 26.58, "learning_rate": 6.3271604938271615e-06, "loss": 0.9121, "step": 3190 }, { "epoch": 26.67, "learning_rate": 6.172839506172839e-06, "loss": 0.9275, "step": 3200 }, { "epoch": 26.75, "learning_rate": 6.0185185185185185e-06, "loss": 0.9336, "step": 3210 }, { "epoch": 26.83, "learning_rate": 5.864197530864198e-06, "loss": 0.9263, "step": 3220 }, { "epoch": 26.92, "learning_rate": 5.7098765432098764e-06, "loss": 0.932, "step": 3230 }, { "epoch": 27.0, "learning_rate": 5.555555555555556e-06, "loss": 0.9071, "step": 3240 }, { "epoch": 27.0, "eval_accuracy": 0.5393192488262911, "eval_loss": 1.430330753326416, "eval_runtime": 23.403, "eval_samples_per_second": 72.811, "eval_steps_per_second": 2.307, "step": 3240 }, { "epoch": 27.08, "learning_rate": 5.401234567901234e-06, "loss": 0.9078, "step": 3250 }, { "epoch": 27.17, "learning_rate": 5.246913580246914e-06, "loss": 0.9895, "step": 3260 }, { "epoch": 27.25, "learning_rate": 5.092592592592592e-06, "loss": 0.8999, "step": 3270 }, { "epoch": 27.33, "learning_rate": 4.938271604938272e-06, "loss": 0.9041, "step": 3280 }, { "epoch": 27.42, "learning_rate": 4.78395061728395e-06, "loss": 0.958, "step": 3290 }, { "epoch": 27.5, "learning_rate": 4.6296296296296296e-06, "loss": 0.9329, "step": 3300 }, { "epoch": 27.58, "learning_rate": 4.475308641975309e-06, "loss": 0.902, "step": 3310 }, { "epoch": 27.67, "learning_rate": 4.3209876543209875e-06, "loss": 0.9415, "step": 3320 }, { "epoch": 27.75, "learning_rate": 4.166666666666667e-06, "loss": 0.8901, "step": 3330 }, { "epoch": 27.83, "learning_rate": 4.012345679012345e-06, "loss": 0.9495, "step": 3340 }, { "epoch": 27.92, "learning_rate": 3.858024691358025e-06, "loss": 0.9019, "step": 3350 }, { "epoch": 28.0, "learning_rate": 3.7037037037037037e-06, "loss": 0.9288, "step": 3360 }, { "epoch": 28.0, "eval_accuracy": 0.5328638497652582, "eval_loss": 1.4555658102035522, "eval_runtime": 23.4988, "eval_samples_per_second": 72.514, "eval_steps_per_second": 2.298, "step": 3360 }, { "epoch": 28.08, "learning_rate": 3.5493827160493827e-06, "loss": 0.9258, "step": 3370 }, { "epoch": 28.17, "learning_rate": 3.3950617283950617e-06, "loss": 0.9017, "step": 3380 }, { "epoch": 28.25, "learning_rate": 3.2407407407407406e-06, "loss": 0.8841, "step": 3390 }, { "epoch": 28.33, "learning_rate": 3.0864197530864196e-06, "loss": 0.8903, "step": 3400 }, { "epoch": 28.42, "learning_rate": 2.932098765432099e-06, "loss": 0.9242, "step": 3410 }, { "epoch": 28.5, "learning_rate": 2.777777777777778e-06, "loss": 0.9272, "step": 3420 }, { "epoch": 28.58, "learning_rate": 2.623456790123457e-06, "loss": 0.8952, "step": 3430 }, { "epoch": 28.67, "learning_rate": 2.469135802469136e-06, "loss": 0.9263, "step": 3440 }, { "epoch": 28.75, "learning_rate": 2.3148148148148148e-06, "loss": 0.9586, "step": 3450 }, { "epoch": 28.83, "learning_rate": 2.1604938271604937e-06, "loss": 0.929, "step": 3460 }, { "epoch": 28.92, "learning_rate": 2.0061728395061727e-06, "loss": 0.9409, "step": 3470 }, { "epoch": 29.0, "learning_rate": 1.8518518518518519e-06, "loss": 0.9285, "step": 3480 }, { "epoch": 29.0, "eval_accuracy": 0.5446009389671361, "eval_loss": 1.3899754285812378, "eval_runtime": 23.3569, "eval_samples_per_second": 72.955, "eval_steps_per_second": 2.312, "step": 3480 }, { "epoch": 29.08, "learning_rate": 1.6975308641975308e-06, "loss": 0.9286, "step": 3490 }, { "epoch": 29.17, "learning_rate": 1.5432098765432098e-06, "loss": 0.9462, "step": 3500 }, { "epoch": 29.25, "learning_rate": 1.388888888888889e-06, "loss": 0.9352, "step": 3510 }, { "epoch": 29.33, "learning_rate": 1.234567901234568e-06, "loss": 0.9116, "step": 3520 }, { "epoch": 29.42, "learning_rate": 1.0802469135802469e-06, "loss": 0.8827, "step": 3530 }, { "epoch": 29.5, "learning_rate": 9.259259259259259e-07, "loss": 0.9091, "step": 3540 }, { "epoch": 29.58, "learning_rate": 7.716049382716049e-07, "loss": 0.9104, "step": 3550 }, { "epoch": 29.67, "learning_rate": 6.17283950617284e-07, "loss": 0.9446, "step": 3560 }, { "epoch": 29.75, "learning_rate": 4.6296296296296297e-07, "loss": 0.9111, "step": 3570 }, { "epoch": 29.83, "learning_rate": 3.08641975308642e-07, "loss": 0.9165, "step": 3580 }, { "epoch": 29.92, "learning_rate": 1.54320987654321e-07, "loss": 0.9068, "step": 3590 }, { "epoch": 30.0, "learning_rate": 0.0, "loss": 0.8955, "step": 3600 }, { "epoch": 30.0, "eval_accuracy": 0.5387323943661971, "eval_loss": 1.4081873893737793, "eval_runtime": 24.0466, "eval_samples_per_second": 70.862, "eval_steps_per_second": 2.246, "step": 3600 }, { "epoch": 30.0, "step": 3600, "total_flos": 3.5642168535562445e+19, "train_loss": 0.9839045113987392, "train_runtime": 19975.6243, "train_samples_per_second": 23.025, "train_steps_per_second": 0.18 } ], "max_steps": 3600, "num_train_epochs": 30, "total_flos": 3.5642168535562445e+19, "trial_name": null, "trial_params": null }