{ "best_metric": 0.6381103992462158, "best_model_checkpoint": "./vit-base-beans/checkpoint-1200", "epoch": 8.0, "global_step": 3120, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 0.00019935897435897437, "loss": 1.7678, "step": 10 }, { "epoch": 0.05, "learning_rate": 0.00019871794871794874, "loss": 1.7014, "step": 20 }, { "epoch": 0.08, "learning_rate": 0.0001980769230769231, "loss": 1.4957, "step": 30 }, { "epoch": 0.1, "learning_rate": 0.00019743589743589744, "loss": 1.5043, "step": 40 }, { "epoch": 0.1, "eval_accuracy": 0.44556677890011226, "eval_loss": 1.493240237236023, "eval_runtime": 59.3866, "eval_samples_per_second": 60.081, "eval_steps_per_second": 7.51, "step": 40 }, { "epoch": 0.13, "learning_rate": 0.00019679487179487178, "loss": 1.3931, "step": 50 }, { "epoch": 0.15, "learning_rate": 0.00019615384615384615, "loss": 1.3029, "step": 60 }, { "epoch": 0.18, "learning_rate": 0.0001955128205128205, "loss": 1.1753, "step": 70 }, { "epoch": 0.21, "learning_rate": 0.00019487179487179487, "loss": 1.3894, "step": 80 }, { "epoch": 0.21, "eval_accuracy": 0.5297418630751964, "eval_loss": 1.2430940866470337, "eval_runtime": 38.1635, "eval_samples_per_second": 93.492, "eval_steps_per_second": 11.687, "step": 80 }, { "epoch": 0.23, "learning_rate": 0.00019423076923076924, "loss": 1.118, "step": 90 }, { "epoch": 0.26, "learning_rate": 0.0001935897435897436, "loss": 1.1395, "step": 100 }, { "epoch": 0.28, "learning_rate": 0.00019294871794871797, "loss": 1.0946, "step": 110 }, { "epoch": 0.31, "learning_rate": 0.00019230769230769233, "loss": 1.208, "step": 120 }, { "epoch": 0.31, "eval_accuracy": 0.6464646464646465, "eval_loss": 0.9881709218025208, "eval_runtime": 34.6718, "eval_samples_per_second": 102.908, "eval_steps_per_second": 12.863, "step": 120 }, { "epoch": 0.33, "learning_rate": 0.00019166666666666667, "loss": 0.9775, "step": 130 }, { "epoch": 0.36, "learning_rate": 0.00019102564102564104, "loss": 1.0216, "step": 140 }, { "epoch": 0.38, "learning_rate": 0.00019038461538461538, "loss": 1.1942, "step": 150 }, { "epoch": 0.41, "learning_rate": 0.00018974358974358974, "loss": 0.9847, "step": 160 }, { "epoch": 0.41, "eval_accuracy": 0.6599326599326599, "eval_loss": 0.950343132019043, "eval_runtime": 40.3536, "eval_samples_per_second": 88.418, "eval_steps_per_second": 11.052, "step": 160 }, { "epoch": 0.44, "learning_rate": 0.0001891025641025641, "loss": 0.9707, "step": 170 }, { "epoch": 0.46, "learning_rate": 0.00018846153846153847, "loss": 1.1941, "step": 180 }, { "epoch": 0.49, "learning_rate": 0.00018782051282051283, "loss": 1.0309, "step": 190 }, { "epoch": 0.51, "learning_rate": 0.0001871794871794872, "loss": 1.102, "step": 200 }, { "epoch": 0.51, "eval_accuracy": 0.6374859708193041, "eval_loss": 0.9868837594985962, "eval_runtime": 32.1858, "eval_samples_per_second": 110.856, "eval_steps_per_second": 13.857, "step": 200 }, { "epoch": 0.54, "learning_rate": 0.00018653846153846154, "loss": 0.9899, "step": 210 }, { "epoch": 0.56, "learning_rate": 0.0001858974358974359, "loss": 0.8926, "step": 220 }, { "epoch": 0.59, "learning_rate": 0.00018525641025641027, "loss": 0.9552, "step": 230 }, { "epoch": 0.62, "learning_rate": 0.00018461538461538463, "loss": 0.9109, "step": 240 }, { "epoch": 0.62, "eval_accuracy": 0.6380471380471381, "eval_loss": 0.9887320399284363, "eval_runtime": 32.5312, "eval_samples_per_second": 109.679, "eval_steps_per_second": 13.71, "step": 240 }, { "epoch": 0.64, "learning_rate": 0.00018397435897435897, "loss": 0.832, "step": 250 }, { "epoch": 0.67, "learning_rate": 0.00018333333333333334, "loss": 0.9323, "step": 260 }, { "epoch": 0.69, "learning_rate": 0.0001826923076923077, "loss": 1.0061, "step": 270 }, { "epoch": 0.72, "learning_rate": 0.00018205128205128207, "loss": 0.9627, "step": 280 }, { "epoch": 0.72, "eval_accuracy": 0.6156004489337823, "eval_loss": 1.057395100593567, "eval_runtime": 34.1152, "eval_samples_per_second": 104.587, "eval_steps_per_second": 13.073, "step": 280 }, { "epoch": 0.74, "learning_rate": 0.00018141025641025643, "loss": 0.9613, "step": 290 }, { "epoch": 0.77, "learning_rate": 0.00018076923076923077, "loss": 0.8986, "step": 300 }, { "epoch": 0.79, "learning_rate": 0.00018012820512820513, "loss": 0.9344, "step": 310 }, { "epoch": 0.82, "learning_rate": 0.0001794871794871795, "loss": 0.7031, "step": 320 }, { "epoch": 0.82, "eval_accuracy": 0.7087542087542088, "eval_loss": 0.813530683517456, "eval_runtime": 39.1342, "eval_samples_per_second": 91.173, "eval_steps_per_second": 11.397, "step": 320 }, { "epoch": 0.85, "learning_rate": 0.00017884615384615386, "loss": 0.7596, "step": 330 }, { "epoch": 0.87, "learning_rate": 0.00017820512820512823, "loss": 0.7207, "step": 340 }, { "epoch": 0.9, "learning_rate": 0.00017756410256410257, "loss": 0.8904, "step": 350 }, { "epoch": 0.92, "learning_rate": 0.00017692307692307693, "loss": 0.7605, "step": 360 }, { "epoch": 0.92, "eval_accuracy": 0.7272727272727273, "eval_loss": 0.7620912790298462, "eval_runtime": 33.0054, "eval_samples_per_second": 108.103, "eval_steps_per_second": 13.513, "step": 360 }, { "epoch": 0.95, "learning_rate": 0.0001762820512820513, "loss": 0.8093, "step": 370 }, { "epoch": 0.97, "learning_rate": 0.00017564102564102566, "loss": 0.8198, "step": 380 }, { "epoch": 1.0, "learning_rate": 0.000175, "loss": 0.9515, "step": 390 }, { "epoch": 1.03, "learning_rate": 0.00017435897435897436, "loss": 0.8467, "step": 400 }, { "epoch": 1.03, "eval_accuracy": 0.7244668911335578, "eval_loss": 0.7974632978439331, "eval_runtime": 33.3512, "eval_samples_per_second": 106.983, "eval_steps_per_second": 13.373, "step": 400 }, { "epoch": 1.05, "learning_rate": 0.00017371794871794873, "loss": 0.5829, "step": 410 }, { "epoch": 1.08, "learning_rate": 0.0001730769230769231, "loss": 0.5576, "step": 420 }, { "epoch": 1.1, "learning_rate": 0.00017243589743589746, "loss": 0.6035, "step": 430 }, { "epoch": 1.13, "learning_rate": 0.0001717948717948718, "loss": 0.6653, "step": 440 }, { "epoch": 1.13, "eval_accuracy": 0.7317620650953984, "eval_loss": 0.7574185132980347, "eval_runtime": 32.4855, "eval_samples_per_second": 109.834, "eval_steps_per_second": 13.729, "step": 440 }, { "epoch": 1.15, "learning_rate": 0.00017115384615384616, "loss": 0.5067, "step": 450 }, { "epoch": 1.18, "learning_rate": 0.00017051282051282053, "loss": 0.4973, "step": 460 }, { "epoch": 1.21, "learning_rate": 0.00016987179487179486, "loss": 0.5721, "step": 470 }, { "epoch": 1.23, "learning_rate": 0.00016923076923076923, "loss": 0.5467, "step": 480 }, { "epoch": 1.23, "eval_accuracy": 0.7244668911335578, "eval_loss": 0.7471520900726318, "eval_runtime": 32.6752, "eval_samples_per_second": 109.196, "eval_steps_per_second": 13.65, "step": 480 }, { "epoch": 1.26, "learning_rate": 0.0001685897435897436, "loss": 0.5494, "step": 490 }, { "epoch": 1.28, "learning_rate": 0.00016794871794871796, "loss": 0.6158, "step": 500 }, { "epoch": 1.31, "learning_rate": 0.00016730769230769232, "loss": 0.5412, "step": 510 }, { "epoch": 1.33, "learning_rate": 0.0001666666666666667, "loss": 0.388, "step": 520 }, { "epoch": 1.33, "eval_accuracy": 0.7716049382716049, "eval_loss": 0.6508304476737976, "eval_runtime": 36.0929, "eval_samples_per_second": 98.856, "eval_steps_per_second": 12.357, "step": 520 }, { "epoch": 1.36, "learning_rate": 0.00016602564102564105, "loss": 0.4511, "step": 530 }, { "epoch": 1.38, "learning_rate": 0.0001653846153846154, "loss": 0.566, "step": 540 }, { "epoch": 1.41, "learning_rate": 0.00016474358974358976, "loss": 0.4476, "step": 550 }, { "epoch": 1.44, "learning_rate": 0.0001641025641025641, "loss": 0.4699, "step": 560 }, { "epoch": 1.44, "eval_accuracy": 0.7244668911335578, "eval_loss": 0.773759663105011, "eval_runtime": 31.7272, "eval_samples_per_second": 112.459, "eval_steps_per_second": 14.057, "step": 560 }, { "epoch": 1.46, "learning_rate": 0.00016346153846153846, "loss": 0.402, "step": 570 }, { "epoch": 1.49, "learning_rate": 0.00016282051282051282, "loss": 0.425, "step": 580 }, { "epoch": 1.51, "learning_rate": 0.0001621794871794872, "loss": 0.5171, "step": 590 }, { "epoch": 1.54, "learning_rate": 0.00016153846153846155, "loss": 0.5344, "step": 600 }, { "epoch": 1.54, "eval_accuracy": 0.7328843995510662, "eval_loss": 0.7624912858009338, "eval_runtime": 32.007, "eval_samples_per_second": 111.476, "eval_steps_per_second": 13.934, "step": 600 }, { "epoch": 1.56, "learning_rate": 0.00016089743589743592, "loss": 0.4913, "step": 610 }, { "epoch": 1.59, "learning_rate": 0.00016025641025641028, "loss": 0.5042, "step": 620 }, { "epoch": 1.62, "learning_rate": 0.00015961538461538462, "loss": 0.6051, "step": 630 }, { "epoch": 1.64, "learning_rate": 0.00015897435897435896, "loss": 0.5753, "step": 640 }, { "epoch": 1.64, "eval_accuracy": 0.7598204264870931, "eval_loss": 0.6743783950805664, "eval_runtime": 30.705, "eval_samples_per_second": 116.203, "eval_steps_per_second": 14.525, "step": 640 }, { "epoch": 1.67, "learning_rate": 0.00015833333333333332, "loss": 0.524, "step": 650 }, { "epoch": 1.69, "learning_rate": 0.0001576923076923077, "loss": 0.5007, "step": 660 }, { "epoch": 1.72, "learning_rate": 0.00015705128205128205, "loss": 0.57, "step": 670 }, { "epoch": 1.74, "learning_rate": 0.00015641025641025642, "loss": 0.5533, "step": 680 }, { "epoch": 1.74, "eval_accuracy": 0.734006734006734, "eval_loss": 0.7230738997459412, "eval_runtime": 33.3034, "eval_samples_per_second": 107.136, "eval_steps_per_second": 13.392, "step": 680 }, { "epoch": 1.77, "learning_rate": 0.00015576923076923078, "loss": 0.4399, "step": 690 }, { "epoch": 1.79, "learning_rate": 0.00015512820512820515, "loss": 0.453, "step": 700 }, { "epoch": 1.82, "learning_rate": 0.00015448717948717951, "loss": 0.4504, "step": 710 }, { "epoch": 1.85, "learning_rate": 0.00015384615384615385, "loss": 0.4244, "step": 720 }, { "epoch": 1.85, "eval_accuracy": 0.7687991021324355, "eval_loss": 0.6673141717910767, "eval_runtime": 32.6948, "eval_samples_per_second": 109.131, "eval_steps_per_second": 13.641, "step": 720 }, { "epoch": 1.87, "learning_rate": 0.00015320512820512822, "loss": 0.46, "step": 730 }, { "epoch": 1.9, "learning_rate": 0.00015256410256410255, "loss": 0.4132, "step": 740 }, { "epoch": 1.92, "learning_rate": 0.00015192307692307692, "loss": 0.4849, "step": 750 }, { "epoch": 1.95, "learning_rate": 0.00015128205128205128, "loss": 0.5423, "step": 760 }, { "epoch": 1.95, "eval_accuracy": 0.7413019079685746, "eval_loss": 0.7185856699943542, "eval_runtime": 35.9539, "eval_samples_per_second": 99.238, "eval_steps_per_second": 12.405, "step": 760 }, { "epoch": 1.97, "learning_rate": 0.00015064102564102565, "loss": 0.3997, "step": 770 }, { "epoch": 2.0, "learning_rate": 0.00015000000000000001, "loss": 0.4744, "step": 780 }, { "epoch": 2.03, "learning_rate": 0.00014935897435897438, "loss": 0.4717, "step": 790 }, { "epoch": 2.05, "learning_rate": 0.00014871794871794872, "loss": 0.3384, "step": 800 }, { "epoch": 2.05, "eval_accuracy": 0.7351290684624018, "eval_loss": 0.7269920706748962, "eval_runtime": 33.5897, "eval_samples_per_second": 106.223, "eval_steps_per_second": 13.278, "step": 800 }, { "epoch": 2.08, "learning_rate": 0.00014807692307692308, "loss": 0.2602, "step": 810 }, { "epoch": 2.1, "learning_rate": 0.00014743589743589745, "loss": 0.2345, "step": 820 }, { "epoch": 2.13, "learning_rate": 0.00014679487179487178, "loss": 0.2135, "step": 830 }, { "epoch": 2.15, "learning_rate": 0.00014615384615384615, "loss": 0.2797, "step": 840 }, { "epoch": 2.15, "eval_accuracy": 0.7497194163860831, "eval_loss": 0.7742622494697571, "eval_runtime": 31.2996, "eval_samples_per_second": 113.995, "eval_steps_per_second": 14.249, "step": 840 }, { "epoch": 2.18, "learning_rate": 0.00014551282051282051, "loss": 0.1862, "step": 850 }, { "epoch": 2.21, "learning_rate": 0.00014487179487179488, "loss": 0.2334, "step": 860 }, { "epoch": 2.23, "learning_rate": 0.00014423076923076924, "loss": 0.3361, "step": 870 }, { "epoch": 2.26, "learning_rate": 0.0001435897435897436, "loss": 0.2939, "step": 880 }, { "epoch": 2.26, "eval_accuracy": 0.7732884399551067, "eval_loss": 0.6985616087913513, "eval_runtime": 32.9047, "eval_samples_per_second": 108.435, "eval_steps_per_second": 13.554, "step": 880 }, { "epoch": 2.28, "learning_rate": 0.00014294871794871795, "loss": 0.2846, "step": 890 }, { "epoch": 2.31, "learning_rate": 0.0001423076923076923, "loss": 0.2555, "step": 900 }, { "epoch": 2.33, "learning_rate": 0.00014166666666666668, "loss": 0.2629, "step": 910 }, { "epoch": 2.36, "learning_rate": 0.00014102564102564104, "loss": 0.2288, "step": 920 }, { "epoch": 2.36, "eval_accuracy": 0.7480359147025814, "eval_loss": 0.761025607585907, "eval_runtime": 34.1426, "eval_samples_per_second": 104.503, "eval_steps_per_second": 13.063, "step": 920 }, { "epoch": 2.38, "learning_rate": 0.00014038461538461538, "loss": 0.2115, "step": 930 }, { "epoch": 2.41, "learning_rate": 0.00013974358974358974, "loss": 0.2036, "step": 940 }, { "epoch": 2.44, "learning_rate": 0.0001391025641025641, "loss": 0.2913, "step": 950 }, { "epoch": 2.46, "learning_rate": 0.00013846153846153847, "loss": 0.2204, "step": 960 }, { "epoch": 2.46, "eval_accuracy": 0.7570145903479237, "eval_loss": 0.7840890884399414, "eval_runtime": 35.7983, "eval_samples_per_second": 99.67, "eval_steps_per_second": 12.459, "step": 960 }, { "epoch": 2.49, "learning_rate": 0.00013782051282051284, "loss": 0.2759, "step": 970 }, { "epoch": 2.51, "learning_rate": 0.00013717948717948718, "loss": 0.3945, "step": 980 }, { "epoch": 2.54, "learning_rate": 0.00013653846153846154, "loss": 0.3639, "step": 990 }, { "epoch": 2.56, "learning_rate": 0.0001358974358974359, "loss": 0.4397, "step": 1000 }, { "epoch": 2.56, "eval_accuracy": 0.7789001122334456, "eval_loss": 0.6565700173377991, "eval_runtime": 31.6553, "eval_samples_per_second": 112.714, "eval_steps_per_second": 14.089, "step": 1000 }, { "epoch": 2.59, "learning_rate": 0.00013525641025641027, "loss": 0.2707, "step": 1010 }, { "epoch": 2.62, "learning_rate": 0.00013461538461538464, "loss": 0.3451, "step": 1020 }, { "epoch": 2.64, "learning_rate": 0.00013397435897435897, "loss": 0.3277, "step": 1030 }, { "epoch": 2.67, "learning_rate": 0.00013333333333333334, "loss": 0.2219, "step": 1040 }, { "epoch": 2.67, "eval_accuracy": 0.7581369248035915, "eval_loss": 0.6900522112846375, "eval_runtime": 31.7845, "eval_samples_per_second": 112.256, "eval_steps_per_second": 14.032, "step": 1040 }, { "epoch": 2.69, "learning_rate": 0.0001326923076923077, "loss": 0.3174, "step": 1050 }, { "epoch": 2.72, "learning_rate": 0.00013205128205128204, "loss": 0.189, "step": 1060 }, { "epoch": 2.74, "learning_rate": 0.0001314102564102564, "loss": 0.2619, "step": 1070 }, { "epoch": 2.77, "learning_rate": 0.00013076923076923077, "loss": 0.2297, "step": 1080 }, { "epoch": 2.77, "eval_accuracy": 0.7581369248035915, "eval_loss": 0.7095093131065369, "eval_runtime": 38.4561, "eval_samples_per_second": 92.781, "eval_steps_per_second": 11.598, "step": 1080 }, { "epoch": 2.79, "learning_rate": 0.00013012820512820514, "loss": 0.2201, "step": 1090 }, { "epoch": 2.82, "learning_rate": 0.0001294871794871795, "loss": 0.2261, "step": 1100 }, { "epoch": 2.85, "learning_rate": 0.00012884615384615387, "loss": 0.2095, "step": 1110 }, { "epoch": 2.87, "learning_rate": 0.00012820512820512823, "loss": 0.209, "step": 1120 }, { "epoch": 2.87, "eval_accuracy": 0.7738496071829405, "eval_loss": 0.7127913236618042, "eval_runtime": 32.9997, "eval_samples_per_second": 108.122, "eval_steps_per_second": 13.515, "step": 1120 }, { "epoch": 2.9, "learning_rate": 0.00012756410256410257, "loss": 0.3365, "step": 1130 }, { "epoch": 2.92, "learning_rate": 0.00012692307692307693, "loss": 0.2639, "step": 1140 }, { "epoch": 2.95, "learning_rate": 0.00012628205128205127, "loss": 0.1892, "step": 1150 }, { "epoch": 2.97, "learning_rate": 0.00012564102564102564, "loss": 0.1853, "step": 1160 }, { "epoch": 2.97, "eval_accuracy": 0.7710437710437711, "eval_loss": 0.6986069083213806, "eval_runtime": 32.2598, "eval_samples_per_second": 110.602, "eval_steps_per_second": 13.825, "step": 1160 }, { "epoch": 3.0, "learning_rate": 0.000125, "loss": 0.1807, "step": 1170 }, { "epoch": 3.03, "learning_rate": 0.00012435897435897437, "loss": 0.1554, "step": 1180 }, { "epoch": 3.05, "learning_rate": 0.00012371794871794873, "loss": 0.1845, "step": 1190 }, { "epoch": 3.08, "learning_rate": 0.0001230769230769231, "loss": 0.1322, "step": 1200 }, { "epoch": 3.08, "eval_accuracy": 0.7962962962962963, "eval_loss": 0.6381103992462158, "eval_runtime": 35.1257, "eval_samples_per_second": 101.578, "eval_steps_per_second": 12.697, "step": 1200 }, { "epoch": 3.1, "learning_rate": 0.00012243589743589746, "loss": 0.0598, "step": 1210 }, { "epoch": 3.13, "learning_rate": 0.00012179487179487179, "loss": 0.0853, "step": 1220 }, { "epoch": 3.15, "learning_rate": 0.00012115384615384615, "loss": 0.1616, "step": 1230 }, { "epoch": 3.18, "learning_rate": 0.00012051282051282052, "loss": 0.2603, "step": 1240 }, { "epoch": 3.18, "eval_accuracy": 0.7682379349046016, "eval_loss": 0.7860389351844788, "eval_runtime": 35.648, "eval_samples_per_second": 100.09, "eval_steps_per_second": 12.511, "step": 1240 }, { "epoch": 3.21, "learning_rate": 0.00011987179487179487, "loss": 0.1104, "step": 1250 }, { "epoch": 3.23, "learning_rate": 0.00011923076923076923, "loss": 0.0634, "step": 1260 }, { "epoch": 3.26, "learning_rate": 0.0001185897435897436, "loss": 0.0717, "step": 1270 }, { "epoch": 3.28, "learning_rate": 0.00011794871794871796, "loss": 0.1031, "step": 1280 }, { "epoch": 3.28, "eval_accuracy": 0.7895622895622896, "eval_loss": 0.7322177886962891, "eval_runtime": 30.6139, "eval_samples_per_second": 116.549, "eval_steps_per_second": 14.569, "step": 1280 }, { "epoch": 3.31, "learning_rate": 0.00011730769230769231, "loss": 0.054, "step": 1290 }, { "epoch": 3.33, "learning_rate": 0.00011666666666666668, "loss": 0.0883, "step": 1300 }, { "epoch": 3.36, "learning_rate": 0.00011602564102564104, "loss": 0.1443, "step": 1310 }, { "epoch": 3.38, "learning_rate": 0.00011538461538461538, "loss": 0.0763, "step": 1320 }, { "epoch": 3.38, "eval_accuracy": 0.7839506172839507, "eval_loss": 0.77773118019104, "eval_runtime": 31.4828, "eval_samples_per_second": 113.332, "eval_steps_per_second": 14.166, "step": 1320 }, { "epoch": 3.41, "learning_rate": 0.00011474358974358975, "loss": 0.1328, "step": 1330 }, { "epoch": 3.44, "learning_rate": 0.0001141025641025641, "loss": 0.0751, "step": 1340 }, { "epoch": 3.46, "learning_rate": 0.00011346153846153846, "loss": 0.1193, "step": 1350 }, { "epoch": 3.49, "learning_rate": 0.00011282051282051283, "loss": 0.1437, "step": 1360 }, { "epoch": 3.49, "eval_accuracy": 0.7665544332210998, "eval_loss": 0.9293356537818909, "eval_runtime": 31.1681, "eval_samples_per_second": 114.476, "eval_steps_per_second": 14.31, "step": 1360 }, { "epoch": 3.51, "learning_rate": 0.00011217948717948718, "loss": 0.1805, "step": 1370 }, { "epoch": 3.54, "learning_rate": 0.00011153846153846154, "loss": 0.0931, "step": 1380 }, { "epoch": 3.56, "learning_rate": 0.00011089743589743591, "loss": 0.1227, "step": 1390 }, { "epoch": 3.59, "learning_rate": 0.00011025641025641027, "loss": 0.0818, "step": 1400 }, { "epoch": 3.59, "eval_accuracy": 0.7940516273849607, "eval_loss": 0.796922504901886, "eval_runtime": 31.9044, "eval_samples_per_second": 111.834, "eval_steps_per_second": 13.979, "step": 1400 }, { "epoch": 3.62, "learning_rate": 0.00010961538461538463, "loss": 0.1193, "step": 1410 }, { "epoch": 3.64, "learning_rate": 0.00010897435897435896, "loss": 0.1237, "step": 1420 }, { "epoch": 3.67, "learning_rate": 0.00010833333333333333, "loss": 0.1074, "step": 1430 }, { "epoch": 3.69, "learning_rate": 0.0001076923076923077, "loss": 0.1355, "step": 1440 }, { "epoch": 3.69, "eval_accuracy": 0.7716049382716049, "eval_loss": 0.8145824670791626, "eval_runtime": 37.6519, "eval_samples_per_second": 94.763, "eval_steps_per_second": 11.845, "step": 1440 }, { "epoch": 3.72, "learning_rate": 0.00010705128205128206, "loss": 0.0844, "step": 1450 }, { "epoch": 3.74, "learning_rate": 0.00010641025641025641, "loss": 0.1067, "step": 1460 }, { "epoch": 3.77, "learning_rate": 0.00010576923076923077, "loss": 0.0342, "step": 1470 }, { "epoch": 3.79, "learning_rate": 0.00010512820512820514, "loss": 0.0802, "step": 1480 }, { "epoch": 3.79, "eval_accuracy": 0.8075196408529742, "eval_loss": 0.6976819038391113, "eval_runtime": 32.2393, "eval_samples_per_second": 110.672, "eval_steps_per_second": 13.834, "step": 1480 }, { "epoch": 3.82, "learning_rate": 0.0001044871794871795, "loss": 0.1143, "step": 1490 }, { "epoch": 3.85, "learning_rate": 0.00010384615384615386, "loss": 0.1332, "step": 1500 }, { "epoch": 3.87, "learning_rate": 0.00010320512820512822, "loss": 0.069, "step": 1510 }, { "epoch": 3.9, "learning_rate": 0.00010256410256410256, "loss": 0.032, "step": 1520 }, { "epoch": 3.9, "eval_accuracy": 0.7929292929292929, "eval_loss": 0.8349816203117371, "eval_runtime": 32.9637, "eval_samples_per_second": 108.24, "eval_steps_per_second": 13.53, "step": 1520 }, { "epoch": 3.92, "learning_rate": 0.00010192307692307692, "loss": 0.0722, "step": 1530 }, { "epoch": 3.95, "learning_rate": 0.00010128205128205129, "loss": 0.1383, "step": 1540 }, { "epoch": 3.97, "learning_rate": 0.00010064102564102564, "loss": 0.0861, "step": 1550 }, { "epoch": 4.0, "learning_rate": 0.0001, "loss": 0.1836, "step": 1560 }, { "epoch": 4.0, "eval_accuracy": 0.7968574635241302, "eval_loss": 0.775885820388794, "eval_runtime": 33.0589, "eval_samples_per_second": 107.928, "eval_steps_per_second": 13.491, "step": 1560 }, { "epoch": 4.03, "learning_rate": 9.935897435897437e-05, "loss": 0.0486, "step": 1570 }, { "epoch": 4.05, "learning_rate": 9.871794871794872e-05, "loss": 0.0369, "step": 1580 }, { "epoch": 4.08, "learning_rate": 9.807692307692307e-05, "loss": 0.0209, "step": 1590 }, { "epoch": 4.1, "learning_rate": 9.743589743589744e-05, "loss": 0.0391, "step": 1600 }, { "epoch": 4.1, "eval_accuracy": 0.8013468013468014, "eval_loss": 0.8052350282669067, "eval_runtime": 32.0537, "eval_samples_per_second": 111.313, "eval_steps_per_second": 13.914, "step": 1600 }, { "epoch": 4.13, "learning_rate": 9.67948717948718e-05, "loss": 0.0354, "step": 1610 }, { "epoch": 4.15, "learning_rate": 9.615384615384617e-05, "loss": 0.0738, "step": 1620 }, { "epoch": 4.18, "learning_rate": 9.551282051282052e-05, "loss": 0.0275, "step": 1630 }, { "epoch": 4.21, "learning_rate": 9.487179487179487e-05, "loss": 0.0406, "step": 1640 }, { "epoch": 4.21, "eval_accuracy": 0.7828282828282829, "eval_loss": 0.9251748919487, "eval_runtime": 31.6082, "eval_samples_per_second": 112.882, "eval_steps_per_second": 14.11, "step": 1640 }, { "epoch": 4.23, "learning_rate": 9.423076923076924e-05, "loss": 0.1377, "step": 1650 }, { "epoch": 4.26, "learning_rate": 9.35897435897436e-05, "loss": 0.0526, "step": 1660 }, { "epoch": 4.28, "learning_rate": 9.294871794871795e-05, "loss": 0.152, "step": 1670 }, { "epoch": 4.31, "learning_rate": 9.230769230769232e-05, "loss": 0.0488, "step": 1680 }, { "epoch": 4.31, "eval_accuracy": 0.7901234567901234, "eval_loss": 0.8203706741333008, "eval_runtime": 31.8606, "eval_samples_per_second": 111.988, "eval_steps_per_second": 13.998, "step": 1680 }, { "epoch": 4.33, "learning_rate": 9.166666666666667e-05, "loss": 0.0372, "step": 1690 }, { "epoch": 4.36, "learning_rate": 9.102564102564103e-05, "loss": 0.0968, "step": 1700 }, { "epoch": 4.38, "learning_rate": 9.038461538461538e-05, "loss": 0.0147, "step": 1710 }, { "epoch": 4.41, "learning_rate": 8.974358974358975e-05, "loss": 0.016, "step": 1720 }, { "epoch": 4.41, "eval_accuracy": 0.7794612794612794, "eval_loss": 0.9169319868087769, "eval_runtime": 33.724, "eval_samples_per_second": 105.8, "eval_steps_per_second": 13.225, "step": 1720 }, { "epoch": 4.44, "learning_rate": 8.910256410256411e-05, "loss": 0.0393, "step": 1730 }, { "epoch": 4.46, "learning_rate": 8.846153846153847e-05, "loss": 0.0258, "step": 1740 }, { "epoch": 4.49, "learning_rate": 8.782051282051283e-05, "loss": 0.009, "step": 1750 }, { "epoch": 4.51, "learning_rate": 8.717948717948718e-05, "loss": 0.0994, "step": 1760 }, { "epoch": 4.51, "eval_accuracy": 0.8120089786756454, "eval_loss": 0.7930631637573242, "eval_runtime": 34.0477, "eval_samples_per_second": 104.794, "eval_steps_per_second": 13.099, "step": 1760 }, { "epoch": 4.54, "learning_rate": 8.653846153846155e-05, "loss": 0.0418, "step": 1770 }, { "epoch": 4.56, "learning_rate": 8.58974358974359e-05, "loss": 0.0793, "step": 1780 }, { "epoch": 4.59, "learning_rate": 8.525641025641026e-05, "loss": 0.0122, "step": 1790 }, { "epoch": 4.62, "learning_rate": 8.461538461538461e-05, "loss": 0.07, "step": 1800 }, { "epoch": 4.62, "eval_accuracy": 0.8103254769921436, "eval_loss": 0.8245030045509338, "eval_runtime": 32.6583, "eval_samples_per_second": 109.252, "eval_steps_per_second": 13.657, "step": 1800 }, { "epoch": 4.64, "learning_rate": 8.397435897435898e-05, "loss": 0.0656, "step": 1810 }, { "epoch": 4.67, "learning_rate": 8.333333333333334e-05, "loss": 0.0364, "step": 1820 }, { "epoch": 4.69, "learning_rate": 8.26923076923077e-05, "loss": 0.0639, "step": 1830 }, { "epoch": 4.72, "learning_rate": 8.205128205128205e-05, "loss": 0.0088, "step": 1840 }, { "epoch": 4.72, "eval_accuracy": 0.7985409652076318, "eval_loss": 0.907173752784729, "eval_runtime": 40.0812, "eval_samples_per_second": 89.019, "eval_steps_per_second": 11.127, "step": 1840 }, { "epoch": 4.74, "learning_rate": 8.141025641025641e-05, "loss": 0.0557, "step": 1850 }, { "epoch": 4.77, "learning_rate": 8.076923076923078e-05, "loss": 0.0548, "step": 1860 }, { "epoch": 4.79, "learning_rate": 8.012820512820514e-05, "loss": 0.0055, "step": 1870 }, { "epoch": 4.82, "learning_rate": 7.948717948717948e-05, "loss": 0.0085, "step": 1880 }, { "epoch": 4.82, "eval_accuracy": 0.8114478114478114, "eval_loss": 0.7862226963043213, "eval_runtime": 33.9761, "eval_samples_per_second": 105.015, "eval_steps_per_second": 13.127, "step": 1880 }, { "epoch": 4.85, "learning_rate": 7.884615384615384e-05, "loss": 0.0423, "step": 1890 }, { "epoch": 4.87, "learning_rate": 7.820512820512821e-05, "loss": 0.0581, "step": 1900 }, { "epoch": 4.9, "learning_rate": 7.756410256410257e-05, "loss": 0.0043, "step": 1910 }, { "epoch": 4.92, "learning_rate": 7.692307692307693e-05, "loss": 0.083, "step": 1920 }, { "epoch": 4.92, "eval_accuracy": 0.8198653198653199, "eval_loss": 0.7796981334686279, "eval_runtime": 32.3864, "eval_samples_per_second": 110.17, "eval_steps_per_second": 13.771, "step": 1920 }, { "epoch": 4.95, "learning_rate": 7.628205128205128e-05, "loss": 0.1308, "step": 1930 }, { "epoch": 4.97, "learning_rate": 7.564102564102564e-05, "loss": 0.0063, "step": 1940 }, { "epoch": 5.0, "learning_rate": 7.500000000000001e-05, "loss": 0.0132, "step": 1950 }, { "epoch": 5.03, "learning_rate": 7.435897435897436e-05, "loss": 0.0055, "step": 1960 }, { "epoch": 5.03, "eval_accuracy": 0.8069584736251403, "eval_loss": 0.8723996877670288, "eval_runtime": 33.6185, "eval_samples_per_second": 106.132, "eval_steps_per_second": 13.267, "step": 1960 }, { "epoch": 5.05, "learning_rate": 7.371794871794872e-05, "loss": 0.0366, "step": 1970 }, { "epoch": 5.08, "learning_rate": 7.307692307692307e-05, "loss": 0.06, "step": 1980 }, { "epoch": 5.1, "learning_rate": 7.243589743589744e-05, "loss": 0.0065, "step": 1990 }, { "epoch": 5.13, "learning_rate": 7.17948717948718e-05, "loss": 0.0223, "step": 2000 }, { "epoch": 5.13, "eval_accuracy": 0.8249158249158249, "eval_loss": 0.7518730163574219, "eval_runtime": 43.1922, "eval_samples_per_second": 82.607, "eval_steps_per_second": 10.326, "step": 2000 }, { "epoch": 5.15, "learning_rate": 7.115384615384616e-05, "loss": 0.0215, "step": 2010 }, { "epoch": 5.18, "learning_rate": 7.051282051282052e-05, "loss": 0.0494, "step": 2020 }, { "epoch": 5.21, "learning_rate": 6.987179487179487e-05, "loss": 0.0441, "step": 2030 }, { "epoch": 5.23, "learning_rate": 6.923076923076924e-05, "loss": 0.0042, "step": 2040 }, { "epoch": 5.23, "eval_accuracy": 0.8232323232323232, "eval_loss": 0.7583897113800049, "eval_runtime": 31.893, "eval_samples_per_second": 111.874, "eval_steps_per_second": 13.984, "step": 2040 }, { "epoch": 5.26, "learning_rate": 6.858974358974359e-05, "loss": 0.0131, "step": 2050 }, { "epoch": 5.28, "learning_rate": 6.794871794871795e-05, "loss": 0.0037, "step": 2060 }, { "epoch": 5.31, "learning_rate": 6.730769230769232e-05, "loss": 0.02, "step": 2070 }, { "epoch": 5.33, "learning_rate": 6.666666666666667e-05, "loss": 0.0178, "step": 2080 }, { "epoch": 5.33, "eval_accuracy": 0.8080808080808081, "eval_loss": 0.8523600697517395, "eval_runtime": 32.5102, "eval_samples_per_second": 109.75, "eval_steps_per_second": 13.719, "step": 2080 }, { "epoch": 5.36, "learning_rate": 6.602564102564102e-05, "loss": 0.0035, "step": 2090 }, { "epoch": 5.38, "learning_rate": 6.538461538461539e-05, "loss": 0.0137, "step": 2100 }, { "epoch": 5.41, "learning_rate": 6.474358974358975e-05, "loss": 0.0033, "step": 2110 }, { "epoch": 5.44, "learning_rate": 6.410256410256412e-05, "loss": 0.0172, "step": 2120 }, { "epoch": 5.44, "eval_accuracy": 0.8215488215488216, "eval_loss": 0.7729219794273376, "eval_runtime": 31.7305, "eval_samples_per_second": 112.447, "eval_steps_per_second": 14.056, "step": 2120 }, { "epoch": 5.46, "learning_rate": 6.346153846153847e-05, "loss": 0.0252, "step": 2130 }, { "epoch": 5.49, "learning_rate": 6.282051282051282e-05, "loss": 0.0034, "step": 2140 }, { "epoch": 5.51, "learning_rate": 6.217948717948718e-05, "loss": 0.0045, "step": 2150 }, { "epoch": 5.54, "learning_rate": 6.153846153846155e-05, "loss": 0.0044, "step": 2160 }, { "epoch": 5.54, "eval_accuracy": 0.808641975308642, "eval_loss": 0.8701183795928955, "eval_runtime": 31.0299, "eval_samples_per_second": 114.986, "eval_steps_per_second": 14.373, "step": 2160 }, { "epoch": 5.56, "learning_rate": 6.089743589743589e-05, "loss": 0.0373, "step": 2170 }, { "epoch": 5.59, "learning_rate": 6.025641025641026e-05, "loss": 0.0029, "step": 2180 }, { "epoch": 5.62, "learning_rate": 5.9615384615384616e-05, "loss": 0.0032, "step": 2190 }, { "epoch": 5.64, "learning_rate": 5.897435897435898e-05, "loss": 0.003, "step": 2200 }, { "epoch": 5.64, "eval_accuracy": 0.8237934904601572, "eval_loss": 0.8060529828071594, "eval_runtime": 32.4421, "eval_samples_per_second": 109.98, "eval_steps_per_second": 13.748, "step": 2200 }, { "epoch": 5.67, "learning_rate": 5.833333333333334e-05, "loss": 0.0034, "step": 2210 }, { "epoch": 5.69, "learning_rate": 5.769230769230769e-05, "loss": 0.0025, "step": 2220 }, { "epoch": 5.72, "learning_rate": 5.705128205128205e-05, "loss": 0.0029, "step": 2230 }, { "epoch": 5.74, "learning_rate": 5.6410256410256414e-05, "loss": 0.0033, "step": 2240 }, { "epoch": 5.74, "eval_accuracy": 0.8226711560044894, "eval_loss": 0.813855767250061, "eval_runtime": 32.378, "eval_samples_per_second": 110.198, "eval_steps_per_second": 13.775, "step": 2240 }, { "epoch": 5.77, "learning_rate": 5.576923076923077e-05, "loss": 0.0129, "step": 2250 }, { "epoch": 5.79, "learning_rate": 5.512820512820514e-05, "loss": 0.0079, "step": 2260 }, { "epoch": 5.82, "learning_rate": 5.448717948717948e-05, "loss": 0.0117, "step": 2270 }, { "epoch": 5.85, "learning_rate": 5.384615384615385e-05, "loss": 0.0023, "step": 2280 }, { "epoch": 5.85, "eval_accuracy": 0.8164983164983165, "eval_loss": 0.8478395342826843, "eval_runtime": 35.321, "eval_samples_per_second": 101.016, "eval_steps_per_second": 12.627, "step": 2280 }, { "epoch": 5.87, "learning_rate": 5.3205128205128205e-05, "loss": 0.0075, "step": 2290 }, { "epoch": 5.9, "learning_rate": 5.256410256410257e-05, "loss": 0.0023, "step": 2300 }, { "epoch": 5.92, "learning_rate": 5.192307692307693e-05, "loss": 0.0058, "step": 2310 }, { "epoch": 5.95, "learning_rate": 5.128205128205128e-05, "loss": 0.003, "step": 2320 }, { "epoch": 5.95, "eval_accuracy": 0.8120089786756454, "eval_loss": 0.8443180918693542, "eval_runtime": 32.2001, "eval_samples_per_second": 110.807, "eval_steps_per_second": 13.851, "step": 2320 }, { "epoch": 5.97, "learning_rate": 5.0641025641025644e-05, "loss": 0.0512, "step": 2330 }, { "epoch": 6.0, "learning_rate": 5e-05, "loss": 0.0024, "step": 2340 }, { "epoch": 6.03, "learning_rate": 4.935897435897436e-05, "loss": 0.0022, "step": 2350 }, { "epoch": 6.05, "learning_rate": 4.871794871794872e-05, "loss": 0.031, "step": 2360 }, { "epoch": 6.05, "eval_accuracy": 0.8047138047138047, "eval_loss": 0.9272196888923645, "eval_runtime": 33.2145, "eval_samples_per_second": 107.423, "eval_steps_per_second": 13.428, "step": 2360 }, { "epoch": 6.08, "learning_rate": 4.8076923076923084e-05, "loss": 0.0456, "step": 2370 }, { "epoch": 6.1, "learning_rate": 4.7435897435897435e-05, "loss": 0.0024, "step": 2380 }, { "epoch": 6.13, "learning_rate": 4.67948717948718e-05, "loss": 0.0023, "step": 2390 }, { "epoch": 6.15, "learning_rate": 4.615384615384616e-05, "loss": 0.0021, "step": 2400 }, { "epoch": 6.15, "eval_accuracy": 0.8204264870931538, "eval_loss": 0.8369048237800598, "eval_runtime": 36.1395, "eval_samples_per_second": 98.728, "eval_steps_per_second": 12.341, "step": 2400 }, { "epoch": 6.18, "learning_rate": 4.5512820512820516e-05, "loss": 0.0021, "step": 2410 }, { "epoch": 6.21, "learning_rate": 4.4871794871794874e-05, "loss": 0.0026, "step": 2420 }, { "epoch": 6.23, "learning_rate": 4.423076923076923e-05, "loss": 0.0021, "step": 2430 }, { "epoch": 6.26, "learning_rate": 4.358974358974359e-05, "loss": 0.0019, "step": 2440 }, { "epoch": 6.26, "eval_accuracy": 0.8209876543209876, "eval_loss": 0.828059196472168, "eval_runtime": 32.1149, "eval_samples_per_second": 111.101, "eval_steps_per_second": 13.888, "step": 2440 }, { "epoch": 6.28, "learning_rate": 4.294871794871795e-05, "loss": 0.0024, "step": 2450 }, { "epoch": 6.31, "learning_rate": 4.230769230769231e-05, "loss": 0.0019, "step": 2460 }, { "epoch": 6.33, "learning_rate": 4.166666666666667e-05, "loss": 0.002, "step": 2470 }, { "epoch": 6.36, "learning_rate": 4.1025641025641023e-05, "loss": 0.0019, "step": 2480 }, { "epoch": 6.36, "eval_accuracy": 0.8198653198653199, "eval_loss": 0.834019124507904, "eval_runtime": 35.8439, "eval_samples_per_second": 99.543, "eval_steps_per_second": 12.443, "step": 2480 }, { "epoch": 6.38, "learning_rate": 4.038461538461539e-05, "loss": 0.002, "step": 2490 }, { "epoch": 6.41, "learning_rate": 3.974358974358974e-05, "loss": 0.0019, "step": 2500 }, { "epoch": 6.44, "learning_rate": 3.9102564102564105e-05, "loss": 0.0018, "step": 2510 }, { "epoch": 6.46, "learning_rate": 3.846153846153846e-05, "loss": 0.0018, "step": 2520 }, { "epoch": 6.46, "eval_accuracy": 0.8204264870931538, "eval_loss": 0.8339148759841919, "eval_runtime": 34.0288, "eval_samples_per_second": 104.853, "eval_steps_per_second": 13.107, "step": 2520 }, { "epoch": 6.49, "learning_rate": 3.782051282051282e-05, "loss": 0.0018, "step": 2530 }, { "epoch": 6.51, "learning_rate": 3.717948717948718e-05, "loss": 0.0018, "step": 2540 }, { "epoch": 6.54, "learning_rate": 3.653846153846154e-05, "loss": 0.0018, "step": 2550 }, { "epoch": 6.56, "learning_rate": 3.58974358974359e-05, "loss": 0.002, "step": 2560 }, { "epoch": 6.56, "eval_accuracy": 0.8209876543209876, "eval_loss": 0.8293877840042114, "eval_runtime": 33.9846, "eval_samples_per_second": 104.989, "eval_steps_per_second": 13.124, "step": 2560 }, { "epoch": 6.59, "learning_rate": 3.525641025641026e-05, "loss": 0.0017, "step": 2570 }, { "epoch": 6.62, "learning_rate": 3.461538461538462e-05, "loss": 0.0017, "step": 2580 }, { "epoch": 6.64, "learning_rate": 3.397435897435898e-05, "loss": 0.0018, "step": 2590 }, { "epoch": 6.67, "learning_rate": 3.3333333333333335e-05, "loss": 0.0017, "step": 2600 }, { "epoch": 6.67, "eval_accuracy": 0.8209876543209876, "eval_loss": 0.8266403079032898, "eval_runtime": 36.0162, "eval_samples_per_second": 99.067, "eval_steps_per_second": 12.383, "step": 2600 }, { "epoch": 6.69, "learning_rate": 3.269230769230769e-05, "loss": 0.002, "step": 2610 }, { "epoch": 6.72, "learning_rate": 3.205128205128206e-05, "loss": 0.0017, "step": 2620 }, { "epoch": 6.74, "learning_rate": 3.141025641025641e-05, "loss": 0.0017, "step": 2630 }, { "epoch": 6.77, "learning_rate": 3.0769230769230774e-05, "loss": 0.0017, "step": 2640 }, { "epoch": 6.77, "eval_accuracy": 0.8226711560044894, "eval_loss": 0.8177886009216309, "eval_runtime": 36.3329, "eval_samples_per_second": 98.203, "eval_steps_per_second": 12.275, "step": 2640 }, { "epoch": 6.79, "learning_rate": 3.012820512820513e-05, "loss": 0.0016, "step": 2650 }, { "epoch": 6.82, "learning_rate": 2.948717948717949e-05, "loss": 0.0016, "step": 2660 }, { "epoch": 6.85, "learning_rate": 2.8846153846153845e-05, "loss": 0.0016, "step": 2670 }, { "epoch": 6.87, "learning_rate": 2.8205128205128207e-05, "loss": 0.0017, "step": 2680 }, { "epoch": 6.87, "eval_accuracy": 0.824354657687991, "eval_loss": 0.8174560070037842, "eval_runtime": 32.3063, "eval_samples_per_second": 110.443, "eval_steps_per_second": 13.805, "step": 2680 }, { "epoch": 6.9, "learning_rate": 2.756410256410257e-05, "loss": 0.0017, "step": 2690 }, { "epoch": 6.92, "learning_rate": 2.6923076923076923e-05, "loss": 0.0017, "step": 2700 }, { "epoch": 6.95, "learning_rate": 2.6282051282051285e-05, "loss": 0.0017, "step": 2710 }, { "epoch": 6.97, "learning_rate": 2.564102564102564e-05, "loss": 0.0017, "step": 2720 }, { "epoch": 6.97, "eval_accuracy": 0.8237934904601572, "eval_loss": 0.8166114687919617, "eval_runtime": 33.2936, "eval_samples_per_second": 107.168, "eval_steps_per_second": 13.396, "step": 2720 }, { "epoch": 7.0, "learning_rate": 2.5e-05, "loss": 0.0015, "step": 2730 }, { "epoch": 7.03, "learning_rate": 2.435897435897436e-05, "loss": 0.0016, "step": 2740 }, { "epoch": 7.05, "learning_rate": 2.3717948717948718e-05, "loss": 0.0016, "step": 2750 }, { "epoch": 7.08, "learning_rate": 2.307692307692308e-05, "loss": 0.0015, "step": 2760 }, { "epoch": 7.08, "eval_accuracy": 0.824354657687991, "eval_loss": 0.8176218271255493, "eval_runtime": 32.3011, "eval_samples_per_second": 110.46, "eval_steps_per_second": 13.808, "step": 2760 }, { "epoch": 7.1, "learning_rate": 2.2435897435897437e-05, "loss": 0.0015, "step": 2770 }, { "epoch": 7.13, "learning_rate": 2.1794871794871795e-05, "loss": 0.0015, "step": 2780 }, { "epoch": 7.15, "learning_rate": 2.1153846153846154e-05, "loss": 0.0015, "step": 2790 }, { "epoch": 7.18, "learning_rate": 2.0512820512820512e-05, "loss": 0.0015, "step": 2800 }, { "epoch": 7.18, "eval_accuracy": 0.824354657687991, "eval_loss": 0.8185549378395081, "eval_runtime": 32.7126, "eval_samples_per_second": 109.071, "eval_steps_per_second": 13.634, "step": 2800 }, { "epoch": 7.21, "learning_rate": 1.987179487179487e-05, "loss": 0.0015, "step": 2810 }, { "epoch": 7.23, "learning_rate": 1.923076923076923e-05, "loss": 0.0015, "step": 2820 }, { "epoch": 7.26, "learning_rate": 1.858974358974359e-05, "loss": 0.0015, "step": 2830 }, { "epoch": 7.28, "learning_rate": 1.794871794871795e-05, "loss": 0.0015, "step": 2840 }, { "epoch": 7.28, "eval_accuracy": 0.8249158249158249, "eval_loss": 0.819684624671936, "eval_runtime": 35.8925, "eval_samples_per_second": 99.408, "eval_steps_per_second": 12.426, "step": 2840 }, { "epoch": 7.31, "learning_rate": 1.730769230769231e-05, "loss": 0.0016, "step": 2850 }, { "epoch": 7.33, "learning_rate": 1.6666666666666667e-05, "loss": 0.0015, "step": 2860 }, { "epoch": 7.36, "learning_rate": 1.602564102564103e-05, "loss": 0.0015, "step": 2870 }, { "epoch": 7.38, "learning_rate": 1.5384615384615387e-05, "loss": 0.0015, "step": 2880 }, { "epoch": 7.38, "eval_accuracy": 0.8249158249158249, "eval_loss": 0.8204275965690613, "eval_runtime": 31.8471, "eval_samples_per_second": 112.035, "eval_steps_per_second": 14.004, "step": 2880 }, { "epoch": 7.41, "learning_rate": 1.4743589743589745e-05, "loss": 0.0015, "step": 2890 }, { "epoch": 7.44, "learning_rate": 1.4102564102564104e-05, "loss": 0.0015, "step": 2900 }, { "epoch": 7.46, "learning_rate": 1.3461538461538462e-05, "loss": 0.0015, "step": 2910 }, { "epoch": 7.49, "learning_rate": 1.282051282051282e-05, "loss": 0.0015, "step": 2920 }, { "epoch": 7.49, "eval_accuracy": 0.8249158249158249, "eval_loss": 0.8213893175125122, "eval_runtime": 30.8496, "eval_samples_per_second": 115.658, "eval_steps_per_second": 14.457, "step": 2920 }, { "epoch": 7.51, "learning_rate": 1.217948717948718e-05, "loss": 0.0015, "step": 2930 }, { "epoch": 7.54, "learning_rate": 1.153846153846154e-05, "loss": 0.0015, "step": 2940 }, { "epoch": 7.56, "learning_rate": 1.0897435897435898e-05, "loss": 0.0015, "step": 2950 }, { "epoch": 7.59, "learning_rate": 1.0256410256410256e-05, "loss": 0.0015, "step": 2960 }, { "epoch": 7.59, "eval_accuracy": 0.8249158249158249, "eval_loss": 0.8220140933990479, "eval_runtime": 31.6074, "eval_samples_per_second": 112.885, "eval_steps_per_second": 14.111, "step": 2960 }, { "epoch": 7.62, "learning_rate": 9.615384615384616e-06, "loss": 0.0016, "step": 2970 }, { "epoch": 7.64, "learning_rate": 8.974358974358976e-06, "loss": 0.0014, "step": 2980 }, { "epoch": 7.67, "learning_rate": 8.333333333333334e-06, "loss": 0.0015, "step": 2990 }, { "epoch": 7.69, "learning_rate": 7.692307692307694e-06, "loss": 0.0015, "step": 3000 }, { "epoch": 7.69, "eval_accuracy": 0.8254769921436588, "eval_loss": 0.8220102787017822, "eval_runtime": 30.2325, "eval_samples_per_second": 118.019, "eval_steps_per_second": 14.752, "step": 3000 }, { "epoch": 7.72, "learning_rate": 7.051282051282052e-06, "loss": 0.0014, "step": 3010 }, { "epoch": 7.74, "learning_rate": 6.41025641025641e-06, "loss": 0.0014, "step": 3020 }, { "epoch": 7.77, "learning_rate": 5.76923076923077e-06, "loss": 0.0015, "step": 3030 }, { "epoch": 7.79, "learning_rate": 5.128205128205128e-06, "loss": 0.0015, "step": 3040 }, { "epoch": 7.79, "eval_accuracy": 0.8254769921436588, "eval_loss": 0.8217305541038513, "eval_runtime": 38.9983, "eval_samples_per_second": 91.491, "eval_steps_per_second": 11.436, "step": 3040 }, { "epoch": 7.82, "learning_rate": 4.487179487179488e-06, "loss": 0.0014, "step": 3050 }, { "epoch": 7.85, "learning_rate": 3.846153846153847e-06, "loss": 0.0015, "step": 3060 }, { "epoch": 7.87, "learning_rate": 3.205128205128205e-06, "loss": 0.0014, "step": 3070 }, { "epoch": 7.9, "learning_rate": 2.564102564102564e-06, "loss": 0.0014, "step": 3080 }, { "epoch": 7.9, "eval_accuracy": 0.8254769921436588, "eval_loss": 0.8224795460700989, "eval_runtime": 31.9073, "eval_samples_per_second": 111.824, "eval_steps_per_second": 13.978, "step": 3080 }, { "epoch": 7.92, "learning_rate": 1.9230769230769234e-06, "loss": 0.0014, "step": 3090 }, { "epoch": 7.95, "learning_rate": 1.282051282051282e-06, "loss": 0.0015, "step": 3100 }, { "epoch": 7.97, "learning_rate": 6.41025641025641e-07, "loss": 0.0014, "step": 3110 }, { "epoch": 8.0, "learning_rate": 0.0, "loss": 0.0014, "step": 3120 }, { "epoch": 8.0, "eval_accuracy": 0.8260381593714927, "eval_loss": 0.8224756717681885, "eval_runtime": 30.7985, "eval_samples_per_second": 115.85, "eval_steps_per_second": 14.481, "step": 3120 }, { "epoch": 8.0, "step": 3120, "total_flos": 3.8629591832685773e+18, "train_loss": 0.2533179052472592, "train_runtime": 4957.8804, "train_samples_per_second": 10.069, "train_steps_per_second": 0.629 } ], "max_steps": 3120, "num_train_epochs": 8, "total_flos": 3.8629591832685773e+18, "trial_name": null, "trial_params": null }