{ "best_metric": 0.8358047604560852, "best_model_checkpoint": "./vit-eGTZANplus\\checkpoint-480", "epoch": 50.0, "eval_steps": 10, "global_step": 5350, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.09, "learning_rate": 0.00019962616822429908, "loss": 2.4098, "step": 10 }, { "epoch": 0.09, "eval_accuracy": 0.14285714285714285, "eval_loss": 2.3848845958709717, "eval_runtime": 3.3561, "eval_samples_per_second": 56.316, "eval_steps_per_second": 7.151, "step": 10 }, { "epoch": 0.19, "learning_rate": 0.00019925233644859814, "loss": 2.3376, "step": 20 }, { "epoch": 0.19, "eval_accuracy": 0.21164021164021163, "eval_loss": 2.257709264755249, "eval_runtime": 3.2453, "eval_samples_per_second": 58.238, "eval_steps_per_second": 7.395, "step": 20 }, { "epoch": 0.28, "learning_rate": 0.0001988785046728972, "loss": 2.2047, "step": 30 }, { "epoch": 0.28, "eval_accuracy": 0.2962962962962963, "eval_loss": 2.158203125, "eval_runtime": 2.9918, "eval_samples_per_second": 63.172, "eval_steps_per_second": 8.022, "step": 30 }, { "epoch": 0.37, "learning_rate": 0.00019850467289719628, "loss": 2.1267, "step": 40 }, { "epoch": 0.37, "eval_accuracy": 0.3862433862433862, "eval_loss": 1.942036747932434, "eval_runtime": 3.0218, "eval_samples_per_second": 62.546, "eval_steps_per_second": 7.942, "step": 40 }, { "epoch": 0.47, "learning_rate": 0.00019813084112149535, "loss": 2.0365, "step": 50 }, { "epoch": 0.47, "eval_accuracy": 0.36507936507936506, "eval_loss": 1.9487409591674805, "eval_runtime": 2.938, "eval_samples_per_second": 64.329, "eval_steps_per_second": 8.169, "step": 50 }, { "epoch": 0.56, "learning_rate": 0.00019775700934579439, "loss": 1.9884, "step": 60 }, { "epoch": 0.56, "eval_accuracy": 0.4973544973544973, "eval_loss": 1.7399966716766357, "eval_runtime": 3.2957, "eval_samples_per_second": 57.348, "eval_steps_per_second": 7.282, "step": 60 }, { "epoch": 0.65, "learning_rate": 0.00019738317757009345, "loss": 1.8822, "step": 70 }, { "epoch": 0.65, "eval_accuracy": 0.455026455026455, "eval_loss": 1.6512662172317505, "eval_runtime": 3.1742, "eval_samples_per_second": 59.542, "eval_steps_per_second": 7.561, "step": 70 }, { "epoch": 0.75, "learning_rate": 0.00019700934579439255, "loss": 1.7083, "step": 80 }, { "epoch": 0.75, "eval_accuracy": 0.4603174603174603, "eval_loss": 1.560472011566162, "eval_runtime": 3.0275, "eval_samples_per_second": 62.429, "eval_steps_per_second": 7.927, "step": 80 }, { "epoch": 0.84, "learning_rate": 0.00019663551401869161, "loss": 1.6416, "step": 90 }, { "epoch": 0.84, "eval_accuracy": 0.49206349206349204, "eval_loss": 1.5726529359817505, "eval_runtime": 3.0636, "eval_samples_per_second": 61.691, "eval_steps_per_second": 7.834, "step": 90 }, { "epoch": 0.93, "learning_rate": 0.00019626168224299065, "loss": 1.6473, "step": 100 }, { "epoch": 0.93, "eval_accuracy": 0.4656084656084656, "eval_loss": 1.648504614830017, "eval_runtime": 3.2766, "eval_samples_per_second": 57.682, "eval_steps_per_second": 7.325, "step": 100 }, { "epoch": 1.03, "learning_rate": 0.0001959252336448598, "loss": 1.3355, "step": 110 }, { "epoch": 1.03, "eval_accuracy": 0.5343915343915344, "eval_loss": 1.4000248908996582, "eval_runtime": 3.1074, "eval_samples_per_second": 60.823, "eval_steps_per_second": 7.724, "step": 110 }, { "epoch": 1.12, "learning_rate": 0.00019555140186915888, "loss": 1.4677, "step": 120 }, { "epoch": 1.12, "eval_accuracy": 0.544973544973545, "eval_loss": 1.3445547819137573, "eval_runtime": 3.4486, "eval_samples_per_second": 54.805, "eval_steps_per_second": 6.959, "step": 120 }, { "epoch": 1.21, "learning_rate": 0.00019517757009345797, "loss": 1.3832, "step": 130 }, { "epoch": 1.21, "eval_accuracy": 0.5555555555555556, "eval_loss": 1.3656994104385376, "eval_runtime": 3.0673, "eval_samples_per_second": 61.618, "eval_steps_per_second": 7.824, "step": 130 }, { "epoch": 1.31, "learning_rate": 0.00019480373831775701, "loss": 1.3364, "step": 140 }, { "epoch": 1.31, "eval_accuracy": 0.582010582010582, "eval_loss": 1.255820631980896, "eval_runtime": 3.1137, "eval_samples_per_second": 60.7, "eval_steps_per_second": 7.708, "step": 140 }, { "epoch": 1.4, "learning_rate": 0.00019442990654205608, "loss": 1.3741, "step": 150 }, { "epoch": 1.4, "eval_accuracy": 0.5343915343915344, "eval_loss": 1.4308785200119019, "eval_runtime": 3.1126, "eval_samples_per_second": 60.721, "eval_steps_per_second": 7.711, "step": 150 }, { "epoch": 1.5, "learning_rate": 0.00019405607476635515, "loss": 1.3806, "step": 160 }, { "epoch": 1.5, "eval_accuracy": 0.5873015873015873, "eval_loss": 1.3130360841751099, "eval_runtime": 3.1851, "eval_samples_per_second": 59.339, "eval_steps_per_second": 7.535, "step": 160 }, { "epoch": 1.59, "learning_rate": 0.00019368224299065422, "loss": 1.096, "step": 170 }, { "epoch": 1.59, "eval_accuracy": 0.6084656084656085, "eval_loss": 1.2385209798812866, "eval_runtime": 3.165, "eval_samples_per_second": 59.716, "eval_steps_per_second": 7.583, "step": 170 }, { "epoch": 1.68, "learning_rate": 0.00019330841121495328, "loss": 1.2139, "step": 180 }, { "epoch": 1.68, "eval_accuracy": 0.5925925925925926, "eval_loss": 1.2447173595428467, "eval_runtime": 3.2024, "eval_samples_per_second": 59.019, "eval_steps_per_second": 7.494, "step": 180 }, { "epoch": 1.78, "learning_rate": 0.00019293457943925235, "loss": 1.1646, "step": 190 }, { "epoch": 1.78, "eval_accuracy": 0.6243386243386243, "eval_loss": 1.1505087614059448, "eval_runtime": 3.3803, "eval_samples_per_second": 55.912, "eval_steps_per_second": 7.1, "step": 190 }, { "epoch": 1.87, "learning_rate": 0.00019256074766355142, "loss": 1.1851, "step": 200 }, { "epoch": 1.87, "eval_accuracy": 0.5555555555555556, "eval_loss": 1.292531967163086, "eval_runtime": 3.3243, "eval_samples_per_second": 56.854, "eval_steps_per_second": 7.22, "step": 200 }, { "epoch": 1.96, "learning_rate": 0.00019218691588785048, "loss": 1.0773, "step": 210 }, { "epoch": 1.96, "eval_accuracy": 0.6825396825396826, "eval_loss": 1.025804042816162, "eval_runtime": 3.2438, "eval_samples_per_second": 58.265, "eval_steps_per_second": 7.399, "step": 210 }, { "epoch": 2.06, "learning_rate": 0.00019181308411214952, "loss": 1.2694, "step": 220 }, { "epoch": 2.06, "eval_accuracy": 0.5978835978835979, "eval_loss": 1.1972746849060059, "eval_runtime": 3.0541, "eval_samples_per_second": 61.884, "eval_steps_per_second": 7.858, "step": 220 }, { "epoch": 2.15, "learning_rate": 0.00019143925233644862, "loss": 0.8254, "step": 230 }, { "epoch": 2.15, "eval_accuracy": 0.7142857142857143, "eval_loss": 0.9814253449440002, "eval_runtime": 3.1455, "eval_samples_per_second": 60.086, "eval_steps_per_second": 7.63, "step": 230 }, { "epoch": 2.24, "learning_rate": 0.0001910654205607477, "loss": 0.8614, "step": 240 }, { "epoch": 2.24, "eval_accuracy": 0.656084656084656, "eval_loss": 1.1236768960952759, "eval_runtime": 3.1907, "eval_samples_per_second": 59.235, "eval_steps_per_second": 7.522, "step": 240 }, { "epoch": 2.34, "learning_rate": 0.00019069158878504673, "loss": 0.961, "step": 250 }, { "epoch": 2.34, "eval_accuracy": 0.6666666666666666, "eval_loss": 1.0817521810531616, "eval_runtime": 3.2851, "eval_samples_per_second": 57.533, "eval_steps_per_second": 7.306, "step": 250 }, { "epoch": 2.43, "learning_rate": 0.0001903177570093458, "loss": 0.8305, "step": 260 }, { "epoch": 2.43, "eval_accuracy": 0.6613756613756614, "eval_loss": 1.0328330993652344, "eval_runtime": 3.1629, "eval_samples_per_second": 59.756, "eval_steps_per_second": 7.588, "step": 260 }, { "epoch": 2.52, "learning_rate": 0.00018994392523364486, "loss": 0.882, "step": 270 }, { "epoch": 2.52, "eval_accuracy": 0.6349206349206349, "eval_loss": 1.1382650136947632, "eval_runtime": 3.1128, "eval_samples_per_second": 60.718, "eval_steps_per_second": 7.71, "step": 270 }, { "epoch": 2.62, "learning_rate": 0.00018957009345794396, "loss": 0.9153, "step": 280 }, { "epoch": 2.62, "eval_accuracy": 0.6507936507936508, "eval_loss": 1.0411267280578613, "eval_runtime": 3.1356, "eval_samples_per_second": 60.275, "eval_steps_per_second": 7.654, "step": 280 }, { "epoch": 2.71, "learning_rate": 0.000189196261682243, "loss": 0.8855, "step": 290 }, { "epoch": 2.71, "eval_accuracy": 0.6984126984126984, "eval_loss": 0.9475411772727966, "eval_runtime": 3.2668, "eval_samples_per_second": 57.854, "eval_steps_per_second": 7.347, "step": 290 }, { "epoch": 2.8, "learning_rate": 0.00018882242990654206, "loss": 0.8792, "step": 300 }, { "epoch": 2.8, "eval_accuracy": 0.5978835978835979, "eval_loss": 1.1673120260238647, "eval_runtime": 3.2828, "eval_samples_per_second": 57.574, "eval_steps_per_second": 7.311, "step": 300 }, { "epoch": 2.9, "learning_rate": 0.00018844859813084113, "loss": 0.8555, "step": 310 }, { "epoch": 2.9, "eval_accuracy": 0.7407407407407407, "eval_loss": 0.8777327537536621, "eval_runtime": 3.2563, "eval_samples_per_second": 58.041, "eval_steps_per_second": 7.37, "step": 310 }, { "epoch": 2.99, "learning_rate": 0.0001880747663551402, "loss": 0.8841, "step": 320 }, { "epoch": 2.99, "eval_accuracy": 0.671957671957672, "eval_loss": 1.0181235074996948, "eval_runtime": 3.2414, "eval_samples_per_second": 58.308, "eval_steps_per_second": 7.404, "step": 320 }, { "epoch": 3.08, "learning_rate": 0.00018770093457943926, "loss": 0.5579, "step": 330 }, { "epoch": 3.08, "eval_accuracy": 0.6613756613756614, "eval_loss": 1.052778959274292, "eval_runtime": 3.3555, "eval_samples_per_second": 56.326, "eval_steps_per_second": 7.153, "step": 330 }, { "epoch": 3.18, "learning_rate": 0.00018732710280373833, "loss": 0.5953, "step": 340 }, { "epoch": 3.18, "eval_accuracy": 0.6455026455026455, "eval_loss": 1.095900058746338, "eval_runtime": 3.2945, "eval_samples_per_second": 57.369, "eval_steps_per_second": 7.285, "step": 340 }, { "epoch": 3.27, "learning_rate": 0.0001869532710280374, "loss": 0.62, "step": 350 }, { "epoch": 3.27, "eval_accuracy": 0.7037037037037037, "eval_loss": 0.9120954275131226, "eval_runtime": 3.6012, "eval_samples_per_second": 52.483, "eval_steps_per_second": 6.664, "step": 350 }, { "epoch": 3.36, "learning_rate": 0.00018657943925233644, "loss": 0.7633, "step": 360 }, { "epoch": 3.36, "eval_accuracy": 0.6984126984126984, "eval_loss": 0.926105797290802, "eval_runtime": 3.4368, "eval_samples_per_second": 54.992, "eval_steps_per_second": 6.983, "step": 360 }, { "epoch": 3.46, "learning_rate": 0.0001862056074766355, "loss": 0.7009, "step": 370 }, { "epoch": 3.46, "eval_accuracy": 0.6296296296296297, "eval_loss": 1.1945137977600098, "eval_runtime": 3.788, "eval_samples_per_second": 49.895, "eval_steps_per_second": 6.336, "step": 370 }, { "epoch": 3.55, "learning_rate": 0.0001858317757009346, "loss": 0.8007, "step": 380 }, { "epoch": 3.55, "eval_accuracy": 0.6296296296296297, "eval_loss": 1.0851304531097412, "eval_runtime": 3.287, "eval_samples_per_second": 57.499, "eval_steps_per_second": 7.301, "step": 380 }, { "epoch": 3.64, "learning_rate": 0.00018545794392523367, "loss": 0.7921, "step": 390 }, { "epoch": 3.64, "eval_accuracy": 0.6984126984126984, "eval_loss": 0.935813307762146, "eval_runtime": 3.1749, "eval_samples_per_second": 59.529, "eval_steps_per_second": 7.559, "step": 390 }, { "epoch": 3.74, "learning_rate": 0.0001850841121495327, "loss": 0.5837, "step": 400 }, { "epoch": 3.74, "eval_accuracy": 0.6666666666666666, "eval_loss": 0.987457811832428, "eval_runtime": 3.1206, "eval_samples_per_second": 60.565, "eval_steps_per_second": 7.691, "step": 400 }, { "epoch": 3.83, "learning_rate": 0.00018471028037383178, "loss": 0.6557, "step": 410 }, { "epoch": 3.83, "eval_accuracy": 0.6931216931216931, "eval_loss": 0.9543613791465759, "eval_runtime": 3.2455, "eval_samples_per_second": 58.235, "eval_steps_per_second": 7.395, "step": 410 }, { "epoch": 3.93, "learning_rate": 0.00018433644859813084, "loss": 0.8081, "step": 420 }, { "epoch": 3.93, "eval_accuracy": 0.656084656084656, "eval_loss": 1.087867259979248, "eval_runtime": 3.0993, "eval_samples_per_second": 60.981, "eval_steps_per_second": 7.744, "step": 420 }, { "epoch": 4.02, "learning_rate": 0.0001839626168224299, "loss": 0.7486, "step": 430 }, { "epoch": 4.02, "eval_accuracy": 0.6190476190476191, "eval_loss": 1.266100287437439, "eval_runtime": 3.1838, "eval_samples_per_second": 59.363, "eval_steps_per_second": 7.538, "step": 430 }, { "epoch": 4.11, "learning_rate": 0.00018358878504672898, "loss": 0.5166, "step": 440 }, { "epoch": 4.11, "eval_accuracy": 0.7142857142857143, "eval_loss": 0.9324920773506165, "eval_runtime": 3.1917, "eval_samples_per_second": 59.216, "eval_steps_per_second": 7.52, "step": 440 }, { "epoch": 4.21, "learning_rate": 0.00018321495327102804, "loss": 0.4375, "step": 450 }, { "epoch": 4.21, "eval_accuracy": 0.7037037037037037, "eval_loss": 0.8819901943206787, "eval_runtime": 3.1479, "eval_samples_per_second": 60.04, "eval_steps_per_second": 7.624, "step": 450 }, { "epoch": 4.3, "learning_rate": 0.0001828411214953271, "loss": 0.4839, "step": 460 }, { "epoch": 4.3, "eval_accuracy": 0.6825396825396826, "eval_loss": 1.0533747673034668, "eval_runtime": 3.1426, "eval_samples_per_second": 60.141, "eval_steps_per_second": 7.637, "step": 460 }, { "epoch": 4.39, "learning_rate": 0.00018246728971962618, "loss": 0.3932, "step": 470 }, { "epoch": 4.39, "eval_accuracy": 0.7037037037037037, "eval_loss": 1.0015952587127686, "eval_runtime": 3.1739, "eval_samples_per_second": 59.548, "eval_steps_per_second": 7.562, "step": 470 }, { "epoch": 4.49, "learning_rate": 0.00018209345794392525, "loss": 0.4672, "step": 480 }, { "epoch": 4.49, "eval_accuracy": 0.746031746031746, "eval_loss": 0.8358047604560852, "eval_runtime": 3.1086, "eval_samples_per_second": 60.8, "eval_steps_per_second": 7.721, "step": 480 }, { "epoch": 4.58, "learning_rate": 0.0001817196261682243, "loss": 0.4839, "step": 490 }, { "epoch": 4.58, "eval_accuracy": 0.6666666666666666, "eval_loss": 1.0010868310928345, "eval_runtime": 3.436, "eval_samples_per_second": 55.006, "eval_steps_per_second": 6.985, "step": 490 }, { "epoch": 4.67, "learning_rate": 0.00018134579439252338, "loss": 0.4536, "step": 500 }, { "epoch": 4.67, "eval_accuracy": 0.5925925925925926, "eval_loss": 1.4390077590942383, "eval_runtime": 3.2086, "eval_samples_per_second": 58.905, "eval_steps_per_second": 7.48, "step": 500 }, { "epoch": 4.77, "learning_rate": 0.00018097196261682242, "loss": 0.5925, "step": 510 }, { "epoch": 4.77, "eval_accuracy": 0.6455026455026455, "eval_loss": 1.0699883699417114, "eval_runtime": 3.2145, "eval_samples_per_second": 58.796, "eval_steps_per_second": 7.466, "step": 510 }, { "epoch": 4.86, "learning_rate": 0.0001805981308411215, "loss": 0.4905, "step": 520 }, { "epoch": 4.86, "eval_accuracy": 0.6772486772486772, "eval_loss": 1.0934034585952759, "eval_runtime": 3.1633, "eval_samples_per_second": 59.748, "eval_steps_per_second": 7.587, "step": 520 }, { "epoch": 4.95, "learning_rate": 0.00018022429906542058, "loss": 0.6323, "step": 530 }, { "epoch": 4.95, "eval_accuracy": 0.656084656084656, "eval_loss": 1.074127197265625, "eval_runtime": 3.0611, "eval_samples_per_second": 61.742, "eval_steps_per_second": 7.84, "step": 530 }, { "epoch": 5.05, "learning_rate": 0.00017985046728971965, "loss": 0.3686, "step": 540 }, { "epoch": 5.05, "eval_accuracy": 0.6613756613756614, "eval_loss": 1.17642080783844, "eval_runtime": 3.1625, "eval_samples_per_second": 59.764, "eval_steps_per_second": 7.589, "step": 540 }, { "epoch": 5.14, "learning_rate": 0.0001794766355140187, "loss": 0.3233, "step": 550 }, { "epoch": 5.14, "eval_accuracy": 0.6666666666666666, "eval_loss": 0.9969061017036438, "eval_runtime": 3.1186, "eval_samples_per_second": 60.604, "eval_steps_per_second": 7.696, "step": 550 }, { "epoch": 5.23, "learning_rate": 0.00017910280373831776, "loss": 0.2791, "step": 560 }, { "epoch": 5.23, "eval_accuracy": 0.6613756613756614, "eval_loss": 1.20182466506958, "eval_runtime": 3.2809, "eval_samples_per_second": 57.607, "eval_steps_per_second": 7.315, "step": 560 }, { "epoch": 5.33, "learning_rate": 0.00017872897196261682, "loss": 0.2798, "step": 570 }, { "epoch": 5.33, "eval_accuracy": 0.6772486772486772, "eval_loss": 1.0335559844970703, "eval_runtime": 3.1419, "eval_samples_per_second": 60.155, "eval_steps_per_second": 7.639, "step": 570 }, { "epoch": 5.42, "learning_rate": 0.0001783551401869159, "loss": 0.2892, "step": 580 }, { "epoch": 5.42, "eval_accuracy": 0.6296296296296297, "eval_loss": 1.3315926790237427, "eval_runtime": 3.2144, "eval_samples_per_second": 58.797, "eval_steps_per_second": 7.466, "step": 580 }, { "epoch": 5.51, "learning_rate": 0.00017798130841121496, "loss": 0.317, "step": 590 }, { "epoch": 5.51, "eval_accuracy": 0.708994708994709, "eval_loss": 1.0159733295440674, "eval_runtime": 3.1281, "eval_samples_per_second": 60.42, "eval_steps_per_second": 7.672, "step": 590 }, { "epoch": 5.61, "learning_rate": 0.00017760747663551403, "loss": 0.3673, "step": 600 }, { "epoch": 5.61, "eval_accuracy": 0.6190476190476191, "eval_loss": 1.3200335502624512, "eval_runtime": 2.9997, "eval_samples_per_second": 63.006, "eval_steps_per_second": 8.001, "step": 600 }, { "epoch": 5.7, "learning_rate": 0.0001772336448598131, "loss": 0.4733, "step": 610 }, { "epoch": 5.7, "eval_accuracy": 0.6455026455026455, "eval_loss": 1.2423778772354126, "eval_runtime": 3.2132, "eval_samples_per_second": 58.819, "eval_steps_per_second": 7.469, "step": 610 }, { "epoch": 5.79, "learning_rate": 0.00017685981308411216, "loss": 0.4683, "step": 620 }, { "epoch": 5.79, "eval_accuracy": 0.7248677248677249, "eval_loss": 0.8893383145332336, "eval_runtime": 3.1811, "eval_samples_per_second": 59.413, "eval_steps_per_second": 7.545, "step": 620 }, { "epoch": 5.89, "learning_rate": 0.00017648598130841123, "loss": 0.4179, "step": 630 }, { "epoch": 5.89, "eval_accuracy": 0.7142857142857143, "eval_loss": 1.0192750692367554, "eval_runtime": 3.1066, "eval_samples_per_second": 60.839, "eval_steps_per_second": 7.726, "step": 630 }, { "epoch": 5.98, "learning_rate": 0.0001761121495327103, "loss": 0.3667, "step": 640 }, { "epoch": 5.98, "eval_accuracy": 0.7037037037037037, "eval_loss": 0.9950593113899231, "eval_runtime": 3.1029, "eval_samples_per_second": 60.911, "eval_steps_per_second": 7.735, "step": 640 }, { "epoch": 6.07, "learning_rate": 0.00017573831775700936, "loss": 0.2212, "step": 650 }, { "epoch": 6.07, "eval_accuracy": 0.7354497354497355, "eval_loss": 0.9019126296043396, "eval_runtime": 3.0407, "eval_samples_per_second": 62.156, "eval_steps_per_second": 7.893, "step": 650 }, { "epoch": 6.17, "learning_rate": 0.0001753644859813084, "loss": 0.1881, "step": 660 }, { "epoch": 6.17, "eval_accuracy": 0.656084656084656, "eval_loss": 1.1512494087219238, "eval_runtime": 3.0574, "eval_samples_per_second": 61.818, "eval_steps_per_second": 7.85, "step": 660 }, { "epoch": 6.26, "learning_rate": 0.0001749906542056075, "loss": 0.2403, "step": 670 }, { "epoch": 6.26, "eval_accuracy": 0.708994708994709, "eval_loss": 1.0092432498931885, "eval_runtime": 3.1346, "eval_samples_per_second": 60.295, "eval_steps_per_second": 7.657, "step": 670 }, { "epoch": 6.36, "learning_rate": 0.00017461682242990656, "loss": 0.2597, "step": 680 }, { "epoch": 6.36, "eval_accuracy": 0.6825396825396826, "eval_loss": 1.1817060708999634, "eval_runtime": 3.0592, "eval_samples_per_second": 61.781, "eval_steps_per_second": 7.845, "step": 680 }, { "epoch": 6.45, "learning_rate": 0.00017424299065420563, "loss": 0.2644, "step": 690 }, { "epoch": 6.45, "eval_accuracy": 0.6507936507936508, "eval_loss": 1.258557677268982, "eval_runtime": 3.1473, "eval_samples_per_second": 60.051, "eval_steps_per_second": 7.626, "step": 690 }, { "epoch": 6.54, "learning_rate": 0.00017386915887850467, "loss": 0.2562, "step": 700 }, { "epoch": 6.54, "eval_accuracy": 0.7195767195767195, "eval_loss": 1.0005096197128296, "eval_runtime": 3.1123, "eval_samples_per_second": 60.726, "eval_steps_per_second": 7.711, "step": 700 }, { "epoch": 6.64, "learning_rate": 0.00017349532710280374, "loss": 0.1487, "step": 710 }, { "epoch": 6.64, "eval_accuracy": 0.6507936507936508, "eval_loss": 1.2767467498779297, "eval_runtime": 3.1275, "eval_samples_per_second": 60.433, "eval_steps_per_second": 7.674, "step": 710 }, { "epoch": 6.73, "learning_rate": 0.00017312149532710283, "loss": 0.2953, "step": 720 }, { "epoch": 6.73, "eval_accuracy": 0.6984126984126984, "eval_loss": 1.253779649734497, "eval_runtime": 3.2123, "eval_samples_per_second": 58.836, "eval_steps_per_second": 7.471, "step": 720 }, { "epoch": 6.82, "learning_rate": 0.00017274766355140187, "loss": 0.1725, "step": 730 }, { "epoch": 6.82, "eval_accuracy": 0.656084656084656, "eval_loss": 1.2238225936889648, "eval_runtime": 3.1844, "eval_samples_per_second": 59.353, "eval_steps_per_second": 7.537, "step": 730 }, { "epoch": 6.92, "learning_rate": 0.00017237383177570094, "loss": 0.2037, "step": 740 }, { "epoch": 6.92, "eval_accuracy": 0.6084656084656085, "eval_loss": 1.468080759048462, "eval_runtime": 3.1309, "eval_samples_per_second": 60.366, "eval_steps_per_second": 7.666, "step": 740 }, { "epoch": 7.01, "learning_rate": 0.000172, "loss": 0.2592, "step": 750 }, { "epoch": 7.01, "eval_accuracy": 0.708994708994709, "eval_loss": 1.2074000835418701, "eval_runtime": 3.2681, "eval_samples_per_second": 57.831, "eval_steps_per_second": 7.344, "step": 750 }, { "epoch": 7.1, "learning_rate": 0.00017162616822429907, "loss": 0.1851, "step": 760 }, { "epoch": 7.1, "eval_accuracy": 0.6825396825396826, "eval_loss": 1.1313153505325317, "eval_runtime": 3.1857, "eval_samples_per_second": 59.328, "eval_steps_per_second": 7.534, "step": 760 }, { "epoch": 7.2, "learning_rate": 0.00017125233644859814, "loss": 0.0958, "step": 770 }, { "epoch": 7.2, "eval_accuracy": 0.6931216931216931, "eval_loss": 1.222944974899292, "eval_runtime": 3.1213, "eval_samples_per_second": 60.551, "eval_steps_per_second": 7.689, "step": 770 }, { "epoch": 7.29, "learning_rate": 0.0001708785046728972, "loss": 0.0947, "step": 780 }, { "epoch": 7.29, "eval_accuracy": 0.6613756613756614, "eval_loss": 1.3873189687728882, "eval_runtime": 3.0939, "eval_samples_per_second": 61.087, "eval_steps_per_second": 7.757, "step": 780 }, { "epoch": 7.38, "learning_rate": 0.00017050467289719628, "loss": 0.2053, "step": 790 }, { "epoch": 7.38, "eval_accuracy": 0.6666666666666666, "eval_loss": 1.4111433029174805, "eval_runtime": 3.0678, "eval_samples_per_second": 61.607, "eval_steps_per_second": 7.823, "step": 790 }, { "epoch": 7.48, "learning_rate": 0.00017013084112149534, "loss": 0.2165, "step": 800 }, { "epoch": 7.48, "eval_accuracy": 0.6878306878306878, "eval_loss": 1.3087962865829468, "eval_runtime": 3.1862, "eval_samples_per_second": 59.319, "eval_steps_per_second": 7.533, "step": 800 }, { "epoch": 7.57, "learning_rate": 0.00016975700934579438, "loss": 0.2425, "step": 810 }, { "epoch": 7.57, "eval_accuracy": 0.6984126984126984, "eval_loss": 1.3991620540618896, "eval_runtime": 3.2153, "eval_samples_per_second": 58.781, "eval_steps_per_second": 7.464, "step": 810 }, { "epoch": 7.66, "learning_rate": 0.00016938317757009348, "loss": 0.1644, "step": 820 }, { "epoch": 7.66, "eval_accuracy": 0.7248677248677249, "eval_loss": 1.080649733543396, "eval_runtime": 3.2137, "eval_samples_per_second": 58.811, "eval_steps_per_second": 7.468, "step": 820 }, { "epoch": 7.76, "learning_rate": 0.00016900934579439254, "loss": 0.3093, "step": 830 }, { "epoch": 7.76, "eval_accuracy": 0.6878306878306878, "eval_loss": 1.214190125465393, "eval_runtime": 3.1342, "eval_samples_per_second": 60.302, "eval_steps_per_second": 7.657, "step": 830 }, { "epoch": 7.85, "learning_rate": 0.0001686355140186916, "loss": 0.2525, "step": 840 }, { "epoch": 7.85, "eval_accuracy": 0.656084656084656, "eval_loss": 1.3408259153366089, "eval_runtime": 3.0897, "eval_samples_per_second": 61.171, "eval_steps_per_second": 7.768, "step": 840 }, { "epoch": 7.94, "learning_rate": 0.00016826168224299065, "loss": 0.3339, "step": 850 }, { "epoch": 7.94, "eval_accuracy": 0.7248677248677249, "eval_loss": 1.1808757781982422, "eval_runtime": 3.2528, "eval_samples_per_second": 58.104, "eval_steps_per_second": 7.378, "step": 850 }, { "epoch": 8.04, "learning_rate": 0.00016788785046728972, "loss": 0.1044, "step": 860 }, { "epoch": 8.04, "eval_accuracy": 0.6666666666666666, "eval_loss": 1.2688654661178589, "eval_runtime": 3.09, "eval_samples_per_second": 61.165, "eval_steps_per_second": 7.767, "step": 860 }, { "epoch": 8.13, "learning_rate": 0.0001675140186915888, "loss": 0.093, "step": 870 }, { "epoch": 8.13, "eval_accuracy": 0.708994708994709, "eval_loss": 1.209309697151184, "eval_runtime": 3.1281, "eval_samples_per_second": 60.421, "eval_steps_per_second": 7.672, "step": 870 }, { "epoch": 8.22, "learning_rate": 0.00016714018691588785, "loss": 0.2934, "step": 880 }, { "epoch": 8.22, "eval_accuracy": 0.671957671957672, "eval_loss": 1.1540151834487915, "eval_runtime": 3.27, "eval_samples_per_second": 57.798, "eval_steps_per_second": 7.339, "step": 880 }, { "epoch": 8.32, "learning_rate": 0.00016676635514018692, "loss": 0.2133, "step": 890 }, { "epoch": 8.32, "eval_accuracy": 0.6349206349206349, "eval_loss": 1.5835676193237305, "eval_runtime": 3.2878, "eval_samples_per_second": 57.485, "eval_steps_per_second": 7.3, "step": 890 }, { "epoch": 8.41, "learning_rate": 0.000166392523364486, "loss": 0.2045, "step": 900 }, { "epoch": 8.41, "eval_accuracy": 0.6507936507936508, "eval_loss": 1.2564616203308105, "eval_runtime": 3.1746, "eval_samples_per_second": 59.536, "eval_steps_per_second": 7.56, "step": 900 }, { "epoch": 8.5, "learning_rate": 0.00016601869158878506, "loss": 0.2886, "step": 910 }, { "epoch": 8.5, "eval_accuracy": 0.6613756613756614, "eval_loss": 1.3674818277359009, "eval_runtime": 3.2413, "eval_samples_per_second": 58.311, "eval_steps_per_second": 7.405, "step": 910 }, { "epoch": 8.6, "learning_rate": 0.00016564485981308412, "loss": 0.128, "step": 920 }, { "epoch": 8.6, "eval_accuracy": 0.671957671957672, "eval_loss": 1.2137342691421509, "eval_runtime": 3.222, "eval_samples_per_second": 58.658, "eval_steps_per_second": 7.449, "step": 920 }, { "epoch": 8.69, "learning_rate": 0.0001652710280373832, "loss": 0.0785, "step": 930 }, { "epoch": 8.69, "eval_accuracy": 0.6825396825396826, "eval_loss": 1.3206517696380615, "eval_runtime": 3.21, "eval_samples_per_second": 58.878, "eval_steps_per_second": 7.477, "step": 930 }, { "epoch": 8.79, "learning_rate": 0.00016489719626168226, "loss": 0.1409, "step": 940 }, { "epoch": 8.79, "eval_accuracy": 0.6878306878306878, "eval_loss": 1.2279229164123535, "eval_runtime": 3.1127, "eval_samples_per_second": 60.719, "eval_steps_per_second": 7.71, "step": 940 }, { "epoch": 8.88, "learning_rate": 0.00016452336448598132, "loss": 0.1221, "step": 950 }, { "epoch": 8.88, "eval_accuracy": 0.7513227513227513, "eval_loss": 0.9318807125091553, "eval_runtime": 3.2281, "eval_samples_per_second": 58.549, "eval_steps_per_second": 7.435, "step": 950 }, { "epoch": 8.97, "learning_rate": 0.00016414953271028036, "loss": 0.112, "step": 960 }, { "epoch": 8.97, "eval_accuracy": 0.671957671957672, "eval_loss": 1.267318606376648, "eval_runtime": 3.1888, "eval_samples_per_second": 59.27, "eval_steps_per_second": 7.526, "step": 960 }, { "epoch": 9.07, "learning_rate": 0.00016377570093457946, "loss": 0.0863, "step": 970 }, { "epoch": 9.07, "eval_accuracy": 0.6931216931216931, "eval_loss": 1.3446311950683594, "eval_runtime": 3.2112, "eval_samples_per_second": 58.857, "eval_steps_per_second": 7.474, "step": 970 }, { "epoch": 9.16, "learning_rate": 0.00016340186915887853, "loss": 0.0915, "step": 980 }, { "epoch": 9.16, "eval_accuracy": 0.7037037037037037, "eval_loss": 1.1720484495162964, "eval_runtime": 3.1413, "eval_samples_per_second": 60.167, "eval_steps_per_second": 7.64, "step": 980 }, { "epoch": 9.25, "learning_rate": 0.00016302803738317757, "loss": 0.0911, "step": 990 }, { "epoch": 9.25, "eval_accuracy": 0.6984126984126984, "eval_loss": 1.2161829471588135, "eval_runtime": 3.2839, "eval_samples_per_second": 57.553, "eval_steps_per_second": 7.308, "step": 990 }, { "epoch": 9.35, "learning_rate": 0.00016265420560747663, "loss": 0.0763, "step": 1000 }, { "epoch": 9.35, "eval_accuracy": 0.7142857142857143, "eval_loss": 1.1872042417526245, "eval_runtime": 3.1612, "eval_samples_per_second": 59.787, "eval_steps_per_second": 7.592, "step": 1000 }, { "epoch": 9.44, "learning_rate": 0.0001622803738317757, "loss": 0.0524, "step": 1010 }, { "epoch": 9.44, "eval_accuracy": 0.6878306878306878, "eval_loss": 1.3205093145370483, "eval_runtime": 3.1614, "eval_samples_per_second": 59.784, "eval_steps_per_second": 7.592, "step": 1010 }, { "epoch": 9.53, "learning_rate": 0.0001619065420560748, "loss": 0.1508, "step": 1020 }, { "epoch": 9.53, "eval_accuracy": 0.6507936507936508, "eval_loss": 1.5726176500320435, "eval_runtime": 3.161, "eval_samples_per_second": 59.791, "eval_steps_per_second": 7.593, "step": 1020 }, { "epoch": 9.63, "learning_rate": 0.00016153271028037383, "loss": 0.0994, "step": 1030 }, { "epoch": 9.63, "eval_accuracy": 0.6931216931216931, "eval_loss": 1.3505302667617798, "eval_runtime": 3.1505, "eval_samples_per_second": 59.99, "eval_steps_per_second": 7.618, "step": 1030 }, { "epoch": 9.72, "learning_rate": 0.0001611588785046729, "loss": 0.223, "step": 1040 }, { "epoch": 9.72, "eval_accuracy": 0.671957671957672, "eval_loss": 1.3896968364715576, "eval_runtime": 3.1746, "eval_samples_per_second": 59.536, "eval_steps_per_second": 7.56, "step": 1040 }, { "epoch": 9.81, "learning_rate": 0.00016078504672897197, "loss": 0.1115, "step": 1050 }, { "epoch": 9.81, "eval_accuracy": 0.6772486772486772, "eval_loss": 1.3965896368026733, "eval_runtime": 3.2706, "eval_samples_per_second": 57.787, "eval_steps_per_second": 7.338, "step": 1050 }, { "epoch": 9.91, "learning_rate": 0.00016041121495327104, "loss": 0.1485, "step": 1060 }, { "epoch": 9.91, "eval_accuracy": 0.6878306878306878, "eval_loss": 1.4686475992202759, "eval_runtime": 3.1507, "eval_samples_per_second": 59.987, "eval_steps_per_second": 7.617, "step": 1060 }, { "epoch": 10.0, "learning_rate": 0.0001600373831775701, "loss": 0.18, "step": 1070 }, { "epoch": 10.0, "eval_accuracy": 0.6878306878306878, "eval_loss": 1.2233675718307495, "eval_runtime": 3.1543, "eval_samples_per_second": 59.919, "eval_steps_per_second": 7.609, "step": 1070 }, { "epoch": 10.09, "learning_rate": 0.00015966355140186917, "loss": 0.1366, "step": 1080 }, { "epoch": 10.09, "eval_accuracy": 0.6666666666666666, "eval_loss": 1.4836784601211548, "eval_runtime": 3.2142, "eval_samples_per_second": 58.802, "eval_steps_per_second": 7.467, "step": 1080 }, { "epoch": 10.19, "learning_rate": 0.00015928971962616824, "loss": 0.149, "step": 1090 }, { "epoch": 10.19, "eval_accuracy": 0.671957671957672, "eval_loss": 1.4587175846099854, "eval_runtime": 3.2536, "eval_samples_per_second": 58.09, "eval_steps_per_second": 7.377, "step": 1090 }, { "epoch": 10.28, "learning_rate": 0.0001589158878504673, "loss": 0.1618, "step": 1100 }, { "epoch": 10.28, "eval_accuracy": 0.6666666666666666, "eval_loss": 1.3593031167984009, "eval_runtime": 3.1414, "eval_samples_per_second": 60.165, "eval_steps_per_second": 7.64, "step": 1100 }, { "epoch": 10.37, "learning_rate": 0.00015854205607476635, "loss": 0.1302, "step": 1110 }, { "epoch": 10.37, "eval_accuracy": 0.6349206349206349, "eval_loss": 1.5082346200942993, "eval_runtime": 3.1372, "eval_samples_per_second": 60.244, "eval_steps_per_second": 7.65, "step": 1110 }, { "epoch": 10.47, "learning_rate": 0.00015816822429906544, "loss": 0.0208, "step": 1120 }, { "epoch": 10.47, "eval_accuracy": 0.7037037037037037, "eval_loss": 1.4162836074829102, "eval_runtime": 3.2255, "eval_samples_per_second": 58.596, "eval_steps_per_second": 7.441, "step": 1120 }, { "epoch": 10.56, "learning_rate": 0.0001577943925233645, "loss": 0.0314, "step": 1130 }, { "epoch": 10.56, "eval_accuracy": 0.7248677248677249, "eval_loss": 1.2450639009475708, "eval_runtime": 3.2013, "eval_samples_per_second": 59.038, "eval_steps_per_second": 7.497, "step": 1130 }, { "epoch": 10.65, "learning_rate": 0.00015742056074766355, "loss": 0.0355, "step": 1140 }, { "epoch": 10.65, "eval_accuracy": 0.708994708994709, "eval_loss": 1.3142927885055542, "eval_runtime": 3.2738, "eval_samples_per_second": 57.732, "eval_steps_per_second": 7.331, "step": 1140 }, { "epoch": 10.75, "learning_rate": 0.00015704672897196261, "loss": 0.1024, "step": 1150 }, { "epoch": 10.75, "eval_accuracy": 0.6878306878306878, "eval_loss": 1.4214942455291748, "eval_runtime": 3.2147, "eval_samples_per_second": 58.792, "eval_steps_per_second": 7.466, "step": 1150 }, { "epoch": 10.84, "learning_rate": 0.00015667289719626168, "loss": 0.0733, "step": 1160 }, { "epoch": 10.84, "eval_accuracy": 0.7142857142857143, "eval_loss": 1.2427018880844116, "eval_runtime": 3.1456, "eval_samples_per_second": 60.085, "eval_steps_per_second": 7.63, "step": 1160 }, { "epoch": 10.93, "learning_rate": 0.00015629906542056078, "loss": 0.0542, "step": 1170 }, { "epoch": 10.93, "eval_accuracy": 0.6825396825396826, "eval_loss": 1.5809307098388672, "eval_runtime": 3.2372, "eval_samples_per_second": 58.384, "eval_steps_per_second": 7.414, "step": 1170 }, { "epoch": 11.03, "learning_rate": 0.00015592523364485982, "loss": 0.0995, "step": 1180 }, { "epoch": 11.03, "eval_accuracy": 0.6613756613756614, "eval_loss": 1.5994837284088135, "eval_runtime": 3.153, "eval_samples_per_second": 59.943, "eval_steps_per_second": 7.612, "step": 1180 }, { "epoch": 11.12, "learning_rate": 0.00015555140186915888, "loss": 0.0653, "step": 1190 }, { "epoch": 11.12, "eval_accuracy": 0.708994708994709, "eval_loss": 1.3932106494903564, "eval_runtime": 3.2168, "eval_samples_per_second": 58.753, "eval_steps_per_second": 7.461, "step": 1190 }, { "epoch": 11.21, "learning_rate": 0.00015517757009345795, "loss": 0.0339, "step": 1200 }, { "epoch": 11.21, "eval_accuracy": 0.708994708994709, "eval_loss": 1.2856649160385132, "eval_runtime": 3.0564, "eval_samples_per_second": 61.837, "eval_steps_per_second": 7.852, "step": 1200 }, { "epoch": 11.31, "learning_rate": 0.00015480373831775702, "loss": 0.1038, "step": 1210 }, { "epoch": 11.31, "eval_accuracy": 0.7407407407407407, "eval_loss": 1.2895965576171875, "eval_runtime": 3.1971, "eval_samples_per_second": 59.115, "eval_steps_per_second": 7.507, "step": 1210 }, { "epoch": 11.4, "learning_rate": 0.00015442990654205608, "loss": 0.0415, "step": 1220 }, { "epoch": 11.4, "eval_accuracy": 0.7354497354497355, "eval_loss": 1.2908622026443481, "eval_runtime": 3.2149, "eval_samples_per_second": 58.789, "eval_steps_per_second": 7.465, "step": 1220 }, { "epoch": 11.5, "learning_rate": 0.00015405607476635515, "loss": 0.0629, "step": 1230 }, { "epoch": 11.5, "eval_accuracy": 0.7142857142857143, "eval_loss": 1.3047831058502197, "eval_runtime": 3.228, "eval_samples_per_second": 58.55, "eval_steps_per_second": 7.435, "step": 1230 }, { "epoch": 11.59, "learning_rate": 0.00015368224299065422, "loss": 0.0137, "step": 1240 }, { "epoch": 11.59, "eval_accuracy": 0.7195767195767195, "eval_loss": 1.3543046712875366, "eval_runtime": 3.1086, "eval_samples_per_second": 60.8, "eval_steps_per_second": 7.721, "step": 1240 }, { "epoch": 11.68, "learning_rate": 0.0001533084112149533, "loss": 0.035, "step": 1250 }, { "epoch": 11.68, "eval_accuracy": 0.7301587301587301, "eval_loss": 1.2130463123321533, "eval_runtime": 3.4203, "eval_samples_per_second": 55.258, "eval_steps_per_second": 7.017, "step": 1250 }, { "epoch": 11.78, "learning_rate": 0.00015293457943925233, "loss": 0.0102, "step": 1260 }, { "epoch": 11.78, "eval_accuracy": 0.7248677248677249, "eval_loss": 1.2087303400039673, "eval_runtime": 3.3746, "eval_samples_per_second": 56.006, "eval_steps_per_second": 7.112, "step": 1260 }, { "epoch": 11.87, "learning_rate": 0.00015256074766355142, "loss": 0.0409, "step": 1270 }, { "epoch": 11.87, "eval_accuracy": 0.7566137566137566, "eval_loss": 1.2329652309417725, "eval_runtime": 3.1081, "eval_samples_per_second": 60.809, "eval_steps_per_second": 7.722, "step": 1270 }, { "epoch": 11.96, "learning_rate": 0.0001521869158878505, "loss": 0.0659, "step": 1280 }, { "epoch": 11.96, "eval_accuracy": 0.7142857142857143, "eval_loss": 1.3084936141967773, "eval_runtime": 3.3561, "eval_samples_per_second": 56.315, "eval_steps_per_second": 7.151, "step": 1280 }, { "epoch": 12.06, "learning_rate": 0.00015181308411214953, "loss": 0.035, "step": 1290 }, { "epoch": 12.06, "eval_accuracy": 0.7248677248677249, "eval_loss": 1.405351161956787, "eval_runtime": 3.4254, "eval_samples_per_second": 55.175, "eval_steps_per_second": 7.006, "step": 1290 }, { "epoch": 12.15, "learning_rate": 0.0001514392523364486, "loss": 0.103, "step": 1300 }, { "epoch": 12.15, "eval_accuracy": 0.7248677248677249, "eval_loss": 1.464030146598816, "eval_runtime": 3.2557, "eval_samples_per_second": 58.053, "eval_steps_per_second": 7.372, "step": 1300 }, { "epoch": 12.24, "learning_rate": 0.00015106542056074766, "loss": 0.0238, "step": 1310 }, { "epoch": 12.24, "eval_accuracy": 0.7037037037037037, "eval_loss": 1.347579002380371, "eval_runtime": 3.048, "eval_samples_per_second": 62.008, "eval_steps_per_second": 7.874, "step": 1310 }, { "epoch": 12.34, "learning_rate": 0.00015069158878504676, "loss": 0.0196, "step": 1320 }, { "epoch": 12.34, "eval_accuracy": 0.708994708994709, "eval_loss": 1.40040123462677, "eval_runtime": 3.1832, "eval_samples_per_second": 59.375, "eval_steps_per_second": 7.54, "step": 1320 }, { "epoch": 12.43, "learning_rate": 0.0001503177570093458, "loss": 0.009, "step": 1330 }, { "epoch": 12.43, "eval_accuracy": 0.7142857142857143, "eval_loss": 1.4277156591415405, "eval_runtime": 3.2679, "eval_samples_per_second": 57.836, "eval_steps_per_second": 7.344, "step": 1330 }, { "epoch": 12.52, "learning_rate": 0.00014994392523364486, "loss": 0.0238, "step": 1340 }, { "epoch": 12.52, "eval_accuracy": 0.7407407407407407, "eval_loss": 1.4086812734603882, "eval_runtime": 3.2187, "eval_samples_per_second": 58.719, "eval_steps_per_second": 7.456, "step": 1340 }, { "epoch": 12.62, "learning_rate": 0.00014957009345794393, "loss": 0.0468, "step": 1350 }, { "epoch": 12.62, "eval_accuracy": 0.7195767195767195, "eval_loss": 1.3357652425765991, "eval_runtime": 3.1607, "eval_samples_per_second": 59.798, "eval_steps_per_second": 7.593, "step": 1350 }, { "epoch": 12.71, "learning_rate": 0.000149196261682243, "loss": 0.0207, "step": 1360 }, { "epoch": 12.71, "eval_accuracy": 0.7142857142857143, "eval_loss": 1.4172828197479248, "eval_runtime": 3.0888, "eval_samples_per_second": 61.188, "eval_steps_per_second": 7.77, "step": 1360 }, { "epoch": 12.8, "learning_rate": 0.00014882242990654207, "loss": 0.0138, "step": 1370 }, { "epoch": 12.8, "eval_accuracy": 0.7037037037037037, "eval_loss": 1.4307596683502197, "eval_runtime": 3.1887, "eval_samples_per_second": 59.271, "eval_steps_per_second": 7.526, "step": 1370 }, { "epoch": 12.9, "learning_rate": 0.00014844859813084113, "loss": 0.0241, "step": 1380 }, { "epoch": 12.9, "eval_accuracy": 0.6772486772486772, "eval_loss": 1.6381709575653076, "eval_runtime": 3.2282, "eval_samples_per_second": 58.546, "eval_steps_per_second": 7.434, "step": 1380 }, { "epoch": 12.99, "learning_rate": 0.0001480747663551402, "loss": 0.0224, "step": 1390 }, { "epoch": 12.99, "eval_accuracy": 0.6984126984126984, "eval_loss": 1.513045072555542, "eval_runtime": 3.1555, "eval_samples_per_second": 59.896, "eval_steps_per_second": 7.606, "step": 1390 }, { "epoch": 13.08, "learning_rate": 0.00014770093457943924, "loss": 0.0367, "step": 1400 }, { "epoch": 13.08, "eval_accuracy": 0.7037037037037037, "eval_loss": 1.5821019411087036, "eval_runtime": 3.1013, "eval_samples_per_second": 60.941, "eval_steps_per_second": 7.739, "step": 1400 }, { "epoch": 13.18, "learning_rate": 0.0001473271028037383, "loss": 0.0201, "step": 1410 }, { "epoch": 13.18, "eval_accuracy": 0.708994708994709, "eval_loss": 1.499505877494812, "eval_runtime": 3.3162, "eval_samples_per_second": 56.994, "eval_steps_per_second": 7.237, "step": 1410 }, { "epoch": 13.27, "learning_rate": 0.0001469532710280374, "loss": 0.0431, "step": 1420 }, { "epoch": 13.27, "eval_accuracy": 0.7354497354497355, "eval_loss": 1.3571968078613281, "eval_runtime": 3.1517, "eval_samples_per_second": 59.968, "eval_steps_per_second": 7.615, "step": 1420 }, { "epoch": 13.36, "learning_rate": 0.00014657943925233647, "loss": 0.0137, "step": 1430 }, { "epoch": 13.36, "eval_accuracy": 0.7248677248677249, "eval_loss": 1.3700077533721924, "eval_runtime": 3.0539, "eval_samples_per_second": 61.889, "eval_steps_per_second": 7.859, "step": 1430 }, { "epoch": 13.46, "learning_rate": 0.0001462056074766355, "loss": 0.0498, "step": 1440 }, { "epoch": 13.46, "eval_accuracy": 0.708994708994709, "eval_loss": 1.6434003114700317, "eval_runtime": 3.0715, "eval_samples_per_second": 61.534, "eval_steps_per_second": 7.814, "step": 1440 }, { "epoch": 13.55, "learning_rate": 0.00014583177570093458, "loss": 0.0175, "step": 1450 }, { "epoch": 13.55, "eval_accuracy": 0.708994708994709, "eval_loss": 1.7298402786254883, "eval_runtime": 3.1073, "eval_samples_per_second": 60.825, "eval_steps_per_second": 7.724, "step": 1450 }, { "epoch": 13.64, "learning_rate": 0.00014545794392523364, "loss": 0.0142, "step": 1460 }, { "epoch": 13.64, "eval_accuracy": 0.7142857142857143, "eval_loss": 1.5783988237380981, "eval_runtime": 3.116, "eval_samples_per_second": 60.655, "eval_steps_per_second": 7.702, "step": 1460 }, { "epoch": 13.74, "learning_rate": 0.0001450841121495327, "loss": 0.0235, "step": 1470 }, { "epoch": 13.74, "eval_accuracy": 0.6984126984126984, "eval_loss": 1.6561763286590576, "eval_runtime": 3.2007, "eval_samples_per_second": 59.049, "eval_steps_per_second": 7.498, "step": 1470 }, { "epoch": 13.83, "learning_rate": 0.00014471028037383178, "loss": 0.0524, "step": 1480 }, { "epoch": 13.83, "eval_accuracy": 0.6984126984126984, "eval_loss": 1.831541657447815, "eval_runtime": 3.3046, "eval_samples_per_second": 57.193, "eval_steps_per_second": 7.263, "step": 1480 }, { "epoch": 13.93, "learning_rate": 0.00014433644859813085, "loss": 0.0506, "step": 1490 }, { "epoch": 13.93, "eval_accuracy": 0.6825396825396826, "eval_loss": 1.6628289222717285, "eval_runtime": 3.2548, "eval_samples_per_second": 58.068, "eval_steps_per_second": 7.374, "step": 1490 }, { "epoch": 14.02, "learning_rate": 0.0001439626168224299, "loss": 0.0566, "step": 1500 }, { "epoch": 14.02, "eval_accuracy": 0.6878306878306878, "eval_loss": 1.6691248416900635, "eval_runtime": 3.3252, "eval_samples_per_second": 56.839, "eval_steps_per_second": 7.218, "step": 1500 }, { "epoch": 14.11, "learning_rate": 0.00014358878504672898, "loss": 0.0872, "step": 1510 }, { "epoch": 14.11, "eval_accuracy": 0.6084656084656085, "eval_loss": 2.196704387664795, "eval_runtime": 3.2014, "eval_samples_per_second": 59.037, "eval_steps_per_second": 7.497, "step": 1510 }, { "epoch": 14.21, "learning_rate": 0.00014321495327102805, "loss": 0.1338, "step": 1520 }, { "epoch": 14.21, "eval_accuracy": 0.6507936507936508, "eval_loss": 1.7845758199691772, "eval_runtime": 3.176, "eval_samples_per_second": 59.51, "eval_steps_per_second": 7.557, "step": 1520 }, { "epoch": 14.3, "learning_rate": 0.00014284112149532711, "loss": 0.0222, "step": 1530 }, { "epoch": 14.3, "eval_accuracy": 0.6772486772486772, "eval_loss": 1.6833900213241577, "eval_runtime": 3.1793, "eval_samples_per_second": 59.447, "eval_steps_per_second": 7.549, "step": 1530 }, { "epoch": 14.39, "learning_rate": 0.00014246728971962618, "loss": 0.0254, "step": 1540 }, { "epoch": 14.39, "eval_accuracy": 0.656084656084656, "eval_loss": 1.9036774635314941, "eval_runtime": 3.1611, "eval_samples_per_second": 59.789, "eval_steps_per_second": 7.592, "step": 1540 }, { "epoch": 14.49, "learning_rate": 0.00014209345794392522, "loss": 0.0244, "step": 1550 }, { "epoch": 14.49, "eval_accuracy": 0.7037037037037037, "eval_loss": 1.5309627056121826, "eval_runtime": 3.1613, "eval_samples_per_second": 59.786, "eval_steps_per_second": 7.592, "step": 1550 }, { "epoch": 14.58, "learning_rate": 0.0001417196261682243, "loss": 0.0178, "step": 1560 }, { "epoch": 14.58, "eval_accuracy": 0.6825396825396826, "eval_loss": 1.53221595287323, "eval_runtime": 3.144, "eval_samples_per_second": 60.114, "eval_steps_per_second": 7.634, "step": 1560 }, { "epoch": 14.67, "learning_rate": 0.00014134579439252338, "loss": 0.0045, "step": 1570 }, { "epoch": 14.67, "eval_accuracy": 0.7301587301587301, "eval_loss": 1.3083724975585938, "eval_runtime": 3.1864, "eval_samples_per_second": 59.314, "eval_steps_per_second": 7.532, "step": 1570 }, { "epoch": 14.77, "learning_rate": 0.00014097196261682245, "loss": 0.0485, "step": 1580 }, { "epoch": 14.77, "eval_accuracy": 0.7354497354497355, "eval_loss": 1.2855416536331177, "eval_runtime": 3.2166, "eval_samples_per_second": 58.758, "eval_steps_per_second": 7.461, "step": 1580 }, { "epoch": 14.86, "learning_rate": 0.0001405981308411215, "loss": 0.0575, "step": 1590 }, { "epoch": 14.86, "eval_accuracy": 0.7301587301587301, "eval_loss": 1.3779939413070679, "eval_runtime": 3.2389, "eval_samples_per_second": 58.353, "eval_steps_per_second": 7.41, "step": 1590 }, { "epoch": 14.95, "learning_rate": 0.00014022429906542056, "loss": 0.0131, "step": 1600 }, { "epoch": 14.95, "eval_accuracy": 0.6984126984126984, "eval_loss": 1.6261014938354492, "eval_runtime": 3.1879, "eval_samples_per_second": 59.286, "eval_steps_per_second": 7.528, "step": 1600 }, { "epoch": 15.05, "learning_rate": 0.00013985046728971963, "loss": 0.0059, "step": 1610 }, { "epoch": 15.05, "eval_accuracy": 0.6931216931216931, "eval_loss": 1.6843873262405396, "eval_runtime": 3.2196, "eval_samples_per_second": 58.702, "eval_steps_per_second": 7.454, "step": 1610 }, { "epoch": 15.14, "learning_rate": 0.0001394766355140187, "loss": 0.0113, "step": 1620 }, { "epoch": 15.14, "eval_accuracy": 0.7248677248677249, "eval_loss": 1.4520890712738037, "eval_runtime": 3.1975, "eval_samples_per_second": 59.108, "eval_steps_per_second": 7.506, "step": 1620 }, { "epoch": 15.23, "learning_rate": 0.00013910280373831776, "loss": 0.0264, "step": 1630 }, { "epoch": 15.23, "eval_accuracy": 0.708994708994709, "eval_loss": 1.5394195318222046, "eval_runtime": 3.2412, "eval_samples_per_second": 58.312, "eval_steps_per_second": 7.405, "step": 1630 }, { "epoch": 15.33, "learning_rate": 0.00013872897196261683, "loss": 0.0131, "step": 1640 }, { "epoch": 15.33, "eval_accuracy": 0.7248677248677249, "eval_loss": 1.521421194076538, "eval_runtime": 3.2643, "eval_samples_per_second": 57.899, "eval_steps_per_second": 7.352, "step": 1640 }, { "epoch": 15.42, "learning_rate": 0.0001383551401869159, "loss": 0.0067, "step": 1650 }, { "epoch": 15.42, "eval_accuracy": 0.6984126984126984, "eval_loss": 1.8741341829299927, "eval_runtime": 3.2542, "eval_samples_per_second": 58.079, "eval_steps_per_second": 7.375, "step": 1650 }, { "epoch": 15.51, "learning_rate": 0.00013798130841121496, "loss": 0.1502, "step": 1660 }, { "epoch": 15.51, "eval_accuracy": 0.7037037037037037, "eval_loss": 1.5996290445327759, "eval_runtime": 3.2543, "eval_samples_per_second": 58.076, "eval_steps_per_second": 7.375, "step": 1660 }, { "epoch": 15.61, "learning_rate": 0.00013760747663551403, "loss": 0.0505, "step": 1670 }, { "epoch": 15.61, "eval_accuracy": 0.6984126984126984, "eval_loss": 1.5197315216064453, "eval_runtime": 3.3082, "eval_samples_per_second": 57.131, "eval_steps_per_second": 7.255, "step": 1670 }, { "epoch": 15.7, "learning_rate": 0.0001372336448598131, "loss": 0.0096, "step": 1680 }, { "epoch": 15.7, "eval_accuracy": 0.708994708994709, "eval_loss": 1.562042236328125, "eval_runtime": 3.2413, "eval_samples_per_second": 58.31, "eval_steps_per_second": 7.404, "step": 1680 }, { "epoch": 15.79, "learning_rate": 0.00013685981308411216, "loss": 0.0623, "step": 1690 }, { "epoch": 15.79, "eval_accuracy": 0.7301587301587301, "eval_loss": 1.5186712741851807, "eval_runtime": 3.3016, "eval_samples_per_second": 57.244, "eval_steps_per_second": 7.269, "step": 1690 }, { "epoch": 15.89, "learning_rate": 0.0001364859813084112, "loss": 0.0732, "step": 1700 }, { "epoch": 15.89, "eval_accuracy": 0.6825396825396826, "eval_loss": 1.5817829370498657, "eval_runtime": 3.2211, "eval_samples_per_second": 58.675, "eval_steps_per_second": 7.451, "step": 1700 }, { "epoch": 15.98, "learning_rate": 0.00013611214953271027, "loss": 0.0587, "step": 1710 }, { "epoch": 15.98, "eval_accuracy": 0.7566137566137566, "eval_loss": 1.331645131111145, "eval_runtime": 3.254, "eval_samples_per_second": 58.083, "eval_steps_per_second": 7.376, "step": 1710 }, { "epoch": 16.07, "learning_rate": 0.00013573831775700936, "loss": 0.0135, "step": 1720 }, { "epoch": 16.07, "eval_accuracy": 0.7142857142857143, "eval_loss": 1.5572881698608398, "eval_runtime": 3.1348, "eval_samples_per_second": 60.292, "eval_steps_per_second": 7.656, "step": 1720 }, { "epoch": 16.17, "learning_rate": 0.00013536448598130843, "loss": 0.0405, "step": 1730 }, { "epoch": 16.17, "eval_accuracy": 0.7195767195767195, "eval_loss": 1.5583974123001099, "eval_runtime": 3.1747, "eval_samples_per_second": 59.534, "eval_steps_per_second": 7.56, "step": 1730 }, { "epoch": 16.26, "learning_rate": 0.00013499065420560747, "loss": 0.0379, "step": 1740 }, { "epoch": 16.26, "eval_accuracy": 0.6613756613756614, "eval_loss": 1.8542115688323975, "eval_runtime": 3.4991, "eval_samples_per_second": 54.014, "eval_steps_per_second": 6.859, "step": 1740 }, { "epoch": 16.36, "learning_rate": 0.00013461682242990654, "loss": 0.0778, "step": 1750 }, { "epoch": 16.36, "eval_accuracy": 0.6772486772486772, "eval_loss": 1.81163489818573, "eval_runtime": 3.2013, "eval_samples_per_second": 59.039, "eval_steps_per_second": 7.497, "step": 1750 }, { "epoch": 16.45, "learning_rate": 0.00013424299065420563, "loss": 0.0178, "step": 1760 }, { "epoch": 16.45, "eval_accuracy": 0.7037037037037037, "eval_loss": 1.6405593156814575, "eval_runtime": 3.1824, "eval_samples_per_second": 59.389, "eval_steps_per_second": 7.541, "step": 1760 }, { "epoch": 16.54, "learning_rate": 0.00013386915887850467, "loss": 0.0252, "step": 1770 }, { "epoch": 16.54, "eval_accuracy": 0.6772486772486772, "eval_loss": 1.6421043872833252, "eval_runtime": 3.2328, "eval_samples_per_second": 58.464, "eval_steps_per_second": 7.424, "step": 1770 }, { "epoch": 16.64, "learning_rate": 0.00013349532710280374, "loss": 0.0638, "step": 1780 }, { "epoch": 16.64, "eval_accuracy": 0.746031746031746, "eval_loss": 1.4504343271255493, "eval_runtime": 3.1566, "eval_samples_per_second": 59.875, "eval_steps_per_second": 7.603, "step": 1780 }, { "epoch": 16.73, "learning_rate": 0.0001331214953271028, "loss": 0.0138, "step": 1790 }, { "epoch": 16.73, "eval_accuracy": 0.708994708994709, "eval_loss": 1.5848379135131836, "eval_runtime": 3.2276, "eval_samples_per_second": 58.557, "eval_steps_per_second": 7.436, "step": 1790 }, { "epoch": 16.82, "learning_rate": 0.00013274766355140188, "loss": 0.1027, "step": 1800 }, { "epoch": 16.82, "eval_accuracy": 0.7354497354497355, "eval_loss": 1.5205998420715332, "eval_runtime": 3.2514, "eval_samples_per_second": 58.128, "eval_steps_per_second": 7.381, "step": 1800 }, { "epoch": 16.92, "learning_rate": 0.00013237383177570094, "loss": 0.086, "step": 1810 }, { "epoch": 16.92, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.3391714096069336, "eval_runtime": 3.2275, "eval_samples_per_second": 58.56, "eval_steps_per_second": 7.436, "step": 1810 }, { "epoch": 17.01, "learning_rate": 0.000132, "loss": 0.1029, "step": 1820 }, { "epoch": 17.01, "eval_accuracy": 0.708994708994709, "eval_loss": 1.4613063335418701, "eval_runtime": 3.268, "eval_samples_per_second": 57.834, "eval_steps_per_second": 7.344, "step": 1820 }, { "epoch": 17.1, "learning_rate": 0.00013162616822429908, "loss": 0.115, "step": 1830 }, { "epoch": 17.1, "eval_accuracy": 0.6931216931216931, "eval_loss": 1.7344552278518677, "eval_runtime": 3.2414, "eval_samples_per_second": 58.308, "eval_steps_per_second": 7.404, "step": 1830 }, { "epoch": 17.2, "learning_rate": 0.00013125233644859814, "loss": 0.0286, "step": 1840 }, { "epoch": 17.2, "eval_accuracy": 0.7301587301587301, "eval_loss": 1.5347076654434204, "eval_runtime": 3.2891, "eval_samples_per_second": 57.462, "eval_steps_per_second": 7.297, "step": 1840 }, { "epoch": 17.29, "learning_rate": 0.00013087850467289718, "loss": 0.0608, "step": 1850 }, { "epoch": 17.29, "eval_accuracy": 0.6931216931216931, "eval_loss": 1.5781868696212769, "eval_runtime": 3.2271, "eval_samples_per_second": 58.566, "eval_steps_per_second": 7.437, "step": 1850 }, { "epoch": 17.38, "learning_rate": 0.00013050467289719628, "loss": 0.1215, "step": 1860 }, { "epoch": 17.38, "eval_accuracy": 0.6931216931216931, "eval_loss": 1.5484486818313599, "eval_runtime": 3.2944, "eval_samples_per_second": 57.37, "eval_steps_per_second": 7.285, "step": 1860 }, { "epoch": 17.48, "learning_rate": 0.00013013084112149535, "loss": 0.095, "step": 1870 }, { "epoch": 17.48, "eval_accuracy": 0.708994708994709, "eval_loss": 1.5258464813232422, "eval_runtime": 3.265, "eval_samples_per_second": 57.887, "eval_steps_per_second": 7.351, "step": 1870 }, { "epoch": 17.57, "learning_rate": 0.0001297570093457944, "loss": 0.0176, "step": 1880 }, { "epoch": 17.57, "eval_accuracy": 0.708994708994709, "eval_loss": 1.5887885093688965, "eval_runtime": 3.1883, "eval_samples_per_second": 59.278, "eval_steps_per_second": 7.527, "step": 1880 }, { "epoch": 17.66, "learning_rate": 0.00012938317757009345, "loss": 0.0208, "step": 1890 }, { "epoch": 17.66, "eval_accuracy": 0.6878306878306878, "eval_loss": 2.017876625061035, "eval_runtime": 3.1617, "eval_samples_per_second": 59.778, "eval_steps_per_second": 7.591, "step": 1890 }, { "epoch": 17.76, "learning_rate": 0.00012900934579439252, "loss": 0.0752, "step": 1900 }, { "epoch": 17.76, "eval_accuracy": 0.6878306878306878, "eval_loss": 1.8983193635940552, "eval_runtime": 3.3023, "eval_samples_per_second": 57.233, "eval_steps_per_second": 7.268, "step": 1900 }, { "epoch": 17.85, "learning_rate": 0.00012863551401869162, "loss": 0.0609, "step": 1910 }, { "epoch": 17.85, "eval_accuracy": 0.7037037037037037, "eval_loss": 1.6523683071136475, "eval_runtime": 3.2531, "eval_samples_per_second": 58.099, "eval_steps_per_second": 7.378, "step": 1910 }, { "epoch": 17.94, "learning_rate": 0.00012826168224299066, "loss": 0.0059, "step": 1920 }, { "epoch": 17.94, "eval_accuracy": 0.6984126984126984, "eval_loss": 1.6338155269622803, "eval_runtime": 3.2413, "eval_samples_per_second": 58.309, "eval_steps_per_second": 7.404, "step": 1920 }, { "epoch": 18.04, "learning_rate": 0.00012788785046728972, "loss": 0.0264, "step": 1930 }, { "epoch": 18.04, "eval_accuracy": 0.7037037037037037, "eval_loss": 1.6208415031433105, "eval_runtime": 3.4796, "eval_samples_per_second": 54.317, "eval_steps_per_second": 6.897, "step": 1930 }, { "epoch": 18.13, "learning_rate": 0.0001275140186915888, "loss": 0.0128, "step": 1940 }, { "epoch": 18.13, "eval_accuracy": 0.7566137566137566, "eval_loss": 1.521356225013733, "eval_runtime": 3.1889, "eval_samples_per_second": 59.268, "eval_steps_per_second": 7.526, "step": 1940 }, { "epoch": 18.22, "learning_rate": 0.00012714018691588786, "loss": 0.0053, "step": 1950 }, { "epoch": 18.22, "eval_accuracy": 0.7195767195767195, "eval_loss": 1.5842546224594116, "eval_runtime": 3.1478, "eval_samples_per_second": 60.042, "eval_steps_per_second": 7.624, "step": 1950 }, { "epoch": 18.32, "learning_rate": 0.00012676635514018692, "loss": 0.0129, "step": 1960 }, { "epoch": 18.32, "eval_accuracy": 0.7248677248677249, "eval_loss": 1.5440059900283813, "eval_runtime": 4.1754, "eval_samples_per_second": 45.265, "eval_steps_per_second": 5.748, "step": 1960 }, { "epoch": 18.41, "learning_rate": 0.000126392523364486, "loss": 0.016, "step": 1970 }, { "epoch": 18.41, "eval_accuracy": 0.7566137566137566, "eval_loss": 1.4988614320755005, "eval_runtime": 3.1898, "eval_samples_per_second": 59.252, "eval_steps_per_second": 7.524, "step": 1970 }, { "epoch": 18.5, "learning_rate": 0.00012601869158878506, "loss": 0.0696, "step": 1980 }, { "epoch": 18.5, "eval_accuracy": 0.7407407407407407, "eval_loss": 1.4819732904434204, "eval_runtime": 3.1615, "eval_samples_per_second": 59.781, "eval_steps_per_second": 7.591, "step": 1980 }, { "epoch": 18.6, "learning_rate": 0.00012564485981308413, "loss": 0.0217, "step": 1990 }, { "epoch": 18.6, "eval_accuracy": 0.7195767195767195, "eval_loss": 1.4832117557525635, "eval_runtime": 3.179, "eval_samples_per_second": 59.452, "eval_steps_per_second": 7.549, "step": 1990 }, { "epoch": 18.69, "learning_rate": 0.00012527102803738317, "loss": 0.006, "step": 2000 }, { "epoch": 18.69, "eval_accuracy": 0.6772486772486772, "eval_loss": 1.9052395820617676, "eval_runtime": 3.1938, "eval_samples_per_second": 59.176, "eval_steps_per_second": 7.514, "step": 2000 }, { "epoch": 18.79, "learning_rate": 0.00012489719626168226, "loss": 0.0357, "step": 2010 }, { "epoch": 18.79, "eval_accuracy": 0.6984126984126984, "eval_loss": 1.73564612865448, "eval_runtime": 3.3743, "eval_samples_per_second": 56.012, "eval_steps_per_second": 7.113, "step": 2010 }, { "epoch": 18.88, "learning_rate": 0.00012452336448598133, "loss": 0.0197, "step": 2020 }, { "epoch": 18.88, "eval_accuracy": 0.7301587301587301, "eval_loss": 1.617836594581604, "eval_runtime": 3.185, "eval_samples_per_second": 59.34, "eval_steps_per_second": 7.535, "step": 2020 }, { "epoch": 18.97, "learning_rate": 0.00012414953271028037, "loss": 0.0331, "step": 2030 }, { "epoch": 18.97, "eval_accuracy": 0.7407407407407407, "eval_loss": 1.5125271081924438, "eval_runtime": 3.228, "eval_samples_per_second": 58.55, "eval_steps_per_second": 7.435, "step": 2030 }, { "epoch": 19.07, "learning_rate": 0.00012381308411214953, "loss": 0.2593, "step": 2040 }, { "epoch": 19.07, "eval_accuracy": 0.708994708994709, "eval_loss": 1.6546989679336548, "eval_runtime": 3.1974, "eval_samples_per_second": 59.111, "eval_steps_per_second": 7.506, "step": 2040 }, { "epoch": 19.16, "learning_rate": 0.0001234392523364486, "loss": 0.0134, "step": 2050 }, { "epoch": 19.16, "eval_accuracy": 0.6931216931216931, "eval_loss": 1.6934614181518555, "eval_runtime": 3.2069, "eval_samples_per_second": 58.936, "eval_steps_per_second": 7.484, "step": 2050 }, { "epoch": 19.25, "learning_rate": 0.0001230654205607477, "loss": 0.0069, "step": 2060 }, { "epoch": 19.25, "eval_accuracy": 0.7407407407407407, "eval_loss": 1.5514878034591675, "eval_runtime": 3.2273, "eval_samples_per_second": 58.563, "eval_steps_per_second": 7.437, "step": 2060 }, { "epoch": 19.35, "learning_rate": 0.00012269158878504673, "loss": 0.0083, "step": 2070 }, { "epoch": 19.35, "eval_accuracy": 0.7248677248677249, "eval_loss": 1.576420545578003, "eval_runtime": 3.1752, "eval_samples_per_second": 59.525, "eval_steps_per_second": 7.559, "step": 2070 }, { "epoch": 19.44, "learning_rate": 0.0001223177570093458, "loss": 0.0299, "step": 2080 }, { "epoch": 19.44, "eval_accuracy": 0.7301587301587301, "eval_loss": 1.5843783617019653, "eval_runtime": 3.1803, "eval_samples_per_second": 59.428, "eval_steps_per_second": 7.546, "step": 2080 }, { "epoch": 19.53, "learning_rate": 0.00012194392523364486, "loss": 0.002, "step": 2090 }, { "epoch": 19.53, "eval_accuracy": 0.746031746031746, "eval_loss": 1.5910844802856445, "eval_runtime": 3.1724, "eval_samples_per_second": 59.576, "eval_steps_per_second": 7.565, "step": 2090 }, { "epoch": 19.63, "learning_rate": 0.00012157009345794393, "loss": 0.0022, "step": 2100 }, { "epoch": 19.63, "eval_accuracy": 0.7513227513227513, "eval_loss": 1.5894769430160522, "eval_runtime": 3.2193, "eval_samples_per_second": 58.708, "eval_steps_per_second": 7.455, "step": 2100 }, { "epoch": 19.72, "learning_rate": 0.00012119626168224301, "loss": 0.0024, "step": 2110 }, { "epoch": 19.72, "eval_accuracy": 0.7513227513227513, "eval_loss": 1.586985468864441, "eval_runtime": 3.1897, "eval_samples_per_second": 59.254, "eval_steps_per_second": 7.524, "step": 2110 }, { "epoch": 19.81, "learning_rate": 0.00012082242990654206, "loss": 0.0082, "step": 2120 }, { "epoch": 19.81, "eval_accuracy": 0.7407407407407407, "eval_loss": 1.582448124885559, "eval_runtime": 3.1594, "eval_samples_per_second": 59.821, "eval_steps_per_second": 7.596, "step": 2120 }, { "epoch": 19.91, "learning_rate": 0.00012044859813084113, "loss": 0.0022, "step": 2130 }, { "epoch": 19.91, "eval_accuracy": 0.7354497354497355, "eval_loss": 1.5609190464019775, "eval_runtime": 3.3172, "eval_samples_per_second": 56.976, "eval_steps_per_second": 7.235, "step": 2130 }, { "epoch": 20.0, "learning_rate": 0.00012007476635514018, "loss": 0.02, "step": 2140 }, { "epoch": 20.0, "eval_accuracy": 0.7354497354497355, "eval_loss": 1.6138461828231812, "eval_runtime": 3.3238, "eval_samples_per_second": 56.863, "eval_steps_per_second": 7.221, "step": 2140 }, { "epoch": 20.09, "learning_rate": 0.00011970093457943925, "loss": 0.0063, "step": 2150 }, { "epoch": 20.09, "eval_accuracy": 0.7037037037037037, "eval_loss": 1.7616651058197021, "eval_runtime": 3.2361, "eval_samples_per_second": 58.403, "eval_steps_per_second": 7.416, "step": 2150 }, { "epoch": 20.19, "learning_rate": 0.00011932710280373833, "loss": 0.0021, "step": 2160 }, { "epoch": 20.19, "eval_accuracy": 0.6984126984126984, "eval_loss": 1.7941789627075195, "eval_runtime": 5.8284, "eval_samples_per_second": 32.428, "eval_steps_per_second": 4.118, "step": 2160 }, { "epoch": 20.28, "learning_rate": 0.0001189532710280374, "loss": 0.0068, "step": 2170 }, { "epoch": 20.28, "eval_accuracy": 0.7142857142857143, "eval_loss": 1.7489572763442993, "eval_runtime": 3.085, "eval_samples_per_second": 61.265, "eval_steps_per_second": 7.78, "step": 2170 }, { "epoch": 20.37, "learning_rate": 0.00011857943925233645, "loss": 0.0078, "step": 2180 }, { "epoch": 20.37, "eval_accuracy": 0.7354497354497355, "eval_loss": 1.6500416994094849, "eval_runtime": 3.0679, "eval_samples_per_second": 61.606, "eval_steps_per_second": 7.823, "step": 2180 }, { "epoch": 20.47, "learning_rate": 0.00011820560747663552, "loss": 0.0019, "step": 2190 }, { "epoch": 20.47, "eval_accuracy": 0.7513227513227513, "eval_loss": 1.5972764492034912, "eval_runtime": 3.3043, "eval_samples_per_second": 57.198, "eval_steps_per_second": 7.263, "step": 2190 }, { "epoch": 20.56, "learning_rate": 0.00011783177570093457, "loss": 0.0052, "step": 2200 }, { "epoch": 20.56, "eval_accuracy": 0.7301587301587301, "eval_loss": 1.64747154712677, "eval_runtime": 3.2063, "eval_samples_per_second": 58.947, "eval_steps_per_second": 7.485, "step": 2200 }, { "epoch": 20.65, "learning_rate": 0.00011745794392523365, "loss": 0.0169, "step": 2210 }, { "epoch": 20.65, "eval_accuracy": 0.7195767195767195, "eval_loss": 1.6964852809906006, "eval_runtime": 3.2678, "eval_samples_per_second": 57.837, "eval_steps_per_second": 7.344, "step": 2210 }, { "epoch": 20.75, "learning_rate": 0.00011708411214953272, "loss": 0.011, "step": 2220 }, { "epoch": 20.75, "eval_accuracy": 0.708994708994709, "eval_loss": 1.7380739450454712, "eval_runtime": 3.1484, "eval_samples_per_second": 60.03, "eval_steps_per_second": 7.623, "step": 2220 }, { "epoch": 20.84, "learning_rate": 0.00011671028037383178, "loss": 0.0141, "step": 2230 }, { "epoch": 20.84, "eval_accuracy": 0.7354497354497355, "eval_loss": 1.6947896480560303, "eval_runtime": 3.3613, "eval_samples_per_second": 56.228, "eval_steps_per_second": 7.14, "step": 2230 }, { "epoch": 20.93, "learning_rate": 0.00011633644859813084, "loss": 0.026, "step": 2240 }, { "epoch": 20.93, "eval_accuracy": 0.7037037037037037, "eval_loss": 1.7711342573165894, "eval_runtime": 3.2033, "eval_samples_per_second": 59.001, "eval_steps_per_second": 7.492, "step": 2240 }, { "epoch": 21.03, "learning_rate": 0.00011596261682242991, "loss": 0.0019, "step": 2250 }, { "epoch": 21.03, "eval_accuracy": 0.6825396825396826, "eval_loss": 1.8577070236206055, "eval_runtime": 3.1485, "eval_samples_per_second": 60.03, "eval_steps_per_second": 7.623, "step": 2250 }, { "epoch": 21.12, "learning_rate": 0.00011558878504672899, "loss": 0.0523, "step": 2260 }, { "epoch": 21.12, "eval_accuracy": 0.6878306878306878, "eval_loss": 1.837315320968628, "eval_runtime": 3.1213, "eval_samples_per_second": 60.552, "eval_steps_per_second": 7.689, "step": 2260 }, { "epoch": 21.21, "learning_rate": 0.00011521495327102804, "loss": 0.0234, "step": 2270 }, { "epoch": 21.21, "eval_accuracy": 0.7248677248677249, "eval_loss": 1.7204312086105347, "eval_runtime": 3.3214, "eval_samples_per_second": 56.904, "eval_steps_per_second": 7.226, "step": 2270 }, { "epoch": 21.31, "learning_rate": 0.00011484112149532711, "loss": 0.0105, "step": 2280 }, { "epoch": 21.31, "eval_accuracy": 0.7195767195767195, "eval_loss": 1.6235790252685547, "eval_runtime": 3.1507, "eval_samples_per_second": 59.986, "eval_steps_per_second": 7.617, "step": 2280 }, { "epoch": 21.4, "learning_rate": 0.00011446728971962617, "loss": 0.048, "step": 2290 }, { "epoch": 21.4, "eval_accuracy": 0.6984126984126984, "eval_loss": 1.8592135906219482, "eval_runtime": 3.1589, "eval_samples_per_second": 59.83, "eval_steps_per_second": 7.598, "step": 2290 }, { "epoch": 21.5, "learning_rate": 0.00011409345794392523, "loss": 0.0456, "step": 2300 }, { "epoch": 21.5, "eval_accuracy": 0.7142857142857143, "eval_loss": 1.7222115993499756, "eval_runtime": 3.1346, "eval_samples_per_second": 60.296, "eval_steps_per_second": 7.657, "step": 2300 }, { "epoch": 21.59, "learning_rate": 0.00011371962616822431, "loss": 0.0129, "step": 2310 }, { "epoch": 21.59, "eval_accuracy": 0.7142857142857143, "eval_loss": 1.6260654926300049, "eval_runtime": 3.1239, "eval_samples_per_second": 60.501, "eval_steps_per_second": 7.683, "step": 2310 }, { "epoch": 21.68, "learning_rate": 0.00011334579439252338, "loss": 0.0048, "step": 2320 }, { "epoch": 21.68, "eval_accuracy": 0.7142857142857143, "eval_loss": 1.5359770059585571, "eval_runtime": 3.254, "eval_samples_per_second": 58.083, "eval_steps_per_second": 7.376, "step": 2320 }, { "epoch": 21.78, "learning_rate": 0.00011297196261682243, "loss": 0.0217, "step": 2330 }, { "epoch": 21.78, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.4278719425201416, "eval_runtime": 3.2536, "eval_samples_per_second": 58.09, "eval_steps_per_second": 7.376, "step": 2330 }, { "epoch": 21.87, "learning_rate": 0.0001125981308411215, "loss": 0.011, "step": 2340 }, { "epoch": 21.87, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.369935393333435, "eval_runtime": 3.0331, "eval_samples_per_second": 62.312, "eval_steps_per_second": 7.913, "step": 2340 }, { "epoch": 21.96, "learning_rate": 0.00011222429906542056, "loss": 0.0104, "step": 2350 }, { "epoch": 21.96, "eval_accuracy": 0.6878306878306878, "eval_loss": 1.9012395143508911, "eval_runtime": 3.1254, "eval_samples_per_second": 60.472, "eval_steps_per_second": 7.679, "step": 2350 }, { "epoch": 22.06, "learning_rate": 0.00011185046728971964, "loss": 0.0204, "step": 2360 }, { "epoch": 22.06, "eval_accuracy": 0.7354497354497355, "eval_loss": 1.5529383420944214, "eval_runtime": 3.1087, "eval_samples_per_second": 60.796, "eval_steps_per_second": 7.72, "step": 2360 }, { "epoch": 22.15, "learning_rate": 0.0001114766355140187, "loss": 0.02, "step": 2370 }, { "epoch": 22.15, "eval_accuracy": 0.6931216931216931, "eval_loss": 1.7242515087127686, "eval_runtime": 3.0296, "eval_samples_per_second": 62.384, "eval_steps_per_second": 7.922, "step": 2370 }, { "epoch": 22.24, "learning_rate": 0.00011110280373831776, "loss": 0.0394, "step": 2380 }, { "epoch": 22.24, "eval_accuracy": 0.6931216931216931, "eval_loss": 1.8429406881332397, "eval_runtime": 3.2178, "eval_samples_per_second": 58.735, "eval_steps_per_second": 7.458, "step": 2380 }, { "epoch": 22.34, "learning_rate": 0.00011072897196261682, "loss": 0.0217, "step": 2390 }, { "epoch": 22.34, "eval_accuracy": 0.7407407407407407, "eval_loss": 1.6551986932754517, "eval_runtime": 3.0937, "eval_samples_per_second": 61.092, "eval_steps_per_second": 7.758, "step": 2390 }, { "epoch": 22.43, "learning_rate": 0.00011035514018691588, "loss": 0.0407, "step": 2400 }, { "epoch": 22.43, "eval_accuracy": 0.7407407407407407, "eval_loss": 1.584214687347412, "eval_runtime": 3.0876, "eval_samples_per_second": 61.213, "eval_steps_per_second": 7.773, "step": 2400 }, { "epoch": 22.52, "learning_rate": 0.00010998130841121497, "loss": 0.0075, "step": 2410 }, { "epoch": 22.52, "eval_accuracy": 0.7354497354497355, "eval_loss": 1.5881952047348022, "eval_runtime": 3.1026, "eval_samples_per_second": 60.916, "eval_steps_per_second": 7.735, "step": 2410 }, { "epoch": 22.62, "learning_rate": 0.00010960747663551403, "loss": 0.0144, "step": 2420 }, { "epoch": 22.62, "eval_accuracy": 0.7301587301587301, "eval_loss": 1.6610509157180786, "eval_runtime": 3.2448, "eval_samples_per_second": 58.248, "eval_steps_per_second": 7.397, "step": 2420 }, { "epoch": 22.71, "learning_rate": 0.00010923364485981309, "loss": 0.0021, "step": 2430 }, { "epoch": 22.71, "eval_accuracy": 0.746031746031746, "eval_loss": 1.7075546979904175, "eval_runtime": 3.273, "eval_samples_per_second": 57.746, "eval_steps_per_second": 7.333, "step": 2430 }, { "epoch": 22.8, "learning_rate": 0.00010885981308411215, "loss": 0.0019, "step": 2440 }, { "epoch": 22.8, "eval_accuracy": 0.7354497354497355, "eval_loss": 1.616317629814148, "eval_runtime": 3.3746, "eval_samples_per_second": 56.006, "eval_steps_per_second": 7.112, "step": 2440 }, { "epoch": 22.9, "learning_rate": 0.00010848598130841121, "loss": 0.0074, "step": 2450 }, { "epoch": 22.9, "eval_accuracy": 0.746031746031746, "eval_loss": 1.5530917644500732, "eval_runtime": 3.2179, "eval_samples_per_second": 58.735, "eval_steps_per_second": 7.458, "step": 2450 }, { "epoch": 22.99, "learning_rate": 0.0001081121495327103, "loss": 0.0195, "step": 2460 }, { "epoch": 22.99, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.5417741537094116, "eval_runtime": 3.3609, "eval_samples_per_second": 56.235, "eval_steps_per_second": 7.141, "step": 2460 }, { "epoch": 23.08, "learning_rate": 0.00010773831775700935, "loss": 0.0085, "step": 2470 }, { "epoch": 23.08, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.532561182975769, "eval_runtime": 3.2217, "eval_samples_per_second": 58.664, "eval_steps_per_second": 7.449, "step": 2470 }, { "epoch": 23.18, "learning_rate": 0.00010736448598130842, "loss": 0.0015, "step": 2480 }, { "epoch": 23.18, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.530474305152893, "eval_runtime": 3.1097, "eval_samples_per_second": 60.778, "eval_steps_per_second": 7.718, "step": 2480 }, { "epoch": 23.27, "learning_rate": 0.00010699065420560748, "loss": 0.0015, "step": 2490 }, { "epoch": 23.27, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.530909776687622, "eval_runtime": 3.3395, "eval_samples_per_second": 56.594, "eval_steps_per_second": 7.187, "step": 2490 }, { "epoch": 23.36, "learning_rate": 0.00010661682242990654, "loss": 0.0058, "step": 2500 }, { "epoch": 23.36, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.5259517431259155, "eval_runtime": 3.1744, "eval_samples_per_second": 59.539, "eval_steps_per_second": 7.56, "step": 2500 }, { "epoch": 23.46, "learning_rate": 0.00010624299065420562, "loss": 0.0015, "step": 2510 }, { "epoch": 23.46, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.5237162113189697, "eval_runtime": 3.3526, "eval_samples_per_second": 56.374, "eval_steps_per_second": 7.159, "step": 2510 }, { "epoch": 23.55, "learning_rate": 0.00010586915887850468, "loss": 0.0083, "step": 2520 }, { "epoch": 23.55, "eval_accuracy": 0.7566137566137566, "eval_loss": 1.5256061553955078, "eval_runtime": 3.4146, "eval_samples_per_second": 55.351, "eval_steps_per_second": 7.029, "step": 2520 }, { "epoch": 23.64, "learning_rate": 0.00010549532710280374, "loss": 0.0102, "step": 2530 }, { "epoch": 23.64, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.5389765501022339, "eval_runtime": 3.0883, "eval_samples_per_second": 61.198, "eval_steps_per_second": 7.771, "step": 2530 }, { "epoch": 23.74, "learning_rate": 0.0001051214953271028, "loss": 0.0014, "step": 2540 }, { "epoch": 23.74, "eval_accuracy": 0.7671957671957672, "eval_loss": 1.551665186882019, "eval_runtime": 3.2146, "eval_samples_per_second": 58.794, "eval_steps_per_second": 7.466, "step": 2540 }, { "epoch": 23.83, "learning_rate": 0.00010474766355140186, "loss": 0.0137, "step": 2550 }, { "epoch": 23.83, "eval_accuracy": 0.7566137566137566, "eval_loss": 1.5628893375396729, "eval_runtime": 3.2259, "eval_samples_per_second": 58.589, "eval_steps_per_second": 7.44, "step": 2550 }, { "epoch": 23.93, "learning_rate": 0.00010437383177570095, "loss": 0.0211, "step": 2560 }, { "epoch": 23.93, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.5249507427215576, "eval_runtime": 3.1967, "eval_samples_per_second": 59.123, "eval_steps_per_second": 7.508, "step": 2560 }, { "epoch": 24.02, "learning_rate": 0.00010400000000000001, "loss": 0.0014, "step": 2570 }, { "epoch": 24.02, "eval_accuracy": 0.7671957671957672, "eval_loss": 1.5051255226135254, "eval_runtime": 3.3479, "eval_samples_per_second": 56.454, "eval_steps_per_second": 7.169, "step": 2570 }, { "epoch": 24.11, "learning_rate": 0.00010362616822429907, "loss": 0.0102, "step": 2580 }, { "epoch": 24.11, "eval_accuracy": 0.7671957671957672, "eval_loss": 1.4923993349075317, "eval_runtime": 3.3214, "eval_samples_per_second": 56.904, "eval_steps_per_second": 7.226, "step": 2580 }, { "epoch": 24.21, "learning_rate": 0.00010325233644859813, "loss": 0.0032, "step": 2590 }, { "epoch": 24.21, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.4887175559997559, "eval_runtime": 3.2614, "eval_samples_per_second": 57.95, "eval_steps_per_second": 7.359, "step": 2590 }, { "epoch": 24.3, "learning_rate": 0.0001028785046728972, "loss": 0.0012, "step": 2600 }, { "epoch": 24.3, "eval_accuracy": 0.7566137566137566, "eval_loss": 1.4899401664733887, "eval_runtime": 3.1875, "eval_samples_per_second": 59.295, "eval_steps_per_second": 7.53, "step": 2600 }, { "epoch": 24.39, "learning_rate": 0.00010250467289719628, "loss": 0.0013, "step": 2610 }, { "epoch": 24.39, "eval_accuracy": 0.7566137566137566, "eval_loss": 1.4907020330429077, "eval_runtime": 3.2459, "eval_samples_per_second": 58.227, "eval_steps_per_second": 7.394, "step": 2610 }, { "epoch": 24.49, "learning_rate": 0.00010213084112149533, "loss": 0.0013, "step": 2620 }, { "epoch": 24.49, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.4912313222885132, "eval_runtime": 3.1875, "eval_samples_per_second": 59.294, "eval_steps_per_second": 7.529, "step": 2620 }, { "epoch": 24.58, "learning_rate": 0.0001017570093457944, "loss": 0.0012, "step": 2630 }, { "epoch": 24.58, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.4922280311584473, "eval_runtime": 3.428, "eval_samples_per_second": 55.134, "eval_steps_per_second": 7.001, "step": 2630 }, { "epoch": 24.67, "learning_rate": 0.00010138317757009345, "loss": 0.0131, "step": 2640 }, { "epoch": 24.67, "eval_accuracy": 0.7671957671957672, "eval_loss": 1.501592755317688, "eval_runtime": 3.3214, "eval_samples_per_second": 56.904, "eval_steps_per_second": 7.226, "step": 2640 }, { "epoch": 24.77, "learning_rate": 0.00010100934579439252, "loss": 0.0266, "step": 2650 }, { "epoch": 24.77, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.517897129058838, "eval_runtime": 3.3613, "eval_samples_per_second": 56.229, "eval_steps_per_second": 7.14, "step": 2650 }, { "epoch": 24.86, "learning_rate": 0.0001006355140186916, "loss": 0.0072, "step": 2660 }, { "epoch": 24.86, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.5467013120651245, "eval_runtime": 3.2995, "eval_samples_per_second": 57.281, "eval_steps_per_second": 7.274, "step": 2660 }, { "epoch": 24.95, "learning_rate": 0.00010026168224299067, "loss": 0.0084, "step": 2670 }, { "epoch": 24.95, "eval_accuracy": 0.7566137566137566, "eval_loss": 1.5633758306503296, "eval_runtime": 3.3204, "eval_samples_per_second": 56.921, "eval_steps_per_second": 7.228, "step": 2670 }, { "epoch": 25.05, "learning_rate": 9.988785046728972e-05, "loss": 0.0141, "step": 2680 }, { "epoch": 25.05, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.549333095550537, "eval_runtime": 3.3387, "eval_samples_per_second": 56.609, "eval_steps_per_second": 7.188, "step": 2680 }, { "epoch": 25.14, "learning_rate": 9.95140186915888e-05, "loss": 0.006, "step": 2690 }, { "epoch": 25.14, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.5422124862670898, "eval_runtime": 3.2678, "eval_samples_per_second": 57.837, "eval_steps_per_second": 7.344, "step": 2690 }, { "epoch": 25.23, "learning_rate": 9.914018691588785e-05, "loss": 0.0049, "step": 2700 }, { "epoch": 25.23, "eval_accuracy": 0.7671957671957672, "eval_loss": 1.5276082754135132, "eval_runtime": 3.4271, "eval_samples_per_second": 55.149, "eval_steps_per_second": 7.003, "step": 2700 }, { "epoch": 25.33, "learning_rate": 9.876635514018692e-05, "loss": 0.0012, "step": 2710 }, { "epoch": 25.33, "eval_accuracy": 0.7671957671957672, "eval_loss": 1.5173016786575317, "eval_runtime": 3.3173, "eval_samples_per_second": 56.973, "eval_steps_per_second": 7.235, "step": 2710 }, { "epoch": 25.42, "learning_rate": 9.839252336448599e-05, "loss": 0.0012, "step": 2720 }, { "epoch": 25.42, "eval_accuracy": 0.7671957671957672, "eval_loss": 1.5145915746688843, "eval_runtime": 3.3338, "eval_samples_per_second": 56.692, "eval_steps_per_second": 7.199, "step": 2720 }, { "epoch": 25.51, "learning_rate": 9.801869158878506e-05, "loss": 0.0104, "step": 2730 }, { "epoch": 25.51, "eval_accuracy": 0.7671957671957672, "eval_loss": 1.5092376470565796, "eval_runtime": 3.3686, "eval_samples_per_second": 56.107, "eval_steps_per_second": 7.125, "step": 2730 }, { "epoch": 25.61, "learning_rate": 9.764485981308412e-05, "loss": 0.0011, "step": 2740 }, { "epoch": 25.61, "eval_accuracy": 0.7671957671957672, "eval_loss": 1.5066778659820557, "eval_runtime": 3.2919, "eval_samples_per_second": 57.415, "eval_steps_per_second": 7.291, "step": 2740 }, { "epoch": 25.7, "learning_rate": 9.727102803738318e-05, "loss": 0.01, "step": 2750 }, { "epoch": 25.7, "eval_accuracy": 0.7671957671957672, "eval_loss": 1.5135998725891113, "eval_runtime": 3.3065, "eval_samples_per_second": 57.16, "eval_steps_per_second": 7.258, "step": 2750 }, { "epoch": 25.79, "learning_rate": 9.689719626168224e-05, "loss": 0.0012, "step": 2760 }, { "epoch": 25.79, "eval_accuracy": 0.7671957671957672, "eval_loss": 1.5182033777236938, "eval_runtime": 3.1889, "eval_samples_per_second": 59.268, "eval_steps_per_second": 7.526, "step": 2760 }, { "epoch": 25.89, "learning_rate": 9.652336448598131e-05, "loss": 0.0011, "step": 2770 }, { "epoch": 25.89, "eval_accuracy": 0.7671957671957672, "eval_loss": 1.5205999612808228, "eval_runtime": 3.2413, "eval_samples_per_second": 58.311, "eval_steps_per_second": 7.405, "step": 2770 }, { "epoch": 25.98, "learning_rate": 9.614953271028038e-05, "loss": 0.0199, "step": 2780 }, { "epoch": 25.98, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.5461922883987427, "eval_runtime": 3.1874, "eval_samples_per_second": 59.295, "eval_steps_per_second": 7.53, "step": 2780 }, { "epoch": 26.07, "learning_rate": 9.577570093457945e-05, "loss": 0.0105, "step": 2790 }, { "epoch": 26.07, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.560713291168213, "eval_runtime": 3.3195, "eval_samples_per_second": 56.936, "eval_steps_per_second": 7.23, "step": 2790 }, { "epoch": 26.17, "learning_rate": 9.540186915887851e-05, "loss": 0.0011, "step": 2800 }, { "epoch": 26.17, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.5657832622528076, "eval_runtime": 3.108, "eval_samples_per_second": 60.811, "eval_steps_per_second": 7.722, "step": 2800 }, { "epoch": 26.26, "learning_rate": 9.502803738317757e-05, "loss": 0.004, "step": 2810 }, { "epoch": 26.26, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.5667065382003784, "eval_runtime": 3.1078, "eval_samples_per_second": 60.815, "eval_steps_per_second": 7.723, "step": 2810 }, { "epoch": 26.36, "learning_rate": 9.465420560747665e-05, "loss": 0.0011, "step": 2820 }, { "epoch": 26.36, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.565108060836792, "eval_runtime": 3.1483, "eval_samples_per_second": 60.032, "eval_steps_per_second": 7.623, "step": 2820 }, { "epoch": 26.45, "learning_rate": 9.42803738317757e-05, "loss": 0.0129, "step": 2830 }, { "epoch": 26.45, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.5749871730804443, "eval_runtime": 3.086, "eval_samples_per_second": 61.245, "eval_steps_per_second": 7.777, "step": 2830 }, { "epoch": 26.54, "learning_rate": 9.390654205607478e-05, "loss": 0.0102, "step": 2840 }, { "epoch": 26.54, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.574375867843628, "eval_runtime": 3.141, "eval_samples_per_second": 60.171, "eval_steps_per_second": 7.641, "step": 2840 }, { "epoch": 26.64, "learning_rate": 9.353271028037384e-05, "loss": 0.0041, "step": 2850 }, { "epoch": 26.64, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.5728485584259033, "eval_runtime": 3.0982, "eval_samples_per_second": 61.002, "eval_steps_per_second": 7.746, "step": 2850 }, { "epoch": 26.73, "learning_rate": 9.31588785046729e-05, "loss": 0.0082, "step": 2860 }, { "epoch": 26.73, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.5666193962097168, "eval_runtime": 3.0518, "eval_samples_per_second": 61.93, "eval_steps_per_second": 7.864, "step": 2860 }, { "epoch": 26.82, "learning_rate": 9.278504672897197e-05, "loss": 0.0069, "step": 2870 }, { "epoch": 26.82, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.5484100580215454, "eval_runtime": 3.1504, "eval_samples_per_second": 59.992, "eval_steps_per_second": 7.618, "step": 2870 }, { "epoch": 26.92, "learning_rate": 9.241121495327104e-05, "loss": 0.0058, "step": 2880 }, { "epoch": 26.92, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.5442478656768799, "eval_runtime": 3.2682, "eval_samples_per_second": 57.831, "eval_steps_per_second": 7.344, "step": 2880 }, { "epoch": 27.01, "learning_rate": 9.20373831775701e-05, "loss": 0.0011, "step": 2890 }, { "epoch": 27.01, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.5461711883544922, "eval_runtime": 3.0931, "eval_samples_per_second": 61.105, "eval_steps_per_second": 7.759, "step": 2890 }, { "epoch": 27.1, "learning_rate": 9.166355140186916e-05, "loss": 0.0058, "step": 2900 }, { "epoch": 27.1, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.5431169271469116, "eval_runtime": 3.0975, "eval_samples_per_second": 61.016, "eval_steps_per_second": 7.748, "step": 2900 }, { "epoch": 27.2, "learning_rate": 9.128971962616823e-05, "loss": 0.0054, "step": 2910 }, { "epoch": 27.2, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.5305052995681763, "eval_runtime": 3.1126, "eval_samples_per_second": 60.722, "eval_steps_per_second": 7.711, "step": 2910 }, { "epoch": 27.29, "learning_rate": 9.091588785046729e-05, "loss": 0.0064, "step": 2920 }, { "epoch": 27.29, "eval_accuracy": 0.7671957671957672, "eval_loss": 1.5352741479873657, "eval_runtime": 3.1506, "eval_samples_per_second": 59.988, "eval_steps_per_second": 7.618, "step": 2920 }, { "epoch": 27.38, "learning_rate": 9.054205607476636e-05, "loss": 0.0116, "step": 2930 }, { "epoch": 27.38, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.546917200088501, "eval_runtime": 3.3601, "eval_samples_per_second": 56.248, "eval_steps_per_second": 7.143, "step": 2930 }, { "epoch": 27.48, "learning_rate": 9.016822429906543e-05, "loss": 0.001, "step": 2940 }, { "epoch": 27.48, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.5545085668563843, "eval_runtime": 3.1606, "eval_samples_per_second": 59.798, "eval_steps_per_second": 7.593, "step": 2940 }, { "epoch": 27.57, "learning_rate": 8.97943925233645e-05, "loss": 0.0058, "step": 2950 }, { "epoch": 27.57, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.554548978805542, "eval_runtime": 3.2008, "eval_samples_per_second": 59.047, "eval_steps_per_second": 7.498, "step": 2950 }, { "epoch": 27.66, "learning_rate": 8.942056074766355e-05, "loss": 0.0056, "step": 2960 }, { "epoch": 27.66, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.546671748161316, "eval_runtime": 3.4523, "eval_samples_per_second": 54.747, "eval_steps_per_second": 6.952, "step": 2960 }, { "epoch": 27.76, "learning_rate": 8.904672897196263e-05, "loss": 0.001, "step": 2970 }, { "epoch": 27.76, "eval_accuracy": 0.7671957671957672, "eval_loss": 1.5371477603912354, "eval_runtime": 3.2841, "eval_samples_per_second": 57.551, "eval_steps_per_second": 7.308, "step": 2970 }, { "epoch": 27.85, "learning_rate": 8.867289719626168e-05, "loss": 0.0153, "step": 2980 }, { "epoch": 27.85, "eval_accuracy": 0.7671957671957672, "eval_loss": 1.5416946411132812, "eval_runtime": 3.3871, "eval_samples_per_second": 55.799, "eval_steps_per_second": 7.086, "step": 2980 }, { "epoch": 27.94, "learning_rate": 8.829906542056075e-05, "loss": 0.0063, "step": 2990 }, { "epoch": 27.94, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.556403636932373, "eval_runtime": 3.2332, "eval_samples_per_second": 58.456, "eval_steps_per_second": 7.423, "step": 2990 }, { "epoch": 28.04, "learning_rate": 8.792523364485982e-05, "loss": 0.001, "step": 3000 }, { "epoch": 28.04, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.5654370784759521, "eval_runtime": 3.2405, "eval_samples_per_second": 58.325, "eval_steps_per_second": 7.406, "step": 3000 }, { "epoch": 28.13, "learning_rate": 8.755140186915888e-05, "loss": 0.001, "step": 3010 }, { "epoch": 28.13, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.5694721937179565, "eval_runtime": 3.2113, "eval_samples_per_second": 58.854, "eval_steps_per_second": 7.474, "step": 3010 }, { "epoch": 28.22, "learning_rate": 8.717757009345795e-05, "loss": 0.001, "step": 3020 }, { "epoch": 28.22, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.5711843967437744, "eval_runtime": 3.1919, "eval_samples_per_second": 59.213, "eval_steps_per_second": 7.519, "step": 3020 }, { "epoch": 28.32, "learning_rate": 8.6803738317757e-05, "loss": 0.0043, "step": 3030 }, { "epoch": 28.32, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.573392391204834, "eval_runtime": 3.0318, "eval_samples_per_second": 62.338, "eval_steps_per_second": 7.916, "step": 3030 }, { "epoch": 28.41, "learning_rate": 8.642990654205609e-05, "loss": 0.0043, "step": 3040 }, { "epoch": 28.41, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.5829499959945679, "eval_runtime": 3.2944, "eval_samples_per_second": 57.37, "eval_steps_per_second": 7.285, "step": 3040 }, { "epoch": 28.5, "learning_rate": 8.605607476635514e-05, "loss": 0.0009, "step": 3050 }, { "epoch": 28.5, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.5897167921066284, "eval_runtime": 3.1612, "eval_samples_per_second": 59.787, "eval_steps_per_second": 7.592, "step": 3050 }, { "epoch": 28.6, "learning_rate": 8.56822429906542e-05, "loss": 0.0009, "step": 3060 }, { "epoch": 28.6, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.5928257703781128, "eval_runtime": 3.2548, "eval_samples_per_second": 58.068, "eval_steps_per_second": 7.374, "step": 3060 }, { "epoch": 28.69, "learning_rate": 8.530841121495327e-05, "loss": 0.0136, "step": 3070 }, { "epoch": 28.69, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.5987612009048462, "eval_runtime": 3.0819, "eval_samples_per_second": 61.325, "eval_steps_per_second": 7.787, "step": 3070 }, { "epoch": 28.79, "learning_rate": 8.493457943925234e-05, "loss": 0.0106, "step": 3080 }, { "epoch": 28.79, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.5925683975219727, "eval_runtime": 3.4371, "eval_samples_per_second": 54.988, "eval_steps_per_second": 6.983, "step": 3080 }, { "epoch": 28.88, "learning_rate": 8.456074766355141e-05, "loss": 0.0169, "step": 3090 }, { "epoch": 28.88, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.5758073329925537, "eval_runtime": 3.1208, "eval_samples_per_second": 60.562, "eval_steps_per_second": 7.69, "step": 3090 }, { "epoch": 28.97, "learning_rate": 8.418691588785048e-05, "loss": 0.0058, "step": 3100 }, { "epoch": 28.97, "eval_accuracy": 0.7671957671957672, "eval_loss": 1.5587332248687744, "eval_runtime": 3.2941, "eval_samples_per_second": 57.375, "eval_steps_per_second": 7.286, "step": 3100 }, { "epoch": 29.07, "learning_rate": 8.381308411214953e-05, "loss": 0.0008, "step": 3110 }, { "epoch": 29.07, "eval_accuracy": 0.7671957671957672, "eval_loss": 1.5488831996917725, "eval_runtime": 3.104, "eval_samples_per_second": 60.89, "eval_steps_per_second": 7.732, "step": 3110 }, { "epoch": 29.16, "learning_rate": 8.343925233644861e-05, "loss": 0.0106, "step": 3120 }, { "epoch": 29.16, "eval_accuracy": 0.7671957671957672, "eval_loss": 1.5524382591247559, "eval_runtime": 3.1857, "eval_samples_per_second": 59.327, "eval_steps_per_second": 7.534, "step": 3120 }, { "epoch": 29.25, "learning_rate": 8.306542056074766e-05, "loss": 0.0066, "step": 3130 }, { "epoch": 29.25, "eval_accuracy": 0.7671957671957672, "eval_loss": 1.5529749393463135, "eval_runtime": 3.2947, "eval_samples_per_second": 57.365, "eval_steps_per_second": 7.284, "step": 3130 }, { "epoch": 29.35, "learning_rate": 8.269158878504673e-05, "loss": 0.0047, "step": 3140 }, { "epoch": 29.35, "eval_accuracy": 0.7724867724867724, "eval_loss": 1.561766266822815, "eval_runtime": 3.0618, "eval_samples_per_second": 61.729, "eval_steps_per_second": 7.839, "step": 3140 }, { "epoch": 29.44, "learning_rate": 8.23177570093458e-05, "loss": 0.0009, "step": 3150 }, { "epoch": 29.44, "eval_accuracy": 0.7671957671957672, "eval_loss": 1.562992811203003, "eval_runtime": 4.4425, "eval_samples_per_second": 42.543, "eval_steps_per_second": 5.402, "step": 3150 }, { "epoch": 29.53, "learning_rate": 8.194392523364487e-05, "loss": 0.01, "step": 3160 }, { "epoch": 29.53, "eval_accuracy": 0.7671957671957672, "eval_loss": 1.5655356645584106, "eval_runtime": 3.1679, "eval_samples_per_second": 59.661, "eval_steps_per_second": 7.576, "step": 3160 }, { "epoch": 29.63, "learning_rate": 8.157009345794393e-05, "loss": 0.0131, "step": 3170 }, { "epoch": 29.63, "eval_accuracy": 0.7724867724867724, "eval_loss": 1.568717360496521, "eval_runtime": 4.272, "eval_samples_per_second": 44.242, "eval_steps_per_second": 5.618, "step": 3170 }, { "epoch": 29.72, "learning_rate": 8.119626168224299e-05, "loss": 0.0065, "step": 3180 }, { "epoch": 29.72, "eval_accuracy": 0.7724867724867724, "eval_loss": 1.5695167779922485, "eval_runtime": 3.2729, "eval_samples_per_second": 57.748, "eval_steps_per_second": 7.333, "step": 3180 }, { "epoch": 29.81, "learning_rate": 8.082242990654207e-05, "loss": 0.0049, "step": 3190 }, { "epoch": 29.81, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.5673458576202393, "eval_runtime": 3.2283, "eval_samples_per_second": 58.545, "eval_steps_per_second": 7.434, "step": 3190 }, { "epoch": 29.91, "learning_rate": 8.044859813084112e-05, "loss": 0.0008, "step": 3200 }, { "epoch": 29.91, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.5677809715270996, "eval_runtime": 3.2238, "eval_samples_per_second": 58.627, "eval_steps_per_second": 7.445, "step": 3200 }, { "epoch": 30.0, "learning_rate": 8.00747663551402e-05, "loss": 0.0009, "step": 3210 }, { "epoch": 30.0, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.569243311882019, "eval_runtime": 3.2344, "eval_samples_per_second": 58.434, "eval_steps_per_second": 7.42, "step": 3210 }, { "epoch": 30.09, "learning_rate": 7.970093457943925e-05, "loss": 0.0008, "step": 3220 }, { "epoch": 30.09, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.569542407989502, "eval_runtime": 3.1865, "eval_samples_per_second": 59.312, "eval_steps_per_second": 7.532, "step": 3220 }, { "epoch": 30.19, "learning_rate": 7.932710280373832e-05, "loss": 0.0008, "step": 3230 }, { "epoch": 30.19, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.5703235864639282, "eval_runtime": 3.1705, "eval_samples_per_second": 59.612, "eval_steps_per_second": 7.57, "step": 3230 }, { "epoch": 30.28, "learning_rate": 7.895327102803739e-05, "loss": 0.0047, "step": 3240 }, { "epoch": 30.28, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.5737032890319824, "eval_runtime": 3.2402, "eval_samples_per_second": 58.33, "eval_steps_per_second": 7.407, "step": 3240 }, { "epoch": 30.37, "learning_rate": 7.857943925233646e-05, "loss": 0.0008, "step": 3250 }, { "epoch": 30.37, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.5757778882980347, "eval_runtime": 3.1947, "eval_samples_per_second": 59.16, "eval_steps_per_second": 7.512, "step": 3250 }, { "epoch": 30.47, "learning_rate": 7.820560747663552e-05, "loss": 0.0059, "step": 3260 }, { "epoch": 30.47, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.5871896743774414, "eval_runtime": 3.2182, "eval_samples_per_second": 58.729, "eval_steps_per_second": 7.458, "step": 3260 }, { "epoch": 30.56, "learning_rate": 7.783177570093458e-05, "loss": 0.0091, "step": 3270 }, { "epoch": 30.56, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.608903169631958, "eval_runtime": 3.1183, "eval_samples_per_second": 60.61, "eval_steps_per_second": 7.696, "step": 3270 }, { "epoch": 30.65, "learning_rate": 7.745794392523364e-05, "loss": 0.0033, "step": 3280 }, { "epoch": 30.65, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.623267650604248, "eval_runtime": 3.1768, "eval_samples_per_second": 59.494, "eval_steps_per_second": 7.555, "step": 3280 }, { "epoch": 30.75, "learning_rate": 7.708411214953271e-05, "loss": 0.0165, "step": 3290 }, { "epoch": 30.75, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.611799955368042, "eval_runtime": 3.266, "eval_samples_per_second": 57.869, "eval_steps_per_second": 7.348, "step": 3290 }, { "epoch": 30.84, "learning_rate": 7.671028037383178e-05, "loss": 0.015, "step": 3300 }, { "epoch": 30.84, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.5901665687561035, "eval_runtime": 3.1817, "eval_samples_per_second": 59.402, "eval_steps_per_second": 7.543, "step": 3300 }, { "epoch": 30.93, "learning_rate": 7.633644859813085e-05, "loss": 0.0049, "step": 3310 }, { "epoch": 30.93, "eval_accuracy": 0.7671957671957672, "eval_loss": 1.5764192342758179, "eval_runtime": 3.4445, "eval_samples_per_second": 54.87, "eval_steps_per_second": 6.968, "step": 3310 }, { "epoch": 31.03, "learning_rate": 7.596261682242991e-05, "loss": 0.0008, "step": 3320 }, { "epoch": 31.03, "eval_accuracy": 0.7671957671957672, "eval_loss": 1.5679030418395996, "eval_runtime": 3.18, "eval_samples_per_second": 59.433, "eval_steps_per_second": 7.547, "step": 3320 }, { "epoch": 31.12, "learning_rate": 7.558878504672897e-05, "loss": 0.0071, "step": 3330 }, { "epoch": 31.12, "eval_accuracy": 0.7671957671957672, "eval_loss": 1.5708253383636475, "eval_runtime": 3.2342, "eval_samples_per_second": 58.438, "eval_steps_per_second": 7.421, "step": 3330 }, { "epoch": 31.21, "learning_rate": 7.521495327102805e-05, "loss": 0.0068, "step": 3340 }, { "epoch": 31.21, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.5782489776611328, "eval_runtime": 3.0933, "eval_samples_per_second": 61.099, "eval_steps_per_second": 7.759, "step": 3340 }, { "epoch": 31.31, "learning_rate": 7.48411214953271e-05, "loss": 0.0058, "step": 3350 }, { "epoch": 31.31, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.5822737216949463, "eval_runtime": 3.2696, "eval_samples_per_second": 57.806, "eval_steps_per_second": 7.34, "step": 3350 }, { "epoch": 31.4, "learning_rate": 7.446728971962618e-05, "loss": 0.0008, "step": 3360 }, { "epoch": 31.4, "eval_accuracy": 0.7671957671957672, "eval_loss": 1.5795854330062866, "eval_runtime": 3.2032, "eval_samples_per_second": 59.004, "eval_steps_per_second": 7.493, "step": 3360 }, { "epoch": 31.5, "learning_rate": 7.409345794392524e-05, "loss": 0.0008, "step": 3370 }, { "epoch": 31.5, "eval_accuracy": 0.7671957671957672, "eval_loss": 1.5795681476593018, "eval_runtime": 3.2182, "eval_samples_per_second": 58.729, "eval_steps_per_second": 7.458, "step": 3370 }, { "epoch": 31.59, "learning_rate": 7.37196261682243e-05, "loss": 0.0084, "step": 3380 }, { "epoch": 31.59, "eval_accuracy": 0.7671957671957672, "eval_loss": 1.5760174989700317, "eval_runtime": 3.1968, "eval_samples_per_second": 59.122, "eval_steps_per_second": 7.508, "step": 3380 }, { "epoch": 31.68, "learning_rate": 7.334579439252337e-05, "loss": 0.0074, "step": 3390 }, { "epoch": 31.68, "eval_accuracy": 0.7671957671957672, "eval_loss": 1.5738554000854492, "eval_runtime": 3.2685, "eval_samples_per_second": 57.825, "eval_steps_per_second": 7.343, "step": 3390 }, { "epoch": 31.78, "learning_rate": 7.297196261682244e-05, "loss": 0.0039, "step": 3400 }, { "epoch": 31.78, "eval_accuracy": 0.7671957671957672, "eval_loss": 1.5700509548187256, "eval_runtime": 3.1115, "eval_samples_per_second": 60.742, "eval_steps_per_second": 7.713, "step": 3400 }, { "epoch": 31.87, "learning_rate": 7.25981308411215e-05, "loss": 0.0144, "step": 3410 }, { "epoch": 31.87, "eval_accuracy": 0.7671957671957672, "eval_loss": 1.5835039615631104, "eval_runtime": 3.3036, "eval_samples_per_second": 57.211, "eval_steps_per_second": 7.265, "step": 3410 }, { "epoch": 31.96, "learning_rate": 7.222429906542056e-05, "loss": 0.0051, "step": 3420 }, { "epoch": 31.96, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.5920816659927368, "eval_runtime": 3.4411, "eval_samples_per_second": 54.924, "eval_steps_per_second": 6.975, "step": 3420 }, { "epoch": 32.06, "learning_rate": 7.185046728971963e-05, "loss": 0.0054, "step": 3430 }, { "epoch": 32.06, "eval_accuracy": 0.7671957671957672, "eval_loss": 1.5909570455551147, "eval_runtime": 3.0254, "eval_samples_per_second": 62.471, "eval_steps_per_second": 7.933, "step": 3430 }, { "epoch": 32.15, "learning_rate": 7.14766355140187e-05, "loss": 0.0047, "step": 3440 }, { "epoch": 32.15, "eval_accuracy": 0.7671957671957672, "eval_loss": 1.5870330333709717, "eval_runtime": 3.3063, "eval_samples_per_second": 57.163, "eval_steps_per_second": 7.259, "step": 3440 }, { "epoch": 32.24, "learning_rate": 7.110280373831776e-05, "loss": 0.0125, "step": 3450 }, { "epoch": 32.24, "eval_accuracy": 0.7671957671957672, "eval_loss": 1.5907707214355469, "eval_runtime": 3.0785, "eval_samples_per_second": 61.394, "eval_steps_per_second": 7.796, "step": 3450 }, { "epoch": 32.34, "learning_rate": 7.072897196261683e-05, "loss": 0.0057, "step": 3460 }, { "epoch": 32.34, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.5986977815628052, "eval_runtime": 3.0752, "eval_samples_per_second": 61.459, "eval_steps_per_second": 7.804, "step": 3460 }, { "epoch": 32.43, "learning_rate": 7.03551401869159e-05, "loss": 0.0007, "step": 3470 }, { "epoch": 32.43, "eval_accuracy": 0.7566137566137566, "eval_loss": 1.6025718450546265, "eval_runtime": 3.1619, "eval_samples_per_second": 59.775, "eval_steps_per_second": 7.59, "step": 3470 }, { "epoch": 32.52, "learning_rate": 6.998130841121495e-05, "loss": 0.006, "step": 3480 }, { "epoch": 32.52, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.5968295335769653, "eval_runtime": 3.0263, "eval_samples_per_second": 62.452, "eval_steps_per_second": 7.93, "step": 3480 }, { "epoch": 32.62, "learning_rate": 6.960747663551403e-05, "loss": 0.0007, "step": 3490 }, { "epoch": 32.62, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.594142198562622, "eval_runtime": 3.2795, "eval_samples_per_second": 57.631, "eval_steps_per_second": 7.318, "step": 3490 }, { "epoch": 32.71, "learning_rate": 6.923364485981308e-05, "loss": 0.0007, "step": 3500 }, { "epoch": 32.71, "eval_accuracy": 0.7671957671957672, "eval_loss": 1.59373140335083, "eval_runtime": 3.0541, "eval_samples_per_second": 61.884, "eval_steps_per_second": 7.858, "step": 3500 }, { "epoch": 32.8, "learning_rate": 6.885981308411215e-05, "loss": 0.0097, "step": 3510 }, { "epoch": 32.8, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.600218415260315, "eval_runtime": 3.105, "eval_samples_per_second": 60.87, "eval_steps_per_second": 7.73, "step": 3510 }, { "epoch": 32.9, "learning_rate": 6.848598130841122e-05, "loss": 0.0105, "step": 3520 }, { "epoch": 32.9, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6051002740859985, "eval_runtime": 3.4012, "eval_samples_per_second": 55.569, "eval_steps_per_second": 7.056, "step": 3520 }, { "epoch": 32.99, "learning_rate": 6.811214953271028e-05, "loss": 0.0007, "step": 3530 }, { "epoch": 32.99, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6050375699996948, "eval_runtime": 3.0547, "eval_samples_per_second": 61.873, "eval_steps_per_second": 7.857, "step": 3530 }, { "epoch": 33.08, "learning_rate": 6.773831775700935e-05, "loss": 0.0006, "step": 3540 }, { "epoch": 33.08, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6053138971328735, "eval_runtime": 3.1748, "eval_samples_per_second": 59.531, "eval_steps_per_second": 7.559, "step": 3540 }, { "epoch": 33.18, "learning_rate": 6.73644859813084e-05, "loss": 0.0008, "step": 3550 }, { "epoch": 33.18, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6058577299118042, "eval_runtime": 3.1448, "eval_samples_per_second": 60.099, "eval_steps_per_second": 7.632, "step": 3550 }, { "epoch": 33.27, "learning_rate": 6.699065420560749e-05, "loss": 0.0061, "step": 3560 }, { "epoch": 33.27, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.605916976928711, "eval_runtime": 3.094, "eval_samples_per_second": 61.086, "eval_steps_per_second": 7.757, "step": 3560 }, { "epoch": 33.36, "learning_rate": 6.661682242990654e-05, "loss": 0.0098, "step": 3570 }, { "epoch": 33.36, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6077380180358887, "eval_runtime": 3.1529, "eval_samples_per_second": 59.944, "eval_steps_per_second": 7.612, "step": 3570 }, { "epoch": 33.46, "learning_rate": 6.624299065420561e-05, "loss": 0.005, "step": 3580 }, { "epoch": 33.46, "eval_accuracy": 0.7671957671957672, "eval_loss": 1.6049844026565552, "eval_runtime": 3.2167, "eval_samples_per_second": 58.755, "eval_steps_per_second": 7.461, "step": 3580 }, { "epoch": 33.55, "learning_rate": 6.586915887850467e-05, "loss": 0.0007, "step": 3590 }, { "epoch": 33.55, "eval_accuracy": 0.7671957671957672, "eval_loss": 1.6010342836380005, "eval_runtime": 3.1643, "eval_samples_per_second": 59.729, "eval_steps_per_second": 7.585, "step": 3590 }, { "epoch": 33.64, "learning_rate": 6.549532710280374e-05, "loss": 0.0065, "step": 3600 }, { "epoch": 33.64, "eval_accuracy": 0.7671957671957672, "eval_loss": 1.6033494472503662, "eval_runtime": 3.3617, "eval_samples_per_second": 56.222, "eval_steps_per_second": 7.139, "step": 3600 }, { "epoch": 33.74, "learning_rate": 6.512149532710281e-05, "loss": 0.0047, "step": 3610 }, { "epoch": 33.74, "eval_accuracy": 0.7671957671957672, "eval_loss": 1.602766513824463, "eval_runtime": 3.347, "eval_samples_per_second": 56.469, "eval_steps_per_second": 7.171, "step": 3610 }, { "epoch": 33.83, "learning_rate": 6.474766355140188e-05, "loss": 0.0174, "step": 3620 }, { "epoch": 33.83, "eval_accuracy": 0.7566137566137566, "eval_loss": 1.6135550737380981, "eval_runtime": 3.3024, "eval_samples_per_second": 57.231, "eval_steps_per_second": 7.267, "step": 3620 }, { "epoch": 33.93, "learning_rate": 6.437383177570093e-05, "loss": 0.0057, "step": 3630 }, { "epoch": 33.93, "eval_accuracy": 0.7566137566137566, "eval_loss": 1.6196566820144653, "eval_runtime": 3.1607, "eval_samples_per_second": 59.797, "eval_steps_per_second": 7.593, "step": 3630 }, { "epoch": 34.02, "learning_rate": 6.400000000000001e-05, "loss": 0.0007, "step": 3640 }, { "epoch": 34.02, "eval_accuracy": 0.7566137566137566, "eval_loss": 1.6192022562026978, "eval_runtime": 3.2012, "eval_samples_per_second": 59.04, "eval_steps_per_second": 7.497, "step": 3640 }, { "epoch": 34.11, "learning_rate": 6.362616822429906e-05, "loss": 0.01, "step": 3650 }, { "epoch": 34.11, "eval_accuracy": 0.7566137566137566, "eval_loss": 1.616557240486145, "eval_runtime": 3.0372, "eval_samples_per_second": 62.228, "eval_steps_per_second": 7.902, "step": 3650 }, { "epoch": 34.21, "learning_rate": 6.325233644859813e-05, "loss": 0.0144, "step": 3660 }, { "epoch": 34.21, "eval_accuracy": 0.7566137566137566, "eval_loss": 1.6212772130966187, "eval_runtime": 3.1343, "eval_samples_per_second": 60.3, "eval_steps_per_second": 7.657, "step": 3660 }, { "epoch": 34.3, "learning_rate": 6.28785046728972e-05, "loss": 0.0007, "step": 3670 }, { "epoch": 34.3, "eval_accuracy": 0.7566137566137566, "eval_loss": 1.6278337240219116, "eval_runtime": 3.1421, "eval_samples_per_second": 60.15, "eval_steps_per_second": 7.638, "step": 3670 }, { "epoch": 34.39, "learning_rate": 6.250467289719625e-05, "loss": 0.0095, "step": 3680 }, { "epoch": 34.39, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6321161985397339, "eval_runtime": 3.249, "eval_samples_per_second": 58.171, "eval_steps_per_second": 7.387, "step": 3680 }, { "epoch": 34.49, "learning_rate": 6.213084112149533e-05, "loss": 0.0006, "step": 3690 }, { "epoch": 34.49, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6336792707443237, "eval_runtime": 3.1451, "eval_samples_per_second": 60.093, "eval_steps_per_second": 7.631, "step": 3690 }, { "epoch": 34.58, "learning_rate": 6.175700934579439e-05, "loss": 0.0007, "step": 3700 }, { "epoch": 34.58, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6350607872009277, "eval_runtime": 3.1678, "eval_samples_per_second": 59.662, "eval_steps_per_second": 7.576, "step": 3700 }, { "epoch": 34.67, "learning_rate": 6.138317757009347e-05, "loss": 0.0006, "step": 3710 }, { "epoch": 34.67, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6363701820373535, "eval_runtime": 3.2215, "eval_samples_per_second": 58.669, "eval_steps_per_second": 7.45, "step": 3710 }, { "epoch": 34.77, "learning_rate": 6.100934579439253e-05, "loss": 0.0063, "step": 3720 }, { "epoch": 34.77, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6366699934005737, "eval_runtime": 3.2135, "eval_samples_per_second": 58.815, "eval_steps_per_second": 7.469, "step": 3720 }, { "epoch": 34.86, "learning_rate": 6.0635514018691595e-05, "loss": 0.0062, "step": 3730 }, { "epoch": 34.86, "eval_accuracy": 0.7671957671957672, "eval_loss": 1.6348670721054077, "eval_runtime": 3.0917, "eval_samples_per_second": 61.132, "eval_steps_per_second": 7.763, "step": 3730 }, { "epoch": 34.95, "learning_rate": 6.0261682242990656e-05, "loss": 0.0064, "step": 3740 }, { "epoch": 34.95, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6241209506988525, "eval_runtime": 3.38, "eval_samples_per_second": 55.917, "eval_steps_per_second": 7.101, "step": 3740 }, { "epoch": 35.05, "learning_rate": 5.9887850467289716e-05, "loss": 0.0006, "step": 3750 }, { "epoch": 35.05, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6208295822143555, "eval_runtime": 3.2066, "eval_samples_per_second": 58.942, "eval_steps_per_second": 7.485, "step": 3750 }, { "epoch": 35.14, "learning_rate": 5.951401869158879e-05, "loss": 0.0006, "step": 3760 }, { "epoch": 35.14, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6204679012298584, "eval_runtime": 3.2685, "eval_samples_per_second": 57.825, "eval_steps_per_second": 7.343, "step": 3760 }, { "epoch": 35.23, "learning_rate": 5.914018691588785e-05, "loss": 0.0137, "step": 3770 }, { "epoch": 35.23, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6377967596054077, "eval_runtime": 3.1739, "eval_samples_per_second": 59.549, "eval_steps_per_second": 7.562, "step": 3770 }, { "epoch": 35.33, "learning_rate": 5.8766355140186925e-05, "loss": 0.0007, "step": 3780 }, { "epoch": 35.33, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6510041952133179, "eval_runtime": 3.0141, "eval_samples_per_second": 62.706, "eval_steps_per_second": 7.963, "step": 3780 }, { "epoch": 35.42, "learning_rate": 5.8392523364485985e-05, "loss": 0.0113, "step": 3790 }, { "epoch": 35.42, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6442958116531372, "eval_runtime": 3.0277, "eval_samples_per_second": 62.424, "eval_steps_per_second": 7.927, "step": 3790 }, { "epoch": 35.51, "learning_rate": 5.8018691588785046e-05, "loss": 0.0112, "step": 3800 }, { "epoch": 35.51, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6433544158935547, "eval_runtime": 3.1874, "eval_samples_per_second": 59.295, "eval_steps_per_second": 7.53, "step": 3800 }, { "epoch": 35.61, "learning_rate": 5.764485981308412e-05, "loss": 0.0006, "step": 3810 }, { "epoch": 35.61, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6435635089874268, "eval_runtime": 3.281, "eval_samples_per_second": 57.604, "eval_steps_per_second": 7.315, "step": 3810 }, { "epoch": 35.7, "learning_rate": 5.727102803738318e-05, "loss": 0.0006, "step": 3820 }, { "epoch": 35.7, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6441317796707153, "eval_runtime": 3.1827, "eval_samples_per_second": 59.384, "eval_steps_per_second": 7.541, "step": 3820 }, { "epoch": 35.79, "learning_rate": 5.6897196261682254e-05, "loss": 0.0145, "step": 3830 }, { "epoch": 35.79, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6258851289749146, "eval_runtime": 3.2346, "eval_samples_per_second": 58.431, "eval_steps_per_second": 7.42, "step": 3830 }, { "epoch": 35.89, "learning_rate": 5.6523364485981315e-05, "loss": 0.0006, "step": 3840 }, { "epoch": 35.89, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6195772886276245, "eval_runtime": 3.2109, "eval_samples_per_second": 58.862, "eval_steps_per_second": 7.475, "step": 3840 }, { "epoch": 35.98, "learning_rate": 5.6149532710280375e-05, "loss": 0.0047, "step": 3850 }, { "epoch": 35.98, "eval_accuracy": 0.7671957671957672, "eval_loss": 1.6145455837249756, "eval_runtime": 3.1308, "eval_samples_per_second": 60.367, "eval_steps_per_second": 7.666, "step": 3850 }, { "epoch": 36.07, "learning_rate": 5.577570093457944e-05, "loss": 0.0096, "step": 3860 }, { "epoch": 36.07, "eval_accuracy": 0.7671957671957672, "eval_loss": 1.6121631860733032, "eval_runtime": 3.1516, "eval_samples_per_second": 59.97, "eval_steps_per_second": 7.615, "step": 3860 }, { "epoch": 36.17, "learning_rate": 5.54018691588785e-05, "loss": 0.0006, "step": 3870 }, { "epoch": 36.17, "eval_accuracy": 0.7671957671957672, "eval_loss": 1.6133732795715332, "eval_runtime": 3.2412, "eval_samples_per_second": 58.312, "eval_steps_per_second": 7.405, "step": 3870 }, { "epoch": 36.26, "learning_rate": 5.502803738317758e-05, "loss": 0.0063, "step": 3880 }, { "epoch": 36.26, "eval_accuracy": 0.7671957671957672, "eval_loss": 1.6175814867019653, "eval_runtime": 3.1207, "eval_samples_per_second": 60.563, "eval_steps_per_second": 7.691, "step": 3880 }, { "epoch": 36.36, "learning_rate": 5.465420560747664e-05, "loss": 0.0049, "step": 3890 }, { "epoch": 36.36, "eval_accuracy": 0.7671957671957672, "eval_loss": 1.6226632595062256, "eval_runtime": 3.2034, "eval_samples_per_second": 59.0, "eval_steps_per_second": 7.492, "step": 3890 }, { "epoch": 36.45, "learning_rate": 5.42803738317757e-05, "loss": 0.0006, "step": 3900 }, { "epoch": 36.45, "eval_accuracy": 0.7671957671957672, "eval_loss": 1.6221052408218384, "eval_runtime": 3.2707, "eval_samples_per_second": 57.786, "eval_steps_per_second": 7.338, "step": 3900 }, { "epoch": 36.54, "learning_rate": 5.390654205607477e-05, "loss": 0.0057, "step": 3910 }, { "epoch": 36.54, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.626526951789856, "eval_runtime": 3.2153, "eval_samples_per_second": 58.781, "eval_steps_per_second": 7.464, "step": 3910 }, { "epoch": 36.64, "learning_rate": 5.353271028037383e-05, "loss": 0.0006, "step": 3920 }, { "epoch": 36.64, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6315840482711792, "eval_runtime": 3.2376, "eval_samples_per_second": 58.377, "eval_steps_per_second": 7.413, "step": 3920 }, { "epoch": 36.73, "learning_rate": 5.3158878504672906e-05, "loss": 0.0095, "step": 3930 }, { "epoch": 36.73, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6349341869354248, "eval_runtime": 3.1421, "eval_samples_per_second": 60.15, "eval_steps_per_second": 7.638, "step": 3930 }, { "epoch": 36.82, "learning_rate": 5.2785046728971966e-05, "loss": 0.0062, "step": 3940 }, { "epoch": 36.82, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6396187543869019, "eval_runtime": 3.1897, "eval_samples_per_second": 59.254, "eval_steps_per_second": 7.524, "step": 3940 }, { "epoch": 36.92, "learning_rate": 5.241121495327103e-05, "loss": 0.0062, "step": 3950 }, { "epoch": 36.92, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6347572803497314, "eval_runtime": 3.3171, "eval_samples_per_second": 56.978, "eval_steps_per_second": 7.235, "step": 3950 }, { "epoch": 37.01, "learning_rate": 5.20373831775701e-05, "loss": 0.0052, "step": 3960 }, { "epoch": 37.01, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6283901929855347, "eval_runtime": 3.2006, "eval_samples_per_second": 59.051, "eval_steps_per_second": 7.499, "step": 3960 }, { "epoch": 37.1, "learning_rate": 5.166355140186916e-05, "loss": 0.0054, "step": 3970 }, { "epoch": 37.1, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6296182870864868, "eval_runtime": 3.2544, "eval_samples_per_second": 58.075, "eval_steps_per_second": 7.375, "step": 3970 }, { "epoch": 37.2, "learning_rate": 5.128971962616823e-05, "loss": 0.0142, "step": 3980 }, { "epoch": 37.2, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6342829465866089, "eval_runtime": 3.2879, "eval_samples_per_second": 57.483, "eval_steps_per_second": 7.299, "step": 3980 }, { "epoch": 37.29, "learning_rate": 5.091588785046729e-05, "loss": 0.0006, "step": 3990 }, { "epoch": 37.29, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6389538049697876, "eval_runtime": 3.1746, "eval_samples_per_second": 59.535, "eval_steps_per_second": 7.56, "step": 3990 }, { "epoch": 37.38, "learning_rate": 5.0542056074766356e-05, "loss": 0.0056, "step": 4000 }, { "epoch": 37.38, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.63650643825531, "eval_runtime": 3.2374, "eval_samples_per_second": 58.38, "eval_steps_per_second": 7.413, "step": 4000 }, { "epoch": 37.48, "learning_rate": 5.0168224299065423e-05, "loss": 0.0006, "step": 4010 }, { "epoch": 37.48, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6352988481521606, "eval_runtime": 3.2389, "eval_samples_per_second": 58.353, "eval_steps_per_second": 7.41, "step": 4010 }, { "epoch": 37.57, "learning_rate": 4.9794392523364484e-05, "loss": 0.0006, "step": 4020 }, { "epoch": 37.57, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6355526447296143, "eval_runtime": 3.3214, "eval_samples_per_second": 56.904, "eval_steps_per_second": 7.226, "step": 4020 }, { "epoch": 37.66, "learning_rate": 4.942056074766355e-05, "loss": 0.0006, "step": 4030 }, { "epoch": 37.66, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6356136798858643, "eval_runtime": 3.3612, "eval_samples_per_second": 56.23, "eval_steps_per_second": 7.14, "step": 4030 }, { "epoch": 37.76, "learning_rate": 4.904672897196262e-05, "loss": 0.0006, "step": 4040 }, { "epoch": 37.76, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6366937160491943, "eval_runtime": 3.2144, "eval_samples_per_second": 58.797, "eval_steps_per_second": 7.466, "step": 4040 }, { "epoch": 37.85, "learning_rate": 4.8672897196261686e-05, "loss": 0.01, "step": 4050 }, { "epoch": 37.85, "eval_accuracy": 0.7671957671957672, "eval_loss": 1.631759762763977, "eval_runtime": 3.29, "eval_samples_per_second": 57.446, "eval_steps_per_second": 7.295, "step": 4050 }, { "epoch": 37.94, "learning_rate": 4.829906542056075e-05, "loss": 0.0151, "step": 4060 }, { "epoch": 37.94, "eval_accuracy": 0.7671957671957672, "eval_loss": 1.6293249130249023, "eval_runtime": 3.1536, "eval_samples_per_second": 59.931, "eval_steps_per_second": 7.61, "step": 4060 }, { "epoch": 38.04, "learning_rate": 4.792523364485981e-05, "loss": 0.006, "step": 4070 }, { "epoch": 38.04, "eval_accuracy": 0.7671957671957672, "eval_loss": 1.6346244812011719, "eval_runtime": 3.2143, "eval_samples_per_second": 58.801, "eval_steps_per_second": 7.467, "step": 4070 }, { "epoch": 38.13, "learning_rate": 4.755140186915888e-05, "loss": 0.0006, "step": 4080 }, { "epoch": 38.13, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6401140689849854, "eval_runtime": 3.257, "eval_samples_per_second": 58.028, "eval_steps_per_second": 7.369, "step": 4080 }, { "epoch": 38.22, "learning_rate": 4.717757009345795e-05, "loss": 0.0006, "step": 4090 }, { "epoch": 38.22, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.642866849899292, "eval_runtime": 3.1879, "eval_samples_per_second": 59.286, "eval_steps_per_second": 7.528, "step": 4090 }, { "epoch": 38.32, "learning_rate": 4.6803738317757015e-05, "loss": 0.0093, "step": 4100 }, { "epoch": 38.32, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.635596513748169, "eval_runtime": 3.2354, "eval_samples_per_second": 58.417, "eval_steps_per_second": 7.418, "step": 4100 }, { "epoch": 38.41, "learning_rate": 4.6429906542056075e-05, "loss": 0.0046, "step": 4110 }, { "epoch": 38.41, "eval_accuracy": 0.7671957671957672, "eval_loss": 1.6306812763214111, "eval_runtime": 3.2449, "eval_samples_per_second": 58.246, "eval_steps_per_second": 7.396, "step": 4110 }, { "epoch": 38.5, "learning_rate": 4.605607476635514e-05, "loss": 0.0111, "step": 4120 }, { "epoch": 38.5, "eval_accuracy": 0.7566137566137566, "eval_loss": 1.6402584314346313, "eval_runtime": 3.1872, "eval_samples_per_second": 59.299, "eval_steps_per_second": 7.53, "step": 4120 }, { "epoch": 38.6, "learning_rate": 4.56822429906542e-05, "loss": 0.0005, "step": 4130 }, { "epoch": 38.6, "eval_accuracy": 0.7566137566137566, "eval_loss": 1.6475365161895752, "eval_runtime": 3.5346, "eval_samples_per_second": 53.472, "eval_steps_per_second": 6.79, "step": 4130 }, { "epoch": 38.69, "learning_rate": 4.530841121495327e-05, "loss": 0.0006, "step": 4140 }, { "epoch": 38.69, "eval_accuracy": 0.7566137566137566, "eval_loss": 1.6509393453598022, "eval_runtime": 3.062, "eval_samples_per_second": 61.724, "eval_steps_per_second": 7.838, "step": 4140 }, { "epoch": 38.79, "learning_rate": 4.493457943925234e-05, "loss": 0.0006, "step": 4150 }, { "epoch": 38.79, "eval_accuracy": 0.7566137566137566, "eval_loss": 1.652215600013733, "eval_runtime": 3.5079, "eval_samples_per_second": 53.878, "eval_steps_per_second": 6.842, "step": 4150 }, { "epoch": 38.88, "learning_rate": 4.4560747663551405e-05, "loss": 0.0075, "step": 4160 }, { "epoch": 38.88, "eval_accuracy": 0.7566137566137566, "eval_loss": 1.653160810470581, "eval_runtime": 3.2762, "eval_samples_per_second": 57.688, "eval_steps_per_second": 7.325, "step": 4160 }, { "epoch": 38.97, "learning_rate": 4.418691588785047e-05, "loss": 0.0097, "step": 4170 }, { "epoch": 38.97, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6547893285751343, "eval_runtime": 3.2144, "eval_samples_per_second": 58.797, "eval_steps_per_second": 7.466, "step": 4170 }, { "epoch": 39.07, "learning_rate": 4.381308411214953e-05, "loss": 0.0058, "step": 4180 }, { "epoch": 39.07, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6471433639526367, "eval_runtime": 3.2814, "eval_samples_per_second": 57.598, "eval_steps_per_second": 7.314, "step": 4180 }, { "epoch": 39.16, "learning_rate": 4.34392523364486e-05, "loss": 0.0049, "step": 4190 }, { "epoch": 39.16, "eval_accuracy": 0.7671957671957672, "eval_loss": 1.6409401893615723, "eval_runtime": 3.1609, "eval_samples_per_second": 59.793, "eval_steps_per_second": 7.593, "step": 4190 }, { "epoch": 39.25, "learning_rate": 4.306542056074767e-05, "loss": 0.0111, "step": 4200 }, { "epoch": 39.25, "eval_accuracy": 0.7671957671957672, "eval_loss": 1.6414356231689453, "eval_runtime": 3.2279, "eval_samples_per_second": 58.552, "eval_steps_per_second": 7.435, "step": 4200 }, { "epoch": 39.35, "learning_rate": 4.2691588785046734e-05, "loss": 0.0052, "step": 4210 }, { "epoch": 39.35, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.652433156967163, "eval_runtime": 3.1373, "eval_samples_per_second": 60.242, "eval_steps_per_second": 7.65, "step": 4210 }, { "epoch": 39.44, "learning_rate": 4.23177570093458e-05, "loss": 0.0005, "step": 4220 }, { "epoch": 39.44, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.657133936882019, "eval_runtime": 3.2697, "eval_samples_per_second": 57.804, "eval_steps_per_second": 7.34, "step": 4220 }, { "epoch": 39.53, "learning_rate": 4.194392523364486e-05, "loss": 0.0052, "step": 4230 }, { "epoch": 39.53, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6594574451446533, "eval_runtime": 3.2345, "eval_samples_per_second": 58.432, "eval_steps_per_second": 7.42, "step": 4230 }, { "epoch": 39.63, "learning_rate": 4.157009345794393e-05, "loss": 0.0061, "step": 4240 }, { "epoch": 39.63, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6557565927505493, "eval_runtime": 3.268, "eval_samples_per_second": 57.833, "eval_steps_per_second": 7.344, "step": 4240 }, { "epoch": 39.72, "learning_rate": 4.119626168224299e-05, "loss": 0.0056, "step": 4250 }, { "epoch": 39.72, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.649285912513733, "eval_runtime": 3.2248, "eval_samples_per_second": 58.608, "eval_steps_per_second": 7.442, "step": 4250 }, { "epoch": 39.81, "learning_rate": 4.082242990654206e-05, "loss": 0.0006, "step": 4260 }, { "epoch": 39.81, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6455833911895752, "eval_runtime": 3.1611, "eval_samples_per_second": 59.789, "eval_steps_per_second": 7.592, "step": 4260 }, { "epoch": 39.91, "learning_rate": 4.0448598130841124e-05, "loss": 0.011, "step": 4270 }, { "epoch": 39.91, "eval_accuracy": 0.7671957671957672, "eval_loss": 1.6422607898712158, "eval_runtime": 3.2656, "eval_samples_per_second": 57.877, "eval_steps_per_second": 7.349, "step": 4270 }, { "epoch": 40.0, "learning_rate": 4.0074766355140184e-05, "loss": 0.006, "step": 4280 }, { "epoch": 40.0, "eval_accuracy": 0.7671957671957672, "eval_loss": 1.6446949243545532, "eval_runtime": 3.3462, "eval_samples_per_second": 56.481, "eval_steps_per_second": 7.172, "step": 4280 }, { "epoch": 40.09, "learning_rate": 3.970093457943925e-05, "loss": 0.0043, "step": 4290 }, { "epoch": 40.09, "eval_accuracy": 0.7671957671957672, "eval_loss": 1.6466460227966309, "eval_runtime": 3.3213, "eval_samples_per_second": 56.906, "eval_steps_per_second": 7.226, "step": 4290 }, { "epoch": 40.19, "learning_rate": 3.932710280373832e-05, "loss": 0.0098, "step": 4300 }, { "epoch": 40.19, "eval_accuracy": 0.7671957671957672, "eval_loss": 1.6464401483535767, "eval_runtime": 3.1753, "eval_samples_per_second": 59.521, "eval_steps_per_second": 7.558, "step": 4300 }, { "epoch": 40.28, "learning_rate": 3.8953271028037386e-05, "loss": 0.006, "step": 4310 }, { "epoch": 40.28, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6519055366516113, "eval_runtime": 3.2608, "eval_samples_per_second": 57.961, "eval_steps_per_second": 7.36, "step": 4310 }, { "epoch": 40.37, "learning_rate": 3.857943925233645e-05, "loss": 0.0053, "step": 4320 }, { "epoch": 40.37, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6609773635864258, "eval_runtime": 3.2634, "eval_samples_per_second": 57.916, "eval_steps_per_second": 7.354, "step": 4320 }, { "epoch": 40.47, "learning_rate": 3.8205607476635514e-05, "loss": 0.0005, "step": 4330 }, { "epoch": 40.47, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6645921468734741, "eval_runtime": 3.2004, "eval_samples_per_second": 59.054, "eval_steps_per_second": 7.499, "step": 4330 }, { "epoch": 40.56, "learning_rate": 3.783177570093458e-05, "loss": 0.0006, "step": 4340 }, { "epoch": 40.56, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6660394668579102, "eval_runtime": 3.415, "eval_samples_per_second": 55.344, "eval_steps_per_second": 7.028, "step": 4340 }, { "epoch": 40.65, "learning_rate": 3.745794392523365e-05, "loss": 0.0082, "step": 4350 }, { "epoch": 40.65, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.669595718383789, "eval_runtime": 3.3898, "eval_samples_per_second": 55.756, "eval_steps_per_second": 7.08, "step": 4350 }, { "epoch": 40.75, "learning_rate": 3.7084112149532715e-05, "loss": 0.0064, "step": 4360 }, { "epoch": 40.75, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.672002911567688, "eval_runtime": 3.3575, "eval_samples_per_second": 56.291, "eval_steps_per_second": 7.148, "step": 4360 }, { "epoch": 40.84, "learning_rate": 3.6710280373831776e-05, "loss": 0.0005, "step": 4370 }, { "epoch": 40.84, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6732321977615356, "eval_runtime": 2.9775, "eval_samples_per_second": 63.477, "eval_steps_per_second": 8.061, "step": 4370 }, { "epoch": 40.93, "learning_rate": 3.633644859813084e-05, "loss": 0.0065, "step": 4380 }, { "epoch": 40.93, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6676148176193237, "eval_runtime": 3.245, "eval_samples_per_second": 58.243, "eval_steps_per_second": 7.396, "step": 4380 }, { "epoch": 41.03, "learning_rate": 3.5962616822429904e-05, "loss": 0.006, "step": 4390 }, { "epoch": 41.03, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6597942113876343, "eval_runtime": 3.4525, "eval_samples_per_second": 54.742, "eval_steps_per_second": 6.951, "step": 4390 }, { "epoch": 41.12, "learning_rate": 3.558878504672897e-05, "loss": 0.0047, "step": 4400 }, { "epoch": 41.12, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6621626615524292, "eval_runtime": 3.1882, "eval_samples_per_second": 59.28, "eval_steps_per_second": 7.528, "step": 4400 }, { "epoch": 41.21, "learning_rate": 3.521495327102804e-05, "loss": 0.0055, "step": 4410 }, { "epoch": 41.21, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6600810289382935, "eval_runtime": 3.1527, "eval_samples_per_second": 59.949, "eval_steps_per_second": 7.613, "step": 4410 }, { "epoch": 41.31, "learning_rate": 3.4841121495327105e-05, "loss": 0.0049, "step": 4420 }, { "epoch": 41.31, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6604359149932861, "eval_runtime": 3.0872, "eval_samples_per_second": 61.22, "eval_steps_per_second": 7.774, "step": 4420 }, { "epoch": 41.4, "learning_rate": 3.446728971962617e-05, "loss": 0.0005, "step": 4430 }, { "epoch": 41.4, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.664872646331787, "eval_runtime": 3.2713, "eval_samples_per_second": 57.776, "eval_steps_per_second": 7.337, "step": 4430 }, { "epoch": 41.5, "learning_rate": 3.409345794392523e-05, "loss": 0.005, "step": 4440 }, { "epoch": 41.5, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6663637161254883, "eval_runtime": 3.1479, "eval_samples_per_second": 60.04, "eval_steps_per_second": 7.624, "step": 4440 }, { "epoch": 41.59, "learning_rate": 3.37196261682243e-05, "loss": 0.0098, "step": 4450 }, { "epoch": 41.59, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6684166193008423, "eval_runtime": 3.1477, "eval_samples_per_second": 60.043, "eval_steps_per_second": 7.625, "step": 4450 }, { "epoch": 41.68, "learning_rate": 3.334579439252337e-05, "loss": 0.0005, "step": 4460 }, { "epoch": 41.68, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.672025203704834, "eval_runtime": 3.1099, "eval_samples_per_second": 60.773, "eval_steps_per_second": 7.717, "step": 4460 }, { "epoch": 41.78, "learning_rate": 3.2971962616822435e-05, "loss": 0.0148, "step": 4470 }, { "epoch": 41.78, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6690597534179688, "eval_runtime": 3.17, "eval_samples_per_second": 59.622, "eval_steps_per_second": 7.571, "step": 4470 }, { "epoch": 41.87, "learning_rate": 3.25981308411215e-05, "loss": 0.0005, "step": 4480 }, { "epoch": 41.87, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6645516157150269, "eval_runtime": 3.0775, "eval_samples_per_second": 61.414, "eval_steps_per_second": 7.799, "step": 4480 }, { "epoch": 41.96, "learning_rate": 3.222429906542056e-05, "loss": 0.0052, "step": 4490 }, { "epoch": 41.96, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6626436710357666, "eval_runtime": 3.1486, "eval_samples_per_second": 60.027, "eval_steps_per_second": 7.622, "step": 4490 }, { "epoch": 42.06, "learning_rate": 3.185046728971963e-05, "loss": 0.0052, "step": 4500 }, { "epoch": 42.06, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6587588787078857, "eval_runtime": 3.3613, "eval_samples_per_second": 56.229, "eval_steps_per_second": 7.14, "step": 4500 }, { "epoch": 42.15, "learning_rate": 3.147663551401869e-05, "loss": 0.0044, "step": 4510 }, { "epoch": 42.15, "eval_accuracy": 0.7671957671957672, "eval_loss": 1.6567378044128418, "eval_runtime": 3.1117, "eval_samples_per_second": 60.738, "eval_steps_per_second": 7.713, "step": 4510 }, { "epoch": 42.24, "learning_rate": 3.110280373831776e-05, "loss": 0.0059, "step": 4520 }, { "epoch": 42.24, "eval_accuracy": 0.7671957671957672, "eval_loss": 1.6559375524520874, "eval_runtime": 3.1594, "eval_samples_per_second": 59.821, "eval_steps_per_second": 7.596, "step": 4520 }, { "epoch": 42.34, "learning_rate": 3.0728971962616824e-05, "loss": 0.0005, "step": 4530 }, { "epoch": 42.34, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6584206819534302, "eval_runtime": 3.215, "eval_samples_per_second": 58.786, "eval_steps_per_second": 7.465, "step": 4530 }, { "epoch": 42.43, "learning_rate": 3.0355140186915888e-05, "loss": 0.0046, "step": 4540 }, { "epoch": 42.43, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.658771276473999, "eval_runtime": 3.081, "eval_samples_per_second": 61.344, "eval_steps_per_second": 7.79, "step": 4540 }, { "epoch": 42.52, "learning_rate": 2.9981308411214952e-05, "loss": 0.0005, "step": 4550 }, { "epoch": 42.52, "eval_accuracy": 0.7671957671957672, "eval_loss": 1.6556823253631592, "eval_runtime": 3.7215, "eval_samples_per_second": 50.786, "eval_steps_per_second": 6.449, "step": 4550 }, { "epoch": 42.62, "learning_rate": 2.960747663551402e-05, "loss": 0.0005, "step": 4560 }, { "epoch": 42.62, "eval_accuracy": 0.7671957671957672, "eval_loss": 1.6551687717437744, "eval_runtime": 3.2069, "eval_samples_per_second": 58.935, "eval_steps_per_second": 7.484, "step": 4560 }, { "epoch": 42.71, "learning_rate": 2.9233644859813087e-05, "loss": 0.0005, "step": 4570 }, { "epoch": 42.71, "eval_accuracy": 0.7671957671957672, "eval_loss": 1.655452013015747, "eval_runtime": 3.2606, "eval_samples_per_second": 57.964, "eval_steps_per_second": 7.361, "step": 4570 }, { "epoch": 42.8, "learning_rate": 2.8859813084112154e-05, "loss": 0.0107, "step": 4580 }, { "epoch": 42.8, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6596006155014038, "eval_runtime": 3.241, "eval_samples_per_second": 58.315, "eval_steps_per_second": 7.405, "step": 4580 }, { "epoch": 42.9, "learning_rate": 2.8485981308411214e-05, "loss": 0.015, "step": 4590 }, { "epoch": 42.9, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6657310724258423, "eval_runtime": 3.1747, "eval_samples_per_second": 59.534, "eval_steps_per_second": 7.56, "step": 4590 }, { "epoch": 42.99, "learning_rate": 2.811214953271028e-05, "loss": 0.0052, "step": 4600 }, { "epoch": 42.99, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6676826477050781, "eval_runtime": 3.254, "eval_samples_per_second": 58.083, "eval_steps_per_second": 7.376, "step": 4600 }, { "epoch": 43.08, "learning_rate": 2.7738317757009345e-05, "loss": 0.0051, "step": 4610 }, { "epoch": 43.08, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.668189525604248, "eval_runtime": 3.1876, "eval_samples_per_second": 59.293, "eval_steps_per_second": 7.529, "step": 4610 }, { "epoch": 43.18, "learning_rate": 2.7364485981308413e-05, "loss": 0.0051, "step": 4620 }, { "epoch": 43.18, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6663062572479248, "eval_runtime": 3.3928, "eval_samples_per_second": 55.707, "eval_steps_per_second": 7.074, "step": 4620 }, { "epoch": 43.27, "learning_rate": 2.699065420560748e-05, "loss": 0.0005, "step": 4630 }, { "epoch": 43.27, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.664025068283081, "eval_runtime": 3.2939, "eval_samples_per_second": 57.378, "eval_steps_per_second": 7.286, "step": 4630 }, { "epoch": 43.36, "learning_rate": 2.6616822429906547e-05, "loss": 0.0088, "step": 4640 }, { "epoch": 43.36, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6622815132141113, "eval_runtime": 3.135, "eval_samples_per_second": 60.287, "eval_steps_per_second": 7.655, "step": 4640 }, { "epoch": 43.46, "learning_rate": 2.6242990654205607e-05, "loss": 0.0053, "step": 4650 }, { "epoch": 43.46, "eval_accuracy": 0.7671957671957672, "eval_loss": 1.664080262184143, "eval_runtime": 3.2274, "eval_samples_per_second": 58.56, "eval_steps_per_second": 7.436, "step": 4650 }, { "epoch": 43.55, "learning_rate": 2.5869158878504675e-05, "loss": 0.0064, "step": 4660 }, { "epoch": 43.55, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.665493130683899, "eval_runtime": 3.1518, "eval_samples_per_second": 59.966, "eval_steps_per_second": 7.615, "step": 4660 }, { "epoch": 43.64, "learning_rate": 2.549532710280374e-05, "loss": 0.0005, "step": 4670 }, { "epoch": 43.64, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6664865016937256, "eval_runtime": 3.2866, "eval_samples_per_second": 57.506, "eval_steps_per_second": 7.302, "step": 4670 }, { "epoch": 43.74, "learning_rate": 2.5121495327102806e-05, "loss": 0.005, "step": 4680 }, { "epoch": 43.74, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6677204370498657, "eval_runtime": 3.2047, "eval_samples_per_second": 58.976, "eval_steps_per_second": 7.489, "step": 4680 }, { "epoch": 43.83, "learning_rate": 2.474766355140187e-05, "loss": 0.0049, "step": 4690 }, { "epoch": 43.83, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6758949756622314, "eval_runtime": 3.4806, "eval_samples_per_second": 54.301, "eval_steps_per_second": 6.895, "step": 4690 }, { "epoch": 43.93, "learning_rate": 2.4373831775700937e-05, "loss": 0.0055, "step": 4700 }, { "epoch": 43.93, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6764609813690186, "eval_runtime": 3.2004, "eval_samples_per_second": 59.055, "eval_steps_per_second": 7.499, "step": 4700 }, { "epoch": 44.02, "learning_rate": 2.4e-05, "loss": 0.0144, "step": 4710 }, { "epoch": 44.02, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6740403175354004, "eval_runtime": 3.2944, "eval_samples_per_second": 57.37, "eval_steps_per_second": 7.285, "step": 4710 }, { "epoch": 44.11, "learning_rate": 2.3626168224299068e-05, "loss": 0.0005, "step": 4720 }, { "epoch": 44.11, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6708952188491821, "eval_runtime": 3.0279, "eval_samples_per_second": 62.42, "eval_steps_per_second": 7.926, "step": 4720 }, { "epoch": 44.21, "learning_rate": 2.325233644859813e-05, "loss": 0.0051, "step": 4730 }, { "epoch": 44.21, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6715834140777588, "eval_runtime": 3.2347, "eval_samples_per_second": 58.428, "eval_steps_per_second": 7.419, "step": 4730 }, { "epoch": 44.3, "learning_rate": 2.2878504672897196e-05, "loss": 0.005, "step": 4740 }, { "epoch": 44.3, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6689761877059937, "eval_runtime": 3.1331, "eval_samples_per_second": 60.324, "eval_steps_per_second": 7.66, "step": 4740 }, { "epoch": 44.39, "learning_rate": 2.2504672897196263e-05, "loss": 0.0005, "step": 4750 }, { "epoch": 44.39, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6669028997421265, "eval_runtime": 2.9298, "eval_samples_per_second": 64.509, "eval_steps_per_second": 8.192, "step": 4750 }, { "epoch": 44.49, "learning_rate": 2.2130841121495327e-05, "loss": 0.005, "step": 4760 }, { "epoch": 44.49, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6714545488357544, "eval_runtime": 3.0674, "eval_samples_per_second": 61.615, "eval_steps_per_second": 7.824, "step": 4760 }, { "epoch": 44.58, "learning_rate": 2.1757009345794394e-05, "loss": 0.0005, "step": 4770 }, { "epoch": 44.58, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6736085414886475, "eval_runtime": 2.9894, "eval_samples_per_second": 63.222, "eval_steps_per_second": 8.028, "step": 4770 }, { "epoch": 44.67, "learning_rate": 2.138317757009346e-05, "loss": 0.0046, "step": 4780 }, { "epoch": 44.67, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.676229476928711, "eval_runtime": 2.9735, "eval_samples_per_second": 63.562, "eval_steps_per_second": 8.071, "step": 4780 }, { "epoch": 44.77, "learning_rate": 2.1009345794392525e-05, "loss": 0.0055, "step": 4790 }, { "epoch": 44.77, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6757352352142334, "eval_runtime": 3.1208, "eval_samples_per_second": 60.561, "eval_steps_per_second": 7.69, "step": 4790 }, { "epoch": 44.86, "learning_rate": 2.063551401869159e-05, "loss": 0.0098, "step": 4800 }, { "epoch": 44.86, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.670250654220581, "eval_runtime": 3.1041, "eval_samples_per_second": 60.887, "eval_steps_per_second": 7.732, "step": 4800 }, { "epoch": 44.95, "learning_rate": 2.0261682242990653e-05, "loss": 0.005, "step": 4810 }, { "epoch": 44.95, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6712110042572021, "eval_runtime": 3.374, "eval_samples_per_second": 56.016, "eval_steps_per_second": 7.113, "step": 4810 }, { "epoch": 45.05, "learning_rate": 1.988785046728972e-05, "loss": 0.005, "step": 4820 }, { "epoch": 45.05, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6744420528411865, "eval_runtime": 3.1947, "eval_samples_per_second": 59.161, "eval_steps_per_second": 7.513, "step": 4820 }, { "epoch": 45.14, "learning_rate": 1.9514018691588787e-05, "loss": 0.0005, "step": 4830 }, { "epoch": 45.14, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6774510145187378, "eval_runtime": 3.126, "eval_samples_per_second": 60.46, "eval_steps_per_second": 7.677, "step": 4830 }, { "epoch": 45.23, "learning_rate": 1.914018691588785e-05, "loss": 0.0005, "step": 4840 }, { "epoch": 45.23, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6790132522583008, "eval_runtime": 2.9821, "eval_samples_per_second": 63.379, "eval_steps_per_second": 8.048, "step": 4840 }, { "epoch": 45.33, "learning_rate": 1.8766355140186918e-05, "loss": 0.0005, "step": 4850 }, { "epoch": 45.33, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6794980764389038, "eval_runtime": 3.139, "eval_samples_per_second": 60.21, "eval_steps_per_second": 7.646, "step": 4850 }, { "epoch": 45.42, "learning_rate": 1.8392523364485982e-05, "loss": 0.0049, "step": 4860 }, { "epoch": 45.42, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6801530122756958, "eval_runtime": 2.945, "eval_samples_per_second": 64.176, "eval_steps_per_second": 8.149, "step": 4860 }, { "epoch": 45.51, "learning_rate": 1.8018691588785046e-05, "loss": 0.0051, "step": 4870 }, { "epoch": 45.51, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6776082515716553, "eval_runtime": 2.9852, "eval_samples_per_second": 63.313, "eval_steps_per_second": 8.04, "step": 4870 }, { "epoch": 45.61, "learning_rate": 1.7644859813084113e-05, "loss": 0.0049, "step": 4880 }, { "epoch": 45.61, "eval_accuracy": 0.7671957671957672, "eval_loss": 1.6780359745025635, "eval_runtime": 3.0255, "eval_samples_per_second": 62.469, "eval_steps_per_second": 7.933, "step": 4880 }, { "epoch": 45.7, "learning_rate": 1.7271028037383177e-05, "loss": 0.0091, "step": 4890 }, { "epoch": 45.7, "eval_accuracy": 0.7671957671957672, "eval_loss": 1.6785778999328613, "eval_runtime": 2.9136, "eval_samples_per_second": 64.868, "eval_steps_per_second": 8.237, "step": 4890 }, { "epoch": 45.79, "learning_rate": 1.6897196261682244e-05, "loss": 0.0046, "step": 4900 }, { "epoch": 45.79, "eval_accuracy": 0.7671957671957672, "eval_loss": 1.6758513450622559, "eval_runtime": 2.9184, "eval_samples_per_second": 64.761, "eval_steps_per_second": 8.224, "step": 4900 }, { "epoch": 45.89, "learning_rate": 1.652336448598131e-05, "loss": 0.0056, "step": 4910 }, { "epoch": 45.89, "eval_accuracy": 0.7724867724867724, "eval_loss": 1.6727031469345093, "eval_runtime": 2.9001, "eval_samples_per_second": 65.169, "eval_steps_per_second": 8.275, "step": 4910 }, { "epoch": 45.98, "learning_rate": 1.6149532710280375e-05, "loss": 0.011, "step": 4920 }, { "epoch": 45.98, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6746748685836792, "eval_runtime": 2.9034, "eval_samples_per_second": 65.095, "eval_steps_per_second": 8.266, "step": 4920 }, { "epoch": 46.07, "learning_rate": 1.577570093457944e-05, "loss": 0.0093, "step": 4930 }, { "epoch": 46.07, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.674180507659912, "eval_runtime": 2.9574, "eval_samples_per_second": 63.907, "eval_steps_per_second": 8.115, "step": 4930 }, { "epoch": 46.17, "learning_rate": 1.5401869158878503e-05, "loss": 0.0047, "step": 4940 }, { "epoch": 46.17, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6757071018218994, "eval_runtime": 2.8997, "eval_samples_per_second": 65.179, "eval_steps_per_second": 8.277, "step": 4940 }, { "epoch": 46.26, "learning_rate": 1.502803738317757e-05, "loss": 0.0089, "step": 4950 }, { "epoch": 46.26, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6735711097717285, "eval_runtime": 3.1476, "eval_samples_per_second": 60.046, "eval_steps_per_second": 7.625, "step": 4950 }, { "epoch": 46.36, "learning_rate": 1.4654205607476637e-05, "loss": 0.0005, "step": 4960 }, { "epoch": 46.36, "eval_accuracy": 0.7671957671957672, "eval_loss": 1.6719815731048584, "eval_runtime": 3.192, "eval_samples_per_second": 59.21, "eval_steps_per_second": 7.519, "step": 4960 }, { "epoch": 46.45, "learning_rate": 1.4280373831775701e-05, "loss": 0.0005, "step": 4970 }, { "epoch": 46.45, "eval_accuracy": 0.7671957671957672, "eval_loss": 1.6716129779815674, "eval_runtime": 2.9471, "eval_samples_per_second": 64.131, "eval_steps_per_second": 8.144, "step": 4970 }, { "epoch": 46.54, "learning_rate": 1.3906542056074767e-05, "loss": 0.0097, "step": 4980 }, { "epoch": 46.54, "eval_accuracy": 0.7671957671957672, "eval_loss": 1.672421932220459, "eval_runtime": 2.9663, "eval_samples_per_second": 63.716, "eval_steps_per_second": 8.091, "step": 4980 }, { "epoch": 46.64, "learning_rate": 1.3532710280373834e-05, "loss": 0.0005, "step": 4990 }, { "epoch": 46.64, "eval_accuracy": 0.7671957671957672, "eval_loss": 1.671976089477539, "eval_runtime": 3.0014, "eval_samples_per_second": 62.971, "eval_steps_per_second": 7.996, "step": 4990 }, { "epoch": 46.73, "learning_rate": 1.3158878504672898e-05, "loss": 0.0005, "step": 5000 }, { "epoch": 46.73, "eval_accuracy": 0.7671957671957672, "eval_loss": 1.6718426942825317, "eval_runtime": 3.162, "eval_samples_per_second": 59.772, "eval_steps_per_second": 7.59, "step": 5000 }, { "epoch": 46.82, "learning_rate": 1.2785046728971963e-05, "loss": 0.0004, "step": 5010 }, { "epoch": 46.82, "eval_accuracy": 0.7671957671957672, "eval_loss": 1.6721214056015015, "eval_runtime": 2.9348, "eval_samples_per_second": 64.4, "eval_steps_per_second": 8.178, "step": 5010 }, { "epoch": 46.92, "learning_rate": 1.2411214953271029e-05, "loss": 0.0107, "step": 5020 }, { "epoch": 46.92, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6742489337921143, "eval_runtime": 2.9616, "eval_samples_per_second": 63.816, "eval_steps_per_second": 8.104, "step": 5020 }, { "epoch": 47.01, "learning_rate": 1.2037383177570094e-05, "loss": 0.0051, "step": 5030 }, { "epoch": 47.01, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6763916015625, "eval_runtime": 3.3324, "eval_samples_per_second": 56.716, "eval_steps_per_second": 7.202, "step": 5030 }, { "epoch": 47.1, "learning_rate": 1.166355140186916e-05, "loss": 0.0004, "step": 5040 }, { "epoch": 47.1, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6787925958633423, "eval_runtime": 2.994, "eval_samples_per_second": 63.127, "eval_steps_per_second": 8.016, "step": 5040 }, { "epoch": 47.2, "learning_rate": 1.1289719626168224e-05, "loss": 0.0048, "step": 5050 }, { "epoch": 47.2, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.67880380153656, "eval_runtime": 2.9739, "eval_samples_per_second": 63.554, "eval_steps_per_second": 8.07, "step": 5050 }, { "epoch": 47.29, "learning_rate": 1.0915887850467291e-05, "loss": 0.0005, "step": 5060 }, { "epoch": 47.29, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6779260635375977, "eval_runtime": 2.9453, "eval_samples_per_second": 64.171, "eval_steps_per_second": 8.149, "step": 5060 }, { "epoch": 47.38, "learning_rate": 1.0542056074766356e-05, "loss": 0.0048, "step": 5070 }, { "epoch": 47.38, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6772257089614868, "eval_runtime": 3.0548, "eval_samples_per_second": 61.87, "eval_steps_per_second": 7.856, "step": 5070 }, { "epoch": 47.48, "learning_rate": 1.016822429906542e-05, "loss": 0.0044, "step": 5080 }, { "epoch": 47.48, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.677033543586731, "eval_runtime": 3.0759, "eval_samples_per_second": 61.446, "eval_steps_per_second": 7.803, "step": 5080 }, { "epoch": 47.57, "learning_rate": 9.794392523364486e-06, "loss": 0.0004, "step": 5090 }, { "epoch": 47.57, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6759369373321533, "eval_runtime": 3.134, "eval_samples_per_second": 60.306, "eval_steps_per_second": 7.658, "step": 5090 }, { "epoch": 47.66, "learning_rate": 9.420560747663553e-06, "loss": 0.0053, "step": 5100 }, { "epoch": 47.66, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6769102811813354, "eval_runtime": 3.2383, "eval_samples_per_second": 58.363, "eval_steps_per_second": 7.411, "step": 5100 }, { "epoch": 47.76, "learning_rate": 9.046728971962617e-06, "loss": 0.0093, "step": 5110 }, { "epoch": 47.76, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6791408061981201, "eval_runtime": 3.1946, "eval_samples_per_second": 59.163, "eval_steps_per_second": 7.513, "step": 5110 }, { "epoch": 47.85, "learning_rate": 8.672897196261682e-06, "loss": 0.0046, "step": 5120 }, { "epoch": 47.85, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6781762838363647, "eval_runtime": 3.0007, "eval_samples_per_second": 62.986, "eval_steps_per_second": 7.998, "step": 5120 }, { "epoch": 47.94, "learning_rate": 8.299065420560748e-06, "loss": 0.0101, "step": 5130 }, { "epoch": 47.94, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.679875135421753, "eval_runtime": 2.8961, "eval_samples_per_second": 65.259, "eval_steps_per_second": 8.287, "step": 5130 }, { "epoch": 48.04, "learning_rate": 7.925233644859813e-06, "loss": 0.0055, "step": 5140 }, { "epoch": 48.04, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6814370155334473, "eval_runtime": 2.99, "eval_samples_per_second": 63.21, "eval_steps_per_second": 8.027, "step": 5140 }, { "epoch": 48.13, "learning_rate": 7.551401869158879e-06, "loss": 0.0004, "step": 5150 }, { "epoch": 48.13, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6820155382156372, "eval_runtime": 3.0809, "eval_samples_per_second": 61.346, "eval_steps_per_second": 7.79, "step": 5150 }, { "epoch": 48.22, "learning_rate": 7.1775700934579445e-06, "loss": 0.0005, "step": 5160 }, { "epoch": 48.22, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6822861433029175, "eval_runtime": 2.9449, "eval_samples_per_second": 64.179, "eval_steps_per_second": 8.15, "step": 5160 }, { "epoch": 48.32, "learning_rate": 6.803738317757009e-06, "loss": 0.005, "step": 5170 }, { "epoch": 48.32, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.682709813117981, "eval_runtime": 3.0319, "eval_samples_per_second": 62.337, "eval_steps_per_second": 7.916, "step": 5170 }, { "epoch": 48.41, "learning_rate": 6.429906542056075e-06, "loss": 0.0093, "step": 5180 }, { "epoch": 48.41, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6838692426681519, "eval_runtime": 3.0335, "eval_samples_per_second": 62.304, "eval_steps_per_second": 7.912, "step": 5180 }, { "epoch": 48.5, "learning_rate": 6.05607476635514e-06, "loss": 0.0048, "step": 5190 }, { "epoch": 48.5, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6845488548278809, "eval_runtime": 3.0676, "eval_samples_per_second": 61.612, "eval_steps_per_second": 7.824, "step": 5190 }, { "epoch": 48.6, "learning_rate": 5.682242990654206e-06, "loss": 0.0005, "step": 5200 }, { "epoch": 48.6, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6849009990692139, "eval_runtime": 3.008, "eval_samples_per_second": 62.832, "eval_steps_per_second": 7.979, "step": 5200 }, { "epoch": 48.69, "learning_rate": 5.308411214953271e-06, "loss": 0.0005, "step": 5210 }, { "epoch": 48.69, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6851400136947632, "eval_runtime": 2.9174, "eval_samples_per_second": 64.784, "eval_steps_per_second": 8.227, "step": 5210 }, { "epoch": 48.79, "learning_rate": 4.934579439252337e-06, "loss": 0.0136, "step": 5220 }, { "epoch": 48.79, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.686295747756958, "eval_runtime": 2.9681, "eval_samples_per_second": 63.678, "eval_steps_per_second": 8.086, "step": 5220 }, { "epoch": 48.88, "learning_rate": 4.560747663551402e-06, "loss": 0.005, "step": 5230 }, { "epoch": 48.88, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6866832971572876, "eval_runtime": 2.9518, "eval_samples_per_second": 64.029, "eval_steps_per_second": 8.131, "step": 5230 }, { "epoch": 48.97, "learning_rate": 4.186915887850468e-06, "loss": 0.0096, "step": 5240 }, { "epoch": 48.97, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.685899257659912, "eval_runtime": 3.0006, "eval_samples_per_second": 62.987, "eval_steps_per_second": 7.998, "step": 5240 }, { "epoch": 49.07, "learning_rate": 3.813084112149533e-06, "loss": 0.0048, "step": 5250 }, { "epoch": 49.07, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6844896078109741, "eval_runtime": 2.9876, "eval_samples_per_second": 63.261, "eval_steps_per_second": 8.033, "step": 5250 }, { "epoch": 49.16, "learning_rate": 3.4392523364485985e-06, "loss": 0.0048, "step": 5260 }, { "epoch": 49.16, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6853784322738647, "eval_runtime": 2.9873, "eval_samples_per_second": 63.267, "eval_steps_per_second": 8.034, "step": 5260 }, { "epoch": 49.25, "learning_rate": 3.0654205607476637e-06, "loss": 0.0093, "step": 5270 }, { "epoch": 49.25, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6857768297195435, "eval_runtime": 2.9803, "eval_samples_per_second": 63.416, "eval_steps_per_second": 8.053, "step": 5270 }, { "epoch": 49.35, "learning_rate": 2.691588785046729e-06, "loss": 0.0004, "step": 5280 }, { "epoch": 49.35, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6857463121414185, "eval_runtime": 3.0292, "eval_samples_per_second": 62.393, "eval_steps_per_second": 7.923, "step": 5280 }, { "epoch": 49.44, "learning_rate": 2.3177570093457947e-06, "loss": 0.0095, "step": 5290 }, { "epoch": 49.44, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.685395359992981, "eval_runtime": 2.9645, "eval_samples_per_second": 63.755, "eval_steps_per_second": 8.096, "step": 5290 }, { "epoch": 49.53, "learning_rate": 1.94392523364486e-06, "loss": 0.0005, "step": 5300 }, { "epoch": 49.53, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6847246885299683, "eval_runtime": 3.0054, "eval_samples_per_second": 62.887, "eval_steps_per_second": 7.986, "step": 5300 }, { "epoch": 49.63, "learning_rate": 1.5700934579439254e-06, "loss": 0.0005, "step": 5310 }, { "epoch": 49.63, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6844700574874878, "eval_runtime": 2.9223, "eval_samples_per_second": 64.676, "eval_steps_per_second": 8.213, "step": 5310 }, { "epoch": 49.72, "learning_rate": 1.1962616822429907e-06, "loss": 0.0092, "step": 5320 }, { "epoch": 49.72, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6847366094589233, "eval_runtime": 3.0496, "eval_samples_per_second": 61.975, "eval_steps_per_second": 7.87, "step": 5320 }, { "epoch": 49.81, "learning_rate": 8.224299065420561e-07, "loss": 0.0005, "step": 5330 }, { "epoch": 49.81, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6845016479492188, "eval_runtime": 3.1606, "eval_samples_per_second": 59.798, "eval_steps_per_second": 7.593, "step": 5330 }, { "epoch": 49.91, "learning_rate": 4.4859813084112153e-07, "loss": 0.0092, "step": 5340 }, { "epoch": 49.91, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.684584140777588, "eval_runtime": 2.921, "eval_samples_per_second": 64.704, "eval_steps_per_second": 8.216, "step": 5340 }, { "epoch": 50.0, "learning_rate": 7.476635514018692e-08, "loss": 0.0005, "step": 5350 }, { "epoch": 50.0, "eval_accuracy": 0.7619047619047619, "eval_loss": 1.6846909523010254, "eval_runtime": 2.9853, "eval_samples_per_second": 63.309, "eval_steps_per_second": 8.039, "step": 5350 }, { "epoch": 50.0, "step": 5350, "total_flos": 6.575784632757043e+18, "train_loss": 0.14066274270554568, "train_runtime": 4434.2749, "train_samples_per_second": 19.135, "train_steps_per_second": 1.207 } ], "logging_steps": 10, "max_steps": 5350, "num_train_epochs": 50, "save_steps": 10, "total_flos": 6.575784632757043e+18, "trial_name": null, "trial_params": null }