vit-eGTZANplus / trainer_state.json
ghermoso's picture
End of training
0aa2858 verified
{
"best_metric": 0.8358047604560852,
"best_model_checkpoint": "./vit-eGTZANplus\\checkpoint-480",
"epoch": 50.0,
"eval_steps": 10,
"global_step": 5350,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.09,
"learning_rate": 0.00019962616822429908,
"loss": 2.4098,
"step": 10
},
{
"epoch": 0.09,
"eval_accuracy": 0.14285714285714285,
"eval_loss": 2.3848845958709717,
"eval_runtime": 3.3561,
"eval_samples_per_second": 56.316,
"eval_steps_per_second": 7.151,
"step": 10
},
{
"epoch": 0.19,
"learning_rate": 0.00019925233644859814,
"loss": 2.3376,
"step": 20
},
{
"epoch": 0.19,
"eval_accuracy": 0.21164021164021163,
"eval_loss": 2.257709264755249,
"eval_runtime": 3.2453,
"eval_samples_per_second": 58.238,
"eval_steps_per_second": 7.395,
"step": 20
},
{
"epoch": 0.28,
"learning_rate": 0.0001988785046728972,
"loss": 2.2047,
"step": 30
},
{
"epoch": 0.28,
"eval_accuracy": 0.2962962962962963,
"eval_loss": 2.158203125,
"eval_runtime": 2.9918,
"eval_samples_per_second": 63.172,
"eval_steps_per_second": 8.022,
"step": 30
},
{
"epoch": 0.37,
"learning_rate": 0.00019850467289719628,
"loss": 2.1267,
"step": 40
},
{
"epoch": 0.37,
"eval_accuracy": 0.3862433862433862,
"eval_loss": 1.942036747932434,
"eval_runtime": 3.0218,
"eval_samples_per_second": 62.546,
"eval_steps_per_second": 7.942,
"step": 40
},
{
"epoch": 0.47,
"learning_rate": 0.00019813084112149535,
"loss": 2.0365,
"step": 50
},
{
"epoch": 0.47,
"eval_accuracy": 0.36507936507936506,
"eval_loss": 1.9487409591674805,
"eval_runtime": 2.938,
"eval_samples_per_second": 64.329,
"eval_steps_per_second": 8.169,
"step": 50
},
{
"epoch": 0.56,
"learning_rate": 0.00019775700934579439,
"loss": 1.9884,
"step": 60
},
{
"epoch": 0.56,
"eval_accuracy": 0.4973544973544973,
"eval_loss": 1.7399966716766357,
"eval_runtime": 3.2957,
"eval_samples_per_second": 57.348,
"eval_steps_per_second": 7.282,
"step": 60
},
{
"epoch": 0.65,
"learning_rate": 0.00019738317757009345,
"loss": 1.8822,
"step": 70
},
{
"epoch": 0.65,
"eval_accuracy": 0.455026455026455,
"eval_loss": 1.6512662172317505,
"eval_runtime": 3.1742,
"eval_samples_per_second": 59.542,
"eval_steps_per_second": 7.561,
"step": 70
},
{
"epoch": 0.75,
"learning_rate": 0.00019700934579439255,
"loss": 1.7083,
"step": 80
},
{
"epoch": 0.75,
"eval_accuracy": 0.4603174603174603,
"eval_loss": 1.560472011566162,
"eval_runtime": 3.0275,
"eval_samples_per_second": 62.429,
"eval_steps_per_second": 7.927,
"step": 80
},
{
"epoch": 0.84,
"learning_rate": 0.00019663551401869161,
"loss": 1.6416,
"step": 90
},
{
"epoch": 0.84,
"eval_accuracy": 0.49206349206349204,
"eval_loss": 1.5726529359817505,
"eval_runtime": 3.0636,
"eval_samples_per_second": 61.691,
"eval_steps_per_second": 7.834,
"step": 90
},
{
"epoch": 0.93,
"learning_rate": 0.00019626168224299065,
"loss": 1.6473,
"step": 100
},
{
"epoch": 0.93,
"eval_accuracy": 0.4656084656084656,
"eval_loss": 1.648504614830017,
"eval_runtime": 3.2766,
"eval_samples_per_second": 57.682,
"eval_steps_per_second": 7.325,
"step": 100
},
{
"epoch": 1.03,
"learning_rate": 0.0001959252336448598,
"loss": 1.3355,
"step": 110
},
{
"epoch": 1.03,
"eval_accuracy": 0.5343915343915344,
"eval_loss": 1.4000248908996582,
"eval_runtime": 3.1074,
"eval_samples_per_second": 60.823,
"eval_steps_per_second": 7.724,
"step": 110
},
{
"epoch": 1.12,
"learning_rate": 0.00019555140186915888,
"loss": 1.4677,
"step": 120
},
{
"epoch": 1.12,
"eval_accuracy": 0.544973544973545,
"eval_loss": 1.3445547819137573,
"eval_runtime": 3.4486,
"eval_samples_per_second": 54.805,
"eval_steps_per_second": 6.959,
"step": 120
},
{
"epoch": 1.21,
"learning_rate": 0.00019517757009345797,
"loss": 1.3832,
"step": 130
},
{
"epoch": 1.21,
"eval_accuracy": 0.5555555555555556,
"eval_loss": 1.3656994104385376,
"eval_runtime": 3.0673,
"eval_samples_per_second": 61.618,
"eval_steps_per_second": 7.824,
"step": 130
},
{
"epoch": 1.31,
"learning_rate": 0.00019480373831775701,
"loss": 1.3364,
"step": 140
},
{
"epoch": 1.31,
"eval_accuracy": 0.582010582010582,
"eval_loss": 1.255820631980896,
"eval_runtime": 3.1137,
"eval_samples_per_second": 60.7,
"eval_steps_per_second": 7.708,
"step": 140
},
{
"epoch": 1.4,
"learning_rate": 0.00019442990654205608,
"loss": 1.3741,
"step": 150
},
{
"epoch": 1.4,
"eval_accuracy": 0.5343915343915344,
"eval_loss": 1.4308785200119019,
"eval_runtime": 3.1126,
"eval_samples_per_second": 60.721,
"eval_steps_per_second": 7.711,
"step": 150
},
{
"epoch": 1.5,
"learning_rate": 0.00019405607476635515,
"loss": 1.3806,
"step": 160
},
{
"epoch": 1.5,
"eval_accuracy": 0.5873015873015873,
"eval_loss": 1.3130360841751099,
"eval_runtime": 3.1851,
"eval_samples_per_second": 59.339,
"eval_steps_per_second": 7.535,
"step": 160
},
{
"epoch": 1.59,
"learning_rate": 0.00019368224299065422,
"loss": 1.096,
"step": 170
},
{
"epoch": 1.59,
"eval_accuracy": 0.6084656084656085,
"eval_loss": 1.2385209798812866,
"eval_runtime": 3.165,
"eval_samples_per_second": 59.716,
"eval_steps_per_second": 7.583,
"step": 170
},
{
"epoch": 1.68,
"learning_rate": 0.00019330841121495328,
"loss": 1.2139,
"step": 180
},
{
"epoch": 1.68,
"eval_accuracy": 0.5925925925925926,
"eval_loss": 1.2447173595428467,
"eval_runtime": 3.2024,
"eval_samples_per_second": 59.019,
"eval_steps_per_second": 7.494,
"step": 180
},
{
"epoch": 1.78,
"learning_rate": 0.00019293457943925235,
"loss": 1.1646,
"step": 190
},
{
"epoch": 1.78,
"eval_accuracy": 0.6243386243386243,
"eval_loss": 1.1505087614059448,
"eval_runtime": 3.3803,
"eval_samples_per_second": 55.912,
"eval_steps_per_second": 7.1,
"step": 190
},
{
"epoch": 1.87,
"learning_rate": 0.00019256074766355142,
"loss": 1.1851,
"step": 200
},
{
"epoch": 1.87,
"eval_accuracy": 0.5555555555555556,
"eval_loss": 1.292531967163086,
"eval_runtime": 3.3243,
"eval_samples_per_second": 56.854,
"eval_steps_per_second": 7.22,
"step": 200
},
{
"epoch": 1.96,
"learning_rate": 0.00019218691588785048,
"loss": 1.0773,
"step": 210
},
{
"epoch": 1.96,
"eval_accuracy": 0.6825396825396826,
"eval_loss": 1.025804042816162,
"eval_runtime": 3.2438,
"eval_samples_per_second": 58.265,
"eval_steps_per_second": 7.399,
"step": 210
},
{
"epoch": 2.06,
"learning_rate": 0.00019181308411214952,
"loss": 1.2694,
"step": 220
},
{
"epoch": 2.06,
"eval_accuracy": 0.5978835978835979,
"eval_loss": 1.1972746849060059,
"eval_runtime": 3.0541,
"eval_samples_per_second": 61.884,
"eval_steps_per_second": 7.858,
"step": 220
},
{
"epoch": 2.15,
"learning_rate": 0.00019143925233644862,
"loss": 0.8254,
"step": 230
},
{
"epoch": 2.15,
"eval_accuracy": 0.7142857142857143,
"eval_loss": 0.9814253449440002,
"eval_runtime": 3.1455,
"eval_samples_per_second": 60.086,
"eval_steps_per_second": 7.63,
"step": 230
},
{
"epoch": 2.24,
"learning_rate": 0.0001910654205607477,
"loss": 0.8614,
"step": 240
},
{
"epoch": 2.24,
"eval_accuracy": 0.656084656084656,
"eval_loss": 1.1236768960952759,
"eval_runtime": 3.1907,
"eval_samples_per_second": 59.235,
"eval_steps_per_second": 7.522,
"step": 240
},
{
"epoch": 2.34,
"learning_rate": 0.00019069158878504673,
"loss": 0.961,
"step": 250
},
{
"epoch": 2.34,
"eval_accuracy": 0.6666666666666666,
"eval_loss": 1.0817521810531616,
"eval_runtime": 3.2851,
"eval_samples_per_second": 57.533,
"eval_steps_per_second": 7.306,
"step": 250
},
{
"epoch": 2.43,
"learning_rate": 0.0001903177570093458,
"loss": 0.8305,
"step": 260
},
{
"epoch": 2.43,
"eval_accuracy": 0.6613756613756614,
"eval_loss": 1.0328330993652344,
"eval_runtime": 3.1629,
"eval_samples_per_second": 59.756,
"eval_steps_per_second": 7.588,
"step": 260
},
{
"epoch": 2.52,
"learning_rate": 0.00018994392523364486,
"loss": 0.882,
"step": 270
},
{
"epoch": 2.52,
"eval_accuracy": 0.6349206349206349,
"eval_loss": 1.1382650136947632,
"eval_runtime": 3.1128,
"eval_samples_per_second": 60.718,
"eval_steps_per_second": 7.71,
"step": 270
},
{
"epoch": 2.62,
"learning_rate": 0.00018957009345794396,
"loss": 0.9153,
"step": 280
},
{
"epoch": 2.62,
"eval_accuracy": 0.6507936507936508,
"eval_loss": 1.0411267280578613,
"eval_runtime": 3.1356,
"eval_samples_per_second": 60.275,
"eval_steps_per_second": 7.654,
"step": 280
},
{
"epoch": 2.71,
"learning_rate": 0.000189196261682243,
"loss": 0.8855,
"step": 290
},
{
"epoch": 2.71,
"eval_accuracy": 0.6984126984126984,
"eval_loss": 0.9475411772727966,
"eval_runtime": 3.2668,
"eval_samples_per_second": 57.854,
"eval_steps_per_second": 7.347,
"step": 290
},
{
"epoch": 2.8,
"learning_rate": 0.00018882242990654206,
"loss": 0.8792,
"step": 300
},
{
"epoch": 2.8,
"eval_accuracy": 0.5978835978835979,
"eval_loss": 1.1673120260238647,
"eval_runtime": 3.2828,
"eval_samples_per_second": 57.574,
"eval_steps_per_second": 7.311,
"step": 300
},
{
"epoch": 2.9,
"learning_rate": 0.00018844859813084113,
"loss": 0.8555,
"step": 310
},
{
"epoch": 2.9,
"eval_accuracy": 0.7407407407407407,
"eval_loss": 0.8777327537536621,
"eval_runtime": 3.2563,
"eval_samples_per_second": 58.041,
"eval_steps_per_second": 7.37,
"step": 310
},
{
"epoch": 2.99,
"learning_rate": 0.0001880747663551402,
"loss": 0.8841,
"step": 320
},
{
"epoch": 2.99,
"eval_accuracy": 0.671957671957672,
"eval_loss": 1.0181235074996948,
"eval_runtime": 3.2414,
"eval_samples_per_second": 58.308,
"eval_steps_per_second": 7.404,
"step": 320
},
{
"epoch": 3.08,
"learning_rate": 0.00018770093457943926,
"loss": 0.5579,
"step": 330
},
{
"epoch": 3.08,
"eval_accuracy": 0.6613756613756614,
"eval_loss": 1.052778959274292,
"eval_runtime": 3.3555,
"eval_samples_per_second": 56.326,
"eval_steps_per_second": 7.153,
"step": 330
},
{
"epoch": 3.18,
"learning_rate": 0.00018732710280373833,
"loss": 0.5953,
"step": 340
},
{
"epoch": 3.18,
"eval_accuracy": 0.6455026455026455,
"eval_loss": 1.095900058746338,
"eval_runtime": 3.2945,
"eval_samples_per_second": 57.369,
"eval_steps_per_second": 7.285,
"step": 340
},
{
"epoch": 3.27,
"learning_rate": 0.0001869532710280374,
"loss": 0.62,
"step": 350
},
{
"epoch": 3.27,
"eval_accuracy": 0.7037037037037037,
"eval_loss": 0.9120954275131226,
"eval_runtime": 3.6012,
"eval_samples_per_second": 52.483,
"eval_steps_per_second": 6.664,
"step": 350
},
{
"epoch": 3.36,
"learning_rate": 0.00018657943925233644,
"loss": 0.7633,
"step": 360
},
{
"epoch": 3.36,
"eval_accuracy": 0.6984126984126984,
"eval_loss": 0.926105797290802,
"eval_runtime": 3.4368,
"eval_samples_per_second": 54.992,
"eval_steps_per_second": 6.983,
"step": 360
},
{
"epoch": 3.46,
"learning_rate": 0.0001862056074766355,
"loss": 0.7009,
"step": 370
},
{
"epoch": 3.46,
"eval_accuracy": 0.6296296296296297,
"eval_loss": 1.1945137977600098,
"eval_runtime": 3.788,
"eval_samples_per_second": 49.895,
"eval_steps_per_second": 6.336,
"step": 370
},
{
"epoch": 3.55,
"learning_rate": 0.0001858317757009346,
"loss": 0.8007,
"step": 380
},
{
"epoch": 3.55,
"eval_accuracy": 0.6296296296296297,
"eval_loss": 1.0851304531097412,
"eval_runtime": 3.287,
"eval_samples_per_second": 57.499,
"eval_steps_per_second": 7.301,
"step": 380
},
{
"epoch": 3.64,
"learning_rate": 0.00018545794392523367,
"loss": 0.7921,
"step": 390
},
{
"epoch": 3.64,
"eval_accuracy": 0.6984126984126984,
"eval_loss": 0.935813307762146,
"eval_runtime": 3.1749,
"eval_samples_per_second": 59.529,
"eval_steps_per_second": 7.559,
"step": 390
},
{
"epoch": 3.74,
"learning_rate": 0.0001850841121495327,
"loss": 0.5837,
"step": 400
},
{
"epoch": 3.74,
"eval_accuracy": 0.6666666666666666,
"eval_loss": 0.987457811832428,
"eval_runtime": 3.1206,
"eval_samples_per_second": 60.565,
"eval_steps_per_second": 7.691,
"step": 400
},
{
"epoch": 3.83,
"learning_rate": 0.00018471028037383178,
"loss": 0.6557,
"step": 410
},
{
"epoch": 3.83,
"eval_accuracy": 0.6931216931216931,
"eval_loss": 0.9543613791465759,
"eval_runtime": 3.2455,
"eval_samples_per_second": 58.235,
"eval_steps_per_second": 7.395,
"step": 410
},
{
"epoch": 3.93,
"learning_rate": 0.00018433644859813084,
"loss": 0.8081,
"step": 420
},
{
"epoch": 3.93,
"eval_accuracy": 0.656084656084656,
"eval_loss": 1.087867259979248,
"eval_runtime": 3.0993,
"eval_samples_per_second": 60.981,
"eval_steps_per_second": 7.744,
"step": 420
},
{
"epoch": 4.02,
"learning_rate": 0.0001839626168224299,
"loss": 0.7486,
"step": 430
},
{
"epoch": 4.02,
"eval_accuracy": 0.6190476190476191,
"eval_loss": 1.266100287437439,
"eval_runtime": 3.1838,
"eval_samples_per_second": 59.363,
"eval_steps_per_second": 7.538,
"step": 430
},
{
"epoch": 4.11,
"learning_rate": 0.00018358878504672898,
"loss": 0.5166,
"step": 440
},
{
"epoch": 4.11,
"eval_accuracy": 0.7142857142857143,
"eval_loss": 0.9324920773506165,
"eval_runtime": 3.1917,
"eval_samples_per_second": 59.216,
"eval_steps_per_second": 7.52,
"step": 440
},
{
"epoch": 4.21,
"learning_rate": 0.00018321495327102804,
"loss": 0.4375,
"step": 450
},
{
"epoch": 4.21,
"eval_accuracy": 0.7037037037037037,
"eval_loss": 0.8819901943206787,
"eval_runtime": 3.1479,
"eval_samples_per_second": 60.04,
"eval_steps_per_second": 7.624,
"step": 450
},
{
"epoch": 4.3,
"learning_rate": 0.0001828411214953271,
"loss": 0.4839,
"step": 460
},
{
"epoch": 4.3,
"eval_accuracy": 0.6825396825396826,
"eval_loss": 1.0533747673034668,
"eval_runtime": 3.1426,
"eval_samples_per_second": 60.141,
"eval_steps_per_second": 7.637,
"step": 460
},
{
"epoch": 4.39,
"learning_rate": 0.00018246728971962618,
"loss": 0.3932,
"step": 470
},
{
"epoch": 4.39,
"eval_accuracy": 0.7037037037037037,
"eval_loss": 1.0015952587127686,
"eval_runtime": 3.1739,
"eval_samples_per_second": 59.548,
"eval_steps_per_second": 7.562,
"step": 470
},
{
"epoch": 4.49,
"learning_rate": 0.00018209345794392525,
"loss": 0.4672,
"step": 480
},
{
"epoch": 4.49,
"eval_accuracy": 0.746031746031746,
"eval_loss": 0.8358047604560852,
"eval_runtime": 3.1086,
"eval_samples_per_second": 60.8,
"eval_steps_per_second": 7.721,
"step": 480
},
{
"epoch": 4.58,
"learning_rate": 0.0001817196261682243,
"loss": 0.4839,
"step": 490
},
{
"epoch": 4.58,
"eval_accuracy": 0.6666666666666666,
"eval_loss": 1.0010868310928345,
"eval_runtime": 3.436,
"eval_samples_per_second": 55.006,
"eval_steps_per_second": 6.985,
"step": 490
},
{
"epoch": 4.67,
"learning_rate": 0.00018134579439252338,
"loss": 0.4536,
"step": 500
},
{
"epoch": 4.67,
"eval_accuracy": 0.5925925925925926,
"eval_loss": 1.4390077590942383,
"eval_runtime": 3.2086,
"eval_samples_per_second": 58.905,
"eval_steps_per_second": 7.48,
"step": 500
},
{
"epoch": 4.77,
"learning_rate": 0.00018097196261682242,
"loss": 0.5925,
"step": 510
},
{
"epoch": 4.77,
"eval_accuracy": 0.6455026455026455,
"eval_loss": 1.0699883699417114,
"eval_runtime": 3.2145,
"eval_samples_per_second": 58.796,
"eval_steps_per_second": 7.466,
"step": 510
},
{
"epoch": 4.86,
"learning_rate": 0.0001805981308411215,
"loss": 0.4905,
"step": 520
},
{
"epoch": 4.86,
"eval_accuracy": 0.6772486772486772,
"eval_loss": 1.0934034585952759,
"eval_runtime": 3.1633,
"eval_samples_per_second": 59.748,
"eval_steps_per_second": 7.587,
"step": 520
},
{
"epoch": 4.95,
"learning_rate": 0.00018022429906542058,
"loss": 0.6323,
"step": 530
},
{
"epoch": 4.95,
"eval_accuracy": 0.656084656084656,
"eval_loss": 1.074127197265625,
"eval_runtime": 3.0611,
"eval_samples_per_second": 61.742,
"eval_steps_per_second": 7.84,
"step": 530
},
{
"epoch": 5.05,
"learning_rate": 0.00017985046728971965,
"loss": 0.3686,
"step": 540
},
{
"epoch": 5.05,
"eval_accuracy": 0.6613756613756614,
"eval_loss": 1.17642080783844,
"eval_runtime": 3.1625,
"eval_samples_per_second": 59.764,
"eval_steps_per_second": 7.589,
"step": 540
},
{
"epoch": 5.14,
"learning_rate": 0.0001794766355140187,
"loss": 0.3233,
"step": 550
},
{
"epoch": 5.14,
"eval_accuracy": 0.6666666666666666,
"eval_loss": 0.9969061017036438,
"eval_runtime": 3.1186,
"eval_samples_per_second": 60.604,
"eval_steps_per_second": 7.696,
"step": 550
},
{
"epoch": 5.23,
"learning_rate": 0.00017910280373831776,
"loss": 0.2791,
"step": 560
},
{
"epoch": 5.23,
"eval_accuracy": 0.6613756613756614,
"eval_loss": 1.20182466506958,
"eval_runtime": 3.2809,
"eval_samples_per_second": 57.607,
"eval_steps_per_second": 7.315,
"step": 560
},
{
"epoch": 5.33,
"learning_rate": 0.00017872897196261682,
"loss": 0.2798,
"step": 570
},
{
"epoch": 5.33,
"eval_accuracy": 0.6772486772486772,
"eval_loss": 1.0335559844970703,
"eval_runtime": 3.1419,
"eval_samples_per_second": 60.155,
"eval_steps_per_second": 7.639,
"step": 570
},
{
"epoch": 5.42,
"learning_rate": 0.0001783551401869159,
"loss": 0.2892,
"step": 580
},
{
"epoch": 5.42,
"eval_accuracy": 0.6296296296296297,
"eval_loss": 1.3315926790237427,
"eval_runtime": 3.2144,
"eval_samples_per_second": 58.797,
"eval_steps_per_second": 7.466,
"step": 580
},
{
"epoch": 5.51,
"learning_rate": 0.00017798130841121496,
"loss": 0.317,
"step": 590
},
{
"epoch": 5.51,
"eval_accuracy": 0.708994708994709,
"eval_loss": 1.0159733295440674,
"eval_runtime": 3.1281,
"eval_samples_per_second": 60.42,
"eval_steps_per_second": 7.672,
"step": 590
},
{
"epoch": 5.61,
"learning_rate": 0.00017760747663551403,
"loss": 0.3673,
"step": 600
},
{
"epoch": 5.61,
"eval_accuracy": 0.6190476190476191,
"eval_loss": 1.3200335502624512,
"eval_runtime": 2.9997,
"eval_samples_per_second": 63.006,
"eval_steps_per_second": 8.001,
"step": 600
},
{
"epoch": 5.7,
"learning_rate": 0.0001772336448598131,
"loss": 0.4733,
"step": 610
},
{
"epoch": 5.7,
"eval_accuracy": 0.6455026455026455,
"eval_loss": 1.2423778772354126,
"eval_runtime": 3.2132,
"eval_samples_per_second": 58.819,
"eval_steps_per_second": 7.469,
"step": 610
},
{
"epoch": 5.79,
"learning_rate": 0.00017685981308411216,
"loss": 0.4683,
"step": 620
},
{
"epoch": 5.79,
"eval_accuracy": 0.7248677248677249,
"eval_loss": 0.8893383145332336,
"eval_runtime": 3.1811,
"eval_samples_per_second": 59.413,
"eval_steps_per_second": 7.545,
"step": 620
},
{
"epoch": 5.89,
"learning_rate": 0.00017648598130841123,
"loss": 0.4179,
"step": 630
},
{
"epoch": 5.89,
"eval_accuracy": 0.7142857142857143,
"eval_loss": 1.0192750692367554,
"eval_runtime": 3.1066,
"eval_samples_per_second": 60.839,
"eval_steps_per_second": 7.726,
"step": 630
},
{
"epoch": 5.98,
"learning_rate": 0.0001761121495327103,
"loss": 0.3667,
"step": 640
},
{
"epoch": 5.98,
"eval_accuracy": 0.7037037037037037,
"eval_loss": 0.9950593113899231,
"eval_runtime": 3.1029,
"eval_samples_per_second": 60.911,
"eval_steps_per_second": 7.735,
"step": 640
},
{
"epoch": 6.07,
"learning_rate": 0.00017573831775700936,
"loss": 0.2212,
"step": 650
},
{
"epoch": 6.07,
"eval_accuracy": 0.7354497354497355,
"eval_loss": 0.9019126296043396,
"eval_runtime": 3.0407,
"eval_samples_per_second": 62.156,
"eval_steps_per_second": 7.893,
"step": 650
},
{
"epoch": 6.17,
"learning_rate": 0.0001753644859813084,
"loss": 0.1881,
"step": 660
},
{
"epoch": 6.17,
"eval_accuracy": 0.656084656084656,
"eval_loss": 1.1512494087219238,
"eval_runtime": 3.0574,
"eval_samples_per_second": 61.818,
"eval_steps_per_second": 7.85,
"step": 660
},
{
"epoch": 6.26,
"learning_rate": 0.0001749906542056075,
"loss": 0.2403,
"step": 670
},
{
"epoch": 6.26,
"eval_accuracy": 0.708994708994709,
"eval_loss": 1.0092432498931885,
"eval_runtime": 3.1346,
"eval_samples_per_second": 60.295,
"eval_steps_per_second": 7.657,
"step": 670
},
{
"epoch": 6.36,
"learning_rate": 0.00017461682242990656,
"loss": 0.2597,
"step": 680
},
{
"epoch": 6.36,
"eval_accuracy": 0.6825396825396826,
"eval_loss": 1.1817060708999634,
"eval_runtime": 3.0592,
"eval_samples_per_second": 61.781,
"eval_steps_per_second": 7.845,
"step": 680
},
{
"epoch": 6.45,
"learning_rate": 0.00017424299065420563,
"loss": 0.2644,
"step": 690
},
{
"epoch": 6.45,
"eval_accuracy": 0.6507936507936508,
"eval_loss": 1.258557677268982,
"eval_runtime": 3.1473,
"eval_samples_per_second": 60.051,
"eval_steps_per_second": 7.626,
"step": 690
},
{
"epoch": 6.54,
"learning_rate": 0.00017386915887850467,
"loss": 0.2562,
"step": 700
},
{
"epoch": 6.54,
"eval_accuracy": 0.7195767195767195,
"eval_loss": 1.0005096197128296,
"eval_runtime": 3.1123,
"eval_samples_per_second": 60.726,
"eval_steps_per_second": 7.711,
"step": 700
},
{
"epoch": 6.64,
"learning_rate": 0.00017349532710280374,
"loss": 0.1487,
"step": 710
},
{
"epoch": 6.64,
"eval_accuracy": 0.6507936507936508,
"eval_loss": 1.2767467498779297,
"eval_runtime": 3.1275,
"eval_samples_per_second": 60.433,
"eval_steps_per_second": 7.674,
"step": 710
},
{
"epoch": 6.73,
"learning_rate": 0.00017312149532710283,
"loss": 0.2953,
"step": 720
},
{
"epoch": 6.73,
"eval_accuracy": 0.6984126984126984,
"eval_loss": 1.253779649734497,
"eval_runtime": 3.2123,
"eval_samples_per_second": 58.836,
"eval_steps_per_second": 7.471,
"step": 720
},
{
"epoch": 6.82,
"learning_rate": 0.00017274766355140187,
"loss": 0.1725,
"step": 730
},
{
"epoch": 6.82,
"eval_accuracy": 0.656084656084656,
"eval_loss": 1.2238225936889648,
"eval_runtime": 3.1844,
"eval_samples_per_second": 59.353,
"eval_steps_per_second": 7.537,
"step": 730
},
{
"epoch": 6.92,
"learning_rate": 0.00017237383177570094,
"loss": 0.2037,
"step": 740
},
{
"epoch": 6.92,
"eval_accuracy": 0.6084656084656085,
"eval_loss": 1.468080759048462,
"eval_runtime": 3.1309,
"eval_samples_per_second": 60.366,
"eval_steps_per_second": 7.666,
"step": 740
},
{
"epoch": 7.01,
"learning_rate": 0.000172,
"loss": 0.2592,
"step": 750
},
{
"epoch": 7.01,
"eval_accuracy": 0.708994708994709,
"eval_loss": 1.2074000835418701,
"eval_runtime": 3.2681,
"eval_samples_per_second": 57.831,
"eval_steps_per_second": 7.344,
"step": 750
},
{
"epoch": 7.1,
"learning_rate": 0.00017162616822429907,
"loss": 0.1851,
"step": 760
},
{
"epoch": 7.1,
"eval_accuracy": 0.6825396825396826,
"eval_loss": 1.1313153505325317,
"eval_runtime": 3.1857,
"eval_samples_per_second": 59.328,
"eval_steps_per_second": 7.534,
"step": 760
},
{
"epoch": 7.2,
"learning_rate": 0.00017125233644859814,
"loss": 0.0958,
"step": 770
},
{
"epoch": 7.2,
"eval_accuracy": 0.6931216931216931,
"eval_loss": 1.222944974899292,
"eval_runtime": 3.1213,
"eval_samples_per_second": 60.551,
"eval_steps_per_second": 7.689,
"step": 770
},
{
"epoch": 7.29,
"learning_rate": 0.0001708785046728972,
"loss": 0.0947,
"step": 780
},
{
"epoch": 7.29,
"eval_accuracy": 0.6613756613756614,
"eval_loss": 1.3873189687728882,
"eval_runtime": 3.0939,
"eval_samples_per_second": 61.087,
"eval_steps_per_second": 7.757,
"step": 780
},
{
"epoch": 7.38,
"learning_rate": 0.00017050467289719628,
"loss": 0.2053,
"step": 790
},
{
"epoch": 7.38,
"eval_accuracy": 0.6666666666666666,
"eval_loss": 1.4111433029174805,
"eval_runtime": 3.0678,
"eval_samples_per_second": 61.607,
"eval_steps_per_second": 7.823,
"step": 790
},
{
"epoch": 7.48,
"learning_rate": 0.00017013084112149534,
"loss": 0.2165,
"step": 800
},
{
"epoch": 7.48,
"eval_accuracy": 0.6878306878306878,
"eval_loss": 1.3087962865829468,
"eval_runtime": 3.1862,
"eval_samples_per_second": 59.319,
"eval_steps_per_second": 7.533,
"step": 800
},
{
"epoch": 7.57,
"learning_rate": 0.00016975700934579438,
"loss": 0.2425,
"step": 810
},
{
"epoch": 7.57,
"eval_accuracy": 0.6984126984126984,
"eval_loss": 1.3991620540618896,
"eval_runtime": 3.2153,
"eval_samples_per_second": 58.781,
"eval_steps_per_second": 7.464,
"step": 810
},
{
"epoch": 7.66,
"learning_rate": 0.00016938317757009348,
"loss": 0.1644,
"step": 820
},
{
"epoch": 7.66,
"eval_accuracy": 0.7248677248677249,
"eval_loss": 1.080649733543396,
"eval_runtime": 3.2137,
"eval_samples_per_second": 58.811,
"eval_steps_per_second": 7.468,
"step": 820
},
{
"epoch": 7.76,
"learning_rate": 0.00016900934579439254,
"loss": 0.3093,
"step": 830
},
{
"epoch": 7.76,
"eval_accuracy": 0.6878306878306878,
"eval_loss": 1.214190125465393,
"eval_runtime": 3.1342,
"eval_samples_per_second": 60.302,
"eval_steps_per_second": 7.657,
"step": 830
},
{
"epoch": 7.85,
"learning_rate": 0.0001686355140186916,
"loss": 0.2525,
"step": 840
},
{
"epoch": 7.85,
"eval_accuracy": 0.656084656084656,
"eval_loss": 1.3408259153366089,
"eval_runtime": 3.0897,
"eval_samples_per_second": 61.171,
"eval_steps_per_second": 7.768,
"step": 840
},
{
"epoch": 7.94,
"learning_rate": 0.00016826168224299065,
"loss": 0.3339,
"step": 850
},
{
"epoch": 7.94,
"eval_accuracy": 0.7248677248677249,
"eval_loss": 1.1808757781982422,
"eval_runtime": 3.2528,
"eval_samples_per_second": 58.104,
"eval_steps_per_second": 7.378,
"step": 850
},
{
"epoch": 8.04,
"learning_rate": 0.00016788785046728972,
"loss": 0.1044,
"step": 860
},
{
"epoch": 8.04,
"eval_accuracy": 0.6666666666666666,
"eval_loss": 1.2688654661178589,
"eval_runtime": 3.09,
"eval_samples_per_second": 61.165,
"eval_steps_per_second": 7.767,
"step": 860
},
{
"epoch": 8.13,
"learning_rate": 0.0001675140186915888,
"loss": 0.093,
"step": 870
},
{
"epoch": 8.13,
"eval_accuracy": 0.708994708994709,
"eval_loss": 1.209309697151184,
"eval_runtime": 3.1281,
"eval_samples_per_second": 60.421,
"eval_steps_per_second": 7.672,
"step": 870
},
{
"epoch": 8.22,
"learning_rate": 0.00016714018691588785,
"loss": 0.2934,
"step": 880
},
{
"epoch": 8.22,
"eval_accuracy": 0.671957671957672,
"eval_loss": 1.1540151834487915,
"eval_runtime": 3.27,
"eval_samples_per_second": 57.798,
"eval_steps_per_second": 7.339,
"step": 880
},
{
"epoch": 8.32,
"learning_rate": 0.00016676635514018692,
"loss": 0.2133,
"step": 890
},
{
"epoch": 8.32,
"eval_accuracy": 0.6349206349206349,
"eval_loss": 1.5835676193237305,
"eval_runtime": 3.2878,
"eval_samples_per_second": 57.485,
"eval_steps_per_second": 7.3,
"step": 890
},
{
"epoch": 8.41,
"learning_rate": 0.000166392523364486,
"loss": 0.2045,
"step": 900
},
{
"epoch": 8.41,
"eval_accuracy": 0.6507936507936508,
"eval_loss": 1.2564616203308105,
"eval_runtime": 3.1746,
"eval_samples_per_second": 59.536,
"eval_steps_per_second": 7.56,
"step": 900
},
{
"epoch": 8.5,
"learning_rate": 0.00016601869158878506,
"loss": 0.2886,
"step": 910
},
{
"epoch": 8.5,
"eval_accuracy": 0.6613756613756614,
"eval_loss": 1.3674818277359009,
"eval_runtime": 3.2413,
"eval_samples_per_second": 58.311,
"eval_steps_per_second": 7.405,
"step": 910
},
{
"epoch": 8.6,
"learning_rate": 0.00016564485981308412,
"loss": 0.128,
"step": 920
},
{
"epoch": 8.6,
"eval_accuracy": 0.671957671957672,
"eval_loss": 1.2137342691421509,
"eval_runtime": 3.222,
"eval_samples_per_second": 58.658,
"eval_steps_per_second": 7.449,
"step": 920
},
{
"epoch": 8.69,
"learning_rate": 0.0001652710280373832,
"loss": 0.0785,
"step": 930
},
{
"epoch": 8.69,
"eval_accuracy": 0.6825396825396826,
"eval_loss": 1.3206517696380615,
"eval_runtime": 3.21,
"eval_samples_per_second": 58.878,
"eval_steps_per_second": 7.477,
"step": 930
},
{
"epoch": 8.79,
"learning_rate": 0.00016489719626168226,
"loss": 0.1409,
"step": 940
},
{
"epoch": 8.79,
"eval_accuracy": 0.6878306878306878,
"eval_loss": 1.2279229164123535,
"eval_runtime": 3.1127,
"eval_samples_per_second": 60.719,
"eval_steps_per_second": 7.71,
"step": 940
},
{
"epoch": 8.88,
"learning_rate": 0.00016452336448598132,
"loss": 0.1221,
"step": 950
},
{
"epoch": 8.88,
"eval_accuracy": 0.7513227513227513,
"eval_loss": 0.9318807125091553,
"eval_runtime": 3.2281,
"eval_samples_per_second": 58.549,
"eval_steps_per_second": 7.435,
"step": 950
},
{
"epoch": 8.97,
"learning_rate": 0.00016414953271028036,
"loss": 0.112,
"step": 960
},
{
"epoch": 8.97,
"eval_accuracy": 0.671957671957672,
"eval_loss": 1.267318606376648,
"eval_runtime": 3.1888,
"eval_samples_per_second": 59.27,
"eval_steps_per_second": 7.526,
"step": 960
},
{
"epoch": 9.07,
"learning_rate": 0.00016377570093457946,
"loss": 0.0863,
"step": 970
},
{
"epoch": 9.07,
"eval_accuracy": 0.6931216931216931,
"eval_loss": 1.3446311950683594,
"eval_runtime": 3.2112,
"eval_samples_per_second": 58.857,
"eval_steps_per_second": 7.474,
"step": 970
},
{
"epoch": 9.16,
"learning_rate": 0.00016340186915887853,
"loss": 0.0915,
"step": 980
},
{
"epoch": 9.16,
"eval_accuracy": 0.7037037037037037,
"eval_loss": 1.1720484495162964,
"eval_runtime": 3.1413,
"eval_samples_per_second": 60.167,
"eval_steps_per_second": 7.64,
"step": 980
},
{
"epoch": 9.25,
"learning_rate": 0.00016302803738317757,
"loss": 0.0911,
"step": 990
},
{
"epoch": 9.25,
"eval_accuracy": 0.6984126984126984,
"eval_loss": 1.2161829471588135,
"eval_runtime": 3.2839,
"eval_samples_per_second": 57.553,
"eval_steps_per_second": 7.308,
"step": 990
},
{
"epoch": 9.35,
"learning_rate": 0.00016265420560747663,
"loss": 0.0763,
"step": 1000
},
{
"epoch": 9.35,
"eval_accuracy": 0.7142857142857143,
"eval_loss": 1.1872042417526245,
"eval_runtime": 3.1612,
"eval_samples_per_second": 59.787,
"eval_steps_per_second": 7.592,
"step": 1000
},
{
"epoch": 9.44,
"learning_rate": 0.0001622803738317757,
"loss": 0.0524,
"step": 1010
},
{
"epoch": 9.44,
"eval_accuracy": 0.6878306878306878,
"eval_loss": 1.3205093145370483,
"eval_runtime": 3.1614,
"eval_samples_per_second": 59.784,
"eval_steps_per_second": 7.592,
"step": 1010
},
{
"epoch": 9.53,
"learning_rate": 0.0001619065420560748,
"loss": 0.1508,
"step": 1020
},
{
"epoch": 9.53,
"eval_accuracy": 0.6507936507936508,
"eval_loss": 1.5726176500320435,
"eval_runtime": 3.161,
"eval_samples_per_second": 59.791,
"eval_steps_per_second": 7.593,
"step": 1020
},
{
"epoch": 9.63,
"learning_rate": 0.00016153271028037383,
"loss": 0.0994,
"step": 1030
},
{
"epoch": 9.63,
"eval_accuracy": 0.6931216931216931,
"eval_loss": 1.3505302667617798,
"eval_runtime": 3.1505,
"eval_samples_per_second": 59.99,
"eval_steps_per_second": 7.618,
"step": 1030
},
{
"epoch": 9.72,
"learning_rate": 0.0001611588785046729,
"loss": 0.223,
"step": 1040
},
{
"epoch": 9.72,
"eval_accuracy": 0.671957671957672,
"eval_loss": 1.3896968364715576,
"eval_runtime": 3.1746,
"eval_samples_per_second": 59.536,
"eval_steps_per_second": 7.56,
"step": 1040
},
{
"epoch": 9.81,
"learning_rate": 0.00016078504672897197,
"loss": 0.1115,
"step": 1050
},
{
"epoch": 9.81,
"eval_accuracy": 0.6772486772486772,
"eval_loss": 1.3965896368026733,
"eval_runtime": 3.2706,
"eval_samples_per_second": 57.787,
"eval_steps_per_second": 7.338,
"step": 1050
},
{
"epoch": 9.91,
"learning_rate": 0.00016041121495327104,
"loss": 0.1485,
"step": 1060
},
{
"epoch": 9.91,
"eval_accuracy": 0.6878306878306878,
"eval_loss": 1.4686475992202759,
"eval_runtime": 3.1507,
"eval_samples_per_second": 59.987,
"eval_steps_per_second": 7.617,
"step": 1060
},
{
"epoch": 10.0,
"learning_rate": 0.0001600373831775701,
"loss": 0.18,
"step": 1070
},
{
"epoch": 10.0,
"eval_accuracy": 0.6878306878306878,
"eval_loss": 1.2233675718307495,
"eval_runtime": 3.1543,
"eval_samples_per_second": 59.919,
"eval_steps_per_second": 7.609,
"step": 1070
},
{
"epoch": 10.09,
"learning_rate": 0.00015966355140186917,
"loss": 0.1366,
"step": 1080
},
{
"epoch": 10.09,
"eval_accuracy": 0.6666666666666666,
"eval_loss": 1.4836784601211548,
"eval_runtime": 3.2142,
"eval_samples_per_second": 58.802,
"eval_steps_per_second": 7.467,
"step": 1080
},
{
"epoch": 10.19,
"learning_rate": 0.00015928971962616824,
"loss": 0.149,
"step": 1090
},
{
"epoch": 10.19,
"eval_accuracy": 0.671957671957672,
"eval_loss": 1.4587175846099854,
"eval_runtime": 3.2536,
"eval_samples_per_second": 58.09,
"eval_steps_per_second": 7.377,
"step": 1090
},
{
"epoch": 10.28,
"learning_rate": 0.0001589158878504673,
"loss": 0.1618,
"step": 1100
},
{
"epoch": 10.28,
"eval_accuracy": 0.6666666666666666,
"eval_loss": 1.3593031167984009,
"eval_runtime": 3.1414,
"eval_samples_per_second": 60.165,
"eval_steps_per_second": 7.64,
"step": 1100
},
{
"epoch": 10.37,
"learning_rate": 0.00015854205607476635,
"loss": 0.1302,
"step": 1110
},
{
"epoch": 10.37,
"eval_accuracy": 0.6349206349206349,
"eval_loss": 1.5082346200942993,
"eval_runtime": 3.1372,
"eval_samples_per_second": 60.244,
"eval_steps_per_second": 7.65,
"step": 1110
},
{
"epoch": 10.47,
"learning_rate": 0.00015816822429906544,
"loss": 0.0208,
"step": 1120
},
{
"epoch": 10.47,
"eval_accuracy": 0.7037037037037037,
"eval_loss": 1.4162836074829102,
"eval_runtime": 3.2255,
"eval_samples_per_second": 58.596,
"eval_steps_per_second": 7.441,
"step": 1120
},
{
"epoch": 10.56,
"learning_rate": 0.0001577943925233645,
"loss": 0.0314,
"step": 1130
},
{
"epoch": 10.56,
"eval_accuracy": 0.7248677248677249,
"eval_loss": 1.2450639009475708,
"eval_runtime": 3.2013,
"eval_samples_per_second": 59.038,
"eval_steps_per_second": 7.497,
"step": 1130
},
{
"epoch": 10.65,
"learning_rate": 0.00015742056074766355,
"loss": 0.0355,
"step": 1140
},
{
"epoch": 10.65,
"eval_accuracy": 0.708994708994709,
"eval_loss": 1.3142927885055542,
"eval_runtime": 3.2738,
"eval_samples_per_second": 57.732,
"eval_steps_per_second": 7.331,
"step": 1140
},
{
"epoch": 10.75,
"learning_rate": 0.00015704672897196261,
"loss": 0.1024,
"step": 1150
},
{
"epoch": 10.75,
"eval_accuracy": 0.6878306878306878,
"eval_loss": 1.4214942455291748,
"eval_runtime": 3.2147,
"eval_samples_per_second": 58.792,
"eval_steps_per_second": 7.466,
"step": 1150
},
{
"epoch": 10.84,
"learning_rate": 0.00015667289719626168,
"loss": 0.0733,
"step": 1160
},
{
"epoch": 10.84,
"eval_accuracy": 0.7142857142857143,
"eval_loss": 1.2427018880844116,
"eval_runtime": 3.1456,
"eval_samples_per_second": 60.085,
"eval_steps_per_second": 7.63,
"step": 1160
},
{
"epoch": 10.93,
"learning_rate": 0.00015629906542056078,
"loss": 0.0542,
"step": 1170
},
{
"epoch": 10.93,
"eval_accuracy": 0.6825396825396826,
"eval_loss": 1.5809307098388672,
"eval_runtime": 3.2372,
"eval_samples_per_second": 58.384,
"eval_steps_per_second": 7.414,
"step": 1170
},
{
"epoch": 11.03,
"learning_rate": 0.00015592523364485982,
"loss": 0.0995,
"step": 1180
},
{
"epoch": 11.03,
"eval_accuracy": 0.6613756613756614,
"eval_loss": 1.5994837284088135,
"eval_runtime": 3.153,
"eval_samples_per_second": 59.943,
"eval_steps_per_second": 7.612,
"step": 1180
},
{
"epoch": 11.12,
"learning_rate": 0.00015555140186915888,
"loss": 0.0653,
"step": 1190
},
{
"epoch": 11.12,
"eval_accuracy": 0.708994708994709,
"eval_loss": 1.3932106494903564,
"eval_runtime": 3.2168,
"eval_samples_per_second": 58.753,
"eval_steps_per_second": 7.461,
"step": 1190
},
{
"epoch": 11.21,
"learning_rate": 0.00015517757009345795,
"loss": 0.0339,
"step": 1200
},
{
"epoch": 11.21,
"eval_accuracy": 0.708994708994709,
"eval_loss": 1.2856649160385132,
"eval_runtime": 3.0564,
"eval_samples_per_second": 61.837,
"eval_steps_per_second": 7.852,
"step": 1200
},
{
"epoch": 11.31,
"learning_rate": 0.00015480373831775702,
"loss": 0.1038,
"step": 1210
},
{
"epoch": 11.31,
"eval_accuracy": 0.7407407407407407,
"eval_loss": 1.2895965576171875,
"eval_runtime": 3.1971,
"eval_samples_per_second": 59.115,
"eval_steps_per_second": 7.507,
"step": 1210
},
{
"epoch": 11.4,
"learning_rate": 0.00015442990654205608,
"loss": 0.0415,
"step": 1220
},
{
"epoch": 11.4,
"eval_accuracy": 0.7354497354497355,
"eval_loss": 1.2908622026443481,
"eval_runtime": 3.2149,
"eval_samples_per_second": 58.789,
"eval_steps_per_second": 7.465,
"step": 1220
},
{
"epoch": 11.5,
"learning_rate": 0.00015405607476635515,
"loss": 0.0629,
"step": 1230
},
{
"epoch": 11.5,
"eval_accuracy": 0.7142857142857143,
"eval_loss": 1.3047831058502197,
"eval_runtime": 3.228,
"eval_samples_per_second": 58.55,
"eval_steps_per_second": 7.435,
"step": 1230
},
{
"epoch": 11.59,
"learning_rate": 0.00015368224299065422,
"loss": 0.0137,
"step": 1240
},
{
"epoch": 11.59,
"eval_accuracy": 0.7195767195767195,
"eval_loss": 1.3543046712875366,
"eval_runtime": 3.1086,
"eval_samples_per_second": 60.8,
"eval_steps_per_second": 7.721,
"step": 1240
},
{
"epoch": 11.68,
"learning_rate": 0.0001533084112149533,
"loss": 0.035,
"step": 1250
},
{
"epoch": 11.68,
"eval_accuracy": 0.7301587301587301,
"eval_loss": 1.2130463123321533,
"eval_runtime": 3.4203,
"eval_samples_per_second": 55.258,
"eval_steps_per_second": 7.017,
"step": 1250
},
{
"epoch": 11.78,
"learning_rate": 0.00015293457943925233,
"loss": 0.0102,
"step": 1260
},
{
"epoch": 11.78,
"eval_accuracy": 0.7248677248677249,
"eval_loss": 1.2087303400039673,
"eval_runtime": 3.3746,
"eval_samples_per_second": 56.006,
"eval_steps_per_second": 7.112,
"step": 1260
},
{
"epoch": 11.87,
"learning_rate": 0.00015256074766355142,
"loss": 0.0409,
"step": 1270
},
{
"epoch": 11.87,
"eval_accuracy": 0.7566137566137566,
"eval_loss": 1.2329652309417725,
"eval_runtime": 3.1081,
"eval_samples_per_second": 60.809,
"eval_steps_per_second": 7.722,
"step": 1270
},
{
"epoch": 11.96,
"learning_rate": 0.0001521869158878505,
"loss": 0.0659,
"step": 1280
},
{
"epoch": 11.96,
"eval_accuracy": 0.7142857142857143,
"eval_loss": 1.3084936141967773,
"eval_runtime": 3.3561,
"eval_samples_per_second": 56.315,
"eval_steps_per_second": 7.151,
"step": 1280
},
{
"epoch": 12.06,
"learning_rate": 0.00015181308411214953,
"loss": 0.035,
"step": 1290
},
{
"epoch": 12.06,
"eval_accuracy": 0.7248677248677249,
"eval_loss": 1.405351161956787,
"eval_runtime": 3.4254,
"eval_samples_per_second": 55.175,
"eval_steps_per_second": 7.006,
"step": 1290
},
{
"epoch": 12.15,
"learning_rate": 0.0001514392523364486,
"loss": 0.103,
"step": 1300
},
{
"epoch": 12.15,
"eval_accuracy": 0.7248677248677249,
"eval_loss": 1.464030146598816,
"eval_runtime": 3.2557,
"eval_samples_per_second": 58.053,
"eval_steps_per_second": 7.372,
"step": 1300
},
{
"epoch": 12.24,
"learning_rate": 0.00015106542056074766,
"loss": 0.0238,
"step": 1310
},
{
"epoch": 12.24,
"eval_accuracy": 0.7037037037037037,
"eval_loss": 1.347579002380371,
"eval_runtime": 3.048,
"eval_samples_per_second": 62.008,
"eval_steps_per_second": 7.874,
"step": 1310
},
{
"epoch": 12.34,
"learning_rate": 0.00015069158878504676,
"loss": 0.0196,
"step": 1320
},
{
"epoch": 12.34,
"eval_accuracy": 0.708994708994709,
"eval_loss": 1.40040123462677,
"eval_runtime": 3.1832,
"eval_samples_per_second": 59.375,
"eval_steps_per_second": 7.54,
"step": 1320
},
{
"epoch": 12.43,
"learning_rate": 0.0001503177570093458,
"loss": 0.009,
"step": 1330
},
{
"epoch": 12.43,
"eval_accuracy": 0.7142857142857143,
"eval_loss": 1.4277156591415405,
"eval_runtime": 3.2679,
"eval_samples_per_second": 57.836,
"eval_steps_per_second": 7.344,
"step": 1330
},
{
"epoch": 12.52,
"learning_rate": 0.00014994392523364486,
"loss": 0.0238,
"step": 1340
},
{
"epoch": 12.52,
"eval_accuracy": 0.7407407407407407,
"eval_loss": 1.4086812734603882,
"eval_runtime": 3.2187,
"eval_samples_per_second": 58.719,
"eval_steps_per_second": 7.456,
"step": 1340
},
{
"epoch": 12.62,
"learning_rate": 0.00014957009345794393,
"loss": 0.0468,
"step": 1350
},
{
"epoch": 12.62,
"eval_accuracy": 0.7195767195767195,
"eval_loss": 1.3357652425765991,
"eval_runtime": 3.1607,
"eval_samples_per_second": 59.798,
"eval_steps_per_second": 7.593,
"step": 1350
},
{
"epoch": 12.71,
"learning_rate": 0.000149196261682243,
"loss": 0.0207,
"step": 1360
},
{
"epoch": 12.71,
"eval_accuracy": 0.7142857142857143,
"eval_loss": 1.4172828197479248,
"eval_runtime": 3.0888,
"eval_samples_per_second": 61.188,
"eval_steps_per_second": 7.77,
"step": 1360
},
{
"epoch": 12.8,
"learning_rate": 0.00014882242990654207,
"loss": 0.0138,
"step": 1370
},
{
"epoch": 12.8,
"eval_accuracy": 0.7037037037037037,
"eval_loss": 1.4307596683502197,
"eval_runtime": 3.1887,
"eval_samples_per_second": 59.271,
"eval_steps_per_second": 7.526,
"step": 1370
},
{
"epoch": 12.9,
"learning_rate": 0.00014844859813084113,
"loss": 0.0241,
"step": 1380
},
{
"epoch": 12.9,
"eval_accuracy": 0.6772486772486772,
"eval_loss": 1.6381709575653076,
"eval_runtime": 3.2282,
"eval_samples_per_second": 58.546,
"eval_steps_per_second": 7.434,
"step": 1380
},
{
"epoch": 12.99,
"learning_rate": 0.0001480747663551402,
"loss": 0.0224,
"step": 1390
},
{
"epoch": 12.99,
"eval_accuracy": 0.6984126984126984,
"eval_loss": 1.513045072555542,
"eval_runtime": 3.1555,
"eval_samples_per_second": 59.896,
"eval_steps_per_second": 7.606,
"step": 1390
},
{
"epoch": 13.08,
"learning_rate": 0.00014770093457943924,
"loss": 0.0367,
"step": 1400
},
{
"epoch": 13.08,
"eval_accuracy": 0.7037037037037037,
"eval_loss": 1.5821019411087036,
"eval_runtime": 3.1013,
"eval_samples_per_second": 60.941,
"eval_steps_per_second": 7.739,
"step": 1400
},
{
"epoch": 13.18,
"learning_rate": 0.0001473271028037383,
"loss": 0.0201,
"step": 1410
},
{
"epoch": 13.18,
"eval_accuracy": 0.708994708994709,
"eval_loss": 1.499505877494812,
"eval_runtime": 3.3162,
"eval_samples_per_second": 56.994,
"eval_steps_per_second": 7.237,
"step": 1410
},
{
"epoch": 13.27,
"learning_rate": 0.0001469532710280374,
"loss": 0.0431,
"step": 1420
},
{
"epoch": 13.27,
"eval_accuracy": 0.7354497354497355,
"eval_loss": 1.3571968078613281,
"eval_runtime": 3.1517,
"eval_samples_per_second": 59.968,
"eval_steps_per_second": 7.615,
"step": 1420
},
{
"epoch": 13.36,
"learning_rate": 0.00014657943925233647,
"loss": 0.0137,
"step": 1430
},
{
"epoch": 13.36,
"eval_accuracy": 0.7248677248677249,
"eval_loss": 1.3700077533721924,
"eval_runtime": 3.0539,
"eval_samples_per_second": 61.889,
"eval_steps_per_second": 7.859,
"step": 1430
},
{
"epoch": 13.46,
"learning_rate": 0.0001462056074766355,
"loss": 0.0498,
"step": 1440
},
{
"epoch": 13.46,
"eval_accuracy": 0.708994708994709,
"eval_loss": 1.6434003114700317,
"eval_runtime": 3.0715,
"eval_samples_per_second": 61.534,
"eval_steps_per_second": 7.814,
"step": 1440
},
{
"epoch": 13.55,
"learning_rate": 0.00014583177570093458,
"loss": 0.0175,
"step": 1450
},
{
"epoch": 13.55,
"eval_accuracy": 0.708994708994709,
"eval_loss": 1.7298402786254883,
"eval_runtime": 3.1073,
"eval_samples_per_second": 60.825,
"eval_steps_per_second": 7.724,
"step": 1450
},
{
"epoch": 13.64,
"learning_rate": 0.00014545794392523364,
"loss": 0.0142,
"step": 1460
},
{
"epoch": 13.64,
"eval_accuracy": 0.7142857142857143,
"eval_loss": 1.5783988237380981,
"eval_runtime": 3.116,
"eval_samples_per_second": 60.655,
"eval_steps_per_second": 7.702,
"step": 1460
},
{
"epoch": 13.74,
"learning_rate": 0.0001450841121495327,
"loss": 0.0235,
"step": 1470
},
{
"epoch": 13.74,
"eval_accuracy": 0.6984126984126984,
"eval_loss": 1.6561763286590576,
"eval_runtime": 3.2007,
"eval_samples_per_second": 59.049,
"eval_steps_per_second": 7.498,
"step": 1470
},
{
"epoch": 13.83,
"learning_rate": 0.00014471028037383178,
"loss": 0.0524,
"step": 1480
},
{
"epoch": 13.83,
"eval_accuracy": 0.6984126984126984,
"eval_loss": 1.831541657447815,
"eval_runtime": 3.3046,
"eval_samples_per_second": 57.193,
"eval_steps_per_second": 7.263,
"step": 1480
},
{
"epoch": 13.93,
"learning_rate": 0.00014433644859813085,
"loss": 0.0506,
"step": 1490
},
{
"epoch": 13.93,
"eval_accuracy": 0.6825396825396826,
"eval_loss": 1.6628289222717285,
"eval_runtime": 3.2548,
"eval_samples_per_second": 58.068,
"eval_steps_per_second": 7.374,
"step": 1490
},
{
"epoch": 14.02,
"learning_rate": 0.0001439626168224299,
"loss": 0.0566,
"step": 1500
},
{
"epoch": 14.02,
"eval_accuracy": 0.6878306878306878,
"eval_loss": 1.6691248416900635,
"eval_runtime": 3.3252,
"eval_samples_per_second": 56.839,
"eval_steps_per_second": 7.218,
"step": 1500
},
{
"epoch": 14.11,
"learning_rate": 0.00014358878504672898,
"loss": 0.0872,
"step": 1510
},
{
"epoch": 14.11,
"eval_accuracy": 0.6084656084656085,
"eval_loss": 2.196704387664795,
"eval_runtime": 3.2014,
"eval_samples_per_second": 59.037,
"eval_steps_per_second": 7.497,
"step": 1510
},
{
"epoch": 14.21,
"learning_rate": 0.00014321495327102805,
"loss": 0.1338,
"step": 1520
},
{
"epoch": 14.21,
"eval_accuracy": 0.6507936507936508,
"eval_loss": 1.7845758199691772,
"eval_runtime": 3.176,
"eval_samples_per_second": 59.51,
"eval_steps_per_second": 7.557,
"step": 1520
},
{
"epoch": 14.3,
"learning_rate": 0.00014284112149532711,
"loss": 0.0222,
"step": 1530
},
{
"epoch": 14.3,
"eval_accuracy": 0.6772486772486772,
"eval_loss": 1.6833900213241577,
"eval_runtime": 3.1793,
"eval_samples_per_second": 59.447,
"eval_steps_per_second": 7.549,
"step": 1530
},
{
"epoch": 14.39,
"learning_rate": 0.00014246728971962618,
"loss": 0.0254,
"step": 1540
},
{
"epoch": 14.39,
"eval_accuracy": 0.656084656084656,
"eval_loss": 1.9036774635314941,
"eval_runtime": 3.1611,
"eval_samples_per_second": 59.789,
"eval_steps_per_second": 7.592,
"step": 1540
},
{
"epoch": 14.49,
"learning_rate": 0.00014209345794392522,
"loss": 0.0244,
"step": 1550
},
{
"epoch": 14.49,
"eval_accuracy": 0.7037037037037037,
"eval_loss": 1.5309627056121826,
"eval_runtime": 3.1613,
"eval_samples_per_second": 59.786,
"eval_steps_per_second": 7.592,
"step": 1550
},
{
"epoch": 14.58,
"learning_rate": 0.0001417196261682243,
"loss": 0.0178,
"step": 1560
},
{
"epoch": 14.58,
"eval_accuracy": 0.6825396825396826,
"eval_loss": 1.53221595287323,
"eval_runtime": 3.144,
"eval_samples_per_second": 60.114,
"eval_steps_per_second": 7.634,
"step": 1560
},
{
"epoch": 14.67,
"learning_rate": 0.00014134579439252338,
"loss": 0.0045,
"step": 1570
},
{
"epoch": 14.67,
"eval_accuracy": 0.7301587301587301,
"eval_loss": 1.3083724975585938,
"eval_runtime": 3.1864,
"eval_samples_per_second": 59.314,
"eval_steps_per_second": 7.532,
"step": 1570
},
{
"epoch": 14.77,
"learning_rate": 0.00014097196261682245,
"loss": 0.0485,
"step": 1580
},
{
"epoch": 14.77,
"eval_accuracy": 0.7354497354497355,
"eval_loss": 1.2855416536331177,
"eval_runtime": 3.2166,
"eval_samples_per_second": 58.758,
"eval_steps_per_second": 7.461,
"step": 1580
},
{
"epoch": 14.86,
"learning_rate": 0.0001405981308411215,
"loss": 0.0575,
"step": 1590
},
{
"epoch": 14.86,
"eval_accuracy": 0.7301587301587301,
"eval_loss": 1.3779939413070679,
"eval_runtime": 3.2389,
"eval_samples_per_second": 58.353,
"eval_steps_per_second": 7.41,
"step": 1590
},
{
"epoch": 14.95,
"learning_rate": 0.00014022429906542056,
"loss": 0.0131,
"step": 1600
},
{
"epoch": 14.95,
"eval_accuracy": 0.6984126984126984,
"eval_loss": 1.6261014938354492,
"eval_runtime": 3.1879,
"eval_samples_per_second": 59.286,
"eval_steps_per_second": 7.528,
"step": 1600
},
{
"epoch": 15.05,
"learning_rate": 0.00013985046728971963,
"loss": 0.0059,
"step": 1610
},
{
"epoch": 15.05,
"eval_accuracy": 0.6931216931216931,
"eval_loss": 1.6843873262405396,
"eval_runtime": 3.2196,
"eval_samples_per_second": 58.702,
"eval_steps_per_second": 7.454,
"step": 1610
},
{
"epoch": 15.14,
"learning_rate": 0.0001394766355140187,
"loss": 0.0113,
"step": 1620
},
{
"epoch": 15.14,
"eval_accuracy": 0.7248677248677249,
"eval_loss": 1.4520890712738037,
"eval_runtime": 3.1975,
"eval_samples_per_second": 59.108,
"eval_steps_per_second": 7.506,
"step": 1620
},
{
"epoch": 15.23,
"learning_rate": 0.00013910280373831776,
"loss": 0.0264,
"step": 1630
},
{
"epoch": 15.23,
"eval_accuracy": 0.708994708994709,
"eval_loss": 1.5394195318222046,
"eval_runtime": 3.2412,
"eval_samples_per_second": 58.312,
"eval_steps_per_second": 7.405,
"step": 1630
},
{
"epoch": 15.33,
"learning_rate": 0.00013872897196261683,
"loss": 0.0131,
"step": 1640
},
{
"epoch": 15.33,
"eval_accuracy": 0.7248677248677249,
"eval_loss": 1.521421194076538,
"eval_runtime": 3.2643,
"eval_samples_per_second": 57.899,
"eval_steps_per_second": 7.352,
"step": 1640
},
{
"epoch": 15.42,
"learning_rate": 0.0001383551401869159,
"loss": 0.0067,
"step": 1650
},
{
"epoch": 15.42,
"eval_accuracy": 0.6984126984126984,
"eval_loss": 1.8741341829299927,
"eval_runtime": 3.2542,
"eval_samples_per_second": 58.079,
"eval_steps_per_second": 7.375,
"step": 1650
},
{
"epoch": 15.51,
"learning_rate": 0.00013798130841121496,
"loss": 0.1502,
"step": 1660
},
{
"epoch": 15.51,
"eval_accuracy": 0.7037037037037037,
"eval_loss": 1.5996290445327759,
"eval_runtime": 3.2543,
"eval_samples_per_second": 58.076,
"eval_steps_per_second": 7.375,
"step": 1660
},
{
"epoch": 15.61,
"learning_rate": 0.00013760747663551403,
"loss": 0.0505,
"step": 1670
},
{
"epoch": 15.61,
"eval_accuracy": 0.6984126984126984,
"eval_loss": 1.5197315216064453,
"eval_runtime": 3.3082,
"eval_samples_per_second": 57.131,
"eval_steps_per_second": 7.255,
"step": 1670
},
{
"epoch": 15.7,
"learning_rate": 0.0001372336448598131,
"loss": 0.0096,
"step": 1680
},
{
"epoch": 15.7,
"eval_accuracy": 0.708994708994709,
"eval_loss": 1.562042236328125,
"eval_runtime": 3.2413,
"eval_samples_per_second": 58.31,
"eval_steps_per_second": 7.404,
"step": 1680
},
{
"epoch": 15.79,
"learning_rate": 0.00013685981308411216,
"loss": 0.0623,
"step": 1690
},
{
"epoch": 15.79,
"eval_accuracy": 0.7301587301587301,
"eval_loss": 1.5186712741851807,
"eval_runtime": 3.3016,
"eval_samples_per_second": 57.244,
"eval_steps_per_second": 7.269,
"step": 1690
},
{
"epoch": 15.89,
"learning_rate": 0.0001364859813084112,
"loss": 0.0732,
"step": 1700
},
{
"epoch": 15.89,
"eval_accuracy": 0.6825396825396826,
"eval_loss": 1.5817829370498657,
"eval_runtime": 3.2211,
"eval_samples_per_second": 58.675,
"eval_steps_per_second": 7.451,
"step": 1700
},
{
"epoch": 15.98,
"learning_rate": 0.00013611214953271027,
"loss": 0.0587,
"step": 1710
},
{
"epoch": 15.98,
"eval_accuracy": 0.7566137566137566,
"eval_loss": 1.331645131111145,
"eval_runtime": 3.254,
"eval_samples_per_second": 58.083,
"eval_steps_per_second": 7.376,
"step": 1710
},
{
"epoch": 16.07,
"learning_rate": 0.00013573831775700936,
"loss": 0.0135,
"step": 1720
},
{
"epoch": 16.07,
"eval_accuracy": 0.7142857142857143,
"eval_loss": 1.5572881698608398,
"eval_runtime": 3.1348,
"eval_samples_per_second": 60.292,
"eval_steps_per_second": 7.656,
"step": 1720
},
{
"epoch": 16.17,
"learning_rate": 0.00013536448598130843,
"loss": 0.0405,
"step": 1730
},
{
"epoch": 16.17,
"eval_accuracy": 0.7195767195767195,
"eval_loss": 1.5583974123001099,
"eval_runtime": 3.1747,
"eval_samples_per_second": 59.534,
"eval_steps_per_second": 7.56,
"step": 1730
},
{
"epoch": 16.26,
"learning_rate": 0.00013499065420560747,
"loss": 0.0379,
"step": 1740
},
{
"epoch": 16.26,
"eval_accuracy": 0.6613756613756614,
"eval_loss": 1.8542115688323975,
"eval_runtime": 3.4991,
"eval_samples_per_second": 54.014,
"eval_steps_per_second": 6.859,
"step": 1740
},
{
"epoch": 16.36,
"learning_rate": 0.00013461682242990654,
"loss": 0.0778,
"step": 1750
},
{
"epoch": 16.36,
"eval_accuracy": 0.6772486772486772,
"eval_loss": 1.81163489818573,
"eval_runtime": 3.2013,
"eval_samples_per_second": 59.039,
"eval_steps_per_second": 7.497,
"step": 1750
},
{
"epoch": 16.45,
"learning_rate": 0.00013424299065420563,
"loss": 0.0178,
"step": 1760
},
{
"epoch": 16.45,
"eval_accuracy": 0.7037037037037037,
"eval_loss": 1.6405593156814575,
"eval_runtime": 3.1824,
"eval_samples_per_second": 59.389,
"eval_steps_per_second": 7.541,
"step": 1760
},
{
"epoch": 16.54,
"learning_rate": 0.00013386915887850467,
"loss": 0.0252,
"step": 1770
},
{
"epoch": 16.54,
"eval_accuracy": 0.6772486772486772,
"eval_loss": 1.6421043872833252,
"eval_runtime": 3.2328,
"eval_samples_per_second": 58.464,
"eval_steps_per_second": 7.424,
"step": 1770
},
{
"epoch": 16.64,
"learning_rate": 0.00013349532710280374,
"loss": 0.0638,
"step": 1780
},
{
"epoch": 16.64,
"eval_accuracy": 0.746031746031746,
"eval_loss": 1.4504343271255493,
"eval_runtime": 3.1566,
"eval_samples_per_second": 59.875,
"eval_steps_per_second": 7.603,
"step": 1780
},
{
"epoch": 16.73,
"learning_rate": 0.0001331214953271028,
"loss": 0.0138,
"step": 1790
},
{
"epoch": 16.73,
"eval_accuracy": 0.708994708994709,
"eval_loss": 1.5848379135131836,
"eval_runtime": 3.2276,
"eval_samples_per_second": 58.557,
"eval_steps_per_second": 7.436,
"step": 1790
},
{
"epoch": 16.82,
"learning_rate": 0.00013274766355140188,
"loss": 0.1027,
"step": 1800
},
{
"epoch": 16.82,
"eval_accuracy": 0.7354497354497355,
"eval_loss": 1.5205998420715332,
"eval_runtime": 3.2514,
"eval_samples_per_second": 58.128,
"eval_steps_per_second": 7.381,
"step": 1800
},
{
"epoch": 16.92,
"learning_rate": 0.00013237383177570094,
"loss": 0.086,
"step": 1810
},
{
"epoch": 16.92,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.3391714096069336,
"eval_runtime": 3.2275,
"eval_samples_per_second": 58.56,
"eval_steps_per_second": 7.436,
"step": 1810
},
{
"epoch": 17.01,
"learning_rate": 0.000132,
"loss": 0.1029,
"step": 1820
},
{
"epoch": 17.01,
"eval_accuracy": 0.708994708994709,
"eval_loss": 1.4613063335418701,
"eval_runtime": 3.268,
"eval_samples_per_second": 57.834,
"eval_steps_per_second": 7.344,
"step": 1820
},
{
"epoch": 17.1,
"learning_rate": 0.00013162616822429908,
"loss": 0.115,
"step": 1830
},
{
"epoch": 17.1,
"eval_accuracy": 0.6931216931216931,
"eval_loss": 1.7344552278518677,
"eval_runtime": 3.2414,
"eval_samples_per_second": 58.308,
"eval_steps_per_second": 7.404,
"step": 1830
},
{
"epoch": 17.2,
"learning_rate": 0.00013125233644859814,
"loss": 0.0286,
"step": 1840
},
{
"epoch": 17.2,
"eval_accuracy": 0.7301587301587301,
"eval_loss": 1.5347076654434204,
"eval_runtime": 3.2891,
"eval_samples_per_second": 57.462,
"eval_steps_per_second": 7.297,
"step": 1840
},
{
"epoch": 17.29,
"learning_rate": 0.00013087850467289718,
"loss": 0.0608,
"step": 1850
},
{
"epoch": 17.29,
"eval_accuracy": 0.6931216931216931,
"eval_loss": 1.5781868696212769,
"eval_runtime": 3.2271,
"eval_samples_per_second": 58.566,
"eval_steps_per_second": 7.437,
"step": 1850
},
{
"epoch": 17.38,
"learning_rate": 0.00013050467289719628,
"loss": 0.1215,
"step": 1860
},
{
"epoch": 17.38,
"eval_accuracy": 0.6931216931216931,
"eval_loss": 1.5484486818313599,
"eval_runtime": 3.2944,
"eval_samples_per_second": 57.37,
"eval_steps_per_second": 7.285,
"step": 1860
},
{
"epoch": 17.48,
"learning_rate": 0.00013013084112149535,
"loss": 0.095,
"step": 1870
},
{
"epoch": 17.48,
"eval_accuracy": 0.708994708994709,
"eval_loss": 1.5258464813232422,
"eval_runtime": 3.265,
"eval_samples_per_second": 57.887,
"eval_steps_per_second": 7.351,
"step": 1870
},
{
"epoch": 17.57,
"learning_rate": 0.0001297570093457944,
"loss": 0.0176,
"step": 1880
},
{
"epoch": 17.57,
"eval_accuracy": 0.708994708994709,
"eval_loss": 1.5887885093688965,
"eval_runtime": 3.1883,
"eval_samples_per_second": 59.278,
"eval_steps_per_second": 7.527,
"step": 1880
},
{
"epoch": 17.66,
"learning_rate": 0.00012938317757009345,
"loss": 0.0208,
"step": 1890
},
{
"epoch": 17.66,
"eval_accuracy": 0.6878306878306878,
"eval_loss": 2.017876625061035,
"eval_runtime": 3.1617,
"eval_samples_per_second": 59.778,
"eval_steps_per_second": 7.591,
"step": 1890
},
{
"epoch": 17.76,
"learning_rate": 0.00012900934579439252,
"loss": 0.0752,
"step": 1900
},
{
"epoch": 17.76,
"eval_accuracy": 0.6878306878306878,
"eval_loss": 1.8983193635940552,
"eval_runtime": 3.3023,
"eval_samples_per_second": 57.233,
"eval_steps_per_second": 7.268,
"step": 1900
},
{
"epoch": 17.85,
"learning_rate": 0.00012863551401869162,
"loss": 0.0609,
"step": 1910
},
{
"epoch": 17.85,
"eval_accuracy": 0.7037037037037037,
"eval_loss": 1.6523683071136475,
"eval_runtime": 3.2531,
"eval_samples_per_second": 58.099,
"eval_steps_per_second": 7.378,
"step": 1910
},
{
"epoch": 17.94,
"learning_rate": 0.00012826168224299066,
"loss": 0.0059,
"step": 1920
},
{
"epoch": 17.94,
"eval_accuracy": 0.6984126984126984,
"eval_loss": 1.6338155269622803,
"eval_runtime": 3.2413,
"eval_samples_per_second": 58.309,
"eval_steps_per_second": 7.404,
"step": 1920
},
{
"epoch": 18.04,
"learning_rate": 0.00012788785046728972,
"loss": 0.0264,
"step": 1930
},
{
"epoch": 18.04,
"eval_accuracy": 0.7037037037037037,
"eval_loss": 1.6208415031433105,
"eval_runtime": 3.4796,
"eval_samples_per_second": 54.317,
"eval_steps_per_second": 6.897,
"step": 1930
},
{
"epoch": 18.13,
"learning_rate": 0.0001275140186915888,
"loss": 0.0128,
"step": 1940
},
{
"epoch": 18.13,
"eval_accuracy": 0.7566137566137566,
"eval_loss": 1.521356225013733,
"eval_runtime": 3.1889,
"eval_samples_per_second": 59.268,
"eval_steps_per_second": 7.526,
"step": 1940
},
{
"epoch": 18.22,
"learning_rate": 0.00012714018691588786,
"loss": 0.0053,
"step": 1950
},
{
"epoch": 18.22,
"eval_accuracy": 0.7195767195767195,
"eval_loss": 1.5842546224594116,
"eval_runtime": 3.1478,
"eval_samples_per_second": 60.042,
"eval_steps_per_second": 7.624,
"step": 1950
},
{
"epoch": 18.32,
"learning_rate": 0.00012676635514018692,
"loss": 0.0129,
"step": 1960
},
{
"epoch": 18.32,
"eval_accuracy": 0.7248677248677249,
"eval_loss": 1.5440059900283813,
"eval_runtime": 4.1754,
"eval_samples_per_second": 45.265,
"eval_steps_per_second": 5.748,
"step": 1960
},
{
"epoch": 18.41,
"learning_rate": 0.000126392523364486,
"loss": 0.016,
"step": 1970
},
{
"epoch": 18.41,
"eval_accuracy": 0.7566137566137566,
"eval_loss": 1.4988614320755005,
"eval_runtime": 3.1898,
"eval_samples_per_second": 59.252,
"eval_steps_per_second": 7.524,
"step": 1970
},
{
"epoch": 18.5,
"learning_rate": 0.00012601869158878506,
"loss": 0.0696,
"step": 1980
},
{
"epoch": 18.5,
"eval_accuracy": 0.7407407407407407,
"eval_loss": 1.4819732904434204,
"eval_runtime": 3.1615,
"eval_samples_per_second": 59.781,
"eval_steps_per_second": 7.591,
"step": 1980
},
{
"epoch": 18.6,
"learning_rate": 0.00012564485981308413,
"loss": 0.0217,
"step": 1990
},
{
"epoch": 18.6,
"eval_accuracy": 0.7195767195767195,
"eval_loss": 1.4832117557525635,
"eval_runtime": 3.179,
"eval_samples_per_second": 59.452,
"eval_steps_per_second": 7.549,
"step": 1990
},
{
"epoch": 18.69,
"learning_rate": 0.00012527102803738317,
"loss": 0.006,
"step": 2000
},
{
"epoch": 18.69,
"eval_accuracy": 0.6772486772486772,
"eval_loss": 1.9052395820617676,
"eval_runtime": 3.1938,
"eval_samples_per_second": 59.176,
"eval_steps_per_second": 7.514,
"step": 2000
},
{
"epoch": 18.79,
"learning_rate": 0.00012489719626168226,
"loss": 0.0357,
"step": 2010
},
{
"epoch": 18.79,
"eval_accuracy": 0.6984126984126984,
"eval_loss": 1.73564612865448,
"eval_runtime": 3.3743,
"eval_samples_per_second": 56.012,
"eval_steps_per_second": 7.113,
"step": 2010
},
{
"epoch": 18.88,
"learning_rate": 0.00012452336448598133,
"loss": 0.0197,
"step": 2020
},
{
"epoch": 18.88,
"eval_accuracy": 0.7301587301587301,
"eval_loss": 1.617836594581604,
"eval_runtime": 3.185,
"eval_samples_per_second": 59.34,
"eval_steps_per_second": 7.535,
"step": 2020
},
{
"epoch": 18.97,
"learning_rate": 0.00012414953271028037,
"loss": 0.0331,
"step": 2030
},
{
"epoch": 18.97,
"eval_accuracy": 0.7407407407407407,
"eval_loss": 1.5125271081924438,
"eval_runtime": 3.228,
"eval_samples_per_second": 58.55,
"eval_steps_per_second": 7.435,
"step": 2030
},
{
"epoch": 19.07,
"learning_rate": 0.00012381308411214953,
"loss": 0.2593,
"step": 2040
},
{
"epoch": 19.07,
"eval_accuracy": 0.708994708994709,
"eval_loss": 1.6546989679336548,
"eval_runtime": 3.1974,
"eval_samples_per_second": 59.111,
"eval_steps_per_second": 7.506,
"step": 2040
},
{
"epoch": 19.16,
"learning_rate": 0.0001234392523364486,
"loss": 0.0134,
"step": 2050
},
{
"epoch": 19.16,
"eval_accuracy": 0.6931216931216931,
"eval_loss": 1.6934614181518555,
"eval_runtime": 3.2069,
"eval_samples_per_second": 58.936,
"eval_steps_per_second": 7.484,
"step": 2050
},
{
"epoch": 19.25,
"learning_rate": 0.0001230654205607477,
"loss": 0.0069,
"step": 2060
},
{
"epoch": 19.25,
"eval_accuracy": 0.7407407407407407,
"eval_loss": 1.5514878034591675,
"eval_runtime": 3.2273,
"eval_samples_per_second": 58.563,
"eval_steps_per_second": 7.437,
"step": 2060
},
{
"epoch": 19.35,
"learning_rate": 0.00012269158878504673,
"loss": 0.0083,
"step": 2070
},
{
"epoch": 19.35,
"eval_accuracy": 0.7248677248677249,
"eval_loss": 1.576420545578003,
"eval_runtime": 3.1752,
"eval_samples_per_second": 59.525,
"eval_steps_per_second": 7.559,
"step": 2070
},
{
"epoch": 19.44,
"learning_rate": 0.0001223177570093458,
"loss": 0.0299,
"step": 2080
},
{
"epoch": 19.44,
"eval_accuracy": 0.7301587301587301,
"eval_loss": 1.5843783617019653,
"eval_runtime": 3.1803,
"eval_samples_per_second": 59.428,
"eval_steps_per_second": 7.546,
"step": 2080
},
{
"epoch": 19.53,
"learning_rate": 0.00012194392523364486,
"loss": 0.002,
"step": 2090
},
{
"epoch": 19.53,
"eval_accuracy": 0.746031746031746,
"eval_loss": 1.5910844802856445,
"eval_runtime": 3.1724,
"eval_samples_per_second": 59.576,
"eval_steps_per_second": 7.565,
"step": 2090
},
{
"epoch": 19.63,
"learning_rate": 0.00012157009345794393,
"loss": 0.0022,
"step": 2100
},
{
"epoch": 19.63,
"eval_accuracy": 0.7513227513227513,
"eval_loss": 1.5894769430160522,
"eval_runtime": 3.2193,
"eval_samples_per_second": 58.708,
"eval_steps_per_second": 7.455,
"step": 2100
},
{
"epoch": 19.72,
"learning_rate": 0.00012119626168224301,
"loss": 0.0024,
"step": 2110
},
{
"epoch": 19.72,
"eval_accuracy": 0.7513227513227513,
"eval_loss": 1.586985468864441,
"eval_runtime": 3.1897,
"eval_samples_per_second": 59.254,
"eval_steps_per_second": 7.524,
"step": 2110
},
{
"epoch": 19.81,
"learning_rate": 0.00012082242990654206,
"loss": 0.0082,
"step": 2120
},
{
"epoch": 19.81,
"eval_accuracy": 0.7407407407407407,
"eval_loss": 1.582448124885559,
"eval_runtime": 3.1594,
"eval_samples_per_second": 59.821,
"eval_steps_per_second": 7.596,
"step": 2120
},
{
"epoch": 19.91,
"learning_rate": 0.00012044859813084113,
"loss": 0.0022,
"step": 2130
},
{
"epoch": 19.91,
"eval_accuracy": 0.7354497354497355,
"eval_loss": 1.5609190464019775,
"eval_runtime": 3.3172,
"eval_samples_per_second": 56.976,
"eval_steps_per_second": 7.235,
"step": 2130
},
{
"epoch": 20.0,
"learning_rate": 0.00012007476635514018,
"loss": 0.02,
"step": 2140
},
{
"epoch": 20.0,
"eval_accuracy": 0.7354497354497355,
"eval_loss": 1.6138461828231812,
"eval_runtime": 3.3238,
"eval_samples_per_second": 56.863,
"eval_steps_per_second": 7.221,
"step": 2140
},
{
"epoch": 20.09,
"learning_rate": 0.00011970093457943925,
"loss": 0.0063,
"step": 2150
},
{
"epoch": 20.09,
"eval_accuracy": 0.7037037037037037,
"eval_loss": 1.7616651058197021,
"eval_runtime": 3.2361,
"eval_samples_per_second": 58.403,
"eval_steps_per_second": 7.416,
"step": 2150
},
{
"epoch": 20.19,
"learning_rate": 0.00011932710280373833,
"loss": 0.0021,
"step": 2160
},
{
"epoch": 20.19,
"eval_accuracy": 0.6984126984126984,
"eval_loss": 1.7941789627075195,
"eval_runtime": 5.8284,
"eval_samples_per_second": 32.428,
"eval_steps_per_second": 4.118,
"step": 2160
},
{
"epoch": 20.28,
"learning_rate": 0.0001189532710280374,
"loss": 0.0068,
"step": 2170
},
{
"epoch": 20.28,
"eval_accuracy": 0.7142857142857143,
"eval_loss": 1.7489572763442993,
"eval_runtime": 3.085,
"eval_samples_per_second": 61.265,
"eval_steps_per_second": 7.78,
"step": 2170
},
{
"epoch": 20.37,
"learning_rate": 0.00011857943925233645,
"loss": 0.0078,
"step": 2180
},
{
"epoch": 20.37,
"eval_accuracy": 0.7354497354497355,
"eval_loss": 1.6500416994094849,
"eval_runtime": 3.0679,
"eval_samples_per_second": 61.606,
"eval_steps_per_second": 7.823,
"step": 2180
},
{
"epoch": 20.47,
"learning_rate": 0.00011820560747663552,
"loss": 0.0019,
"step": 2190
},
{
"epoch": 20.47,
"eval_accuracy": 0.7513227513227513,
"eval_loss": 1.5972764492034912,
"eval_runtime": 3.3043,
"eval_samples_per_second": 57.198,
"eval_steps_per_second": 7.263,
"step": 2190
},
{
"epoch": 20.56,
"learning_rate": 0.00011783177570093457,
"loss": 0.0052,
"step": 2200
},
{
"epoch": 20.56,
"eval_accuracy": 0.7301587301587301,
"eval_loss": 1.64747154712677,
"eval_runtime": 3.2063,
"eval_samples_per_second": 58.947,
"eval_steps_per_second": 7.485,
"step": 2200
},
{
"epoch": 20.65,
"learning_rate": 0.00011745794392523365,
"loss": 0.0169,
"step": 2210
},
{
"epoch": 20.65,
"eval_accuracy": 0.7195767195767195,
"eval_loss": 1.6964852809906006,
"eval_runtime": 3.2678,
"eval_samples_per_second": 57.837,
"eval_steps_per_second": 7.344,
"step": 2210
},
{
"epoch": 20.75,
"learning_rate": 0.00011708411214953272,
"loss": 0.011,
"step": 2220
},
{
"epoch": 20.75,
"eval_accuracy": 0.708994708994709,
"eval_loss": 1.7380739450454712,
"eval_runtime": 3.1484,
"eval_samples_per_second": 60.03,
"eval_steps_per_second": 7.623,
"step": 2220
},
{
"epoch": 20.84,
"learning_rate": 0.00011671028037383178,
"loss": 0.0141,
"step": 2230
},
{
"epoch": 20.84,
"eval_accuracy": 0.7354497354497355,
"eval_loss": 1.6947896480560303,
"eval_runtime": 3.3613,
"eval_samples_per_second": 56.228,
"eval_steps_per_second": 7.14,
"step": 2230
},
{
"epoch": 20.93,
"learning_rate": 0.00011633644859813084,
"loss": 0.026,
"step": 2240
},
{
"epoch": 20.93,
"eval_accuracy": 0.7037037037037037,
"eval_loss": 1.7711342573165894,
"eval_runtime": 3.2033,
"eval_samples_per_second": 59.001,
"eval_steps_per_second": 7.492,
"step": 2240
},
{
"epoch": 21.03,
"learning_rate": 0.00011596261682242991,
"loss": 0.0019,
"step": 2250
},
{
"epoch": 21.03,
"eval_accuracy": 0.6825396825396826,
"eval_loss": 1.8577070236206055,
"eval_runtime": 3.1485,
"eval_samples_per_second": 60.03,
"eval_steps_per_second": 7.623,
"step": 2250
},
{
"epoch": 21.12,
"learning_rate": 0.00011558878504672899,
"loss": 0.0523,
"step": 2260
},
{
"epoch": 21.12,
"eval_accuracy": 0.6878306878306878,
"eval_loss": 1.837315320968628,
"eval_runtime": 3.1213,
"eval_samples_per_second": 60.552,
"eval_steps_per_second": 7.689,
"step": 2260
},
{
"epoch": 21.21,
"learning_rate": 0.00011521495327102804,
"loss": 0.0234,
"step": 2270
},
{
"epoch": 21.21,
"eval_accuracy": 0.7248677248677249,
"eval_loss": 1.7204312086105347,
"eval_runtime": 3.3214,
"eval_samples_per_second": 56.904,
"eval_steps_per_second": 7.226,
"step": 2270
},
{
"epoch": 21.31,
"learning_rate": 0.00011484112149532711,
"loss": 0.0105,
"step": 2280
},
{
"epoch": 21.31,
"eval_accuracy": 0.7195767195767195,
"eval_loss": 1.6235790252685547,
"eval_runtime": 3.1507,
"eval_samples_per_second": 59.986,
"eval_steps_per_second": 7.617,
"step": 2280
},
{
"epoch": 21.4,
"learning_rate": 0.00011446728971962617,
"loss": 0.048,
"step": 2290
},
{
"epoch": 21.4,
"eval_accuracy": 0.6984126984126984,
"eval_loss": 1.8592135906219482,
"eval_runtime": 3.1589,
"eval_samples_per_second": 59.83,
"eval_steps_per_second": 7.598,
"step": 2290
},
{
"epoch": 21.5,
"learning_rate": 0.00011409345794392523,
"loss": 0.0456,
"step": 2300
},
{
"epoch": 21.5,
"eval_accuracy": 0.7142857142857143,
"eval_loss": 1.7222115993499756,
"eval_runtime": 3.1346,
"eval_samples_per_second": 60.296,
"eval_steps_per_second": 7.657,
"step": 2300
},
{
"epoch": 21.59,
"learning_rate": 0.00011371962616822431,
"loss": 0.0129,
"step": 2310
},
{
"epoch": 21.59,
"eval_accuracy": 0.7142857142857143,
"eval_loss": 1.6260654926300049,
"eval_runtime": 3.1239,
"eval_samples_per_second": 60.501,
"eval_steps_per_second": 7.683,
"step": 2310
},
{
"epoch": 21.68,
"learning_rate": 0.00011334579439252338,
"loss": 0.0048,
"step": 2320
},
{
"epoch": 21.68,
"eval_accuracy": 0.7142857142857143,
"eval_loss": 1.5359770059585571,
"eval_runtime": 3.254,
"eval_samples_per_second": 58.083,
"eval_steps_per_second": 7.376,
"step": 2320
},
{
"epoch": 21.78,
"learning_rate": 0.00011297196261682243,
"loss": 0.0217,
"step": 2330
},
{
"epoch": 21.78,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.4278719425201416,
"eval_runtime": 3.2536,
"eval_samples_per_second": 58.09,
"eval_steps_per_second": 7.376,
"step": 2330
},
{
"epoch": 21.87,
"learning_rate": 0.0001125981308411215,
"loss": 0.011,
"step": 2340
},
{
"epoch": 21.87,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.369935393333435,
"eval_runtime": 3.0331,
"eval_samples_per_second": 62.312,
"eval_steps_per_second": 7.913,
"step": 2340
},
{
"epoch": 21.96,
"learning_rate": 0.00011222429906542056,
"loss": 0.0104,
"step": 2350
},
{
"epoch": 21.96,
"eval_accuracy": 0.6878306878306878,
"eval_loss": 1.9012395143508911,
"eval_runtime": 3.1254,
"eval_samples_per_second": 60.472,
"eval_steps_per_second": 7.679,
"step": 2350
},
{
"epoch": 22.06,
"learning_rate": 0.00011185046728971964,
"loss": 0.0204,
"step": 2360
},
{
"epoch": 22.06,
"eval_accuracy": 0.7354497354497355,
"eval_loss": 1.5529383420944214,
"eval_runtime": 3.1087,
"eval_samples_per_second": 60.796,
"eval_steps_per_second": 7.72,
"step": 2360
},
{
"epoch": 22.15,
"learning_rate": 0.0001114766355140187,
"loss": 0.02,
"step": 2370
},
{
"epoch": 22.15,
"eval_accuracy": 0.6931216931216931,
"eval_loss": 1.7242515087127686,
"eval_runtime": 3.0296,
"eval_samples_per_second": 62.384,
"eval_steps_per_second": 7.922,
"step": 2370
},
{
"epoch": 22.24,
"learning_rate": 0.00011110280373831776,
"loss": 0.0394,
"step": 2380
},
{
"epoch": 22.24,
"eval_accuracy": 0.6931216931216931,
"eval_loss": 1.8429406881332397,
"eval_runtime": 3.2178,
"eval_samples_per_second": 58.735,
"eval_steps_per_second": 7.458,
"step": 2380
},
{
"epoch": 22.34,
"learning_rate": 0.00011072897196261682,
"loss": 0.0217,
"step": 2390
},
{
"epoch": 22.34,
"eval_accuracy": 0.7407407407407407,
"eval_loss": 1.6551986932754517,
"eval_runtime": 3.0937,
"eval_samples_per_second": 61.092,
"eval_steps_per_second": 7.758,
"step": 2390
},
{
"epoch": 22.43,
"learning_rate": 0.00011035514018691588,
"loss": 0.0407,
"step": 2400
},
{
"epoch": 22.43,
"eval_accuracy": 0.7407407407407407,
"eval_loss": 1.584214687347412,
"eval_runtime": 3.0876,
"eval_samples_per_second": 61.213,
"eval_steps_per_second": 7.773,
"step": 2400
},
{
"epoch": 22.52,
"learning_rate": 0.00010998130841121497,
"loss": 0.0075,
"step": 2410
},
{
"epoch": 22.52,
"eval_accuracy": 0.7354497354497355,
"eval_loss": 1.5881952047348022,
"eval_runtime": 3.1026,
"eval_samples_per_second": 60.916,
"eval_steps_per_second": 7.735,
"step": 2410
},
{
"epoch": 22.62,
"learning_rate": 0.00010960747663551403,
"loss": 0.0144,
"step": 2420
},
{
"epoch": 22.62,
"eval_accuracy": 0.7301587301587301,
"eval_loss": 1.6610509157180786,
"eval_runtime": 3.2448,
"eval_samples_per_second": 58.248,
"eval_steps_per_second": 7.397,
"step": 2420
},
{
"epoch": 22.71,
"learning_rate": 0.00010923364485981309,
"loss": 0.0021,
"step": 2430
},
{
"epoch": 22.71,
"eval_accuracy": 0.746031746031746,
"eval_loss": 1.7075546979904175,
"eval_runtime": 3.273,
"eval_samples_per_second": 57.746,
"eval_steps_per_second": 7.333,
"step": 2430
},
{
"epoch": 22.8,
"learning_rate": 0.00010885981308411215,
"loss": 0.0019,
"step": 2440
},
{
"epoch": 22.8,
"eval_accuracy": 0.7354497354497355,
"eval_loss": 1.616317629814148,
"eval_runtime": 3.3746,
"eval_samples_per_second": 56.006,
"eval_steps_per_second": 7.112,
"step": 2440
},
{
"epoch": 22.9,
"learning_rate": 0.00010848598130841121,
"loss": 0.0074,
"step": 2450
},
{
"epoch": 22.9,
"eval_accuracy": 0.746031746031746,
"eval_loss": 1.5530917644500732,
"eval_runtime": 3.2179,
"eval_samples_per_second": 58.735,
"eval_steps_per_second": 7.458,
"step": 2450
},
{
"epoch": 22.99,
"learning_rate": 0.0001081121495327103,
"loss": 0.0195,
"step": 2460
},
{
"epoch": 22.99,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.5417741537094116,
"eval_runtime": 3.3609,
"eval_samples_per_second": 56.235,
"eval_steps_per_second": 7.141,
"step": 2460
},
{
"epoch": 23.08,
"learning_rate": 0.00010773831775700935,
"loss": 0.0085,
"step": 2470
},
{
"epoch": 23.08,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.532561182975769,
"eval_runtime": 3.2217,
"eval_samples_per_second": 58.664,
"eval_steps_per_second": 7.449,
"step": 2470
},
{
"epoch": 23.18,
"learning_rate": 0.00010736448598130842,
"loss": 0.0015,
"step": 2480
},
{
"epoch": 23.18,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.530474305152893,
"eval_runtime": 3.1097,
"eval_samples_per_second": 60.778,
"eval_steps_per_second": 7.718,
"step": 2480
},
{
"epoch": 23.27,
"learning_rate": 0.00010699065420560748,
"loss": 0.0015,
"step": 2490
},
{
"epoch": 23.27,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.530909776687622,
"eval_runtime": 3.3395,
"eval_samples_per_second": 56.594,
"eval_steps_per_second": 7.187,
"step": 2490
},
{
"epoch": 23.36,
"learning_rate": 0.00010661682242990654,
"loss": 0.0058,
"step": 2500
},
{
"epoch": 23.36,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.5259517431259155,
"eval_runtime": 3.1744,
"eval_samples_per_second": 59.539,
"eval_steps_per_second": 7.56,
"step": 2500
},
{
"epoch": 23.46,
"learning_rate": 0.00010624299065420562,
"loss": 0.0015,
"step": 2510
},
{
"epoch": 23.46,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.5237162113189697,
"eval_runtime": 3.3526,
"eval_samples_per_second": 56.374,
"eval_steps_per_second": 7.159,
"step": 2510
},
{
"epoch": 23.55,
"learning_rate": 0.00010586915887850468,
"loss": 0.0083,
"step": 2520
},
{
"epoch": 23.55,
"eval_accuracy": 0.7566137566137566,
"eval_loss": 1.5256061553955078,
"eval_runtime": 3.4146,
"eval_samples_per_second": 55.351,
"eval_steps_per_second": 7.029,
"step": 2520
},
{
"epoch": 23.64,
"learning_rate": 0.00010549532710280374,
"loss": 0.0102,
"step": 2530
},
{
"epoch": 23.64,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.5389765501022339,
"eval_runtime": 3.0883,
"eval_samples_per_second": 61.198,
"eval_steps_per_second": 7.771,
"step": 2530
},
{
"epoch": 23.74,
"learning_rate": 0.0001051214953271028,
"loss": 0.0014,
"step": 2540
},
{
"epoch": 23.74,
"eval_accuracy": 0.7671957671957672,
"eval_loss": 1.551665186882019,
"eval_runtime": 3.2146,
"eval_samples_per_second": 58.794,
"eval_steps_per_second": 7.466,
"step": 2540
},
{
"epoch": 23.83,
"learning_rate": 0.00010474766355140186,
"loss": 0.0137,
"step": 2550
},
{
"epoch": 23.83,
"eval_accuracy": 0.7566137566137566,
"eval_loss": 1.5628893375396729,
"eval_runtime": 3.2259,
"eval_samples_per_second": 58.589,
"eval_steps_per_second": 7.44,
"step": 2550
},
{
"epoch": 23.93,
"learning_rate": 0.00010437383177570095,
"loss": 0.0211,
"step": 2560
},
{
"epoch": 23.93,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.5249507427215576,
"eval_runtime": 3.1967,
"eval_samples_per_second": 59.123,
"eval_steps_per_second": 7.508,
"step": 2560
},
{
"epoch": 24.02,
"learning_rate": 0.00010400000000000001,
"loss": 0.0014,
"step": 2570
},
{
"epoch": 24.02,
"eval_accuracy": 0.7671957671957672,
"eval_loss": 1.5051255226135254,
"eval_runtime": 3.3479,
"eval_samples_per_second": 56.454,
"eval_steps_per_second": 7.169,
"step": 2570
},
{
"epoch": 24.11,
"learning_rate": 0.00010362616822429907,
"loss": 0.0102,
"step": 2580
},
{
"epoch": 24.11,
"eval_accuracy": 0.7671957671957672,
"eval_loss": 1.4923993349075317,
"eval_runtime": 3.3214,
"eval_samples_per_second": 56.904,
"eval_steps_per_second": 7.226,
"step": 2580
},
{
"epoch": 24.21,
"learning_rate": 0.00010325233644859813,
"loss": 0.0032,
"step": 2590
},
{
"epoch": 24.21,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.4887175559997559,
"eval_runtime": 3.2614,
"eval_samples_per_second": 57.95,
"eval_steps_per_second": 7.359,
"step": 2590
},
{
"epoch": 24.3,
"learning_rate": 0.0001028785046728972,
"loss": 0.0012,
"step": 2600
},
{
"epoch": 24.3,
"eval_accuracy": 0.7566137566137566,
"eval_loss": 1.4899401664733887,
"eval_runtime": 3.1875,
"eval_samples_per_second": 59.295,
"eval_steps_per_second": 7.53,
"step": 2600
},
{
"epoch": 24.39,
"learning_rate": 0.00010250467289719628,
"loss": 0.0013,
"step": 2610
},
{
"epoch": 24.39,
"eval_accuracy": 0.7566137566137566,
"eval_loss": 1.4907020330429077,
"eval_runtime": 3.2459,
"eval_samples_per_second": 58.227,
"eval_steps_per_second": 7.394,
"step": 2610
},
{
"epoch": 24.49,
"learning_rate": 0.00010213084112149533,
"loss": 0.0013,
"step": 2620
},
{
"epoch": 24.49,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.4912313222885132,
"eval_runtime": 3.1875,
"eval_samples_per_second": 59.294,
"eval_steps_per_second": 7.529,
"step": 2620
},
{
"epoch": 24.58,
"learning_rate": 0.0001017570093457944,
"loss": 0.0012,
"step": 2630
},
{
"epoch": 24.58,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.4922280311584473,
"eval_runtime": 3.428,
"eval_samples_per_second": 55.134,
"eval_steps_per_second": 7.001,
"step": 2630
},
{
"epoch": 24.67,
"learning_rate": 0.00010138317757009345,
"loss": 0.0131,
"step": 2640
},
{
"epoch": 24.67,
"eval_accuracy": 0.7671957671957672,
"eval_loss": 1.501592755317688,
"eval_runtime": 3.3214,
"eval_samples_per_second": 56.904,
"eval_steps_per_second": 7.226,
"step": 2640
},
{
"epoch": 24.77,
"learning_rate": 0.00010100934579439252,
"loss": 0.0266,
"step": 2650
},
{
"epoch": 24.77,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.517897129058838,
"eval_runtime": 3.3613,
"eval_samples_per_second": 56.229,
"eval_steps_per_second": 7.14,
"step": 2650
},
{
"epoch": 24.86,
"learning_rate": 0.0001006355140186916,
"loss": 0.0072,
"step": 2660
},
{
"epoch": 24.86,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.5467013120651245,
"eval_runtime": 3.2995,
"eval_samples_per_second": 57.281,
"eval_steps_per_second": 7.274,
"step": 2660
},
{
"epoch": 24.95,
"learning_rate": 0.00010026168224299067,
"loss": 0.0084,
"step": 2670
},
{
"epoch": 24.95,
"eval_accuracy": 0.7566137566137566,
"eval_loss": 1.5633758306503296,
"eval_runtime": 3.3204,
"eval_samples_per_second": 56.921,
"eval_steps_per_second": 7.228,
"step": 2670
},
{
"epoch": 25.05,
"learning_rate": 9.988785046728972e-05,
"loss": 0.0141,
"step": 2680
},
{
"epoch": 25.05,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.549333095550537,
"eval_runtime": 3.3387,
"eval_samples_per_second": 56.609,
"eval_steps_per_second": 7.188,
"step": 2680
},
{
"epoch": 25.14,
"learning_rate": 9.95140186915888e-05,
"loss": 0.006,
"step": 2690
},
{
"epoch": 25.14,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.5422124862670898,
"eval_runtime": 3.2678,
"eval_samples_per_second": 57.837,
"eval_steps_per_second": 7.344,
"step": 2690
},
{
"epoch": 25.23,
"learning_rate": 9.914018691588785e-05,
"loss": 0.0049,
"step": 2700
},
{
"epoch": 25.23,
"eval_accuracy": 0.7671957671957672,
"eval_loss": 1.5276082754135132,
"eval_runtime": 3.4271,
"eval_samples_per_second": 55.149,
"eval_steps_per_second": 7.003,
"step": 2700
},
{
"epoch": 25.33,
"learning_rate": 9.876635514018692e-05,
"loss": 0.0012,
"step": 2710
},
{
"epoch": 25.33,
"eval_accuracy": 0.7671957671957672,
"eval_loss": 1.5173016786575317,
"eval_runtime": 3.3173,
"eval_samples_per_second": 56.973,
"eval_steps_per_second": 7.235,
"step": 2710
},
{
"epoch": 25.42,
"learning_rate": 9.839252336448599e-05,
"loss": 0.0012,
"step": 2720
},
{
"epoch": 25.42,
"eval_accuracy": 0.7671957671957672,
"eval_loss": 1.5145915746688843,
"eval_runtime": 3.3338,
"eval_samples_per_second": 56.692,
"eval_steps_per_second": 7.199,
"step": 2720
},
{
"epoch": 25.51,
"learning_rate": 9.801869158878506e-05,
"loss": 0.0104,
"step": 2730
},
{
"epoch": 25.51,
"eval_accuracy": 0.7671957671957672,
"eval_loss": 1.5092376470565796,
"eval_runtime": 3.3686,
"eval_samples_per_second": 56.107,
"eval_steps_per_second": 7.125,
"step": 2730
},
{
"epoch": 25.61,
"learning_rate": 9.764485981308412e-05,
"loss": 0.0011,
"step": 2740
},
{
"epoch": 25.61,
"eval_accuracy": 0.7671957671957672,
"eval_loss": 1.5066778659820557,
"eval_runtime": 3.2919,
"eval_samples_per_second": 57.415,
"eval_steps_per_second": 7.291,
"step": 2740
},
{
"epoch": 25.7,
"learning_rate": 9.727102803738318e-05,
"loss": 0.01,
"step": 2750
},
{
"epoch": 25.7,
"eval_accuracy": 0.7671957671957672,
"eval_loss": 1.5135998725891113,
"eval_runtime": 3.3065,
"eval_samples_per_second": 57.16,
"eval_steps_per_second": 7.258,
"step": 2750
},
{
"epoch": 25.79,
"learning_rate": 9.689719626168224e-05,
"loss": 0.0012,
"step": 2760
},
{
"epoch": 25.79,
"eval_accuracy": 0.7671957671957672,
"eval_loss": 1.5182033777236938,
"eval_runtime": 3.1889,
"eval_samples_per_second": 59.268,
"eval_steps_per_second": 7.526,
"step": 2760
},
{
"epoch": 25.89,
"learning_rate": 9.652336448598131e-05,
"loss": 0.0011,
"step": 2770
},
{
"epoch": 25.89,
"eval_accuracy": 0.7671957671957672,
"eval_loss": 1.5205999612808228,
"eval_runtime": 3.2413,
"eval_samples_per_second": 58.311,
"eval_steps_per_second": 7.405,
"step": 2770
},
{
"epoch": 25.98,
"learning_rate": 9.614953271028038e-05,
"loss": 0.0199,
"step": 2780
},
{
"epoch": 25.98,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.5461922883987427,
"eval_runtime": 3.1874,
"eval_samples_per_second": 59.295,
"eval_steps_per_second": 7.53,
"step": 2780
},
{
"epoch": 26.07,
"learning_rate": 9.577570093457945e-05,
"loss": 0.0105,
"step": 2790
},
{
"epoch": 26.07,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.560713291168213,
"eval_runtime": 3.3195,
"eval_samples_per_second": 56.936,
"eval_steps_per_second": 7.23,
"step": 2790
},
{
"epoch": 26.17,
"learning_rate": 9.540186915887851e-05,
"loss": 0.0011,
"step": 2800
},
{
"epoch": 26.17,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.5657832622528076,
"eval_runtime": 3.108,
"eval_samples_per_second": 60.811,
"eval_steps_per_second": 7.722,
"step": 2800
},
{
"epoch": 26.26,
"learning_rate": 9.502803738317757e-05,
"loss": 0.004,
"step": 2810
},
{
"epoch": 26.26,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.5667065382003784,
"eval_runtime": 3.1078,
"eval_samples_per_second": 60.815,
"eval_steps_per_second": 7.723,
"step": 2810
},
{
"epoch": 26.36,
"learning_rate": 9.465420560747665e-05,
"loss": 0.0011,
"step": 2820
},
{
"epoch": 26.36,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.565108060836792,
"eval_runtime": 3.1483,
"eval_samples_per_second": 60.032,
"eval_steps_per_second": 7.623,
"step": 2820
},
{
"epoch": 26.45,
"learning_rate": 9.42803738317757e-05,
"loss": 0.0129,
"step": 2830
},
{
"epoch": 26.45,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.5749871730804443,
"eval_runtime": 3.086,
"eval_samples_per_second": 61.245,
"eval_steps_per_second": 7.777,
"step": 2830
},
{
"epoch": 26.54,
"learning_rate": 9.390654205607478e-05,
"loss": 0.0102,
"step": 2840
},
{
"epoch": 26.54,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.574375867843628,
"eval_runtime": 3.141,
"eval_samples_per_second": 60.171,
"eval_steps_per_second": 7.641,
"step": 2840
},
{
"epoch": 26.64,
"learning_rate": 9.353271028037384e-05,
"loss": 0.0041,
"step": 2850
},
{
"epoch": 26.64,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.5728485584259033,
"eval_runtime": 3.0982,
"eval_samples_per_second": 61.002,
"eval_steps_per_second": 7.746,
"step": 2850
},
{
"epoch": 26.73,
"learning_rate": 9.31588785046729e-05,
"loss": 0.0082,
"step": 2860
},
{
"epoch": 26.73,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.5666193962097168,
"eval_runtime": 3.0518,
"eval_samples_per_second": 61.93,
"eval_steps_per_second": 7.864,
"step": 2860
},
{
"epoch": 26.82,
"learning_rate": 9.278504672897197e-05,
"loss": 0.0069,
"step": 2870
},
{
"epoch": 26.82,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.5484100580215454,
"eval_runtime": 3.1504,
"eval_samples_per_second": 59.992,
"eval_steps_per_second": 7.618,
"step": 2870
},
{
"epoch": 26.92,
"learning_rate": 9.241121495327104e-05,
"loss": 0.0058,
"step": 2880
},
{
"epoch": 26.92,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.5442478656768799,
"eval_runtime": 3.2682,
"eval_samples_per_second": 57.831,
"eval_steps_per_second": 7.344,
"step": 2880
},
{
"epoch": 27.01,
"learning_rate": 9.20373831775701e-05,
"loss": 0.0011,
"step": 2890
},
{
"epoch": 27.01,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.5461711883544922,
"eval_runtime": 3.0931,
"eval_samples_per_second": 61.105,
"eval_steps_per_second": 7.759,
"step": 2890
},
{
"epoch": 27.1,
"learning_rate": 9.166355140186916e-05,
"loss": 0.0058,
"step": 2900
},
{
"epoch": 27.1,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.5431169271469116,
"eval_runtime": 3.0975,
"eval_samples_per_second": 61.016,
"eval_steps_per_second": 7.748,
"step": 2900
},
{
"epoch": 27.2,
"learning_rate": 9.128971962616823e-05,
"loss": 0.0054,
"step": 2910
},
{
"epoch": 27.2,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.5305052995681763,
"eval_runtime": 3.1126,
"eval_samples_per_second": 60.722,
"eval_steps_per_second": 7.711,
"step": 2910
},
{
"epoch": 27.29,
"learning_rate": 9.091588785046729e-05,
"loss": 0.0064,
"step": 2920
},
{
"epoch": 27.29,
"eval_accuracy": 0.7671957671957672,
"eval_loss": 1.5352741479873657,
"eval_runtime": 3.1506,
"eval_samples_per_second": 59.988,
"eval_steps_per_second": 7.618,
"step": 2920
},
{
"epoch": 27.38,
"learning_rate": 9.054205607476636e-05,
"loss": 0.0116,
"step": 2930
},
{
"epoch": 27.38,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.546917200088501,
"eval_runtime": 3.3601,
"eval_samples_per_second": 56.248,
"eval_steps_per_second": 7.143,
"step": 2930
},
{
"epoch": 27.48,
"learning_rate": 9.016822429906543e-05,
"loss": 0.001,
"step": 2940
},
{
"epoch": 27.48,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.5545085668563843,
"eval_runtime": 3.1606,
"eval_samples_per_second": 59.798,
"eval_steps_per_second": 7.593,
"step": 2940
},
{
"epoch": 27.57,
"learning_rate": 8.97943925233645e-05,
"loss": 0.0058,
"step": 2950
},
{
"epoch": 27.57,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.554548978805542,
"eval_runtime": 3.2008,
"eval_samples_per_second": 59.047,
"eval_steps_per_second": 7.498,
"step": 2950
},
{
"epoch": 27.66,
"learning_rate": 8.942056074766355e-05,
"loss": 0.0056,
"step": 2960
},
{
"epoch": 27.66,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.546671748161316,
"eval_runtime": 3.4523,
"eval_samples_per_second": 54.747,
"eval_steps_per_second": 6.952,
"step": 2960
},
{
"epoch": 27.76,
"learning_rate": 8.904672897196263e-05,
"loss": 0.001,
"step": 2970
},
{
"epoch": 27.76,
"eval_accuracy": 0.7671957671957672,
"eval_loss": 1.5371477603912354,
"eval_runtime": 3.2841,
"eval_samples_per_second": 57.551,
"eval_steps_per_second": 7.308,
"step": 2970
},
{
"epoch": 27.85,
"learning_rate": 8.867289719626168e-05,
"loss": 0.0153,
"step": 2980
},
{
"epoch": 27.85,
"eval_accuracy": 0.7671957671957672,
"eval_loss": 1.5416946411132812,
"eval_runtime": 3.3871,
"eval_samples_per_second": 55.799,
"eval_steps_per_second": 7.086,
"step": 2980
},
{
"epoch": 27.94,
"learning_rate": 8.829906542056075e-05,
"loss": 0.0063,
"step": 2990
},
{
"epoch": 27.94,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.556403636932373,
"eval_runtime": 3.2332,
"eval_samples_per_second": 58.456,
"eval_steps_per_second": 7.423,
"step": 2990
},
{
"epoch": 28.04,
"learning_rate": 8.792523364485982e-05,
"loss": 0.001,
"step": 3000
},
{
"epoch": 28.04,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.5654370784759521,
"eval_runtime": 3.2405,
"eval_samples_per_second": 58.325,
"eval_steps_per_second": 7.406,
"step": 3000
},
{
"epoch": 28.13,
"learning_rate": 8.755140186915888e-05,
"loss": 0.001,
"step": 3010
},
{
"epoch": 28.13,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.5694721937179565,
"eval_runtime": 3.2113,
"eval_samples_per_second": 58.854,
"eval_steps_per_second": 7.474,
"step": 3010
},
{
"epoch": 28.22,
"learning_rate": 8.717757009345795e-05,
"loss": 0.001,
"step": 3020
},
{
"epoch": 28.22,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.5711843967437744,
"eval_runtime": 3.1919,
"eval_samples_per_second": 59.213,
"eval_steps_per_second": 7.519,
"step": 3020
},
{
"epoch": 28.32,
"learning_rate": 8.6803738317757e-05,
"loss": 0.0043,
"step": 3030
},
{
"epoch": 28.32,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.573392391204834,
"eval_runtime": 3.0318,
"eval_samples_per_second": 62.338,
"eval_steps_per_second": 7.916,
"step": 3030
},
{
"epoch": 28.41,
"learning_rate": 8.642990654205609e-05,
"loss": 0.0043,
"step": 3040
},
{
"epoch": 28.41,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.5829499959945679,
"eval_runtime": 3.2944,
"eval_samples_per_second": 57.37,
"eval_steps_per_second": 7.285,
"step": 3040
},
{
"epoch": 28.5,
"learning_rate": 8.605607476635514e-05,
"loss": 0.0009,
"step": 3050
},
{
"epoch": 28.5,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.5897167921066284,
"eval_runtime": 3.1612,
"eval_samples_per_second": 59.787,
"eval_steps_per_second": 7.592,
"step": 3050
},
{
"epoch": 28.6,
"learning_rate": 8.56822429906542e-05,
"loss": 0.0009,
"step": 3060
},
{
"epoch": 28.6,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.5928257703781128,
"eval_runtime": 3.2548,
"eval_samples_per_second": 58.068,
"eval_steps_per_second": 7.374,
"step": 3060
},
{
"epoch": 28.69,
"learning_rate": 8.530841121495327e-05,
"loss": 0.0136,
"step": 3070
},
{
"epoch": 28.69,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.5987612009048462,
"eval_runtime": 3.0819,
"eval_samples_per_second": 61.325,
"eval_steps_per_second": 7.787,
"step": 3070
},
{
"epoch": 28.79,
"learning_rate": 8.493457943925234e-05,
"loss": 0.0106,
"step": 3080
},
{
"epoch": 28.79,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.5925683975219727,
"eval_runtime": 3.4371,
"eval_samples_per_second": 54.988,
"eval_steps_per_second": 6.983,
"step": 3080
},
{
"epoch": 28.88,
"learning_rate": 8.456074766355141e-05,
"loss": 0.0169,
"step": 3090
},
{
"epoch": 28.88,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.5758073329925537,
"eval_runtime": 3.1208,
"eval_samples_per_second": 60.562,
"eval_steps_per_second": 7.69,
"step": 3090
},
{
"epoch": 28.97,
"learning_rate": 8.418691588785048e-05,
"loss": 0.0058,
"step": 3100
},
{
"epoch": 28.97,
"eval_accuracy": 0.7671957671957672,
"eval_loss": 1.5587332248687744,
"eval_runtime": 3.2941,
"eval_samples_per_second": 57.375,
"eval_steps_per_second": 7.286,
"step": 3100
},
{
"epoch": 29.07,
"learning_rate": 8.381308411214953e-05,
"loss": 0.0008,
"step": 3110
},
{
"epoch": 29.07,
"eval_accuracy": 0.7671957671957672,
"eval_loss": 1.5488831996917725,
"eval_runtime": 3.104,
"eval_samples_per_second": 60.89,
"eval_steps_per_second": 7.732,
"step": 3110
},
{
"epoch": 29.16,
"learning_rate": 8.343925233644861e-05,
"loss": 0.0106,
"step": 3120
},
{
"epoch": 29.16,
"eval_accuracy": 0.7671957671957672,
"eval_loss": 1.5524382591247559,
"eval_runtime": 3.1857,
"eval_samples_per_second": 59.327,
"eval_steps_per_second": 7.534,
"step": 3120
},
{
"epoch": 29.25,
"learning_rate": 8.306542056074766e-05,
"loss": 0.0066,
"step": 3130
},
{
"epoch": 29.25,
"eval_accuracy": 0.7671957671957672,
"eval_loss": 1.5529749393463135,
"eval_runtime": 3.2947,
"eval_samples_per_second": 57.365,
"eval_steps_per_second": 7.284,
"step": 3130
},
{
"epoch": 29.35,
"learning_rate": 8.269158878504673e-05,
"loss": 0.0047,
"step": 3140
},
{
"epoch": 29.35,
"eval_accuracy": 0.7724867724867724,
"eval_loss": 1.561766266822815,
"eval_runtime": 3.0618,
"eval_samples_per_second": 61.729,
"eval_steps_per_second": 7.839,
"step": 3140
},
{
"epoch": 29.44,
"learning_rate": 8.23177570093458e-05,
"loss": 0.0009,
"step": 3150
},
{
"epoch": 29.44,
"eval_accuracy": 0.7671957671957672,
"eval_loss": 1.562992811203003,
"eval_runtime": 4.4425,
"eval_samples_per_second": 42.543,
"eval_steps_per_second": 5.402,
"step": 3150
},
{
"epoch": 29.53,
"learning_rate": 8.194392523364487e-05,
"loss": 0.01,
"step": 3160
},
{
"epoch": 29.53,
"eval_accuracy": 0.7671957671957672,
"eval_loss": 1.5655356645584106,
"eval_runtime": 3.1679,
"eval_samples_per_second": 59.661,
"eval_steps_per_second": 7.576,
"step": 3160
},
{
"epoch": 29.63,
"learning_rate": 8.157009345794393e-05,
"loss": 0.0131,
"step": 3170
},
{
"epoch": 29.63,
"eval_accuracy": 0.7724867724867724,
"eval_loss": 1.568717360496521,
"eval_runtime": 4.272,
"eval_samples_per_second": 44.242,
"eval_steps_per_second": 5.618,
"step": 3170
},
{
"epoch": 29.72,
"learning_rate": 8.119626168224299e-05,
"loss": 0.0065,
"step": 3180
},
{
"epoch": 29.72,
"eval_accuracy": 0.7724867724867724,
"eval_loss": 1.5695167779922485,
"eval_runtime": 3.2729,
"eval_samples_per_second": 57.748,
"eval_steps_per_second": 7.333,
"step": 3180
},
{
"epoch": 29.81,
"learning_rate": 8.082242990654207e-05,
"loss": 0.0049,
"step": 3190
},
{
"epoch": 29.81,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.5673458576202393,
"eval_runtime": 3.2283,
"eval_samples_per_second": 58.545,
"eval_steps_per_second": 7.434,
"step": 3190
},
{
"epoch": 29.91,
"learning_rate": 8.044859813084112e-05,
"loss": 0.0008,
"step": 3200
},
{
"epoch": 29.91,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.5677809715270996,
"eval_runtime": 3.2238,
"eval_samples_per_second": 58.627,
"eval_steps_per_second": 7.445,
"step": 3200
},
{
"epoch": 30.0,
"learning_rate": 8.00747663551402e-05,
"loss": 0.0009,
"step": 3210
},
{
"epoch": 30.0,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.569243311882019,
"eval_runtime": 3.2344,
"eval_samples_per_second": 58.434,
"eval_steps_per_second": 7.42,
"step": 3210
},
{
"epoch": 30.09,
"learning_rate": 7.970093457943925e-05,
"loss": 0.0008,
"step": 3220
},
{
"epoch": 30.09,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.569542407989502,
"eval_runtime": 3.1865,
"eval_samples_per_second": 59.312,
"eval_steps_per_second": 7.532,
"step": 3220
},
{
"epoch": 30.19,
"learning_rate": 7.932710280373832e-05,
"loss": 0.0008,
"step": 3230
},
{
"epoch": 30.19,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.5703235864639282,
"eval_runtime": 3.1705,
"eval_samples_per_second": 59.612,
"eval_steps_per_second": 7.57,
"step": 3230
},
{
"epoch": 30.28,
"learning_rate": 7.895327102803739e-05,
"loss": 0.0047,
"step": 3240
},
{
"epoch": 30.28,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.5737032890319824,
"eval_runtime": 3.2402,
"eval_samples_per_second": 58.33,
"eval_steps_per_second": 7.407,
"step": 3240
},
{
"epoch": 30.37,
"learning_rate": 7.857943925233646e-05,
"loss": 0.0008,
"step": 3250
},
{
"epoch": 30.37,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.5757778882980347,
"eval_runtime": 3.1947,
"eval_samples_per_second": 59.16,
"eval_steps_per_second": 7.512,
"step": 3250
},
{
"epoch": 30.47,
"learning_rate": 7.820560747663552e-05,
"loss": 0.0059,
"step": 3260
},
{
"epoch": 30.47,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.5871896743774414,
"eval_runtime": 3.2182,
"eval_samples_per_second": 58.729,
"eval_steps_per_second": 7.458,
"step": 3260
},
{
"epoch": 30.56,
"learning_rate": 7.783177570093458e-05,
"loss": 0.0091,
"step": 3270
},
{
"epoch": 30.56,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.608903169631958,
"eval_runtime": 3.1183,
"eval_samples_per_second": 60.61,
"eval_steps_per_second": 7.696,
"step": 3270
},
{
"epoch": 30.65,
"learning_rate": 7.745794392523364e-05,
"loss": 0.0033,
"step": 3280
},
{
"epoch": 30.65,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.623267650604248,
"eval_runtime": 3.1768,
"eval_samples_per_second": 59.494,
"eval_steps_per_second": 7.555,
"step": 3280
},
{
"epoch": 30.75,
"learning_rate": 7.708411214953271e-05,
"loss": 0.0165,
"step": 3290
},
{
"epoch": 30.75,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.611799955368042,
"eval_runtime": 3.266,
"eval_samples_per_second": 57.869,
"eval_steps_per_second": 7.348,
"step": 3290
},
{
"epoch": 30.84,
"learning_rate": 7.671028037383178e-05,
"loss": 0.015,
"step": 3300
},
{
"epoch": 30.84,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.5901665687561035,
"eval_runtime": 3.1817,
"eval_samples_per_second": 59.402,
"eval_steps_per_second": 7.543,
"step": 3300
},
{
"epoch": 30.93,
"learning_rate": 7.633644859813085e-05,
"loss": 0.0049,
"step": 3310
},
{
"epoch": 30.93,
"eval_accuracy": 0.7671957671957672,
"eval_loss": 1.5764192342758179,
"eval_runtime": 3.4445,
"eval_samples_per_second": 54.87,
"eval_steps_per_second": 6.968,
"step": 3310
},
{
"epoch": 31.03,
"learning_rate": 7.596261682242991e-05,
"loss": 0.0008,
"step": 3320
},
{
"epoch": 31.03,
"eval_accuracy": 0.7671957671957672,
"eval_loss": 1.5679030418395996,
"eval_runtime": 3.18,
"eval_samples_per_second": 59.433,
"eval_steps_per_second": 7.547,
"step": 3320
},
{
"epoch": 31.12,
"learning_rate": 7.558878504672897e-05,
"loss": 0.0071,
"step": 3330
},
{
"epoch": 31.12,
"eval_accuracy": 0.7671957671957672,
"eval_loss": 1.5708253383636475,
"eval_runtime": 3.2342,
"eval_samples_per_second": 58.438,
"eval_steps_per_second": 7.421,
"step": 3330
},
{
"epoch": 31.21,
"learning_rate": 7.521495327102805e-05,
"loss": 0.0068,
"step": 3340
},
{
"epoch": 31.21,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.5782489776611328,
"eval_runtime": 3.0933,
"eval_samples_per_second": 61.099,
"eval_steps_per_second": 7.759,
"step": 3340
},
{
"epoch": 31.31,
"learning_rate": 7.48411214953271e-05,
"loss": 0.0058,
"step": 3350
},
{
"epoch": 31.31,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.5822737216949463,
"eval_runtime": 3.2696,
"eval_samples_per_second": 57.806,
"eval_steps_per_second": 7.34,
"step": 3350
},
{
"epoch": 31.4,
"learning_rate": 7.446728971962618e-05,
"loss": 0.0008,
"step": 3360
},
{
"epoch": 31.4,
"eval_accuracy": 0.7671957671957672,
"eval_loss": 1.5795854330062866,
"eval_runtime": 3.2032,
"eval_samples_per_second": 59.004,
"eval_steps_per_second": 7.493,
"step": 3360
},
{
"epoch": 31.5,
"learning_rate": 7.409345794392524e-05,
"loss": 0.0008,
"step": 3370
},
{
"epoch": 31.5,
"eval_accuracy": 0.7671957671957672,
"eval_loss": 1.5795681476593018,
"eval_runtime": 3.2182,
"eval_samples_per_second": 58.729,
"eval_steps_per_second": 7.458,
"step": 3370
},
{
"epoch": 31.59,
"learning_rate": 7.37196261682243e-05,
"loss": 0.0084,
"step": 3380
},
{
"epoch": 31.59,
"eval_accuracy": 0.7671957671957672,
"eval_loss": 1.5760174989700317,
"eval_runtime": 3.1968,
"eval_samples_per_second": 59.122,
"eval_steps_per_second": 7.508,
"step": 3380
},
{
"epoch": 31.68,
"learning_rate": 7.334579439252337e-05,
"loss": 0.0074,
"step": 3390
},
{
"epoch": 31.68,
"eval_accuracy": 0.7671957671957672,
"eval_loss": 1.5738554000854492,
"eval_runtime": 3.2685,
"eval_samples_per_second": 57.825,
"eval_steps_per_second": 7.343,
"step": 3390
},
{
"epoch": 31.78,
"learning_rate": 7.297196261682244e-05,
"loss": 0.0039,
"step": 3400
},
{
"epoch": 31.78,
"eval_accuracy": 0.7671957671957672,
"eval_loss": 1.5700509548187256,
"eval_runtime": 3.1115,
"eval_samples_per_second": 60.742,
"eval_steps_per_second": 7.713,
"step": 3400
},
{
"epoch": 31.87,
"learning_rate": 7.25981308411215e-05,
"loss": 0.0144,
"step": 3410
},
{
"epoch": 31.87,
"eval_accuracy": 0.7671957671957672,
"eval_loss": 1.5835039615631104,
"eval_runtime": 3.3036,
"eval_samples_per_second": 57.211,
"eval_steps_per_second": 7.265,
"step": 3410
},
{
"epoch": 31.96,
"learning_rate": 7.222429906542056e-05,
"loss": 0.0051,
"step": 3420
},
{
"epoch": 31.96,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.5920816659927368,
"eval_runtime": 3.4411,
"eval_samples_per_second": 54.924,
"eval_steps_per_second": 6.975,
"step": 3420
},
{
"epoch": 32.06,
"learning_rate": 7.185046728971963e-05,
"loss": 0.0054,
"step": 3430
},
{
"epoch": 32.06,
"eval_accuracy": 0.7671957671957672,
"eval_loss": 1.5909570455551147,
"eval_runtime": 3.0254,
"eval_samples_per_second": 62.471,
"eval_steps_per_second": 7.933,
"step": 3430
},
{
"epoch": 32.15,
"learning_rate": 7.14766355140187e-05,
"loss": 0.0047,
"step": 3440
},
{
"epoch": 32.15,
"eval_accuracy": 0.7671957671957672,
"eval_loss": 1.5870330333709717,
"eval_runtime": 3.3063,
"eval_samples_per_second": 57.163,
"eval_steps_per_second": 7.259,
"step": 3440
},
{
"epoch": 32.24,
"learning_rate": 7.110280373831776e-05,
"loss": 0.0125,
"step": 3450
},
{
"epoch": 32.24,
"eval_accuracy": 0.7671957671957672,
"eval_loss": 1.5907707214355469,
"eval_runtime": 3.0785,
"eval_samples_per_second": 61.394,
"eval_steps_per_second": 7.796,
"step": 3450
},
{
"epoch": 32.34,
"learning_rate": 7.072897196261683e-05,
"loss": 0.0057,
"step": 3460
},
{
"epoch": 32.34,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.5986977815628052,
"eval_runtime": 3.0752,
"eval_samples_per_second": 61.459,
"eval_steps_per_second": 7.804,
"step": 3460
},
{
"epoch": 32.43,
"learning_rate": 7.03551401869159e-05,
"loss": 0.0007,
"step": 3470
},
{
"epoch": 32.43,
"eval_accuracy": 0.7566137566137566,
"eval_loss": 1.6025718450546265,
"eval_runtime": 3.1619,
"eval_samples_per_second": 59.775,
"eval_steps_per_second": 7.59,
"step": 3470
},
{
"epoch": 32.52,
"learning_rate": 6.998130841121495e-05,
"loss": 0.006,
"step": 3480
},
{
"epoch": 32.52,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.5968295335769653,
"eval_runtime": 3.0263,
"eval_samples_per_second": 62.452,
"eval_steps_per_second": 7.93,
"step": 3480
},
{
"epoch": 32.62,
"learning_rate": 6.960747663551403e-05,
"loss": 0.0007,
"step": 3490
},
{
"epoch": 32.62,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.594142198562622,
"eval_runtime": 3.2795,
"eval_samples_per_second": 57.631,
"eval_steps_per_second": 7.318,
"step": 3490
},
{
"epoch": 32.71,
"learning_rate": 6.923364485981308e-05,
"loss": 0.0007,
"step": 3500
},
{
"epoch": 32.71,
"eval_accuracy": 0.7671957671957672,
"eval_loss": 1.59373140335083,
"eval_runtime": 3.0541,
"eval_samples_per_second": 61.884,
"eval_steps_per_second": 7.858,
"step": 3500
},
{
"epoch": 32.8,
"learning_rate": 6.885981308411215e-05,
"loss": 0.0097,
"step": 3510
},
{
"epoch": 32.8,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.600218415260315,
"eval_runtime": 3.105,
"eval_samples_per_second": 60.87,
"eval_steps_per_second": 7.73,
"step": 3510
},
{
"epoch": 32.9,
"learning_rate": 6.848598130841122e-05,
"loss": 0.0105,
"step": 3520
},
{
"epoch": 32.9,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6051002740859985,
"eval_runtime": 3.4012,
"eval_samples_per_second": 55.569,
"eval_steps_per_second": 7.056,
"step": 3520
},
{
"epoch": 32.99,
"learning_rate": 6.811214953271028e-05,
"loss": 0.0007,
"step": 3530
},
{
"epoch": 32.99,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6050375699996948,
"eval_runtime": 3.0547,
"eval_samples_per_second": 61.873,
"eval_steps_per_second": 7.857,
"step": 3530
},
{
"epoch": 33.08,
"learning_rate": 6.773831775700935e-05,
"loss": 0.0006,
"step": 3540
},
{
"epoch": 33.08,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6053138971328735,
"eval_runtime": 3.1748,
"eval_samples_per_second": 59.531,
"eval_steps_per_second": 7.559,
"step": 3540
},
{
"epoch": 33.18,
"learning_rate": 6.73644859813084e-05,
"loss": 0.0008,
"step": 3550
},
{
"epoch": 33.18,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6058577299118042,
"eval_runtime": 3.1448,
"eval_samples_per_second": 60.099,
"eval_steps_per_second": 7.632,
"step": 3550
},
{
"epoch": 33.27,
"learning_rate": 6.699065420560749e-05,
"loss": 0.0061,
"step": 3560
},
{
"epoch": 33.27,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.605916976928711,
"eval_runtime": 3.094,
"eval_samples_per_second": 61.086,
"eval_steps_per_second": 7.757,
"step": 3560
},
{
"epoch": 33.36,
"learning_rate": 6.661682242990654e-05,
"loss": 0.0098,
"step": 3570
},
{
"epoch": 33.36,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6077380180358887,
"eval_runtime": 3.1529,
"eval_samples_per_second": 59.944,
"eval_steps_per_second": 7.612,
"step": 3570
},
{
"epoch": 33.46,
"learning_rate": 6.624299065420561e-05,
"loss": 0.005,
"step": 3580
},
{
"epoch": 33.46,
"eval_accuracy": 0.7671957671957672,
"eval_loss": 1.6049844026565552,
"eval_runtime": 3.2167,
"eval_samples_per_second": 58.755,
"eval_steps_per_second": 7.461,
"step": 3580
},
{
"epoch": 33.55,
"learning_rate": 6.586915887850467e-05,
"loss": 0.0007,
"step": 3590
},
{
"epoch": 33.55,
"eval_accuracy": 0.7671957671957672,
"eval_loss": 1.6010342836380005,
"eval_runtime": 3.1643,
"eval_samples_per_second": 59.729,
"eval_steps_per_second": 7.585,
"step": 3590
},
{
"epoch": 33.64,
"learning_rate": 6.549532710280374e-05,
"loss": 0.0065,
"step": 3600
},
{
"epoch": 33.64,
"eval_accuracy": 0.7671957671957672,
"eval_loss": 1.6033494472503662,
"eval_runtime": 3.3617,
"eval_samples_per_second": 56.222,
"eval_steps_per_second": 7.139,
"step": 3600
},
{
"epoch": 33.74,
"learning_rate": 6.512149532710281e-05,
"loss": 0.0047,
"step": 3610
},
{
"epoch": 33.74,
"eval_accuracy": 0.7671957671957672,
"eval_loss": 1.602766513824463,
"eval_runtime": 3.347,
"eval_samples_per_second": 56.469,
"eval_steps_per_second": 7.171,
"step": 3610
},
{
"epoch": 33.83,
"learning_rate": 6.474766355140188e-05,
"loss": 0.0174,
"step": 3620
},
{
"epoch": 33.83,
"eval_accuracy": 0.7566137566137566,
"eval_loss": 1.6135550737380981,
"eval_runtime": 3.3024,
"eval_samples_per_second": 57.231,
"eval_steps_per_second": 7.267,
"step": 3620
},
{
"epoch": 33.93,
"learning_rate": 6.437383177570093e-05,
"loss": 0.0057,
"step": 3630
},
{
"epoch": 33.93,
"eval_accuracy": 0.7566137566137566,
"eval_loss": 1.6196566820144653,
"eval_runtime": 3.1607,
"eval_samples_per_second": 59.797,
"eval_steps_per_second": 7.593,
"step": 3630
},
{
"epoch": 34.02,
"learning_rate": 6.400000000000001e-05,
"loss": 0.0007,
"step": 3640
},
{
"epoch": 34.02,
"eval_accuracy": 0.7566137566137566,
"eval_loss": 1.6192022562026978,
"eval_runtime": 3.2012,
"eval_samples_per_second": 59.04,
"eval_steps_per_second": 7.497,
"step": 3640
},
{
"epoch": 34.11,
"learning_rate": 6.362616822429906e-05,
"loss": 0.01,
"step": 3650
},
{
"epoch": 34.11,
"eval_accuracy": 0.7566137566137566,
"eval_loss": 1.616557240486145,
"eval_runtime": 3.0372,
"eval_samples_per_second": 62.228,
"eval_steps_per_second": 7.902,
"step": 3650
},
{
"epoch": 34.21,
"learning_rate": 6.325233644859813e-05,
"loss": 0.0144,
"step": 3660
},
{
"epoch": 34.21,
"eval_accuracy": 0.7566137566137566,
"eval_loss": 1.6212772130966187,
"eval_runtime": 3.1343,
"eval_samples_per_second": 60.3,
"eval_steps_per_second": 7.657,
"step": 3660
},
{
"epoch": 34.3,
"learning_rate": 6.28785046728972e-05,
"loss": 0.0007,
"step": 3670
},
{
"epoch": 34.3,
"eval_accuracy": 0.7566137566137566,
"eval_loss": 1.6278337240219116,
"eval_runtime": 3.1421,
"eval_samples_per_second": 60.15,
"eval_steps_per_second": 7.638,
"step": 3670
},
{
"epoch": 34.39,
"learning_rate": 6.250467289719625e-05,
"loss": 0.0095,
"step": 3680
},
{
"epoch": 34.39,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6321161985397339,
"eval_runtime": 3.249,
"eval_samples_per_second": 58.171,
"eval_steps_per_second": 7.387,
"step": 3680
},
{
"epoch": 34.49,
"learning_rate": 6.213084112149533e-05,
"loss": 0.0006,
"step": 3690
},
{
"epoch": 34.49,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6336792707443237,
"eval_runtime": 3.1451,
"eval_samples_per_second": 60.093,
"eval_steps_per_second": 7.631,
"step": 3690
},
{
"epoch": 34.58,
"learning_rate": 6.175700934579439e-05,
"loss": 0.0007,
"step": 3700
},
{
"epoch": 34.58,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6350607872009277,
"eval_runtime": 3.1678,
"eval_samples_per_second": 59.662,
"eval_steps_per_second": 7.576,
"step": 3700
},
{
"epoch": 34.67,
"learning_rate": 6.138317757009347e-05,
"loss": 0.0006,
"step": 3710
},
{
"epoch": 34.67,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6363701820373535,
"eval_runtime": 3.2215,
"eval_samples_per_second": 58.669,
"eval_steps_per_second": 7.45,
"step": 3710
},
{
"epoch": 34.77,
"learning_rate": 6.100934579439253e-05,
"loss": 0.0063,
"step": 3720
},
{
"epoch": 34.77,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6366699934005737,
"eval_runtime": 3.2135,
"eval_samples_per_second": 58.815,
"eval_steps_per_second": 7.469,
"step": 3720
},
{
"epoch": 34.86,
"learning_rate": 6.0635514018691595e-05,
"loss": 0.0062,
"step": 3730
},
{
"epoch": 34.86,
"eval_accuracy": 0.7671957671957672,
"eval_loss": 1.6348670721054077,
"eval_runtime": 3.0917,
"eval_samples_per_second": 61.132,
"eval_steps_per_second": 7.763,
"step": 3730
},
{
"epoch": 34.95,
"learning_rate": 6.0261682242990656e-05,
"loss": 0.0064,
"step": 3740
},
{
"epoch": 34.95,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6241209506988525,
"eval_runtime": 3.38,
"eval_samples_per_second": 55.917,
"eval_steps_per_second": 7.101,
"step": 3740
},
{
"epoch": 35.05,
"learning_rate": 5.9887850467289716e-05,
"loss": 0.0006,
"step": 3750
},
{
"epoch": 35.05,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6208295822143555,
"eval_runtime": 3.2066,
"eval_samples_per_second": 58.942,
"eval_steps_per_second": 7.485,
"step": 3750
},
{
"epoch": 35.14,
"learning_rate": 5.951401869158879e-05,
"loss": 0.0006,
"step": 3760
},
{
"epoch": 35.14,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6204679012298584,
"eval_runtime": 3.2685,
"eval_samples_per_second": 57.825,
"eval_steps_per_second": 7.343,
"step": 3760
},
{
"epoch": 35.23,
"learning_rate": 5.914018691588785e-05,
"loss": 0.0137,
"step": 3770
},
{
"epoch": 35.23,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6377967596054077,
"eval_runtime": 3.1739,
"eval_samples_per_second": 59.549,
"eval_steps_per_second": 7.562,
"step": 3770
},
{
"epoch": 35.33,
"learning_rate": 5.8766355140186925e-05,
"loss": 0.0007,
"step": 3780
},
{
"epoch": 35.33,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6510041952133179,
"eval_runtime": 3.0141,
"eval_samples_per_second": 62.706,
"eval_steps_per_second": 7.963,
"step": 3780
},
{
"epoch": 35.42,
"learning_rate": 5.8392523364485985e-05,
"loss": 0.0113,
"step": 3790
},
{
"epoch": 35.42,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6442958116531372,
"eval_runtime": 3.0277,
"eval_samples_per_second": 62.424,
"eval_steps_per_second": 7.927,
"step": 3790
},
{
"epoch": 35.51,
"learning_rate": 5.8018691588785046e-05,
"loss": 0.0112,
"step": 3800
},
{
"epoch": 35.51,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6433544158935547,
"eval_runtime": 3.1874,
"eval_samples_per_second": 59.295,
"eval_steps_per_second": 7.53,
"step": 3800
},
{
"epoch": 35.61,
"learning_rate": 5.764485981308412e-05,
"loss": 0.0006,
"step": 3810
},
{
"epoch": 35.61,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6435635089874268,
"eval_runtime": 3.281,
"eval_samples_per_second": 57.604,
"eval_steps_per_second": 7.315,
"step": 3810
},
{
"epoch": 35.7,
"learning_rate": 5.727102803738318e-05,
"loss": 0.0006,
"step": 3820
},
{
"epoch": 35.7,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6441317796707153,
"eval_runtime": 3.1827,
"eval_samples_per_second": 59.384,
"eval_steps_per_second": 7.541,
"step": 3820
},
{
"epoch": 35.79,
"learning_rate": 5.6897196261682254e-05,
"loss": 0.0145,
"step": 3830
},
{
"epoch": 35.79,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6258851289749146,
"eval_runtime": 3.2346,
"eval_samples_per_second": 58.431,
"eval_steps_per_second": 7.42,
"step": 3830
},
{
"epoch": 35.89,
"learning_rate": 5.6523364485981315e-05,
"loss": 0.0006,
"step": 3840
},
{
"epoch": 35.89,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6195772886276245,
"eval_runtime": 3.2109,
"eval_samples_per_second": 58.862,
"eval_steps_per_second": 7.475,
"step": 3840
},
{
"epoch": 35.98,
"learning_rate": 5.6149532710280375e-05,
"loss": 0.0047,
"step": 3850
},
{
"epoch": 35.98,
"eval_accuracy": 0.7671957671957672,
"eval_loss": 1.6145455837249756,
"eval_runtime": 3.1308,
"eval_samples_per_second": 60.367,
"eval_steps_per_second": 7.666,
"step": 3850
},
{
"epoch": 36.07,
"learning_rate": 5.577570093457944e-05,
"loss": 0.0096,
"step": 3860
},
{
"epoch": 36.07,
"eval_accuracy": 0.7671957671957672,
"eval_loss": 1.6121631860733032,
"eval_runtime": 3.1516,
"eval_samples_per_second": 59.97,
"eval_steps_per_second": 7.615,
"step": 3860
},
{
"epoch": 36.17,
"learning_rate": 5.54018691588785e-05,
"loss": 0.0006,
"step": 3870
},
{
"epoch": 36.17,
"eval_accuracy": 0.7671957671957672,
"eval_loss": 1.6133732795715332,
"eval_runtime": 3.2412,
"eval_samples_per_second": 58.312,
"eval_steps_per_second": 7.405,
"step": 3870
},
{
"epoch": 36.26,
"learning_rate": 5.502803738317758e-05,
"loss": 0.0063,
"step": 3880
},
{
"epoch": 36.26,
"eval_accuracy": 0.7671957671957672,
"eval_loss": 1.6175814867019653,
"eval_runtime": 3.1207,
"eval_samples_per_second": 60.563,
"eval_steps_per_second": 7.691,
"step": 3880
},
{
"epoch": 36.36,
"learning_rate": 5.465420560747664e-05,
"loss": 0.0049,
"step": 3890
},
{
"epoch": 36.36,
"eval_accuracy": 0.7671957671957672,
"eval_loss": 1.6226632595062256,
"eval_runtime": 3.2034,
"eval_samples_per_second": 59.0,
"eval_steps_per_second": 7.492,
"step": 3890
},
{
"epoch": 36.45,
"learning_rate": 5.42803738317757e-05,
"loss": 0.0006,
"step": 3900
},
{
"epoch": 36.45,
"eval_accuracy": 0.7671957671957672,
"eval_loss": 1.6221052408218384,
"eval_runtime": 3.2707,
"eval_samples_per_second": 57.786,
"eval_steps_per_second": 7.338,
"step": 3900
},
{
"epoch": 36.54,
"learning_rate": 5.390654205607477e-05,
"loss": 0.0057,
"step": 3910
},
{
"epoch": 36.54,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.626526951789856,
"eval_runtime": 3.2153,
"eval_samples_per_second": 58.781,
"eval_steps_per_second": 7.464,
"step": 3910
},
{
"epoch": 36.64,
"learning_rate": 5.353271028037383e-05,
"loss": 0.0006,
"step": 3920
},
{
"epoch": 36.64,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6315840482711792,
"eval_runtime": 3.2376,
"eval_samples_per_second": 58.377,
"eval_steps_per_second": 7.413,
"step": 3920
},
{
"epoch": 36.73,
"learning_rate": 5.3158878504672906e-05,
"loss": 0.0095,
"step": 3930
},
{
"epoch": 36.73,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6349341869354248,
"eval_runtime": 3.1421,
"eval_samples_per_second": 60.15,
"eval_steps_per_second": 7.638,
"step": 3930
},
{
"epoch": 36.82,
"learning_rate": 5.2785046728971966e-05,
"loss": 0.0062,
"step": 3940
},
{
"epoch": 36.82,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6396187543869019,
"eval_runtime": 3.1897,
"eval_samples_per_second": 59.254,
"eval_steps_per_second": 7.524,
"step": 3940
},
{
"epoch": 36.92,
"learning_rate": 5.241121495327103e-05,
"loss": 0.0062,
"step": 3950
},
{
"epoch": 36.92,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6347572803497314,
"eval_runtime": 3.3171,
"eval_samples_per_second": 56.978,
"eval_steps_per_second": 7.235,
"step": 3950
},
{
"epoch": 37.01,
"learning_rate": 5.20373831775701e-05,
"loss": 0.0052,
"step": 3960
},
{
"epoch": 37.01,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6283901929855347,
"eval_runtime": 3.2006,
"eval_samples_per_second": 59.051,
"eval_steps_per_second": 7.499,
"step": 3960
},
{
"epoch": 37.1,
"learning_rate": 5.166355140186916e-05,
"loss": 0.0054,
"step": 3970
},
{
"epoch": 37.1,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6296182870864868,
"eval_runtime": 3.2544,
"eval_samples_per_second": 58.075,
"eval_steps_per_second": 7.375,
"step": 3970
},
{
"epoch": 37.2,
"learning_rate": 5.128971962616823e-05,
"loss": 0.0142,
"step": 3980
},
{
"epoch": 37.2,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6342829465866089,
"eval_runtime": 3.2879,
"eval_samples_per_second": 57.483,
"eval_steps_per_second": 7.299,
"step": 3980
},
{
"epoch": 37.29,
"learning_rate": 5.091588785046729e-05,
"loss": 0.0006,
"step": 3990
},
{
"epoch": 37.29,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6389538049697876,
"eval_runtime": 3.1746,
"eval_samples_per_second": 59.535,
"eval_steps_per_second": 7.56,
"step": 3990
},
{
"epoch": 37.38,
"learning_rate": 5.0542056074766356e-05,
"loss": 0.0056,
"step": 4000
},
{
"epoch": 37.38,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.63650643825531,
"eval_runtime": 3.2374,
"eval_samples_per_second": 58.38,
"eval_steps_per_second": 7.413,
"step": 4000
},
{
"epoch": 37.48,
"learning_rate": 5.0168224299065423e-05,
"loss": 0.0006,
"step": 4010
},
{
"epoch": 37.48,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6352988481521606,
"eval_runtime": 3.2389,
"eval_samples_per_second": 58.353,
"eval_steps_per_second": 7.41,
"step": 4010
},
{
"epoch": 37.57,
"learning_rate": 4.9794392523364484e-05,
"loss": 0.0006,
"step": 4020
},
{
"epoch": 37.57,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6355526447296143,
"eval_runtime": 3.3214,
"eval_samples_per_second": 56.904,
"eval_steps_per_second": 7.226,
"step": 4020
},
{
"epoch": 37.66,
"learning_rate": 4.942056074766355e-05,
"loss": 0.0006,
"step": 4030
},
{
"epoch": 37.66,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6356136798858643,
"eval_runtime": 3.3612,
"eval_samples_per_second": 56.23,
"eval_steps_per_second": 7.14,
"step": 4030
},
{
"epoch": 37.76,
"learning_rate": 4.904672897196262e-05,
"loss": 0.0006,
"step": 4040
},
{
"epoch": 37.76,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6366937160491943,
"eval_runtime": 3.2144,
"eval_samples_per_second": 58.797,
"eval_steps_per_second": 7.466,
"step": 4040
},
{
"epoch": 37.85,
"learning_rate": 4.8672897196261686e-05,
"loss": 0.01,
"step": 4050
},
{
"epoch": 37.85,
"eval_accuracy": 0.7671957671957672,
"eval_loss": 1.631759762763977,
"eval_runtime": 3.29,
"eval_samples_per_second": 57.446,
"eval_steps_per_second": 7.295,
"step": 4050
},
{
"epoch": 37.94,
"learning_rate": 4.829906542056075e-05,
"loss": 0.0151,
"step": 4060
},
{
"epoch": 37.94,
"eval_accuracy": 0.7671957671957672,
"eval_loss": 1.6293249130249023,
"eval_runtime": 3.1536,
"eval_samples_per_second": 59.931,
"eval_steps_per_second": 7.61,
"step": 4060
},
{
"epoch": 38.04,
"learning_rate": 4.792523364485981e-05,
"loss": 0.006,
"step": 4070
},
{
"epoch": 38.04,
"eval_accuracy": 0.7671957671957672,
"eval_loss": 1.6346244812011719,
"eval_runtime": 3.2143,
"eval_samples_per_second": 58.801,
"eval_steps_per_second": 7.467,
"step": 4070
},
{
"epoch": 38.13,
"learning_rate": 4.755140186915888e-05,
"loss": 0.0006,
"step": 4080
},
{
"epoch": 38.13,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6401140689849854,
"eval_runtime": 3.257,
"eval_samples_per_second": 58.028,
"eval_steps_per_second": 7.369,
"step": 4080
},
{
"epoch": 38.22,
"learning_rate": 4.717757009345795e-05,
"loss": 0.0006,
"step": 4090
},
{
"epoch": 38.22,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.642866849899292,
"eval_runtime": 3.1879,
"eval_samples_per_second": 59.286,
"eval_steps_per_second": 7.528,
"step": 4090
},
{
"epoch": 38.32,
"learning_rate": 4.6803738317757015e-05,
"loss": 0.0093,
"step": 4100
},
{
"epoch": 38.32,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.635596513748169,
"eval_runtime": 3.2354,
"eval_samples_per_second": 58.417,
"eval_steps_per_second": 7.418,
"step": 4100
},
{
"epoch": 38.41,
"learning_rate": 4.6429906542056075e-05,
"loss": 0.0046,
"step": 4110
},
{
"epoch": 38.41,
"eval_accuracy": 0.7671957671957672,
"eval_loss": 1.6306812763214111,
"eval_runtime": 3.2449,
"eval_samples_per_second": 58.246,
"eval_steps_per_second": 7.396,
"step": 4110
},
{
"epoch": 38.5,
"learning_rate": 4.605607476635514e-05,
"loss": 0.0111,
"step": 4120
},
{
"epoch": 38.5,
"eval_accuracy": 0.7566137566137566,
"eval_loss": 1.6402584314346313,
"eval_runtime": 3.1872,
"eval_samples_per_second": 59.299,
"eval_steps_per_second": 7.53,
"step": 4120
},
{
"epoch": 38.6,
"learning_rate": 4.56822429906542e-05,
"loss": 0.0005,
"step": 4130
},
{
"epoch": 38.6,
"eval_accuracy": 0.7566137566137566,
"eval_loss": 1.6475365161895752,
"eval_runtime": 3.5346,
"eval_samples_per_second": 53.472,
"eval_steps_per_second": 6.79,
"step": 4130
},
{
"epoch": 38.69,
"learning_rate": 4.530841121495327e-05,
"loss": 0.0006,
"step": 4140
},
{
"epoch": 38.69,
"eval_accuracy": 0.7566137566137566,
"eval_loss": 1.6509393453598022,
"eval_runtime": 3.062,
"eval_samples_per_second": 61.724,
"eval_steps_per_second": 7.838,
"step": 4140
},
{
"epoch": 38.79,
"learning_rate": 4.493457943925234e-05,
"loss": 0.0006,
"step": 4150
},
{
"epoch": 38.79,
"eval_accuracy": 0.7566137566137566,
"eval_loss": 1.652215600013733,
"eval_runtime": 3.5079,
"eval_samples_per_second": 53.878,
"eval_steps_per_second": 6.842,
"step": 4150
},
{
"epoch": 38.88,
"learning_rate": 4.4560747663551405e-05,
"loss": 0.0075,
"step": 4160
},
{
"epoch": 38.88,
"eval_accuracy": 0.7566137566137566,
"eval_loss": 1.653160810470581,
"eval_runtime": 3.2762,
"eval_samples_per_second": 57.688,
"eval_steps_per_second": 7.325,
"step": 4160
},
{
"epoch": 38.97,
"learning_rate": 4.418691588785047e-05,
"loss": 0.0097,
"step": 4170
},
{
"epoch": 38.97,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6547893285751343,
"eval_runtime": 3.2144,
"eval_samples_per_second": 58.797,
"eval_steps_per_second": 7.466,
"step": 4170
},
{
"epoch": 39.07,
"learning_rate": 4.381308411214953e-05,
"loss": 0.0058,
"step": 4180
},
{
"epoch": 39.07,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6471433639526367,
"eval_runtime": 3.2814,
"eval_samples_per_second": 57.598,
"eval_steps_per_second": 7.314,
"step": 4180
},
{
"epoch": 39.16,
"learning_rate": 4.34392523364486e-05,
"loss": 0.0049,
"step": 4190
},
{
"epoch": 39.16,
"eval_accuracy": 0.7671957671957672,
"eval_loss": 1.6409401893615723,
"eval_runtime": 3.1609,
"eval_samples_per_second": 59.793,
"eval_steps_per_second": 7.593,
"step": 4190
},
{
"epoch": 39.25,
"learning_rate": 4.306542056074767e-05,
"loss": 0.0111,
"step": 4200
},
{
"epoch": 39.25,
"eval_accuracy": 0.7671957671957672,
"eval_loss": 1.6414356231689453,
"eval_runtime": 3.2279,
"eval_samples_per_second": 58.552,
"eval_steps_per_second": 7.435,
"step": 4200
},
{
"epoch": 39.35,
"learning_rate": 4.2691588785046734e-05,
"loss": 0.0052,
"step": 4210
},
{
"epoch": 39.35,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.652433156967163,
"eval_runtime": 3.1373,
"eval_samples_per_second": 60.242,
"eval_steps_per_second": 7.65,
"step": 4210
},
{
"epoch": 39.44,
"learning_rate": 4.23177570093458e-05,
"loss": 0.0005,
"step": 4220
},
{
"epoch": 39.44,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.657133936882019,
"eval_runtime": 3.2697,
"eval_samples_per_second": 57.804,
"eval_steps_per_second": 7.34,
"step": 4220
},
{
"epoch": 39.53,
"learning_rate": 4.194392523364486e-05,
"loss": 0.0052,
"step": 4230
},
{
"epoch": 39.53,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6594574451446533,
"eval_runtime": 3.2345,
"eval_samples_per_second": 58.432,
"eval_steps_per_second": 7.42,
"step": 4230
},
{
"epoch": 39.63,
"learning_rate": 4.157009345794393e-05,
"loss": 0.0061,
"step": 4240
},
{
"epoch": 39.63,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6557565927505493,
"eval_runtime": 3.268,
"eval_samples_per_second": 57.833,
"eval_steps_per_second": 7.344,
"step": 4240
},
{
"epoch": 39.72,
"learning_rate": 4.119626168224299e-05,
"loss": 0.0056,
"step": 4250
},
{
"epoch": 39.72,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.649285912513733,
"eval_runtime": 3.2248,
"eval_samples_per_second": 58.608,
"eval_steps_per_second": 7.442,
"step": 4250
},
{
"epoch": 39.81,
"learning_rate": 4.082242990654206e-05,
"loss": 0.0006,
"step": 4260
},
{
"epoch": 39.81,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6455833911895752,
"eval_runtime": 3.1611,
"eval_samples_per_second": 59.789,
"eval_steps_per_second": 7.592,
"step": 4260
},
{
"epoch": 39.91,
"learning_rate": 4.0448598130841124e-05,
"loss": 0.011,
"step": 4270
},
{
"epoch": 39.91,
"eval_accuracy": 0.7671957671957672,
"eval_loss": 1.6422607898712158,
"eval_runtime": 3.2656,
"eval_samples_per_second": 57.877,
"eval_steps_per_second": 7.349,
"step": 4270
},
{
"epoch": 40.0,
"learning_rate": 4.0074766355140184e-05,
"loss": 0.006,
"step": 4280
},
{
"epoch": 40.0,
"eval_accuracy": 0.7671957671957672,
"eval_loss": 1.6446949243545532,
"eval_runtime": 3.3462,
"eval_samples_per_second": 56.481,
"eval_steps_per_second": 7.172,
"step": 4280
},
{
"epoch": 40.09,
"learning_rate": 3.970093457943925e-05,
"loss": 0.0043,
"step": 4290
},
{
"epoch": 40.09,
"eval_accuracy": 0.7671957671957672,
"eval_loss": 1.6466460227966309,
"eval_runtime": 3.3213,
"eval_samples_per_second": 56.906,
"eval_steps_per_second": 7.226,
"step": 4290
},
{
"epoch": 40.19,
"learning_rate": 3.932710280373832e-05,
"loss": 0.0098,
"step": 4300
},
{
"epoch": 40.19,
"eval_accuracy": 0.7671957671957672,
"eval_loss": 1.6464401483535767,
"eval_runtime": 3.1753,
"eval_samples_per_second": 59.521,
"eval_steps_per_second": 7.558,
"step": 4300
},
{
"epoch": 40.28,
"learning_rate": 3.8953271028037386e-05,
"loss": 0.006,
"step": 4310
},
{
"epoch": 40.28,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6519055366516113,
"eval_runtime": 3.2608,
"eval_samples_per_second": 57.961,
"eval_steps_per_second": 7.36,
"step": 4310
},
{
"epoch": 40.37,
"learning_rate": 3.857943925233645e-05,
"loss": 0.0053,
"step": 4320
},
{
"epoch": 40.37,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6609773635864258,
"eval_runtime": 3.2634,
"eval_samples_per_second": 57.916,
"eval_steps_per_second": 7.354,
"step": 4320
},
{
"epoch": 40.47,
"learning_rate": 3.8205607476635514e-05,
"loss": 0.0005,
"step": 4330
},
{
"epoch": 40.47,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6645921468734741,
"eval_runtime": 3.2004,
"eval_samples_per_second": 59.054,
"eval_steps_per_second": 7.499,
"step": 4330
},
{
"epoch": 40.56,
"learning_rate": 3.783177570093458e-05,
"loss": 0.0006,
"step": 4340
},
{
"epoch": 40.56,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6660394668579102,
"eval_runtime": 3.415,
"eval_samples_per_second": 55.344,
"eval_steps_per_second": 7.028,
"step": 4340
},
{
"epoch": 40.65,
"learning_rate": 3.745794392523365e-05,
"loss": 0.0082,
"step": 4350
},
{
"epoch": 40.65,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.669595718383789,
"eval_runtime": 3.3898,
"eval_samples_per_second": 55.756,
"eval_steps_per_second": 7.08,
"step": 4350
},
{
"epoch": 40.75,
"learning_rate": 3.7084112149532715e-05,
"loss": 0.0064,
"step": 4360
},
{
"epoch": 40.75,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.672002911567688,
"eval_runtime": 3.3575,
"eval_samples_per_second": 56.291,
"eval_steps_per_second": 7.148,
"step": 4360
},
{
"epoch": 40.84,
"learning_rate": 3.6710280373831776e-05,
"loss": 0.0005,
"step": 4370
},
{
"epoch": 40.84,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6732321977615356,
"eval_runtime": 2.9775,
"eval_samples_per_second": 63.477,
"eval_steps_per_second": 8.061,
"step": 4370
},
{
"epoch": 40.93,
"learning_rate": 3.633644859813084e-05,
"loss": 0.0065,
"step": 4380
},
{
"epoch": 40.93,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6676148176193237,
"eval_runtime": 3.245,
"eval_samples_per_second": 58.243,
"eval_steps_per_second": 7.396,
"step": 4380
},
{
"epoch": 41.03,
"learning_rate": 3.5962616822429904e-05,
"loss": 0.006,
"step": 4390
},
{
"epoch": 41.03,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6597942113876343,
"eval_runtime": 3.4525,
"eval_samples_per_second": 54.742,
"eval_steps_per_second": 6.951,
"step": 4390
},
{
"epoch": 41.12,
"learning_rate": 3.558878504672897e-05,
"loss": 0.0047,
"step": 4400
},
{
"epoch": 41.12,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6621626615524292,
"eval_runtime": 3.1882,
"eval_samples_per_second": 59.28,
"eval_steps_per_second": 7.528,
"step": 4400
},
{
"epoch": 41.21,
"learning_rate": 3.521495327102804e-05,
"loss": 0.0055,
"step": 4410
},
{
"epoch": 41.21,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6600810289382935,
"eval_runtime": 3.1527,
"eval_samples_per_second": 59.949,
"eval_steps_per_second": 7.613,
"step": 4410
},
{
"epoch": 41.31,
"learning_rate": 3.4841121495327105e-05,
"loss": 0.0049,
"step": 4420
},
{
"epoch": 41.31,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6604359149932861,
"eval_runtime": 3.0872,
"eval_samples_per_second": 61.22,
"eval_steps_per_second": 7.774,
"step": 4420
},
{
"epoch": 41.4,
"learning_rate": 3.446728971962617e-05,
"loss": 0.0005,
"step": 4430
},
{
"epoch": 41.4,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.664872646331787,
"eval_runtime": 3.2713,
"eval_samples_per_second": 57.776,
"eval_steps_per_second": 7.337,
"step": 4430
},
{
"epoch": 41.5,
"learning_rate": 3.409345794392523e-05,
"loss": 0.005,
"step": 4440
},
{
"epoch": 41.5,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6663637161254883,
"eval_runtime": 3.1479,
"eval_samples_per_second": 60.04,
"eval_steps_per_second": 7.624,
"step": 4440
},
{
"epoch": 41.59,
"learning_rate": 3.37196261682243e-05,
"loss": 0.0098,
"step": 4450
},
{
"epoch": 41.59,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6684166193008423,
"eval_runtime": 3.1477,
"eval_samples_per_second": 60.043,
"eval_steps_per_second": 7.625,
"step": 4450
},
{
"epoch": 41.68,
"learning_rate": 3.334579439252337e-05,
"loss": 0.0005,
"step": 4460
},
{
"epoch": 41.68,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.672025203704834,
"eval_runtime": 3.1099,
"eval_samples_per_second": 60.773,
"eval_steps_per_second": 7.717,
"step": 4460
},
{
"epoch": 41.78,
"learning_rate": 3.2971962616822435e-05,
"loss": 0.0148,
"step": 4470
},
{
"epoch": 41.78,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6690597534179688,
"eval_runtime": 3.17,
"eval_samples_per_second": 59.622,
"eval_steps_per_second": 7.571,
"step": 4470
},
{
"epoch": 41.87,
"learning_rate": 3.25981308411215e-05,
"loss": 0.0005,
"step": 4480
},
{
"epoch": 41.87,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6645516157150269,
"eval_runtime": 3.0775,
"eval_samples_per_second": 61.414,
"eval_steps_per_second": 7.799,
"step": 4480
},
{
"epoch": 41.96,
"learning_rate": 3.222429906542056e-05,
"loss": 0.0052,
"step": 4490
},
{
"epoch": 41.96,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6626436710357666,
"eval_runtime": 3.1486,
"eval_samples_per_second": 60.027,
"eval_steps_per_second": 7.622,
"step": 4490
},
{
"epoch": 42.06,
"learning_rate": 3.185046728971963e-05,
"loss": 0.0052,
"step": 4500
},
{
"epoch": 42.06,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6587588787078857,
"eval_runtime": 3.3613,
"eval_samples_per_second": 56.229,
"eval_steps_per_second": 7.14,
"step": 4500
},
{
"epoch": 42.15,
"learning_rate": 3.147663551401869e-05,
"loss": 0.0044,
"step": 4510
},
{
"epoch": 42.15,
"eval_accuracy": 0.7671957671957672,
"eval_loss": 1.6567378044128418,
"eval_runtime": 3.1117,
"eval_samples_per_second": 60.738,
"eval_steps_per_second": 7.713,
"step": 4510
},
{
"epoch": 42.24,
"learning_rate": 3.110280373831776e-05,
"loss": 0.0059,
"step": 4520
},
{
"epoch": 42.24,
"eval_accuracy": 0.7671957671957672,
"eval_loss": 1.6559375524520874,
"eval_runtime": 3.1594,
"eval_samples_per_second": 59.821,
"eval_steps_per_second": 7.596,
"step": 4520
},
{
"epoch": 42.34,
"learning_rate": 3.0728971962616824e-05,
"loss": 0.0005,
"step": 4530
},
{
"epoch": 42.34,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6584206819534302,
"eval_runtime": 3.215,
"eval_samples_per_second": 58.786,
"eval_steps_per_second": 7.465,
"step": 4530
},
{
"epoch": 42.43,
"learning_rate": 3.0355140186915888e-05,
"loss": 0.0046,
"step": 4540
},
{
"epoch": 42.43,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.658771276473999,
"eval_runtime": 3.081,
"eval_samples_per_second": 61.344,
"eval_steps_per_second": 7.79,
"step": 4540
},
{
"epoch": 42.52,
"learning_rate": 2.9981308411214952e-05,
"loss": 0.0005,
"step": 4550
},
{
"epoch": 42.52,
"eval_accuracy": 0.7671957671957672,
"eval_loss": 1.6556823253631592,
"eval_runtime": 3.7215,
"eval_samples_per_second": 50.786,
"eval_steps_per_second": 6.449,
"step": 4550
},
{
"epoch": 42.62,
"learning_rate": 2.960747663551402e-05,
"loss": 0.0005,
"step": 4560
},
{
"epoch": 42.62,
"eval_accuracy": 0.7671957671957672,
"eval_loss": 1.6551687717437744,
"eval_runtime": 3.2069,
"eval_samples_per_second": 58.935,
"eval_steps_per_second": 7.484,
"step": 4560
},
{
"epoch": 42.71,
"learning_rate": 2.9233644859813087e-05,
"loss": 0.0005,
"step": 4570
},
{
"epoch": 42.71,
"eval_accuracy": 0.7671957671957672,
"eval_loss": 1.655452013015747,
"eval_runtime": 3.2606,
"eval_samples_per_second": 57.964,
"eval_steps_per_second": 7.361,
"step": 4570
},
{
"epoch": 42.8,
"learning_rate": 2.8859813084112154e-05,
"loss": 0.0107,
"step": 4580
},
{
"epoch": 42.8,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6596006155014038,
"eval_runtime": 3.241,
"eval_samples_per_second": 58.315,
"eval_steps_per_second": 7.405,
"step": 4580
},
{
"epoch": 42.9,
"learning_rate": 2.8485981308411214e-05,
"loss": 0.015,
"step": 4590
},
{
"epoch": 42.9,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6657310724258423,
"eval_runtime": 3.1747,
"eval_samples_per_second": 59.534,
"eval_steps_per_second": 7.56,
"step": 4590
},
{
"epoch": 42.99,
"learning_rate": 2.811214953271028e-05,
"loss": 0.0052,
"step": 4600
},
{
"epoch": 42.99,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6676826477050781,
"eval_runtime": 3.254,
"eval_samples_per_second": 58.083,
"eval_steps_per_second": 7.376,
"step": 4600
},
{
"epoch": 43.08,
"learning_rate": 2.7738317757009345e-05,
"loss": 0.0051,
"step": 4610
},
{
"epoch": 43.08,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.668189525604248,
"eval_runtime": 3.1876,
"eval_samples_per_second": 59.293,
"eval_steps_per_second": 7.529,
"step": 4610
},
{
"epoch": 43.18,
"learning_rate": 2.7364485981308413e-05,
"loss": 0.0051,
"step": 4620
},
{
"epoch": 43.18,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6663062572479248,
"eval_runtime": 3.3928,
"eval_samples_per_second": 55.707,
"eval_steps_per_second": 7.074,
"step": 4620
},
{
"epoch": 43.27,
"learning_rate": 2.699065420560748e-05,
"loss": 0.0005,
"step": 4630
},
{
"epoch": 43.27,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.664025068283081,
"eval_runtime": 3.2939,
"eval_samples_per_second": 57.378,
"eval_steps_per_second": 7.286,
"step": 4630
},
{
"epoch": 43.36,
"learning_rate": 2.6616822429906547e-05,
"loss": 0.0088,
"step": 4640
},
{
"epoch": 43.36,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6622815132141113,
"eval_runtime": 3.135,
"eval_samples_per_second": 60.287,
"eval_steps_per_second": 7.655,
"step": 4640
},
{
"epoch": 43.46,
"learning_rate": 2.6242990654205607e-05,
"loss": 0.0053,
"step": 4650
},
{
"epoch": 43.46,
"eval_accuracy": 0.7671957671957672,
"eval_loss": 1.664080262184143,
"eval_runtime": 3.2274,
"eval_samples_per_second": 58.56,
"eval_steps_per_second": 7.436,
"step": 4650
},
{
"epoch": 43.55,
"learning_rate": 2.5869158878504675e-05,
"loss": 0.0064,
"step": 4660
},
{
"epoch": 43.55,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.665493130683899,
"eval_runtime": 3.1518,
"eval_samples_per_second": 59.966,
"eval_steps_per_second": 7.615,
"step": 4660
},
{
"epoch": 43.64,
"learning_rate": 2.549532710280374e-05,
"loss": 0.0005,
"step": 4670
},
{
"epoch": 43.64,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6664865016937256,
"eval_runtime": 3.2866,
"eval_samples_per_second": 57.506,
"eval_steps_per_second": 7.302,
"step": 4670
},
{
"epoch": 43.74,
"learning_rate": 2.5121495327102806e-05,
"loss": 0.005,
"step": 4680
},
{
"epoch": 43.74,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6677204370498657,
"eval_runtime": 3.2047,
"eval_samples_per_second": 58.976,
"eval_steps_per_second": 7.489,
"step": 4680
},
{
"epoch": 43.83,
"learning_rate": 2.474766355140187e-05,
"loss": 0.0049,
"step": 4690
},
{
"epoch": 43.83,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6758949756622314,
"eval_runtime": 3.4806,
"eval_samples_per_second": 54.301,
"eval_steps_per_second": 6.895,
"step": 4690
},
{
"epoch": 43.93,
"learning_rate": 2.4373831775700937e-05,
"loss": 0.0055,
"step": 4700
},
{
"epoch": 43.93,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6764609813690186,
"eval_runtime": 3.2004,
"eval_samples_per_second": 59.055,
"eval_steps_per_second": 7.499,
"step": 4700
},
{
"epoch": 44.02,
"learning_rate": 2.4e-05,
"loss": 0.0144,
"step": 4710
},
{
"epoch": 44.02,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6740403175354004,
"eval_runtime": 3.2944,
"eval_samples_per_second": 57.37,
"eval_steps_per_second": 7.285,
"step": 4710
},
{
"epoch": 44.11,
"learning_rate": 2.3626168224299068e-05,
"loss": 0.0005,
"step": 4720
},
{
"epoch": 44.11,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6708952188491821,
"eval_runtime": 3.0279,
"eval_samples_per_second": 62.42,
"eval_steps_per_second": 7.926,
"step": 4720
},
{
"epoch": 44.21,
"learning_rate": 2.325233644859813e-05,
"loss": 0.0051,
"step": 4730
},
{
"epoch": 44.21,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6715834140777588,
"eval_runtime": 3.2347,
"eval_samples_per_second": 58.428,
"eval_steps_per_second": 7.419,
"step": 4730
},
{
"epoch": 44.3,
"learning_rate": 2.2878504672897196e-05,
"loss": 0.005,
"step": 4740
},
{
"epoch": 44.3,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6689761877059937,
"eval_runtime": 3.1331,
"eval_samples_per_second": 60.324,
"eval_steps_per_second": 7.66,
"step": 4740
},
{
"epoch": 44.39,
"learning_rate": 2.2504672897196263e-05,
"loss": 0.0005,
"step": 4750
},
{
"epoch": 44.39,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6669028997421265,
"eval_runtime": 2.9298,
"eval_samples_per_second": 64.509,
"eval_steps_per_second": 8.192,
"step": 4750
},
{
"epoch": 44.49,
"learning_rate": 2.2130841121495327e-05,
"loss": 0.005,
"step": 4760
},
{
"epoch": 44.49,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6714545488357544,
"eval_runtime": 3.0674,
"eval_samples_per_second": 61.615,
"eval_steps_per_second": 7.824,
"step": 4760
},
{
"epoch": 44.58,
"learning_rate": 2.1757009345794394e-05,
"loss": 0.0005,
"step": 4770
},
{
"epoch": 44.58,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6736085414886475,
"eval_runtime": 2.9894,
"eval_samples_per_second": 63.222,
"eval_steps_per_second": 8.028,
"step": 4770
},
{
"epoch": 44.67,
"learning_rate": 2.138317757009346e-05,
"loss": 0.0046,
"step": 4780
},
{
"epoch": 44.67,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.676229476928711,
"eval_runtime": 2.9735,
"eval_samples_per_second": 63.562,
"eval_steps_per_second": 8.071,
"step": 4780
},
{
"epoch": 44.77,
"learning_rate": 2.1009345794392525e-05,
"loss": 0.0055,
"step": 4790
},
{
"epoch": 44.77,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6757352352142334,
"eval_runtime": 3.1208,
"eval_samples_per_second": 60.561,
"eval_steps_per_second": 7.69,
"step": 4790
},
{
"epoch": 44.86,
"learning_rate": 2.063551401869159e-05,
"loss": 0.0098,
"step": 4800
},
{
"epoch": 44.86,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.670250654220581,
"eval_runtime": 3.1041,
"eval_samples_per_second": 60.887,
"eval_steps_per_second": 7.732,
"step": 4800
},
{
"epoch": 44.95,
"learning_rate": 2.0261682242990653e-05,
"loss": 0.005,
"step": 4810
},
{
"epoch": 44.95,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6712110042572021,
"eval_runtime": 3.374,
"eval_samples_per_second": 56.016,
"eval_steps_per_second": 7.113,
"step": 4810
},
{
"epoch": 45.05,
"learning_rate": 1.988785046728972e-05,
"loss": 0.005,
"step": 4820
},
{
"epoch": 45.05,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6744420528411865,
"eval_runtime": 3.1947,
"eval_samples_per_second": 59.161,
"eval_steps_per_second": 7.513,
"step": 4820
},
{
"epoch": 45.14,
"learning_rate": 1.9514018691588787e-05,
"loss": 0.0005,
"step": 4830
},
{
"epoch": 45.14,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6774510145187378,
"eval_runtime": 3.126,
"eval_samples_per_second": 60.46,
"eval_steps_per_second": 7.677,
"step": 4830
},
{
"epoch": 45.23,
"learning_rate": 1.914018691588785e-05,
"loss": 0.0005,
"step": 4840
},
{
"epoch": 45.23,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6790132522583008,
"eval_runtime": 2.9821,
"eval_samples_per_second": 63.379,
"eval_steps_per_second": 8.048,
"step": 4840
},
{
"epoch": 45.33,
"learning_rate": 1.8766355140186918e-05,
"loss": 0.0005,
"step": 4850
},
{
"epoch": 45.33,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6794980764389038,
"eval_runtime": 3.139,
"eval_samples_per_second": 60.21,
"eval_steps_per_second": 7.646,
"step": 4850
},
{
"epoch": 45.42,
"learning_rate": 1.8392523364485982e-05,
"loss": 0.0049,
"step": 4860
},
{
"epoch": 45.42,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6801530122756958,
"eval_runtime": 2.945,
"eval_samples_per_second": 64.176,
"eval_steps_per_second": 8.149,
"step": 4860
},
{
"epoch": 45.51,
"learning_rate": 1.8018691588785046e-05,
"loss": 0.0051,
"step": 4870
},
{
"epoch": 45.51,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6776082515716553,
"eval_runtime": 2.9852,
"eval_samples_per_second": 63.313,
"eval_steps_per_second": 8.04,
"step": 4870
},
{
"epoch": 45.61,
"learning_rate": 1.7644859813084113e-05,
"loss": 0.0049,
"step": 4880
},
{
"epoch": 45.61,
"eval_accuracy": 0.7671957671957672,
"eval_loss": 1.6780359745025635,
"eval_runtime": 3.0255,
"eval_samples_per_second": 62.469,
"eval_steps_per_second": 7.933,
"step": 4880
},
{
"epoch": 45.7,
"learning_rate": 1.7271028037383177e-05,
"loss": 0.0091,
"step": 4890
},
{
"epoch": 45.7,
"eval_accuracy": 0.7671957671957672,
"eval_loss": 1.6785778999328613,
"eval_runtime": 2.9136,
"eval_samples_per_second": 64.868,
"eval_steps_per_second": 8.237,
"step": 4890
},
{
"epoch": 45.79,
"learning_rate": 1.6897196261682244e-05,
"loss": 0.0046,
"step": 4900
},
{
"epoch": 45.79,
"eval_accuracy": 0.7671957671957672,
"eval_loss": 1.6758513450622559,
"eval_runtime": 2.9184,
"eval_samples_per_second": 64.761,
"eval_steps_per_second": 8.224,
"step": 4900
},
{
"epoch": 45.89,
"learning_rate": 1.652336448598131e-05,
"loss": 0.0056,
"step": 4910
},
{
"epoch": 45.89,
"eval_accuracy": 0.7724867724867724,
"eval_loss": 1.6727031469345093,
"eval_runtime": 2.9001,
"eval_samples_per_second": 65.169,
"eval_steps_per_second": 8.275,
"step": 4910
},
{
"epoch": 45.98,
"learning_rate": 1.6149532710280375e-05,
"loss": 0.011,
"step": 4920
},
{
"epoch": 45.98,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6746748685836792,
"eval_runtime": 2.9034,
"eval_samples_per_second": 65.095,
"eval_steps_per_second": 8.266,
"step": 4920
},
{
"epoch": 46.07,
"learning_rate": 1.577570093457944e-05,
"loss": 0.0093,
"step": 4930
},
{
"epoch": 46.07,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.674180507659912,
"eval_runtime": 2.9574,
"eval_samples_per_second": 63.907,
"eval_steps_per_second": 8.115,
"step": 4930
},
{
"epoch": 46.17,
"learning_rate": 1.5401869158878503e-05,
"loss": 0.0047,
"step": 4940
},
{
"epoch": 46.17,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6757071018218994,
"eval_runtime": 2.8997,
"eval_samples_per_second": 65.179,
"eval_steps_per_second": 8.277,
"step": 4940
},
{
"epoch": 46.26,
"learning_rate": 1.502803738317757e-05,
"loss": 0.0089,
"step": 4950
},
{
"epoch": 46.26,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6735711097717285,
"eval_runtime": 3.1476,
"eval_samples_per_second": 60.046,
"eval_steps_per_second": 7.625,
"step": 4950
},
{
"epoch": 46.36,
"learning_rate": 1.4654205607476637e-05,
"loss": 0.0005,
"step": 4960
},
{
"epoch": 46.36,
"eval_accuracy": 0.7671957671957672,
"eval_loss": 1.6719815731048584,
"eval_runtime": 3.192,
"eval_samples_per_second": 59.21,
"eval_steps_per_second": 7.519,
"step": 4960
},
{
"epoch": 46.45,
"learning_rate": 1.4280373831775701e-05,
"loss": 0.0005,
"step": 4970
},
{
"epoch": 46.45,
"eval_accuracy": 0.7671957671957672,
"eval_loss": 1.6716129779815674,
"eval_runtime": 2.9471,
"eval_samples_per_second": 64.131,
"eval_steps_per_second": 8.144,
"step": 4970
},
{
"epoch": 46.54,
"learning_rate": 1.3906542056074767e-05,
"loss": 0.0097,
"step": 4980
},
{
"epoch": 46.54,
"eval_accuracy": 0.7671957671957672,
"eval_loss": 1.672421932220459,
"eval_runtime": 2.9663,
"eval_samples_per_second": 63.716,
"eval_steps_per_second": 8.091,
"step": 4980
},
{
"epoch": 46.64,
"learning_rate": 1.3532710280373834e-05,
"loss": 0.0005,
"step": 4990
},
{
"epoch": 46.64,
"eval_accuracy": 0.7671957671957672,
"eval_loss": 1.671976089477539,
"eval_runtime": 3.0014,
"eval_samples_per_second": 62.971,
"eval_steps_per_second": 7.996,
"step": 4990
},
{
"epoch": 46.73,
"learning_rate": 1.3158878504672898e-05,
"loss": 0.0005,
"step": 5000
},
{
"epoch": 46.73,
"eval_accuracy": 0.7671957671957672,
"eval_loss": 1.6718426942825317,
"eval_runtime": 3.162,
"eval_samples_per_second": 59.772,
"eval_steps_per_second": 7.59,
"step": 5000
},
{
"epoch": 46.82,
"learning_rate": 1.2785046728971963e-05,
"loss": 0.0004,
"step": 5010
},
{
"epoch": 46.82,
"eval_accuracy": 0.7671957671957672,
"eval_loss": 1.6721214056015015,
"eval_runtime": 2.9348,
"eval_samples_per_second": 64.4,
"eval_steps_per_second": 8.178,
"step": 5010
},
{
"epoch": 46.92,
"learning_rate": 1.2411214953271029e-05,
"loss": 0.0107,
"step": 5020
},
{
"epoch": 46.92,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6742489337921143,
"eval_runtime": 2.9616,
"eval_samples_per_second": 63.816,
"eval_steps_per_second": 8.104,
"step": 5020
},
{
"epoch": 47.01,
"learning_rate": 1.2037383177570094e-05,
"loss": 0.0051,
"step": 5030
},
{
"epoch": 47.01,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6763916015625,
"eval_runtime": 3.3324,
"eval_samples_per_second": 56.716,
"eval_steps_per_second": 7.202,
"step": 5030
},
{
"epoch": 47.1,
"learning_rate": 1.166355140186916e-05,
"loss": 0.0004,
"step": 5040
},
{
"epoch": 47.1,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6787925958633423,
"eval_runtime": 2.994,
"eval_samples_per_second": 63.127,
"eval_steps_per_second": 8.016,
"step": 5040
},
{
"epoch": 47.2,
"learning_rate": 1.1289719626168224e-05,
"loss": 0.0048,
"step": 5050
},
{
"epoch": 47.2,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.67880380153656,
"eval_runtime": 2.9739,
"eval_samples_per_second": 63.554,
"eval_steps_per_second": 8.07,
"step": 5050
},
{
"epoch": 47.29,
"learning_rate": 1.0915887850467291e-05,
"loss": 0.0005,
"step": 5060
},
{
"epoch": 47.29,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6779260635375977,
"eval_runtime": 2.9453,
"eval_samples_per_second": 64.171,
"eval_steps_per_second": 8.149,
"step": 5060
},
{
"epoch": 47.38,
"learning_rate": 1.0542056074766356e-05,
"loss": 0.0048,
"step": 5070
},
{
"epoch": 47.38,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6772257089614868,
"eval_runtime": 3.0548,
"eval_samples_per_second": 61.87,
"eval_steps_per_second": 7.856,
"step": 5070
},
{
"epoch": 47.48,
"learning_rate": 1.016822429906542e-05,
"loss": 0.0044,
"step": 5080
},
{
"epoch": 47.48,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.677033543586731,
"eval_runtime": 3.0759,
"eval_samples_per_second": 61.446,
"eval_steps_per_second": 7.803,
"step": 5080
},
{
"epoch": 47.57,
"learning_rate": 9.794392523364486e-06,
"loss": 0.0004,
"step": 5090
},
{
"epoch": 47.57,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6759369373321533,
"eval_runtime": 3.134,
"eval_samples_per_second": 60.306,
"eval_steps_per_second": 7.658,
"step": 5090
},
{
"epoch": 47.66,
"learning_rate": 9.420560747663553e-06,
"loss": 0.0053,
"step": 5100
},
{
"epoch": 47.66,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6769102811813354,
"eval_runtime": 3.2383,
"eval_samples_per_second": 58.363,
"eval_steps_per_second": 7.411,
"step": 5100
},
{
"epoch": 47.76,
"learning_rate": 9.046728971962617e-06,
"loss": 0.0093,
"step": 5110
},
{
"epoch": 47.76,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6791408061981201,
"eval_runtime": 3.1946,
"eval_samples_per_second": 59.163,
"eval_steps_per_second": 7.513,
"step": 5110
},
{
"epoch": 47.85,
"learning_rate": 8.672897196261682e-06,
"loss": 0.0046,
"step": 5120
},
{
"epoch": 47.85,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6781762838363647,
"eval_runtime": 3.0007,
"eval_samples_per_second": 62.986,
"eval_steps_per_second": 7.998,
"step": 5120
},
{
"epoch": 47.94,
"learning_rate": 8.299065420560748e-06,
"loss": 0.0101,
"step": 5130
},
{
"epoch": 47.94,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.679875135421753,
"eval_runtime": 2.8961,
"eval_samples_per_second": 65.259,
"eval_steps_per_second": 8.287,
"step": 5130
},
{
"epoch": 48.04,
"learning_rate": 7.925233644859813e-06,
"loss": 0.0055,
"step": 5140
},
{
"epoch": 48.04,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6814370155334473,
"eval_runtime": 2.99,
"eval_samples_per_second": 63.21,
"eval_steps_per_second": 8.027,
"step": 5140
},
{
"epoch": 48.13,
"learning_rate": 7.551401869158879e-06,
"loss": 0.0004,
"step": 5150
},
{
"epoch": 48.13,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6820155382156372,
"eval_runtime": 3.0809,
"eval_samples_per_second": 61.346,
"eval_steps_per_second": 7.79,
"step": 5150
},
{
"epoch": 48.22,
"learning_rate": 7.1775700934579445e-06,
"loss": 0.0005,
"step": 5160
},
{
"epoch": 48.22,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6822861433029175,
"eval_runtime": 2.9449,
"eval_samples_per_second": 64.179,
"eval_steps_per_second": 8.15,
"step": 5160
},
{
"epoch": 48.32,
"learning_rate": 6.803738317757009e-06,
"loss": 0.005,
"step": 5170
},
{
"epoch": 48.32,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.682709813117981,
"eval_runtime": 3.0319,
"eval_samples_per_second": 62.337,
"eval_steps_per_second": 7.916,
"step": 5170
},
{
"epoch": 48.41,
"learning_rate": 6.429906542056075e-06,
"loss": 0.0093,
"step": 5180
},
{
"epoch": 48.41,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6838692426681519,
"eval_runtime": 3.0335,
"eval_samples_per_second": 62.304,
"eval_steps_per_second": 7.912,
"step": 5180
},
{
"epoch": 48.5,
"learning_rate": 6.05607476635514e-06,
"loss": 0.0048,
"step": 5190
},
{
"epoch": 48.5,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6845488548278809,
"eval_runtime": 3.0676,
"eval_samples_per_second": 61.612,
"eval_steps_per_second": 7.824,
"step": 5190
},
{
"epoch": 48.6,
"learning_rate": 5.682242990654206e-06,
"loss": 0.0005,
"step": 5200
},
{
"epoch": 48.6,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6849009990692139,
"eval_runtime": 3.008,
"eval_samples_per_second": 62.832,
"eval_steps_per_second": 7.979,
"step": 5200
},
{
"epoch": 48.69,
"learning_rate": 5.308411214953271e-06,
"loss": 0.0005,
"step": 5210
},
{
"epoch": 48.69,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6851400136947632,
"eval_runtime": 2.9174,
"eval_samples_per_second": 64.784,
"eval_steps_per_second": 8.227,
"step": 5210
},
{
"epoch": 48.79,
"learning_rate": 4.934579439252337e-06,
"loss": 0.0136,
"step": 5220
},
{
"epoch": 48.79,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.686295747756958,
"eval_runtime": 2.9681,
"eval_samples_per_second": 63.678,
"eval_steps_per_second": 8.086,
"step": 5220
},
{
"epoch": 48.88,
"learning_rate": 4.560747663551402e-06,
"loss": 0.005,
"step": 5230
},
{
"epoch": 48.88,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6866832971572876,
"eval_runtime": 2.9518,
"eval_samples_per_second": 64.029,
"eval_steps_per_second": 8.131,
"step": 5230
},
{
"epoch": 48.97,
"learning_rate": 4.186915887850468e-06,
"loss": 0.0096,
"step": 5240
},
{
"epoch": 48.97,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.685899257659912,
"eval_runtime": 3.0006,
"eval_samples_per_second": 62.987,
"eval_steps_per_second": 7.998,
"step": 5240
},
{
"epoch": 49.07,
"learning_rate": 3.813084112149533e-06,
"loss": 0.0048,
"step": 5250
},
{
"epoch": 49.07,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6844896078109741,
"eval_runtime": 2.9876,
"eval_samples_per_second": 63.261,
"eval_steps_per_second": 8.033,
"step": 5250
},
{
"epoch": 49.16,
"learning_rate": 3.4392523364485985e-06,
"loss": 0.0048,
"step": 5260
},
{
"epoch": 49.16,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6853784322738647,
"eval_runtime": 2.9873,
"eval_samples_per_second": 63.267,
"eval_steps_per_second": 8.034,
"step": 5260
},
{
"epoch": 49.25,
"learning_rate": 3.0654205607476637e-06,
"loss": 0.0093,
"step": 5270
},
{
"epoch": 49.25,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6857768297195435,
"eval_runtime": 2.9803,
"eval_samples_per_second": 63.416,
"eval_steps_per_second": 8.053,
"step": 5270
},
{
"epoch": 49.35,
"learning_rate": 2.691588785046729e-06,
"loss": 0.0004,
"step": 5280
},
{
"epoch": 49.35,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6857463121414185,
"eval_runtime": 3.0292,
"eval_samples_per_second": 62.393,
"eval_steps_per_second": 7.923,
"step": 5280
},
{
"epoch": 49.44,
"learning_rate": 2.3177570093457947e-06,
"loss": 0.0095,
"step": 5290
},
{
"epoch": 49.44,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.685395359992981,
"eval_runtime": 2.9645,
"eval_samples_per_second": 63.755,
"eval_steps_per_second": 8.096,
"step": 5290
},
{
"epoch": 49.53,
"learning_rate": 1.94392523364486e-06,
"loss": 0.0005,
"step": 5300
},
{
"epoch": 49.53,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6847246885299683,
"eval_runtime": 3.0054,
"eval_samples_per_second": 62.887,
"eval_steps_per_second": 7.986,
"step": 5300
},
{
"epoch": 49.63,
"learning_rate": 1.5700934579439254e-06,
"loss": 0.0005,
"step": 5310
},
{
"epoch": 49.63,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6844700574874878,
"eval_runtime": 2.9223,
"eval_samples_per_second": 64.676,
"eval_steps_per_second": 8.213,
"step": 5310
},
{
"epoch": 49.72,
"learning_rate": 1.1962616822429907e-06,
"loss": 0.0092,
"step": 5320
},
{
"epoch": 49.72,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6847366094589233,
"eval_runtime": 3.0496,
"eval_samples_per_second": 61.975,
"eval_steps_per_second": 7.87,
"step": 5320
},
{
"epoch": 49.81,
"learning_rate": 8.224299065420561e-07,
"loss": 0.0005,
"step": 5330
},
{
"epoch": 49.81,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6845016479492188,
"eval_runtime": 3.1606,
"eval_samples_per_second": 59.798,
"eval_steps_per_second": 7.593,
"step": 5330
},
{
"epoch": 49.91,
"learning_rate": 4.4859813084112153e-07,
"loss": 0.0092,
"step": 5340
},
{
"epoch": 49.91,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.684584140777588,
"eval_runtime": 2.921,
"eval_samples_per_second": 64.704,
"eval_steps_per_second": 8.216,
"step": 5340
},
{
"epoch": 50.0,
"learning_rate": 7.476635514018692e-08,
"loss": 0.0005,
"step": 5350
},
{
"epoch": 50.0,
"eval_accuracy": 0.7619047619047619,
"eval_loss": 1.6846909523010254,
"eval_runtime": 2.9853,
"eval_samples_per_second": 63.309,
"eval_steps_per_second": 8.039,
"step": 5350
},
{
"epoch": 50.0,
"step": 5350,
"total_flos": 6.575784632757043e+18,
"train_loss": 0.14066274270554568,
"train_runtime": 4434.2749,
"train_samples_per_second": 19.135,
"train_steps_per_second": 1.207
}
],
"logging_steps": 10,
"max_steps": 5350,
"num_train_epochs": 50,
"save_steps": 10,
"total_flos": 6.575784632757043e+18,
"trial_name": null,
"trial_params": null
}