mnli / trainer_state.json
ShengdingHu's picture
Training in progress, step 200
84ce188
{
"best_metric": 86.3506916192026,
"best_model_checkpoint": "outputs/bitfit/t5-base/mnli/checkpoint-32800",
"epoch": 3.0,
"global_step": 36816,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02,
"eval_accuracy": 84.0520748576078,
"eval_average_metrics": 84.0520748576078,
"eval_loss": 0.19396202266216278,
"eval_runtime": 73.5315,
"eval_samples_per_second": 133.711,
"step": 200
},
{
"epoch": 0.03,
"eval_accuracy": 85.08950366151342,
"eval_average_metrics": 85.08950366151342,
"eval_loss": 0.1736125648021698,
"eval_runtime": 77.6953,
"eval_samples_per_second": 126.546,
"step": 400
},
{
"epoch": 0.04,
"learning_rate": 0.0002959256844850065,
"loss": 0.2786,
"step": 500
},
{
"epoch": 0.05,
"eval_accuracy": 85.25223759153783,
"eval_average_metrics": 85.25223759153783,
"eval_loss": 0.1704595386981964,
"eval_runtime": 71.3646,
"eval_samples_per_second": 137.771,
"step": 600
},
{
"epoch": 0.07,
"eval_accuracy": 85.120016273393,
"eval_average_metrics": 85.120016273393,
"eval_loss": 0.17760007083415985,
"eval_runtime": 70.572,
"eval_samples_per_second": 139.319,
"step": 800
},
{
"epoch": 0.08,
"learning_rate": 0.000291851368970013,
"loss": 0.1754,
"step": 1000
},
{
"epoch": 0.08,
"eval_accuracy": 85.81163547599675,
"eval_average_metrics": 85.81163547599675,
"eval_loss": 0.17395834624767303,
"eval_runtime": 68.4178,
"eval_samples_per_second": 143.705,
"step": 1000
},
{
"epoch": 0.1,
"eval_accuracy": 86.04556550040684,
"eval_average_metrics": 86.04556550040684,
"eval_loss": 0.16720984876155853,
"eval_runtime": 74.935,
"eval_samples_per_second": 131.207,
"step": 1200
},
{
"epoch": 0.11,
"eval_accuracy": 85.85231895850285,
"eval_average_metrics": 85.85231895850285,
"eval_loss": 0.1686050444841385,
"eval_runtime": 65.5334,
"eval_samples_per_second": 150.03,
"step": 1400
},
{
"epoch": 0.12,
"learning_rate": 0.00028777705345501956,
"loss": 0.17,
"step": 1500
},
{
"epoch": 0.13,
"eval_accuracy": 84.83523189585028,
"eval_average_metrics": 84.83523189585028,
"eval_loss": 0.17506256699562073,
"eval_runtime": 72.658,
"eval_samples_per_second": 135.319,
"step": 1600
},
{
"epoch": 0.15,
"eval_accuracy": 85.77095199349064,
"eval_average_metrics": 85.77095199349064,
"eval_loss": 0.16318167746067047,
"eval_runtime": 71.0929,
"eval_samples_per_second": 138.298,
"step": 1800
},
{
"epoch": 0.16,
"learning_rate": 0.000283702737940026,
"loss": 0.1593,
"step": 2000
},
{
"epoch": 0.16,
"eval_accuracy": 84.98779495524816,
"eval_average_metrics": 84.98779495524816,
"eval_loss": 0.17500561475753784,
"eval_runtime": 76.1036,
"eval_samples_per_second": 129.192,
"step": 2000
},
{
"epoch": 0.18,
"eval_accuracy": 85.22172497965825,
"eval_average_metrics": 85.22172497965825,
"eval_loss": 0.16974958777427673,
"eval_runtime": 73.5318,
"eval_samples_per_second": 133.711,
"step": 2200
},
{
"epoch": 0.2,
"eval_accuracy": 84.93694060211554,
"eval_average_metrics": 84.93694060211554,
"eval_loss": 0.16803883016109467,
"eval_runtime": 71.8068,
"eval_samples_per_second": 136.923,
"step": 2400
},
{
"epoch": 0.2,
"learning_rate": 0.00027962842242503253,
"loss": 0.1626,
"step": 2500
},
{
"epoch": 0.21,
"eval_accuracy": 85.64890154597234,
"eval_average_metrics": 85.64890154597234,
"eval_loss": 0.16620652377605438,
"eval_runtime": 70.4457,
"eval_samples_per_second": 139.569,
"step": 2600
},
{
"epoch": 0.23,
"eval_accuracy": 85.17087062652563,
"eval_average_metrics": 85.17087062652563,
"eval_loss": 0.16685815155506134,
"eval_runtime": 70.323,
"eval_samples_per_second": 139.812,
"step": 2800
},
{
"epoch": 0.24,
"learning_rate": 0.0002755541069100391,
"loss": 0.1637,
"step": 3000
},
{
"epoch": 0.24,
"eval_accuracy": 85.08950366151342,
"eval_average_metrics": 85.08950366151342,
"eval_loss": 0.1686829775571823,
"eval_runtime": 68.4934,
"eval_samples_per_second": 143.547,
"step": 3000
},
{
"epoch": 0.26,
"eval_accuracy": 84.67249796582588,
"eval_average_metrics": 84.67249796582588,
"eval_loss": 0.1832115650177002,
"eval_runtime": 69.9104,
"eval_samples_per_second": 140.637,
"step": 3200
},
{
"epoch": 0.28,
"eval_accuracy": 85.74043938161107,
"eval_average_metrics": 85.74043938161107,
"eval_loss": 0.15809670090675354,
"eval_runtime": 71.8214,
"eval_samples_per_second": 136.895,
"step": 3400
},
{
"epoch": 0.29,
"learning_rate": 0.0002714797913950456,
"loss": 0.1611,
"step": 3500
},
{
"epoch": 0.29,
"eval_accuracy": 85.82180634662328,
"eval_average_metrics": 85.82180634662328,
"eval_loss": 0.16679300367832184,
"eval_runtime": 68.4877,
"eval_samples_per_second": 143.559,
"step": 3600
},
{
"epoch": 0.31,
"eval_accuracy": 85.69975589910497,
"eval_average_metrics": 85.69975589910497,
"eval_loss": 0.1635247766971588,
"eval_runtime": 70.3174,
"eval_samples_per_second": 139.823,
"step": 3800
},
{
"epoch": 0.33,
"learning_rate": 0.0002674054758800521,
"loss": 0.1546,
"step": 4000
},
{
"epoch": 0.33,
"eval_accuracy": 85.8319772172498,
"eval_average_metrics": 85.8319772172498,
"eval_loss": 0.16676998138427734,
"eval_runtime": 73.2289,
"eval_samples_per_second": 134.264,
"step": 4000
},
{
"epoch": 0.34,
"eval_accuracy": 85.50650935720098,
"eval_average_metrics": 85.50650935720098,
"eval_loss": 0.17212657630443573,
"eval_runtime": 74.584,
"eval_samples_per_second": 131.824,
"step": 4200
},
{
"epoch": 0.36,
"eval_accuracy": 85.54719283970708,
"eval_average_metrics": 85.54719283970708,
"eval_loss": 0.17162065207958221,
"eval_runtime": 69.8754,
"eval_samples_per_second": 140.708,
"step": 4400
},
{
"epoch": 0.37,
"learning_rate": 0.00026333116036505864,
"loss": 0.1531,
"step": 4500
},
{
"epoch": 0.37,
"eval_accuracy": 85.96419853539463,
"eval_average_metrics": 85.96419853539463,
"eval_loss": 0.16573481261730194,
"eval_runtime": 67.7552,
"eval_samples_per_second": 145.111,
"step": 4600
},
{
"epoch": 0.39,
"eval_accuracy": 85.78112286411717,
"eval_average_metrics": 85.78112286411717,
"eval_loss": 0.16396570205688477,
"eval_runtime": 63.3281,
"eval_samples_per_second": 155.255,
"step": 4800
},
{
"epoch": 0.41,
"learning_rate": 0.0002592568448500652,
"loss": 0.1566,
"step": 5000
},
{
"epoch": 0.41,
"eval_accuracy": 85.73026851098454,
"eval_average_metrics": 85.73026851098454,
"eval_loss": 0.16679789125919342,
"eval_runtime": 68.8228,
"eval_samples_per_second": 142.86,
"step": 5000
},
{
"epoch": 0.42,
"eval_accuracy": 85.68958502847845,
"eval_average_metrics": 85.68958502847845,
"eval_loss": 0.16058295965194702,
"eval_runtime": 66.5592,
"eval_samples_per_second": 147.718,
"step": 5200
},
{
"epoch": 0.44,
"eval_accuracy": 85.66924328722538,
"eval_average_metrics": 85.66924328722538,
"eval_loss": 0.1740991473197937,
"eval_runtime": 66.4465,
"eval_samples_per_second": 147.969,
"step": 5400
},
{
"epoch": 0.45,
"learning_rate": 0.00025518252933507166,
"loss": 0.1514,
"step": 5500
},
{
"epoch": 0.46,
"eval_accuracy": 84.66232709519936,
"eval_average_metrics": 84.66232709519936,
"eval_loss": 0.18576639890670776,
"eval_runtime": 66.3297,
"eval_samples_per_second": 148.229,
"step": 5600
},
{
"epoch": 0.47,
"eval_accuracy": 85.88283157038242,
"eval_average_metrics": 85.88283157038242,
"eval_loss": 0.16450409591197968,
"eval_runtime": 64.7793,
"eval_samples_per_second": 151.777,
"step": 5800
},
{
"epoch": 0.49,
"learning_rate": 0.0002511082138200782,
"loss": 0.1531,
"step": 6000
},
{
"epoch": 0.49,
"eval_accuracy": 85.13018714401953,
"eval_average_metrics": 85.13018714401953,
"eval_loss": 0.17607340216636658,
"eval_runtime": 63.5814,
"eval_samples_per_second": 154.637,
"step": 6000
},
{
"epoch": 0.51,
"eval_accuracy": 85.35394629780309,
"eval_average_metrics": 85.35394629780309,
"eval_loss": 0.16797170042991638,
"eval_runtime": 63.9708,
"eval_samples_per_second": 153.695,
"step": 6200
},
{
"epoch": 0.52,
"eval_accuracy": 85.75061025223759,
"eval_average_metrics": 85.75061025223759,
"eval_loss": 0.16036862134933472,
"eval_runtime": 64.9464,
"eval_samples_per_second": 151.386,
"step": 6400
},
{
"epoch": 0.53,
"learning_rate": 0.00024703389830508474,
"loss": 0.1544,
"step": 6500
},
{
"epoch": 0.54,
"eval_accuracy": 85.64890154597234,
"eval_average_metrics": 85.64890154597234,
"eval_loss": 0.16572105884552002,
"eval_runtime": 67.1736,
"eval_samples_per_second": 146.367,
"step": 6600
},
{
"epoch": 0.55,
"eval_accuracy": 85.40480065093573,
"eval_average_metrics": 85.40480065093573,
"eval_loss": 0.16141638159751892,
"eval_runtime": 67.3069,
"eval_samples_per_second": 146.077,
"step": 6800
},
{
"epoch": 0.57,
"learning_rate": 0.00024295958279009125,
"loss": 0.1549,
"step": 7000
},
{
"epoch": 0.57,
"eval_accuracy": 85.84214808787632,
"eval_average_metrics": 85.84214808787632,
"eval_loss": 0.1581791192293167,
"eval_runtime": 70.2525,
"eval_samples_per_second": 139.952,
"step": 7000
},
{
"epoch": 0.59,
"eval_accuracy": 85.78112286411717,
"eval_average_metrics": 85.78112286411717,
"eval_loss": 0.1584727168083191,
"eval_runtime": 70.5829,
"eval_samples_per_second": 139.297,
"step": 7200
},
{
"epoch": 0.6,
"eval_accuracy": 85.84214808787632,
"eval_average_metrics": 85.84214808787632,
"eval_loss": 0.16103526949882507,
"eval_runtime": 66.5529,
"eval_samples_per_second": 147.732,
"step": 7400
},
{
"epoch": 0.61,
"learning_rate": 0.00023888526727509777,
"loss": 0.1572,
"step": 7500
},
{
"epoch": 0.62,
"eval_accuracy": 86.00488201790073,
"eval_average_metrics": 86.00488201790073,
"eval_loss": 0.17075441777706146,
"eval_runtime": 61.313,
"eval_samples_per_second": 160.358,
"step": 7600
},
{
"epoch": 0.64,
"eval_accuracy": 85.89300244100895,
"eval_average_metrics": 85.89300244100895,
"eval_loss": 0.1631649136543274,
"eval_runtime": 70.2006,
"eval_samples_per_second": 140.056,
"step": 7800
},
{
"epoch": 0.65,
"learning_rate": 0.0002348109517601043,
"loss": 0.1552,
"step": 8000
},
{
"epoch": 0.65,
"eval_accuracy": 86.00488201790073,
"eval_average_metrics": 86.00488201790073,
"eval_loss": 0.15884214639663696,
"eval_runtime": 70.0105,
"eval_samples_per_second": 140.436,
"step": 8000
},
{
"epoch": 0.67,
"eval_accuracy": 85.42514239218877,
"eval_average_metrics": 85.42514239218877,
"eval_loss": 0.16717489063739777,
"eval_runtime": 67.9572,
"eval_samples_per_second": 144.679,
"step": 8200
},
{
"epoch": 0.68,
"eval_accuracy": 85.66924328722538,
"eval_average_metrics": 85.66924328722538,
"eval_loss": 0.16362008452415466,
"eval_runtime": 71.9947,
"eval_samples_per_second": 136.566,
"step": 8400
},
{
"epoch": 0.69,
"learning_rate": 0.00023073663624511082,
"loss": 0.1518,
"step": 8500
},
{
"epoch": 0.7,
"eval_accuracy": 85.49633848657446,
"eval_average_metrics": 85.49633848657446,
"eval_loss": 0.16621538996696472,
"eval_runtime": 70.1174,
"eval_samples_per_second": 140.222,
"step": 8600
},
{
"epoch": 0.72,
"eval_accuracy": 85.98454027664768,
"eval_average_metrics": 85.98454027664768,
"eval_loss": 0.16499604284763336,
"eval_runtime": 68.6629,
"eval_samples_per_second": 143.192,
"step": 8800
},
{
"epoch": 0.73,
"learning_rate": 0.0002266623207301173,
"loss": 0.1514,
"step": 9000
},
{
"epoch": 0.73,
"eval_accuracy": 86.09641985353946,
"eval_average_metrics": 86.09641985353946,
"eval_loss": 0.1623799055814743,
"eval_runtime": 70.2156,
"eval_samples_per_second": 140.026,
"step": 9000
},
{
"epoch": 0.75,
"eval_accuracy": 85.54719283970708,
"eval_average_metrics": 85.54719283970708,
"eval_loss": 0.1693897545337677,
"eval_runtime": 64.4752,
"eval_samples_per_second": 152.493,
"step": 9200
},
{
"epoch": 0.77,
"eval_accuracy": 85.15052888527258,
"eval_average_metrics": 85.15052888527258,
"eval_loss": 0.16912253201007843,
"eval_runtime": 71.6316,
"eval_samples_per_second": 137.258,
"step": 9400
},
{
"epoch": 0.77,
"learning_rate": 0.00022258800521512384,
"loss": 0.1492,
"step": 9500
},
{
"epoch": 0.78,
"eval_accuracy": 85.81163547599675,
"eval_average_metrics": 85.81163547599675,
"eval_loss": 0.16445724666118622,
"eval_runtime": 70.0896,
"eval_samples_per_second": 140.278,
"step": 9600
},
{
"epoch": 0.8,
"eval_accuracy": 85.51668022782751,
"eval_average_metrics": 85.51668022782751,
"eval_loss": 0.171467587351799,
"eval_runtime": 67.0912,
"eval_samples_per_second": 146.547,
"step": 9800
},
{
"epoch": 0.81,
"learning_rate": 0.00021851368970013035,
"loss": 0.1465,
"step": 10000
},
{
"epoch": 0.81,
"eval_accuracy": 85.76078112286412,
"eval_average_metrics": 85.76078112286412,
"eval_loss": 0.16485248506069183,
"eval_runtime": 71.6667,
"eval_samples_per_second": 137.191,
"step": 10000
},
{
"epoch": 0.83,
"eval_accuracy": 85.54719283970708,
"eval_average_metrics": 85.54719283970708,
"eval_loss": 0.16628311574459076,
"eval_runtime": 69.3952,
"eval_samples_per_second": 141.681,
"step": 10200
},
{
"epoch": 0.85,
"eval_accuracy": 85.72009764035802,
"eval_average_metrics": 85.72009764035802,
"eval_loss": 0.1626047044992447,
"eval_runtime": 63.0097,
"eval_samples_per_second": 156.039,
"step": 10400
},
{
"epoch": 0.86,
"learning_rate": 0.0002144393741851369,
"loss": 0.1478,
"step": 10500
},
{
"epoch": 0.86,
"eval_accuracy": 85.64890154597234,
"eval_average_metrics": 85.64890154597234,
"eval_loss": 0.16279704868793488,
"eval_runtime": 72.6628,
"eval_samples_per_second": 135.31,
"step": 10600
},
{
"epoch": 0.88,
"eval_accuracy": 85.80146460537021,
"eval_average_metrics": 85.80146460537021,
"eval_loss": 0.1637255698442459,
"eval_runtime": 71.5577,
"eval_samples_per_second": 137.4,
"step": 10800
},
{
"epoch": 0.9,
"learning_rate": 0.0002103650586701434,
"loss": 0.1509,
"step": 11000
},
{
"epoch": 0.9,
"eval_accuracy": 85.79129373474369,
"eval_average_metrics": 85.79129373474369,
"eval_loss": 0.16114714741706848,
"eval_runtime": 72.3627,
"eval_samples_per_second": 135.871,
"step": 11000
},
{
"epoch": 0.91,
"eval_accuracy": 85.8319772172498,
"eval_average_metrics": 85.8319772172498,
"eval_loss": 0.15985067188739777,
"eval_runtime": 68.9168,
"eval_samples_per_second": 142.665,
"step": 11200
},
{
"epoch": 0.93,
"eval_accuracy": 85.40480065093573,
"eval_average_metrics": 85.40480065093573,
"eval_loss": 0.17174053192138672,
"eval_runtime": 68.3249,
"eval_samples_per_second": 143.901,
"step": 11400
},
{
"epoch": 0.94,
"learning_rate": 0.00020629074315514992,
"loss": 0.15,
"step": 11500
},
{
"epoch": 0.95,
"eval_accuracy": 85.97436940602115,
"eval_average_metrics": 85.97436940602115,
"eval_loss": 0.15932144224643707,
"eval_runtime": 68.7187,
"eval_samples_per_second": 143.076,
"step": 11600
},
{
"epoch": 0.96,
"eval_accuracy": 85.59804719283972,
"eval_average_metrics": 85.59804719283972,
"eval_loss": 0.16465091705322266,
"eval_runtime": 68.0374,
"eval_samples_per_second": 144.509,
"step": 11800
},
{
"epoch": 0.98,
"learning_rate": 0.00020221642764015643,
"loss": 0.1514,
"step": 12000
},
{
"epoch": 0.98,
"eval_accuracy": 85.88283157038242,
"eval_average_metrics": 85.88283157038242,
"eval_loss": 0.15999911725521088,
"eval_runtime": 67.678,
"eval_samples_per_second": 145.276,
"step": 12000
},
{
"epoch": 0.99,
"eval_accuracy": 86.0353946297803,
"eval_average_metrics": 86.0353946297803,
"eval_loss": 0.16025537252426147,
"eval_runtime": 63.6019,
"eval_samples_per_second": 154.587,
"step": 12200
},
{
"epoch": 1.01,
"eval_accuracy": 85.8726606997559,
"eval_average_metrics": 85.8726606997559,
"eval_loss": 0.1621241718530655,
"eval_runtime": 68.3301,
"eval_samples_per_second": 143.89,
"step": 12400
},
{
"epoch": 1.02,
"learning_rate": 0.00019814211212516294,
"loss": 0.147,
"step": 12500
},
{
"epoch": 1.03,
"eval_accuracy": 85.76078112286412,
"eval_average_metrics": 85.76078112286412,
"eval_loss": 0.17349866032600403,
"eval_runtime": 66.9259,
"eval_samples_per_second": 146.909,
"step": 12600
},
{
"epoch": 1.04,
"eval_accuracy": 85.78112286411717,
"eval_average_metrics": 85.78112286411717,
"eval_loss": 0.1615545153617859,
"eval_runtime": 69.1346,
"eval_samples_per_second": 142.215,
"step": 12800
},
{
"epoch": 1.06,
"learning_rate": 0.00019406779661016945,
"loss": 0.143,
"step": 13000
},
{
"epoch": 1.06,
"eval_accuracy": 85.84214808787632,
"eval_average_metrics": 85.84214808787632,
"eval_loss": 0.1661369502544403,
"eval_runtime": 62.8833,
"eval_samples_per_second": 156.353,
"step": 13000
},
{
"epoch": 1.08,
"eval_accuracy": 86.06590724165989,
"eval_average_metrics": 86.06590724165989,
"eval_loss": 0.16263821721076965,
"eval_runtime": 68.8917,
"eval_samples_per_second": 142.717,
"step": 13200
},
{
"epoch": 1.09,
"eval_accuracy": 85.69975589910497,
"eval_average_metrics": 85.69975589910497,
"eval_loss": 0.1634710133075714,
"eval_runtime": 62.7526,
"eval_samples_per_second": 156.679,
"step": 13400
},
{
"epoch": 1.1,
"learning_rate": 0.000189993481095176,
"loss": 0.1444,
"step": 13500
},
{
"epoch": 1.11,
"eval_accuracy": 85.78112286411717,
"eval_average_metrics": 85.78112286411717,
"eval_loss": 0.16405758261680603,
"eval_runtime": 66.2833,
"eval_samples_per_second": 148.333,
"step": 13600
},
{
"epoch": 1.12,
"eval_accuracy": 85.9540276647681,
"eval_average_metrics": 85.9540276647681,
"eval_loss": 0.16064594686031342,
"eval_runtime": 63.0321,
"eval_samples_per_second": 155.984,
"step": 13800
},
{
"epoch": 1.14,
"learning_rate": 0.0001859191655801825,
"loss": 0.1466,
"step": 14000
},
{
"epoch": 1.14,
"eval_accuracy": 86.31000813669651,
"eval_average_metrics": 86.31000813669651,
"eval_loss": 0.15932226181030273,
"eval_runtime": 66.0369,
"eval_samples_per_second": 148.886,
"step": 14000
},
{
"epoch": 1.16,
"eval_accuracy": 85.72009764035802,
"eval_average_metrics": 85.72009764035802,
"eval_loss": 0.16476964950561523,
"eval_runtime": 67.6592,
"eval_samples_per_second": 145.316,
"step": 14200
},
{
"epoch": 1.17,
"eval_accuracy": 85.36411716842962,
"eval_average_metrics": 85.36411716842962,
"eval_loss": 0.17415712773799896,
"eval_runtime": 69.9268,
"eval_samples_per_second": 140.604,
"step": 14400
},
{
"epoch": 1.18,
"learning_rate": 0.00018184485006518905,
"loss": 0.1493,
"step": 14500
},
{
"epoch": 1.19,
"eval_accuracy": 85.65907241659886,
"eval_average_metrics": 85.65907241659886,
"eval_loss": 0.1634403020143509,
"eval_runtime": 70.839,
"eval_samples_per_second": 138.794,
"step": 14600
},
{
"epoch": 1.21,
"eval_accuracy": 85.94385679414158,
"eval_average_metrics": 85.94385679414158,
"eval_loss": 0.16072088479995728,
"eval_runtime": 67.4524,
"eval_samples_per_second": 145.762,
"step": 14800
},
{
"epoch": 1.22,
"learning_rate": 0.00017777053455019556,
"loss": 0.1453,
"step": 15000
},
{
"epoch": 1.22,
"eval_accuracy": 85.97436940602115,
"eval_average_metrics": 85.97436940602115,
"eval_loss": 0.16354931890964508,
"eval_runtime": 71.4854,
"eval_samples_per_second": 137.539,
"step": 15000
},
{
"epoch": 1.24,
"eval_accuracy": 86.04556550040684,
"eval_average_metrics": 86.04556550040684,
"eval_loss": 0.16333648562431335,
"eval_runtime": 68.0001,
"eval_samples_per_second": 144.588,
"step": 15200
},
{
"epoch": 1.25,
"eval_accuracy": 86.10659072416598,
"eval_average_metrics": 86.10659072416598,
"eval_loss": 0.1655624806880951,
"eval_runtime": 70.1652,
"eval_samples_per_second": 140.126,
"step": 15400
},
{
"epoch": 1.26,
"learning_rate": 0.00017369621903520204,
"loss": 0.1409,
"step": 15500
},
{
"epoch": 1.27,
"eval_accuracy": 86.08624898291293,
"eval_average_metrics": 86.08624898291293,
"eval_loss": 0.16712406277656555,
"eval_runtime": 70.4214,
"eval_samples_per_second": 139.617,
"step": 15600
},
{
"epoch": 1.29,
"eval_accuracy": 85.65907241659886,
"eval_average_metrics": 85.65907241659886,
"eval_loss": 0.16410161554813385,
"eval_runtime": 69.5518,
"eval_samples_per_second": 141.362,
"step": 15800
},
{
"epoch": 1.3,
"learning_rate": 0.00016962190352020858,
"loss": 0.144,
"step": 16000
},
{
"epoch": 1.3,
"eval_accuracy": 86.0353946297803,
"eval_average_metrics": 86.0353946297803,
"eval_loss": 0.1600012332201004,
"eval_runtime": 69.4158,
"eval_samples_per_second": 141.639,
"step": 16000
},
{
"epoch": 1.32,
"eval_accuracy": 85.9947111472742,
"eval_average_metrics": 85.9947111472742,
"eval_loss": 0.1664758175611496,
"eval_runtime": 70.4437,
"eval_samples_per_second": 139.572,
"step": 16200
},
{
"epoch": 1.34,
"eval_accuracy": 86.10659072416598,
"eval_average_metrics": 86.10659072416598,
"eval_loss": 0.16372230648994446,
"eval_runtime": 70.0549,
"eval_samples_per_second": 140.347,
"step": 16400
},
{
"epoch": 1.34,
"learning_rate": 0.0001655475880052151,
"loss": 0.142,
"step": 16500
},
{
"epoch": 1.35,
"eval_accuracy": 86.01505288852725,
"eval_average_metrics": 86.01505288852725,
"eval_loss": 0.16394633054733276,
"eval_runtime": 73.1757,
"eval_samples_per_second": 134.362,
"step": 16600
},
{
"epoch": 1.37,
"eval_accuracy": 86.19812855980472,
"eval_average_metrics": 86.19812855980472,
"eval_loss": 0.16236965358257294,
"eval_runtime": 67.0918,
"eval_samples_per_second": 146.545,
"step": 16800
},
{
"epoch": 1.39,
"learning_rate": 0.00016147327249022163,
"loss": 0.1469,
"step": 17000
},
{
"epoch": 1.39,
"eval_accuracy": 86.06590724165989,
"eval_average_metrics": 86.06590724165989,
"eval_loss": 0.15920616686344147,
"eval_runtime": 73.7251,
"eval_samples_per_second": 133.36,
"step": 17000
},
{
"epoch": 1.4,
"eval_accuracy": 86.31000813669651,
"eval_average_metrics": 86.31000813669651,
"eval_loss": 0.16292713582515717,
"eval_runtime": 70.7554,
"eval_samples_per_second": 138.958,
"step": 17200
},
{
"epoch": 1.42,
"eval_accuracy": 85.86248982912937,
"eval_average_metrics": 85.86248982912937,
"eval_loss": 0.16360752284526825,
"eval_runtime": 72.9243,
"eval_samples_per_second": 134.825,
"step": 17400
},
{
"epoch": 1.43,
"learning_rate": 0.00015739895697522815,
"loss": 0.145,
"step": 17500
},
{
"epoch": 1.43,
"eval_accuracy": 85.88283157038242,
"eval_average_metrics": 85.88283157038242,
"eval_loss": 0.16182997822761536,
"eval_runtime": 71.4174,
"eval_samples_per_second": 137.67,
"step": 17600
},
{
"epoch": 1.45,
"eval_accuracy": 85.79129373474369,
"eval_average_metrics": 85.79129373474369,
"eval_loss": 0.16671514511108398,
"eval_runtime": 72.782,
"eval_samples_per_second": 135.088,
"step": 17800
},
{
"epoch": 1.47,
"learning_rate": 0.00015332464146023469,
"loss": 0.1416,
"step": 18000
},
{
"epoch": 1.47,
"eval_accuracy": 85.79129373474369,
"eval_average_metrics": 85.79129373474369,
"eval_loss": 0.16370686888694763,
"eval_runtime": 69.6237,
"eval_samples_per_second": 141.216,
"step": 18000
},
{
"epoch": 1.48,
"eval_accuracy": 85.77095199349064,
"eval_average_metrics": 85.77095199349064,
"eval_loss": 0.15921832621097565,
"eval_runtime": 69.2043,
"eval_samples_per_second": 142.072,
"step": 18200
},
{
"epoch": 1.5,
"eval_accuracy": 85.82180634662328,
"eval_average_metrics": 85.82180634662328,
"eval_loss": 0.1640625149011612,
"eval_runtime": 68.8973,
"eval_samples_per_second": 142.705,
"step": 18400
},
{
"epoch": 1.51,
"learning_rate": 0.00014925032594524117,
"loss": 0.1453,
"step": 18500
},
{
"epoch": 1.52,
"eval_accuracy": 85.36411716842962,
"eval_average_metrics": 85.36411716842962,
"eval_loss": 0.1784326434135437,
"eval_runtime": 73.5853,
"eval_samples_per_second": 133.614,
"step": 18600
},
{
"epoch": 1.53,
"eval_accuracy": 85.89300244100895,
"eval_average_metrics": 85.89300244100895,
"eval_loss": 0.16068434715270996,
"eval_runtime": 69.0604,
"eval_samples_per_second": 142.368,
"step": 18800
},
{
"epoch": 1.55,
"learning_rate": 0.0001451760104302477,
"loss": 0.1414,
"step": 19000
},
{
"epoch": 1.55,
"eval_accuracy": 85.75061025223759,
"eval_average_metrics": 85.75061025223759,
"eval_loss": 0.164332315325737,
"eval_runtime": 75.2094,
"eval_samples_per_second": 130.728,
"step": 19000
},
{
"epoch": 1.56,
"eval_accuracy": 85.66924328722538,
"eval_average_metrics": 85.66924328722538,
"eval_loss": 0.15945520997047424,
"eval_runtime": 70.4879,
"eval_samples_per_second": 139.485,
"step": 19200
},
{
"epoch": 1.58,
"eval_accuracy": 85.74043938161107,
"eval_average_metrics": 85.74043938161107,
"eval_loss": 0.15915806591510773,
"eval_runtime": 72.0792,
"eval_samples_per_second": 136.405,
"step": 19400
},
{
"epoch": 1.59,
"learning_rate": 0.00014110169491525422,
"loss": 0.1463,
"step": 19500
},
{
"epoch": 1.6,
"eval_accuracy": 86.10659072416598,
"eval_average_metrics": 86.10659072416598,
"eval_loss": 0.16554424166679382,
"eval_runtime": 69.3189,
"eval_samples_per_second": 141.837,
"step": 19600
},
{
"epoch": 1.61,
"eval_accuracy": 85.8726606997559,
"eval_average_metrics": 85.8726606997559,
"eval_loss": 0.1639343500137329,
"eval_runtime": 70.0392,
"eval_samples_per_second": 140.378,
"step": 19800
},
{
"epoch": 1.63,
"learning_rate": 0.00013702737940026073,
"loss": 0.1435,
"step": 20000
},
{
"epoch": 1.63,
"eval_accuracy": 85.79129373474369,
"eval_average_metrics": 85.79129373474369,
"eval_loss": 0.1651633232831955,
"eval_runtime": 72.4148,
"eval_samples_per_second": 135.773,
"step": 20000
},
{
"epoch": 1.65,
"eval_accuracy": 85.90317331163547,
"eval_average_metrics": 85.90317331163547,
"eval_loss": 0.163535937666893,
"eval_runtime": 73.7758,
"eval_samples_per_second": 133.269,
"step": 20200
},
{
"epoch": 1.66,
"eval_accuracy": 85.81163547599675,
"eval_average_metrics": 85.81163547599675,
"eval_loss": 0.16132992506027222,
"eval_runtime": 74.1683,
"eval_samples_per_second": 132.563,
"step": 20400
},
{
"epoch": 1.67,
"learning_rate": 0.00013295306388526727,
"loss": 0.1393,
"step": 20500
},
{
"epoch": 1.68,
"eval_accuracy": 85.86248982912937,
"eval_average_metrics": 85.86248982912937,
"eval_loss": 0.16424906253814697,
"eval_runtime": 75.4388,
"eval_samples_per_second": 130.331,
"step": 20600
},
{
"epoch": 1.69,
"eval_accuracy": 86.20829943043124,
"eval_average_metrics": 86.20829943043124,
"eval_loss": 0.16280879080295563,
"eval_runtime": 73.6216,
"eval_samples_per_second": 133.548,
"step": 20800
},
{
"epoch": 1.71,
"learning_rate": 0.00012887874837027379,
"loss": 0.1476,
"step": 21000
},
{
"epoch": 1.71,
"eval_accuracy": 86.32017900732303,
"eval_average_metrics": 86.32017900732303,
"eval_loss": 0.1631232500076294,
"eval_runtime": 75.2489,
"eval_samples_per_second": 130.66,
"step": 21000
},
{
"epoch": 1.73,
"eval_accuracy": 85.97436940602115,
"eval_average_metrics": 85.97436940602115,
"eval_loss": 0.16299067437648773,
"eval_runtime": 66.4642,
"eval_samples_per_second": 147.929,
"step": 21200
},
{
"epoch": 1.74,
"eval_accuracy": 86.14727420667208,
"eval_average_metrics": 86.14727420667208,
"eval_loss": 0.16605544090270996,
"eval_runtime": 72.8565,
"eval_samples_per_second": 134.95,
"step": 21400
},
{
"epoch": 1.75,
"learning_rate": 0.0001248044328552803,
"loss": 0.1434,
"step": 21500
},
{
"epoch": 1.76,
"eval_accuracy": 85.73026851098454,
"eval_average_metrics": 85.73026851098454,
"eval_loss": 0.16752640902996063,
"eval_runtime": 73.5899,
"eval_samples_per_second": 133.605,
"step": 21600
},
{
"epoch": 1.78,
"eval_accuracy": 86.05573637103336,
"eval_average_metrics": 86.05573637103336,
"eval_loss": 0.1640099287033081,
"eval_runtime": 70.9879,
"eval_samples_per_second": 138.503,
"step": 21800
},
{
"epoch": 1.79,
"learning_rate": 0.00012073011734028682,
"loss": 0.1425,
"step": 22000
},
{
"epoch": 1.79,
"eval_accuracy": 86.02522375915377,
"eval_average_metrics": 86.02522375915377,
"eval_loss": 0.1621551811695099,
"eval_runtime": 67.2101,
"eval_samples_per_second": 146.288,
"step": 22000
},
{
"epoch": 1.81,
"eval_accuracy": 86.01505288852725,
"eval_average_metrics": 86.01505288852725,
"eval_loss": 0.1614847183227539,
"eval_runtime": 68.053,
"eval_samples_per_second": 144.476,
"step": 22200
},
{
"epoch": 1.83,
"eval_accuracy": 85.84214808787632,
"eval_average_metrics": 85.84214808787632,
"eval_loss": 0.1623518317937851,
"eval_runtime": 67.5192,
"eval_samples_per_second": 145.618,
"step": 22400
},
{
"epoch": 1.83,
"learning_rate": 0.00011665580182529335,
"loss": 0.1441,
"step": 22500
},
{
"epoch": 1.84,
"eval_accuracy": 86.06590724165989,
"eval_average_metrics": 86.06590724165989,
"eval_loss": 0.1657322645187378,
"eval_runtime": 65.1547,
"eval_samples_per_second": 150.902,
"step": 22600
},
{
"epoch": 1.86,
"eval_accuracy": 86.00488201790073,
"eval_average_metrics": 86.00488201790073,
"eval_loss": 0.16235147416591644,
"eval_runtime": 57.9601,
"eval_samples_per_second": 169.634,
"step": 22800
},
{
"epoch": 1.87,
"learning_rate": 0.00011258148631029986,
"loss": 0.1391,
"step": 23000
},
{
"epoch": 1.87,
"eval_accuracy": 85.82180634662328,
"eval_average_metrics": 85.82180634662328,
"eval_loss": 0.15935710072517395,
"eval_runtime": 57.3417,
"eval_samples_per_second": 171.463,
"step": 23000
},
{
"epoch": 1.89,
"eval_accuracy": 85.81163547599675,
"eval_average_metrics": 85.81163547599675,
"eval_loss": 0.1635563224554062,
"eval_runtime": 56.1817,
"eval_samples_per_second": 175.003,
"step": 23200
},
{
"epoch": 1.91,
"eval_accuracy": 85.7099267697315,
"eval_average_metrics": 85.7099267697315,
"eval_loss": 0.16560596227645874,
"eval_runtime": 59.2499,
"eval_samples_per_second": 165.941,
"step": 23400
},
{
"epoch": 1.91,
"learning_rate": 0.00010850717079530637,
"loss": 0.1382,
"step": 23500
},
{
"epoch": 1.92,
"eval_accuracy": 86.02522375915377,
"eval_average_metrics": 86.02522375915377,
"eval_loss": 0.1604122817516327,
"eval_runtime": 59.8309,
"eval_samples_per_second": 164.33,
"step": 23600
},
{
"epoch": 1.94,
"eval_accuracy": 85.80146460537021,
"eval_average_metrics": 85.80146460537021,
"eval_loss": 0.16524049639701843,
"eval_runtime": 61.3749,
"eval_samples_per_second": 160.196,
"step": 23800
},
{
"epoch": 1.96,
"learning_rate": 0.0001044328552803129,
"loss": 0.1379,
"step": 24000
},
{
"epoch": 1.96,
"eval_accuracy": 85.73026851098454,
"eval_average_metrics": 85.73026851098454,
"eval_loss": 0.16642265021800995,
"eval_runtime": 61.5426,
"eval_samples_per_second": 159.759,
"step": 24000
},
{
"epoch": 1.97,
"eval_accuracy": 86.23881204231083,
"eval_average_metrics": 86.23881204231083,
"eval_loss": 0.1592371165752411,
"eval_runtime": 61.4113,
"eval_samples_per_second": 160.101,
"step": 24200
},
{
"epoch": 1.99,
"eval_accuracy": 86.23881204231083,
"eval_average_metrics": 86.23881204231083,
"eval_loss": 0.16144132614135742,
"eval_runtime": 60.7982,
"eval_samples_per_second": 161.715,
"step": 24400
},
{
"epoch": 2.0,
"learning_rate": 0.00010035853976531943,
"loss": 0.1454,
"step": 24500
},
{
"epoch": 2.0,
"eval_accuracy": 86.09641985353946,
"eval_average_metrics": 86.09641985353946,
"eval_loss": 0.1667686253786087,
"eval_runtime": 60.7535,
"eval_samples_per_second": 161.834,
"step": 24600
},
{
"epoch": 2.02,
"eval_accuracy": 86.06590724165989,
"eval_average_metrics": 86.06590724165989,
"eval_loss": 0.1680220663547516,
"eval_runtime": 58.6093,
"eval_samples_per_second": 167.755,
"step": 24800
},
{
"epoch": 2.04,
"learning_rate": 9.628422425032592e-05,
"loss": 0.138,
"step": 25000
},
{
"epoch": 2.04,
"eval_accuracy": 85.84214808787632,
"eval_average_metrics": 85.84214808787632,
"eval_loss": 0.16406849026679993,
"eval_runtime": 56.628,
"eval_samples_per_second": 173.624,
"step": 25000
},
{
"epoch": 2.05,
"eval_accuracy": 85.89300244100895,
"eval_average_metrics": 85.89300244100895,
"eval_loss": 0.16717499494552612,
"eval_runtime": 55.5415,
"eval_samples_per_second": 177.021,
"step": 25200
},
{
"epoch": 2.07,
"eval_accuracy": 86.18795768917819,
"eval_average_metrics": 86.18795768917819,
"eval_loss": 0.16641969978809357,
"eval_runtime": 52.5873,
"eval_samples_per_second": 186.965,
"step": 25400
},
{
"epoch": 2.08,
"learning_rate": 9.220990873533245e-05,
"loss": 0.1356,
"step": 25500
},
{
"epoch": 2.09,
"eval_accuracy": 86.0353946297803,
"eval_average_metrics": 86.0353946297803,
"eval_loss": 0.16517092287540436,
"eval_runtime": 54.8202,
"eval_samples_per_second": 179.35,
"step": 25600
},
{
"epoch": 2.1,
"eval_accuracy": 86.20829943043124,
"eval_average_metrics": 86.20829943043124,
"eval_loss": 0.1606525331735611,
"eval_runtime": 58.0473,
"eval_samples_per_second": 169.379,
"step": 25800
},
{
"epoch": 2.12,
"learning_rate": 8.813559322033898e-05,
"loss": 0.138,
"step": 26000
},
{
"epoch": 2.12,
"eval_accuracy": 85.75061025223759,
"eval_average_metrics": 85.75061025223759,
"eval_loss": 0.1638970524072647,
"eval_runtime": 60.2198,
"eval_samples_per_second": 163.268,
"step": 26000
},
{
"epoch": 2.13,
"eval_accuracy": 85.913344182262,
"eval_average_metrics": 85.913344182262,
"eval_loss": 0.16841016709804535,
"eval_runtime": 59.4106,
"eval_samples_per_second": 165.492,
"step": 26200
},
{
"epoch": 2.15,
"eval_accuracy": 86.11676159479251,
"eval_average_metrics": 86.11676159479251,
"eval_loss": 0.16477040946483612,
"eval_runtime": 59.3558,
"eval_samples_per_second": 165.645,
"step": 26400
},
{
"epoch": 2.16,
"learning_rate": 8.406127770534549e-05,
"loss": 0.1315,
"step": 26500
},
{
"epoch": 2.17,
"eval_accuracy": 85.56753458096013,
"eval_average_metrics": 85.56753458096013,
"eval_loss": 0.17194555699825287,
"eval_runtime": 59.1291,
"eval_samples_per_second": 166.28,
"step": 26600
},
{
"epoch": 2.18,
"eval_accuracy": 85.94385679414158,
"eval_average_metrics": 85.94385679414158,
"eval_loss": 0.16446976363658905,
"eval_runtime": 46.5169,
"eval_samples_per_second": 211.364,
"step": 26800
},
{
"epoch": 2.2,
"learning_rate": 7.998696219035201e-05,
"loss": 0.1332,
"step": 27000
},
{
"epoch": 2.2,
"eval_accuracy": 85.81163547599675,
"eval_average_metrics": 85.81163547599675,
"eval_loss": 0.1695818454027176,
"eval_runtime": 45.2415,
"eval_samples_per_second": 217.323,
"step": 27000
},
{
"epoch": 2.22,
"eval_accuracy": 85.93368592351506,
"eval_average_metrics": 85.93368592351506,
"eval_loss": 0.1703195720911026,
"eval_runtime": 45.7196,
"eval_samples_per_second": 215.05,
"step": 27200
},
{
"epoch": 2.23,
"eval_accuracy": 86.14727420667208,
"eval_average_metrics": 86.14727420667208,
"eval_loss": 0.1657610833644867,
"eval_runtime": 48.3877,
"eval_samples_per_second": 203.192,
"step": 27400
},
{
"epoch": 2.24,
"learning_rate": 7.591264667535854e-05,
"loss": 0.1354,
"step": 27500
},
{
"epoch": 2.25,
"eval_accuracy": 86.09641985353946,
"eval_average_metrics": 86.09641985353946,
"eval_loss": 0.1658048778772354,
"eval_runtime": 49.7336,
"eval_samples_per_second": 197.693,
"step": 27600
},
{
"epoch": 2.27,
"eval_accuracy": 85.9947111472742,
"eval_average_metrics": 85.9947111472742,
"eval_loss": 0.16919544339179993,
"eval_runtime": 56.1273,
"eval_samples_per_second": 175.173,
"step": 27800
},
{
"epoch": 2.28,
"learning_rate": 7.183833116036505e-05,
"loss": 0.1311,
"step": 28000
},
{
"epoch": 2.28,
"eval_accuracy": 86.14727420667208,
"eval_average_metrics": 86.14727420667208,
"eval_loss": 0.16675373911857605,
"eval_runtime": 48.3863,
"eval_samples_per_second": 203.198,
"step": 28000
},
{
"epoch": 2.3,
"eval_accuracy": 85.73026851098454,
"eval_average_metrics": 85.73026851098454,
"eval_loss": 0.16785795986652374,
"eval_runtime": 49.8131,
"eval_samples_per_second": 197.378,
"step": 28200
},
{
"epoch": 2.31,
"eval_accuracy": 85.62855980471929,
"eval_average_metrics": 85.62855980471929,
"eval_loss": 0.1680869162082672,
"eval_runtime": 49.9453,
"eval_samples_per_second": 196.855,
"step": 28400
},
{
"epoch": 2.32,
"learning_rate": 6.776401564537158e-05,
"loss": 0.1351,
"step": 28500
},
{
"epoch": 2.33,
"eval_accuracy": 85.86248982912937,
"eval_average_metrics": 85.86248982912937,
"eval_loss": 0.16826093196868896,
"eval_runtime": 47.9335,
"eval_samples_per_second": 205.117,
"step": 28600
},
{
"epoch": 2.35,
"eval_accuracy": 85.84214808787632,
"eval_average_metrics": 85.84214808787632,
"eval_loss": 0.16651229560375214,
"eval_runtime": 49.892,
"eval_samples_per_second": 197.066,
"step": 28800
},
{
"epoch": 2.36,
"learning_rate": 6.368970013037809e-05,
"loss": 0.1347,
"step": 29000
},
{
"epoch": 2.36,
"eval_accuracy": 86.01505288852725,
"eval_average_metrics": 86.01505288852725,
"eval_loss": 0.16772997379302979,
"eval_runtime": 49.8947,
"eval_samples_per_second": 197.055,
"step": 29000
},
{
"epoch": 2.38,
"eval_accuracy": 86.2693246541904,
"eval_average_metrics": 86.2693246541904,
"eval_loss": 0.16580338776111603,
"eval_runtime": 50.1072,
"eval_samples_per_second": 196.219,
"step": 29200
},
{
"epoch": 2.4,
"eval_accuracy": 86.20829943043124,
"eval_average_metrics": 86.20829943043124,
"eval_loss": 0.16437767446041107,
"eval_runtime": 52.1067,
"eval_samples_per_second": 188.69,
"step": 29400
},
{
"epoch": 2.4,
"learning_rate": 5.961538461538461e-05,
"loss": 0.1308,
"step": 29500
},
{
"epoch": 2.41,
"eval_accuracy": 86.10659072416598,
"eval_average_metrics": 86.10659072416598,
"eval_loss": 0.16909147799015045,
"eval_runtime": 48.4579,
"eval_samples_per_second": 202.898,
"step": 29600
},
{
"epoch": 2.43,
"eval_accuracy": 86.07607811228641,
"eval_average_metrics": 86.07607811228641,
"eval_loss": 0.16544800996780396,
"eval_runtime": 53.3838,
"eval_samples_per_second": 184.176,
"step": 29800
},
{
"epoch": 2.44,
"learning_rate": 5.554106910039113e-05,
"loss": 0.1301,
"step": 30000
},
{
"epoch": 2.44,
"eval_accuracy": 86.15744507729862,
"eval_average_metrics": 86.15744507729862,
"eval_loss": 0.16652615368366241,
"eval_runtime": 45.6168,
"eval_samples_per_second": 215.535,
"step": 30000
},
{
"epoch": 2.46,
"eval_accuracy": 86.00488201790073,
"eval_average_metrics": 86.00488201790073,
"eval_loss": 0.16784194111824036,
"eval_runtime": 46.0567,
"eval_samples_per_second": 213.476,
"step": 30200
},
{
"epoch": 2.48,
"eval_accuracy": 86.12693246541903,
"eval_average_metrics": 86.12693246541903,
"eval_loss": 0.16726775467395782,
"eval_runtime": 53.9156,
"eval_samples_per_second": 182.359,
"step": 30400
},
{
"epoch": 2.49,
"learning_rate": 5.146675358539765e-05,
"loss": 0.1324,
"step": 30500
},
{
"epoch": 2.49,
"eval_accuracy": 86.2693246541904,
"eval_average_metrics": 86.2693246541904,
"eval_loss": 0.16430824995040894,
"eval_runtime": 48.9757,
"eval_samples_per_second": 200.753,
"step": 30600
},
{
"epoch": 2.51,
"eval_accuracy": 85.88283157038242,
"eval_average_metrics": 85.88283157038242,
"eval_loss": 0.16779069602489471,
"eval_runtime": 45.747,
"eval_samples_per_second": 214.921,
"step": 30800
},
{
"epoch": 2.53,
"learning_rate": 4.7392438070404173e-05,
"loss": 0.1333,
"step": 31000
},
{
"epoch": 2.53,
"eval_accuracy": 86.12693246541903,
"eval_average_metrics": 86.12693246541903,
"eval_loss": 0.16358087956905365,
"eval_runtime": 45.8832,
"eval_samples_per_second": 214.283,
"step": 31000
},
{
"epoch": 2.54,
"eval_accuracy": 86.20829943043124,
"eval_average_metrics": 86.20829943043124,
"eval_loss": 0.16272908449172974,
"eval_runtime": 44.9382,
"eval_samples_per_second": 218.789,
"step": 31200
},
{
"epoch": 2.56,
"eval_accuracy": 86.21847030105776,
"eval_average_metrics": 86.21847030105776,
"eval_loss": 0.16233167052268982,
"eval_runtime": 45.3879,
"eval_samples_per_second": 216.621,
"step": 31400
},
{
"epoch": 2.57,
"learning_rate": 4.3318122555410686e-05,
"loss": 0.1366,
"step": 31500
},
{
"epoch": 2.57,
"eval_accuracy": 86.12693246541903,
"eval_average_metrics": 86.12693246541903,
"eval_loss": 0.16067072749137878,
"eval_runtime": 45.3275,
"eval_samples_per_second": 216.91,
"step": 31600
},
{
"epoch": 2.59,
"eval_accuracy": 86.00488201790073,
"eval_average_metrics": 86.00488201790073,
"eval_loss": 0.16438935697078705,
"eval_runtime": 45.069,
"eval_samples_per_second": 218.154,
"step": 31800
},
{
"epoch": 2.61,
"learning_rate": 3.924380704041721e-05,
"loss": 0.1401,
"step": 32000
},
{
"epoch": 2.61,
"eval_accuracy": 86.13710333604556,
"eval_average_metrics": 86.13710333604556,
"eval_loss": 0.15911179780960083,
"eval_runtime": 45.1936,
"eval_samples_per_second": 217.553,
"step": 32000
},
{
"epoch": 2.62,
"eval_accuracy": 86.0353946297803,
"eval_average_metrics": 86.0353946297803,
"eval_loss": 0.16360121965408325,
"eval_runtime": 46.2972,
"eval_samples_per_second": 212.367,
"step": 32200
},
{
"epoch": 2.64,
"eval_accuracy": 86.19812855980472,
"eval_average_metrics": 86.19812855980472,
"eval_loss": 0.1620582789182663,
"eval_runtime": 45.431,
"eval_samples_per_second": 216.416,
"step": 32400
},
{
"epoch": 2.65,
"learning_rate": 3.5169491525423724e-05,
"loss": 0.1343,
"step": 32500
},
{
"epoch": 2.66,
"eval_accuracy": 86.08624898291293,
"eval_average_metrics": 86.08624898291293,
"eval_loss": 0.16568879783153534,
"eval_runtime": 45.3557,
"eval_samples_per_second": 216.775,
"step": 32600
},
{
"epoch": 2.67,
"eval_accuracy": 86.3506916192026,
"eval_average_metrics": 86.3506916192026,
"eval_loss": 0.16203464567661285,
"eval_runtime": 44.9934,
"eval_samples_per_second": 218.521,
"step": 32800
},
{
"epoch": 2.69,
"learning_rate": 3.109517601043025e-05,
"loss": 0.1345,
"step": 33000
},
{
"epoch": 2.69,
"eval_accuracy": 86.2286411716843,
"eval_average_metrics": 86.2286411716843,
"eval_loss": 0.1651608943939209,
"eval_runtime": 45.6781,
"eval_samples_per_second": 215.246,
"step": 33000
},
{
"epoch": 2.71,
"eval_accuracy": 86.28966639544345,
"eval_average_metrics": 86.28966639544345,
"eval_loss": 0.16327986121177673,
"eval_runtime": 45.4189,
"eval_samples_per_second": 216.474,
"step": 33200
},
{
"epoch": 2.72,
"eval_accuracy": 86.32017900732303,
"eval_average_metrics": 86.32017900732303,
"eval_loss": 0.16431905329227448,
"eval_runtime": 44.9451,
"eval_samples_per_second": 218.756,
"step": 33400
},
{
"epoch": 2.73,
"learning_rate": 2.7020860495436762e-05,
"loss": 0.1321,
"step": 33500
},
{
"epoch": 2.74,
"eval_accuracy": 86.12693246541903,
"eval_average_metrics": 86.12693246541903,
"eval_loss": 0.16271112859249115,
"eval_runtime": 45.4697,
"eval_samples_per_second": 216.232,
"step": 33600
},
{
"epoch": 2.75,
"eval_accuracy": 86.27949552481692,
"eval_average_metrics": 86.27949552481692,
"eval_loss": 0.16375945508480072,
"eval_runtime": 45.4502,
"eval_samples_per_second": 216.325,
"step": 33800
},
{
"epoch": 2.77,
"learning_rate": 2.294654498044328e-05,
"loss": 0.1348,
"step": 34000
},
{
"epoch": 2.77,
"eval_accuracy": 86.12693246541903,
"eval_average_metrics": 86.12693246541903,
"eval_loss": 0.16407504677772522,
"eval_runtime": 45.1689,
"eval_samples_per_second": 217.672,
"step": 34000
},
{
"epoch": 2.79,
"eval_accuracy": 86.2693246541904,
"eval_average_metrics": 86.2693246541904,
"eval_loss": 0.16450707614421844,
"eval_runtime": 45.2377,
"eval_samples_per_second": 217.341,
"step": 34200
},
{
"epoch": 2.8,
"eval_accuracy": 86.10659072416598,
"eval_average_metrics": 86.10659072416598,
"eval_loss": 0.16434065997600555,
"eval_runtime": 45.1061,
"eval_samples_per_second": 217.975,
"step": 34400
},
{
"epoch": 2.81,
"learning_rate": 1.8872229465449803e-05,
"loss": 0.1361,
"step": 34500
},
{
"epoch": 2.82,
"eval_accuracy": 86.2286411716843,
"eval_average_metrics": 86.2286411716843,
"eval_loss": 0.16249413788318634,
"eval_runtime": 45.4837,
"eval_samples_per_second": 216.165,
"step": 34600
},
{
"epoch": 2.84,
"eval_accuracy": 86.14727420667208,
"eval_average_metrics": 86.14727420667208,
"eval_loss": 0.1645725518465042,
"eval_runtime": 45.3804,
"eval_samples_per_second": 216.657,
"step": 34800
},
{
"epoch": 2.85,
"learning_rate": 1.4797913950456322e-05,
"loss": 0.1335,
"step": 35000
},
{
"epoch": 2.85,
"eval_accuracy": 86.16761594792514,
"eval_average_metrics": 86.16761594792514,
"eval_loss": 0.16331711411476135,
"eval_runtime": 45.6878,
"eval_samples_per_second": 215.2,
"step": 35000
},
{
"epoch": 2.87,
"eval_accuracy": 86.12693246541903,
"eval_average_metrics": 86.12693246541903,
"eval_loss": 0.16130615770816803,
"eval_runtime": 45.0413,
"eval_samples_per_second": 218.289,
"step": 35200
},
{
"epoch": 2.88,
"eval_accuracy": 86.25915378356387,
"eval_average_metrics": 86.25915378356387,
"eval_loss": 0.16118405759334564,
"eval_runtime": 45.4606,
"eval_samples_per_second": 216.275,
"step": 35400
},
{
"epoch": 2.89,
"learning_rate": 1.0723598435462841e-05,
"loss": 0.1368,
"step": 35500
},
{
"epoch": 2.9,
"eval_accuracy": 86.20829943043124,
"eval_average_metrics": 86.20829943043124,
"eval_loss": 0.16242747008800507,
"eval_runtime": 45.4511,
"eval_samples_per_second": 216.32,
"step": 35600
},
{
"epoch": 2.92,
"eval_accuracy": 86.16761594792514,
"eval_average_metrics": 86.16761594792514,
"eval_loss": 0.16256776452064514,
"eval_runtime": 45.2281,
"eval_samples_per_second": 217.387,
"step": 35800
},
{
"epoch": 2.93,
"learning_rate": 6.649282920469361e-06,
"loss": 0.1323,
"step": 36000
},
{
"epoch": 2.93,
"eval_accuracy": 86.33034987794956,
"eval_average_metrics": 86.33034987794956,
"eval_loss": 0.1619912087917328,
"eval_runtime": 45.4939,
"eval_samples_per_second": 216.117,
"step": 36000
},
{
"epoch": 2.95,
"eval_accuracy": 86.20829943043124,
"eval_average_metrics": 86.20829943043124,
"eval_loss": 0.1618933379650116,
"eval_runtime": 45.7568,
"eval_samples_per_second": 214.875,
"step": 36200
},
{
"epoch": 2.97,
"eval_accuracy": 86.17778681855167,
"eval_average_metrics": 86.17778681855167,
"eval_loss": 0.1621612161397934,
"eval_runtime": 44.9098,
"eval_samples_per_second": 218.928,
"step": 36400
},
{
"epoch": 2.97,
"learning_rate": 2.5749674054758798e-06,
"loss": 0.1334,
"step": 36500
},
{
"epoch": 2.98,
"eval_accuracy": 86.21847030105776,
"eval_average_metrics": 86.21847030105776,
"eval_loss": 0.1621207445859909,
"eval_runtime": 45.2906,
"eval_samples_per_second": 217.087,
"step": 36600
},
{
"epoch": 3.0,
"eval_accuracy": 86.19812855980472,
"eval_average_metrics": 86.19812855980472,
"eval_loss": 0.16204114258289337,
"eval_runtime": 45.2396,
"eval_samples_per_second": 217.332,
"step": 36800
},
{
"epoch": 3.0,
"step": 36816,
"total_flos": 1.4734111386140467e+17,
"train_loss": 0.14615808979732375,
"train_runtime": 23668.4692,
"train_samples_per_second": 49.775,
"train_steps_per_second": 1.555
}
],
"max_steps": 36816,
"num_train_epochs": 3,
"total_flos": 1.4734111386140467e+17,
"trial_name": null,
"trial_params": null
}