{ "best_metric": 86.3506916192026, "best_model_checkpoint": "outputs/bitfit/t5-base/mnli/checkpoint-32800", "epoch": 3.0, "global_step": 36816, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "eval_accuracy": 84.0520748576078, "eval_average_metrics": 84.0520748576078, "eval_loss": 0.19396202266216278, "eval_runtime": 73.5315, "eval_samples_per_second": 133.711, "step": 200 }, { "epoch": 0.03, "eval_accuracy": 85.08950366151342, "eval_average_metrics": 85.08950366151342, "eval_loss": 0.1736125648021698, "eval_runtime": 77.6953, "eval_samples_per_second": 126.546, "step": 400 }, { "epoch": 0.04, "learning_rate": 0.0002959256844850065, "loss": 0.2786, "step": 500 }, { "epoch": 0.05, "eval_accuracy": 85.25223759153783, "eval_average_metrics": 85.25223759153783, "eval_loss": 0.1704595386981964, "eval_runtime": 71.3646, "eval_samples_per_second": 137.771, "step": 600 }, { "epoch": 0.07, "eval_accuracy": 85.120016273393, "eval_average_metrics": 85.120016273393, "eval_loss": 0.17760007083415985, "eval_runtime": 70.572, "eval_samples_per_second": 139.319, "step": 800 }, { "epoch": 0.08, "learning_rate": 0.000291851368970013, "loss": 0.1754, "step": 1000 }, { "epoch": 0.08, "eval_accuracy": 85.81163547599675, "eval_average_metrics": 85.81163547599675, "eval_loss": 0.17395834624767303, "eval_runtime": 68.4178, "eval_samples_per_second": 143.705, "step": 1000 }, { "epoch": 0.1, "eval_accuracy": 86.04556550040684, "eval_average_metrics": 86.04556550040684, "eval_loss": 0.16720984876155853, "eval_runtime": 74.935, "eval_samples_per_second": 131.207, "step": 1200 }, { "epoch": 0.11, "eval_accuracy": 85.85231895850285, "eval_average_metrics": 85.85231895850285, "eval_loss": 0.1686050444841385, "eval_runtime": 65.5334, "eval_samples_per_second": 150.03, "step": 1400 }, { "epoch": 0.12, "learning_rate": 0.00028777705345501956, "loss": 0.17, "step": 1500 }, { "epoch": 0.13, "eval_accuracy": 84.83523189585028, "eval_average_metrics": 84.83523189585028, "eval_loss": 0.17506256699562073, "eval_runtime": 72.658, "eval_samples_per_second": 135.319, "step": 1600 }, { "epoch": 0.15, "eval_accuracy": 85.77095199349064, "eval_average_metrics": 85.77095199349064, "eval_loss": 0.16318167746067047, "eval_runtime": 71.0929, "eval_samples_per_second": 138.298, "step": 1800 }, { "epoch": 0.16, "learning_rate": 0.000283702737940026, "loss": 0.1593, "step": 2000 }, { "epoch": 0.16, "eval_accuracy": 84.98779495524816, "eval_average_metrics": 84.98779495524816, "eval_loss": 0.17500561475753784, "eval_runtime": 76.1036, "eval_samples_per_second": 129.192, "step": 2000 }, { "epoch": 0.18, "eval_accuracy": 85.22172497965825, "eval_average_metrics": 85.22172497965825, "eval_loss": 0.16974958777427673, "eval_runtime": 73.5318, "eval_samples_per_second": 133.711, "step": 2200 }, { "epoch": 0.2, "eval_accuracy": 84.93694060211554, "eval_average_metrics": 84.93694060211554, "eval_loss": 0.16803883016109467, "eval_runtime": 71.8068, "eval_samples_per_second": 136.923, "step": 2400 }, { "epoch": 0.2, "learning_rate": 0.00027962842242503253, "loss": 0.1626, "step": 2500 }, { "epoch": 0.21, "eval_accuracy": 85.64890154597234, "eval_average_metrics": 85.64890154597234, "eval_loss": 0.16620652377605438, "eval_runtime": 70.4457, "eval_samples_per_second": 139.569, "step": 2600 }, { "epoch": 0.23, "eval_accuracy": 85.17087062652563, "eval_average_metrics": 85.17087062652563, "eval_loss": 0.16685815155506134, "eval_runtime": 70.323, "eval_samples_per_second": 139.812, "step": 2800 }, { "epoch": 0.24, "learning_rate": 0.0002755541069100391, "loss": 0.1637, "step": 3000 }, { "epoch": 0.24, "eval_accuracy": 85.08950366151342, "eval_average_metrics": 85.08950366151342, "eval_loss": 0.1686829775571823, "eval_runtime": 68.4934, "eval_samples_per_second": 143.547, "step": 3000 }, { "epoch": 0.26, "eval_accuracy": 84.67249796582588, "eval_average_metrics": 84.67249796582588, "eval_loss": 0.1832115650177002, "eval_runtime": 69.9104, "eval_samples_per_second": 140.637, "step": 3200 }, { "epoch": 0.28, "eval_accuracy": 85.74043938161107, "eval_average_metrics": 85.74043938161107, "eval_loss": 0.15809670090675354, "eval_runtime": 71.8214, "eval_samples_per_second": 136.895, "step": 3400 }, { "epoch": 0.29, "learning_rate": 0.0002714797913950456, "loss": 0.1611, "step": 3500 }, { "epoch": 0.29, "eval_accuracy": 85.82180634662328, "eval_average_metrics": 85.82180634662328, "eval_loss": 0.16679300367832184, "eval_runtime": 68.4877, "eval_samples_per_second": 143.559, "step": 3600 }, { "epoch": 0.31, "eval_accuracy": 85.69975589910497, "eval_average_metrics": 85.69975589910497, "eval_loss": 0.1635247766971588, "eval_runtime": 70.3174, "eval_samples_per_second": 139.823, "step": 3800 }, { "epoch": 0.33, "learning_rate": 0.0002674054758800521, "loss": 0.1546, "step": 4000 }, { "epoch": 0.33, "eval_accuracy": 85.8319772172498, "eval_average_metrics": 85.8319772172498, "eval_loss": 0.16676998138427734, "eval_runtime": 73.2289, "eval_samples_per_second": 134.264, "step": 4000 }, { "epoch": 0.34, "eval_accuracy": 85.50650935720098, "eval_average_metrics": 85.50650935720098, "eval_loss": 0.17212657630443573, "eval_runtime": 74.584, "eval_samples_per_second": 131.824, "step": 4200 }, { "epoch": 0.36, "eval_accuracy": 85.54719283970708, "eval_average_metrics": 85.54719283970708, "eval_loss": 0.17162065207958221, "eval_runtime": 69.8754, "eval_samples_per_second": 140.708, "step": 4400 }, { "epoch": 0.37, "learning_rate": 0.00026333116036505864, "loss": 0.1531, "step": 4500 }, { "epoch": 0.37, "eval_accuracy": 85.96419853539463, "eval_average_metrics": 85.96419853539463, "eval_loss": 0.16573481261730194, "eval_runtime": 67.7552, "eval_samples_per_second": 145.111, "step": 4600 }, { "epoch": 0.39, "eval_accuracy": 85.78112286411717, "eval_average_metrics": 85.78112286411717, "eval_loss": 0.16396570205688477, "eval_runtime": 63.3281, "eval_samples_per_second": 155.255, "step": 4800 }, { "epoch": 0.41, "learning_rate": 0.0002592568448500652, "loss": 0.1566, "step": 5000 }, { "epoch": 0.41, "eval_accuracy": 85.73026851098454, "eval_average_metrics": 85.73026851098454, "eval_loss": 0.16679789125919342, "eval_runtime": 68.8228, "eval_samples_per_second": 142.86, "step": 5000 }, { "epoch": 0.42, "eval_accuracy": 85.68958502847845, "eval_average_metrics": 85.68958502847845, "eval_loss": 0.16058295965194702, "eval_runtime": 66.5592, "eval_samples_per_second": 147.718, "step": 5200 }, { "epoch": 0.44, "eval_accuracy": 85.66924328722538, "eval_average_metrics": 85.66924328722538, "eval_loss": 0.1740991473197937, "eval_runtime": 66.4465, "eval_samples_per_second": 147.969, "step": 5400 }, { "epoch": 0.45, "learning_rate": 0.00025518252933507166, "loss": 0.1514, "step": 5500 }, { "epoch": 0.46, "eval_accuracy": 84.66232709519936, "eval_average_metrics": 84.66232709519936, "eval_loss": 0.18576639890670776, "eval_runtime": 66.3297, "eval_samples_per_second": 148.229, "step": 5600 }, { "epoch": 0.47, "eval_accuracy": 85.88283157038242, "eval_average_metrics": 85.88283157038242, "eval_loss": 0.16450409591197968, "eval_runtime": 64.7793, "eval_samples_per_second": 151.777, "step": 5800 }, { "epoch": 0.49, "learning_rate": 0.0002511082138200782, "loss": 0.1531, "step": 6000 }, { "epoch": 0.49, "eval_accuracy": 85.13018714401953, "eval_average_metrics": 85.13018714401953, "eval_loss": 0.17607340216636658, "eval_runtime": 63.5814, "eval_samples_per_second": 154.637, "step": 6000 }, { "epoch": 0.51, "eval_accuracy": 85.35394629780309, "eval_average_metrics": 85.35394629780309, "eval_loss": 0.16797170042991638, "eval_runtime": 63.9708, "eval_samples_per_second": 153.695, "step": 6200 }, { "epoch": 0.52, "eval_accuracy": 85.75061025223759, "eval_average_metrics": 85.75061025223759, "eval_loss": 0.16036862134933472, "eval_runtime": 64.9464, "eval_samples_per_second": 151.386, "step": 6400 }, { "epoch": 0.53, "learning_rate": 0.00024703389830508474, "loss": 0.1544, "step": 6500 }, { "epoch": 0.54, "eval_accuracy": 85.64890154597234, "eval_average_metrics": 85.64890154597234, "eval_loss": 0.16572105884552002, "eval_runtime": 67.1736, "eval_samples_per_second": 146.367, "step": 6600 }, { "epoch": 0.55, "eval_accuracy": 85.40480065093573, "eval_average_metrics": 85.40480065093573, "eval_loss": 0.16141638159751892, "eval_runtime": 67.3069, "eval_samples_per_second": 146.077, "step": 6800 }, { "epoch": 0.57, "learning_rate": 0.00024295958279009125, "loss": 0.1549, "step": 7000 }, { "epoch": 0.57, "eval_accuracy": 85.84214808787632, "eval_average_metrics": 85.84214808787632, "eval_loss": 0.1581791192293167, "eval_runtime": 70.2525, "eval_samples_per_second": 139.952, "step": 7000 }, { "epoch": 0.59, "eval_accuracy": 85.78112286411717, "eval_average_metrics": 85.78112286411717, "eval_loss": 0.1584727168083191, "eval_runtime": 70.5829, "eval_samples_per_second": 139.297, "step": 7200 }, { "epoch": 0.6, "eval_accuracy": 85.84214808787632, "eval_average_metrics": 85.84214808787632, "eval_loss": 0.16103526949882507, "eval_runtime": 66.5529, "eval_samples_per_second": 147.732, "step": 7400 }, { "epoch": 0.61, "learning_rate": 0.00023888526727509777, "loss": 0.1572, "step": 7500 }, { "epoch": 0.62, "eval_accuracy": 86.00488201790073, "eval_average_metrics": 86.00488201790073, "eval_loss": 0.17075441777706146, "eval_runtime": 61.313, "eval_samples_per_second": 160.358, "step": 7600 }, { "epoch": 0.64, "eval_accuracy": 85.89300244100895, "eval_average_metrics": 85.89300244100895, "eval_loss": 0.1631649136543274, "eval_runtime": 70.2006, "eval_samples_per_second": 140.056, "step": 7800 }, { "epoch": 0.65, "learning_rate": 0.0002348109517601043, "loss": 0.1552, "step": 8000 }, { "epoch": 0.65, "eval_accuracy": 86.00488201790073, "eval_average_metrics": 86.00488201790073, "eval_loss": 0.15884214639663696, "eval_runtime": 70.0105, "eval_samples_per_second": 140.436, "step": 8000 }, { "epoch": 0.67, "eval_accuracy": 85.42514239218877, "eval_average_metrics": 85.42514239218877, "eval_loss": 0.16717489063739777, "eval_runtime": 67.9572, "eval_samples_per_second": 144.679, "step": 8200 }, { "epoch": 0.68, "eval_accuracy": 85.66924328722538, "eval_average_metrics": 85.66924328722538, "eval_loss": 0.16362008452415466, "eval_runtime": 71.9947, "eval_samples_per_second": 136.566, "step": 8400 }, { "epoch": 0.69, "learning_rate": 0.00023073663624511082, "loss": 0.1518, "step": 8500 }, { "epoch": 0.7, "eval_accuracy": 85.49633848657446, "eval_average_metrics": 85.49633848657446, "eval_loss": 0.16621538996696472, "eval_runtime": 70.1174, "eval_samples_per_second": 140.222, "step": 8600 }, { "epoch": 0.72, "eval_accuracy": 85.98454027664768, "eval_average_metrics": 85.98454027664768, "eval_loss": 0.16499604284763336, "eval_runtime": 68.6629, "eval_samples_per_second": 143.192, "step": 8800 }, { "epoch": 0.73, "learning_rate": 0.0002266623207301173, "loss": 0.1514, "step": 9000 }, { "epoch": 0.73, "eval_accuracy": 86.09641985353946, "eval_average_metrics": 86.09641985353946, "eval_loss": 0.1623799055814743, "eval_runtime": 70.2156, "eval_samples_per_second": 140.026, "step": 9000 }, { "epoch": 0.75, "eval_accuracy": 85.54719283970708, "eval_average_metrics": 85.54719283970708, "eval_loss": 0.1693897545337677, "eval_runtime": 64.4752, "eval_samples_per_second": 152.493, "step": 9200 }, { "epoch": 0.77, "eval_accuracy": 85.15052888527258, "eval_average_metrics": 85.15052888527258, "eval_loss": 0.16912253201007843, "eval_runtime": 71.6316, "eval_samples_per_second": 137.258, "step": 9400 }, { "epoch": 0.77, "learning_rate": 0.00022258800521512384, "loss": 0.1492, "step": 9500 }, { "epoch": 0.78, "eval_accuracy": 85.81163547599675, "eval_average_metrics": 85.81163547599675, "eval_loss": 0.16445724666118622, "eval_runtime": 70.0896, "eval_samples_per_second": 140.278, "step": 9600 }, { "epoch": 0.8, "eval_accuracy": 85.51668022782751, "eval_average_metrics": 85.51668022782751, "eval_loss": 0.171467587351799, "eval_runtime": 67.0912, "eval_samples_per_second": 146.547, "step": 9800 }, { "epoch": 0.81, "learning_rate": 0.00021851368970013035, "loss": 0.1465, "step": 10000 }, { "epoch": 0.81, "eval_accuracy": 85.76078112286412, "eval_average_metrics": 85.76078112286412, "eval_loss": 0.16485248506069183, "eval_runtime": 71.6667, "eval_samples_per_second": 137.191, "step": 10000 }, { "epoch": 0.83, "eval_accuracy": 85.54719283970708, "eval_average_metrics": 85.54719283970708, "eval_loss": 0.16628311574459076, "eval_runtime": 69.3952, "eval_samples_per_second": 141.681, "step": 10200 }, { "epoch": 0.85, "eval_accuracy": 85.72009764035802, "eval_average_metrics": 85.72009764035802, "eval_loss": 0.1626047044992447, "eval_runtime": 63.0097, "eval_samples_per_second": 156.039, "step": 10400 }, { "epoch": 0.86, "learning_rate": 0.0002144393741851369, "loss": 0.1478, "step": 10500 }, { "epoch": 0.86, "eval_accuracy": 85.64890154597234, "eval_average_metrics": 85.64890154597234, "eval_loss": 0.16279704868793488, "eval_runtime": 72.6628, "eval_samples_per_second": 135.31, "step": 10600 }, { "epoch": 0.88, "eval_accuracy": 85.80146460537021, "eval_average_metrics": 85.80146460537021, "eval_loss": 0.1637255698442459, "eval_runtime": 71.5577, "eval_samples_per_second": 137.4, "step": 10800 }, { "epoch": 0.9, "learning_rate": 0.0002103650586701434, "loss": 0.1509, "step": 11000 }, { "epoch": 0.9, "eval_accuracy": 85.79129373474369, "eval_average_metrics": 85.79129373474369, "eval_loss": 0.16114714741706848, "eval_runtime": 72.3627, "eval_samples_per_second": 135.871, "step": 11000 }, { "epoch": 0.91, "eval_accuracy": 85.8319772172498, "eval_average_metrics": 85.8319772172498, "eval_loss": 0.15985067188739777, "eval_runtime": 68.9168, "eval_samples_per_second": 142.665, "step": 11200 }, { "epoch": 0.93, "eval_accuracy": 85.40480065093573, "eval_average_metrics": 85.40480065093573, "eval_loss": 0.17174053192138672, "eval_runtime": 68.3249, "eval_samples_per_second": 143.901, "step": 11400 }, { "epoch": 0.94, "learning_rate": 0.00020629074315514992, "loss": 0.15, "step": 11500 }, { "epoch": 0.95, "eval_accuracy": 85.97436940602115, "eval_average_metrics": 85.97436940602115, "eval_loss": 0.15932144224643707, "eval_runtime": 68.7187, "eval_samples_per_second": 143.076, "step": 11600 }, { "epoch": 0.96, "eval_accuracy": 85.59804719283972, "eval_average_metrics": 85.59804719283972, "eval_loss": 0.16465091705322266, "eval_runtime": 68.0374, "eval_samples_per_second": 144.509, "step": 11800 }, { "epoch": 0.98, "learning_rate": 0.00020221642764015643, "loss": 0.1514, "step": 12000 }, { "epoch": 0.98, "eval_accuracy": 85.88283157038242, "eval_average_metrics": 85.88283157038242, "eval_loss": 0.15999911725521088, "eval_runtime": 67.678, "eval_samples_per_second": 145.276, "step": 12000 }, { "epoch": 0.99, "eval_accuracy": 86.0353946297803, "eval_average_metrics": 86.0353946297803, "eval_loss": 0.16025537252426147, "eval_runtime": 63.6019, "eval_samples_per_second": 154.587, "step": 12200 }, { "epoch": 1.01, "eval_accuracy": 85.8726606997559, "eval_average_metrics": 85.8726606997559, "eval_loss": 0.1621241718530655, "eval_runtime": 68.3301, "eval_samples_per_second": 143.89, "step": 12400 }, { "epoch": 1.02, "learning_rate": 0.00019814211212516294, "loss": 0.147, "step": 12500 }, { "epoch": 1.03, "eval_accuracy": 85.76078112286412, "eval_average_metrics": 85.76078112286412, "eval_loss": 0.17349866032600403, "eval_runtime": 66.9259, "eval_samples_per_second": 146.909, "step": 12600 }, { "epoch": 1.04, "eval_accuracy": 85.78112286411717, "eval_average_metrics": 85.78112286411717, "eval_loss": 0.1615545153617859, "eval_runtime": 69.1346, "eval_samples_per_second": 142.215, "step": 12800 }, { "epoch": 1.06, "learning_rate": 0.00019406779661016945, "loss": 0.143, "step": 13000 }, { "epoch": 1.06, "eval_accuracy": 85.84214808787632, "eval_average_metrics": 85.84214808787632, "eval_loss": 0.1661369502544403, "eval_runtime": 62.8833, "eval_samples_per_second": 156.353, "step": 13000 }, { "epoch": 1.08, "eval_accuracy": 86.06590724165989, "eval_average_metrics": 86.06590724165989, "eval_loss": 0.16263821721076965, "eval_runtime": 68.8917, "eval_samples_per_second": 142.717, "step": 13200 }, { "epoch": 1.09, "eval_accuracy": 85.69975589910497, "eval_average_metrics": 85.69975589910497, "eval_loss": 0.1634710133075714, "eval_runtime": 62.7526, "eval_samples_per_second": 156.679, "step": 13400 }, { "epoch": 1.1, "learning_rate": 0.000189993481095176, "loss": 0.1444, "step": 13500 }, { "epoch": 1.11, "eval_accuracy": 85.78112286411717, "eval_average_metrics": 85.78112286411717, "eval_loss": 0.16405758261680603, "eval_runtime": 66.2833, "eval_samples_per_second": 148.333, "step": 13600 }, { "epoch": 1.12, "eval_accuracy": 85.9540276647681, "eval_average_metrics": 85.9540276647681, "eval_loss": 0.16064594686031342, "eval_runtime": 63.0321, "eval_samples_per_second": 155.984, "step": 13800 }, { "epoch": 1.14, "learning_rate": 0.0001859191655801825, "loss": 0.1466, "step": 14000 }, { "epoch": 1.14, "eval_accuracy": 86.31000813669651, "eval_average_metrics": 86.31000813669651, "eval_loss": 0.15932226181030273, "eval_runtime": 66.0369, "eval_samples_per_second": 148.886, "step": 14000 }, { "epoch": 1.16, "eval_accuracy": 85.72009764035802, "eval_average_metrics": 85.72009764035802, "eval_loss": 0.16476964950561523, "eval_runtime": 67.6592, "eval_samples_per_second": 145.316, "step": 14200 }, { "epoch": 1.17, "eval_accuracy": 85.36411716842962, "eval_average_metrics": 85.36411716842962, "eval_loss": 0.17415712773799896, "eval_runtime": 69.9268, "eval_samples_per_second": 140.604, "step": 14400 }, { "epoch": 1.18, "learning_rate": 0.00018184485006518905, "loss": 0.1493, "step": 14500 }, { "epoch": 1.19, "eval_accuracy": 85.65907241659886, "eval_average_metrics": 85.65907241659886, "eval_loss": 0.1634403020143509, "eval_runtime": 70.839, "eval_samples_per_second": 138.794, "step": 14600 }, { "epoch": 1.21, "eval_accuracy": 85.94385679414158, "eval_average_metrics": 85.94385679414158, "eval_loss": 0.16072088479995728, "eval_runtime": 67.4524, "eval_samples_per_second": 145.762, "step": 14800 }, { "epoch": 1.22, "learning_rate": 0.00017777053455019556, "loss": 0.1453, "step": 15000 }, { "epoch": 1.22, "eval_accuracy": 85.97436940602115, "eval_average_metrics": 85.97436940602115, "eval_loss": 0.16354931890964508, "eval_runtime": 71.4854, "eval_samples_per_second": 137.539, "step": 15000 }, { "epoch": 1.24, "eval_accuracy": 86.04556550040684, "eval_average_metrics": 86.04556550040684, "eval_loss": 0.16333648562431335, "eval_runtime": 68.0001, "eval_samples_per_second": 144.588, "step": 15200 }, { "epoch": 1.25, "eval_accuracy": 86.10659072416598, "eval_average_metrics": 86.10659072416598, "eval_loss": 0.1655624806880951, "eval_runtime": 70.1652, "eval_samples_per_second": 140.126, "step": 15400 }, { "epoch": 1.26, "learning_rate": 0.00017369621903520204, "loss": 0.1409, "step": 15500 }, { "epoch": 1.27, "eval_accuracy": 86.08624898291293, "eval_average_metrics": 86.08624898291293, "eval_loss": 0.16712406277656555, "eval_runtime": 70.4214, "eval_samples_per_second": 139.617, "step": 15600 }, { "epoch": 1.29, "eval_accuracy": 85.65907241659886, "eval_average_metrics": 85.65907241659886, "eval_loss": 0.16410161554813385, "eval_runtime": 69.5518, "eval_samples_per_second": 141.362, "step": 15800 }, { "epoch": 1.3, "learning_rate": 0.00016962190352020858, "loss": 0.144, "step": 16000 }, { "epoch": 1.3, "eval_accuracy": 86.0353946297803, "eval_average_metrics": 86.0353946297803, "eval_loss": 0.1600012332201004, "eval_runtime": 69.4158, "eval_samples_per_second": 141.639, "step": 16000 }, { "epoch": 1.32, "eval_accuracy": 85.9947111472742, "eval_average_metrics": 85.9947111472742, "eval_loss": 0.1664758175611496, "eval_runtime": 70.4437, "eval_samples_per_second": 139.572, "step": 16200 }, { "epoch": 1.34, "eval_accuracy": 86.10659072416598, "eval_average_metrics": 86.10659072416598, "eval_loss": 0.16372230648994446, "eval_runtime": 70.0549, "eval_samples_per_second": 140.347, "step": 16400 }, { "epoch": 1.34, "learning_rate": 0.0001655475880052151, "loss": 0.142, "step": 16500 }, { "epoch": 1.35, "eval_accuracy": 86.01505288852725, "eval_average_metrics": 86.01505288852725, "eval_loss": 0.16394633054733276, "eval_runtime": 73.1757, "eval_samples_per_second": 134.362, "step": 16600 }, { "epoch": 1.37, "eval_accuracy": 86.19812855980472, "eval_average_metrics": 86.19812855980472, "eval_loss": 0.16236965358257294, "eval_runtime": 67.0918, "eval_samples_per_second": 146.545, "step": 16800 }, { "epoch": 1.39, "learning_rate": 0.00016147327249022163, "loss": 0.1469, "step": 17000 }, { "epoch": 1.39, "eval_accuracy": 86.06590724165989, "eval_average_metrics": 86.06590724165989, "eval_loss": 0.15920616686344147, "eval_runtime": 73.7251, "eval_samples_per_second": 133.36, "step": 17000 }, { "epoch": 1.4, "eval_accuracy": 86.31000813669651, "eval_average_metrics": 86.31000813669651, "eval_loss": 0.16292713582515717, "eval_runtime": 70.7554, "eval_samples_per_second": 138.958, "step": 17200 }, { "epoch": 1.42, "eval_accuracy": 85.86248982912937, "eval_average_metrics": 85.86248982912937, "eval_loss": 0.16360752284526825, "eval_runtime": 72.9243, "eval_samples_per_second": 134.825, "step": 17400 }, { "epoch": 1.43, "learning_rate": 0.00015739895697522815, "loss": 0.145, "step": 17500 }, { "epoch": 1.43, "eval_accuracy": 85.88283157038242, "eval_average_metrics": 85.88283157038242, "eval_loss": 0.16182997822761536, "eval_runtime": 71.4174, "eval_samples_per_second": 137.67, "step": 17600 }, { "epoch": 1.45, "eval_accuracy": 85.79129373474369, "eval_average_metrics": 85.79129373474369, "eval_loss": 0.16671514511108398, "eval_runtime": 72.782, "eval_samples_per_second": 135.088, "step": 17800 }, { "epoch": 1.47, "learning_rate": 0.00015332464146023469, "loss": 0.1416, "step": 18000 }, { "epoch": 1.47, "eval_accuracy": 85.79129373474369, "eval_average_metrics": 85.79129373474369, "eval_loss": 0.16370686888694763, "eval_runtime": 69.6237, "eval_samples_per_second": 141.216, "step": 18000 }, { "epoch": 1.48, "eval_accuracy": 85.77095199349064, "eval_average_metrics": 85.77095199349064, "eval_loss": 0.15921832621097565, "eval_runtime": 69.2043, "eval_samples_per_second": 142.072, "step": 18200 }, { "epoch": 1.5, "eval_accuracy": 85.82180634662328, "eval_average_metrics": 85.82180634662328, "eval_loss": 0.1640625149011612, "eval_runtime": 68.8973, "eval_samples_per_second": 142.705, "step": 18400 }, { "epoch": 1.51, "learning_rate": 0.00014925032594524117, "loss": 0.1453, "step": 18500 }, { "epoch": 1.52, "eval_accuracy": 85.36411716842962, "eval_average_metrics": 85.36411716842962, "eval_loss": 0.1784326434135437, "eval_runtime": 73.5853, "eval_samples_per_second": 133.614, "step": 18600 }, { "epoch": 1.53, "eval_accuracy": 85.89300244100895, "eval_average_metrics": 85.89300244100895, "eval_loss": 0.16068434715270996, "eval_runtime": 69.0604, "eval_samples_per_second": 142.368, "step": 18800 }, { "epoch": 1.55, "learning_rate": 0.0001451760104302477, "loss": 0.1414, "step": 19000 }, { "epoch": 1.55, "eval_accuracy": 85.75061025223759, "eval_average_metrics": 85.75061025223759, "eval_loss": 0.164332315325737, "eval_runtime": 75.2094, "eval_samples_per_second": 130.728, "step": 19000 }, { "epoch": 1.56, "eval_accuracy": 85.66924328722538, "eval_average_metrics": 85.66924328722538, "eval_loss": 0.15945520997047424, "eval_runtime": 70.4879, "eval_samples_per_second": 139.485, "step": 19200 }, { "epoch": 1.58, "eval_accuracy": 85.74043938161107, "eval_average_metrics": 85.74043938161107, "eval_loss": 0.15915806591510773, "eval_runtime": 72.0792, "eval_samples_per_second": 136.405, "step": 19400 }, { "epoch": 1.59, "learning_rate": 0.00014110169491525422, "loss": 0.1463, "step": 19500 }, { "epoch": 1.6, "eval_accuracy": 86.10659072416598, "eval_average_metrics": 86.10659072416598, "eval_loss": 0.16554424166679382, "eval_runtime": 69.3189, "eval_samples_per_second": 141.837, "step": 19600 }, { "epoch": 1.61, "eval_accuracy": 85.8726606997559, "eval_average_metrics": 85.8726606997559, "eval_loss": 0.1639343500137329, "eval_runtime": 70.0392, "eval_samples_per_second": 140.378, "step": 19800 }, { "epoch": 1.63, "learning_rate": 0.00013702737940026073, "loss": 0.1435, "step": 20000 }, { "epoch": 1.63, "eval_accuracy": 85.79129373474369, "eval_average_metrics": 85.79129373474369, "eval_loss": 0.1651633232831955, "eval_runtime": 72.4148, "eval_samples_per_second": 135.773, "step": 20000 }, { "epoch": 1.65, "eval_accuracy": 85.90317331163547, "eval_average_metrics": 85.90317331163547, "eval_loss": 0.163535937666893, "eval_runtime": 73.7758, "eval_samples_per_second": 133.269, "step": 20200 }, { "epoch": 1.66, "eval_accuracy": 85.81163547599675, "eval_average_metrics": 85.81163547599675, "eval_loss": 0.16132992506027222, "eval_runtime": 74.1683, "eval_samples_per_second": 132.563, "step": 20400 }, { "epoch": 1.67, "learning_rate": 0.00013295306388526727, "loss": 0.1393, "step": 20500 }, { "epoch": 1.68, "eval_accuracy": 85.86248982912937, "eval_average_metrics": 85.86248982912937, "eval_loss": 0.16424906253814697, "eval_runtime": 75.4388, "eval_samples_per_second": 130.331, "step": 20600 }, { "epoch": 1.69, "eval_accuracy": 86.20829943043124, "eval_average_metrics": 86.20829943043124, "eval_loss": 0.16280879080295563, "eval_runtime": 73.6216, "eval_samples_per_second": 133.548, "step": 20800 }, { "epoch": 1.71, "learning_rate": 0.00012887874837027379, "loss": 0.1476, "step": 21000 }, { "epoch": 1.71, "eval_accuracy": 86.32017900732303, "eval_average_metrics": 86.32017900732303, "eval_loss": 0.1631232500076294, "eval_runtime": 75.2489, "eval_samples_per_second": 130.66, "step": 21000 }, { "epoch": 1.73, "eval_accuracy": 85.97436940602115, "eval_average_metrics": 85.97436940602115, "eval_loss": 0.16299067437648773, "eval_runtime": 66.4642, "eval_samples_per_second": 147.929, "step": 21200 }, { "epoch": 1.74, "eval_accuracy": 86.14727420667208, "eval_average_metrics": 86.14727420667208, "eval_loss": 0.16605544090270996, "eval_runtime": 72.8565, "eval_samples_per_second": 134.95, "step": 21400 }, { "epoch": 1.75, "learning_rate": 0.0001248044328552803, "loss": 0.1434, "step": 21500 }, { "epoch": 1.76, "eval_accuracy": 85.73026851098454, "eval_average_metrics": 85.73026851098454, "eval_loss": 0.16752640902996063, "eval_runtime": 73.5899, "eval_samples_per_second": 133.605, "step": 21600 }, { "epoch": 1.78, "eval_accuracy": 86.05573637103336, "eval_average_metrics": 86.05573637103336, "eval_loss": 0.1640099287033081, "eval_runtime": 70.9879, "eval_samples_per_second": 138.503, "step": 21800 }, { "epoch": 1.79, "learning_rate": 0.00012073011734028682, "loss": 0.1425, "step": 22000 }, { "epoch": 1.79, "eval_accuracy": 86.02522375915377, "eval_average_metrics": 86.02522375915377, "eval_loss": 0.1621551811695099, "eval_runtime": 67.2101, "eval_samples_per_second": 146.288, "step": 22000 }, { "epoch": 1.81, "eval_accuracy": 86.01505288852725, "eval_average_metrics": 86.01505288852725, "eval_loss": 0.1614847183227539, "eval_runtime": 68.053, "eval_samples_per_second": 144.476, "step": 22200 }, { "epoch": 1.83, "eval_accuracy": 85.84214808787632, "eval_average_metrics": 85.84214808787632, "eval_loss": 0.1623518317937851, "eval_runtime": 67.5192, "eval_samples_per_second": 145.618, "step": 22400 }, { "epoch": 1.83, "learning_rate": 0.00011665580182529335, "loss": 0.1441, "step": 22500 }, { "epoch": 1.84, "eval_accuracy": 86.06590724165989, "eval_average_metrics": 86.06590724165989, "eval_loss": 0.1657322645187378, "eval_runtime": 65.1547, "eval_samples_per_second": 150.902, "step": 22600 }, { "epoch": 1.86, "eval_accuracy": 86.00488201790073, "eval_average_metrics": 86.00488201790073, "eval_loss": 0.16235147416591644, "eval_runtime": 57.9601, "eval_samples_per_second": 169.634, "step": 22800 }, { "epoch": 1.87, "learning_rate": 0.00011258148631029986, "loss": 0.1391, "step": 23000 }, { "epoch": 1.87, "eval_accuracy": 85.82180634662328, "eval_average_metrics": 85.82180634662328, "eval_loss": 0.15935710072517395, "eval_runtime": 57.3417, "eval_samples_per_second": 171.463, "step": 23000 }, { "epoch": 1.89, "eval_accuracy": 85.81163547599675, "eval_average_metrics": 85.81163547599675, "eval_loss": 0.1635563224554062, "eval_runtime": 56.1817, "eval_samples_per_second": 175.003, "step": 23200 }, { "epoch": 1.91, "eval_accuracy": 85.7099267697315, "eval_average_metrics": 85.7099267697315, "eval_loss": 0.16560596227645874, "eval_runtime": 59.2499, "eval_samples_per_second": 165.941, "step": 23400 }, { "epoch": 1.91, "learning_rate": 0.00010850717079530637, "loss": 0.1382, "step": 23500 }, { "epoch": 1.92, "eval_accuracy": 86.02522375915377, "eval_average_metrics": 86.02522375915377, "eval_loss": 0.1604122817516327, "eval_runtime": 59.8309, "eval_samples_per_second": 164.33, "step": 23600 }, { "epoch": 1.94, "eval_accuracy": 85.80146460537021, "eval_average_metrics": 85.80146460537021, "eval_loss": 0.16524049639701843, "eval_runtime": 61.3749, "eval_samples_per_second": 160.196, "step": 23800 }, { "epoch": 1.96, "learning_rate": 0.0001044328552803129, "loss": 0.1379, "step": 24000 }, { "epoch": 1.96, "eval_accuracy": 85.73026851098454, "eval_average_metrics": 85.73026851098454, "eval_loss": 0.16642265021800995, "eval_runtime": 61.5426, "eval_samples_per_second": 159.759, "step": 24000 }, { "epoch": 1.97, "eval_accuracy": 86.23881204231083, "eval_average_metrics": 86.23881204231083, "eval_loss": 0.1592371165752411, "eval_runtime": 61.4113, "eval_samples_per_second": 160.101, "step": 24200 }, { "epoch": 1.99, "eval_accuracy": 86.23881204231083, "eval_average_metrics": 86.23881204231083, "eval_loss": 0.16144132614135742, "eval_runtime": 60.7982, "eval_samples_per_second": 161.715, "step": 24400 }, { "epoch": 2.0, "learning_rate": 0.00010035853976531943, "loss": 0.1454, "step": 24500 }, { "epoch": 2.0, "eval_accuracy": 86.09641985353946, "eval_average_metrics": 86.09641985353946, "eval_loss": 0.1667686253786087, "eval_runtime": 60.7535, "eval_samples_per_second": 161.834, "step": 24600 }, { "epoch": 2.02, "eval_accuracy": 86.06590724165989, "eval_average_metrics": 86.06590724165989, "eval_loss": 0.1680220663547516, "eval_runtime": 58.6093, "eval_samples_per_second": 167.755, "step": 24800 }, { "epoch": 2.04, "learning_rate": 9.628422425032592e-05, "loss": 0.138, "step": 25000 }, { "epoch": 2.04, "eval_accuracy": 85.84214808787632, "eval_average_metrics": 85.84214808787632, "eval_loss": 0.16406849026679993, "eval_runtime": 56.628, "eval_samples_per_second": 173.624, "step": 25000 }, { "epoch": 2.05, "eval_accuracy": 85.89300244100895, "eval_average_metrics": 85.89300244100895, "eval_loss": 0.16717499494552612, "eval_runtime": 55.5415, "eval_samples_per_second": 177.021, "step": 25200 }, { "epoch": 2.07, "eval_accuracy": 86.18795768917819, "eval_average_metrics": 86.18795768917819, "eval_loss": 0.16641969978809357, "eval_runtime": 52.5873, "eval_samples_per_second": 186.965, "step": 25400 }, { "epoch": 2.08, "learning_rate": 9.220990873533245e-05, "loss": 0.1356, "step": 25500 }, { "epoch": 2.09, "eval_accuracy": 86.0353946297803, "eval_average_metrics": 86.0353946297803, "eval_loss": 0.16517092287540436, "eval_runtime": 54.8202, "eval_samples_per_second": 179.35, "step": 25600 }, { "epoch": 2.1, "eval_accuracy": 86.20829943043124, "eval_average_metrics": 86.20829943043124, "eval_loss": 0.1606525331735611, "eval_runtime": 58.0473, "eval_samples_per_second": 169.379, "step": 25800 }, { "epoch": 2.12, "learning_rate": 8.813559322033898e-05, "loss": 0.138, "step": 26000 }, { "epoch": 2.12, "eval_accuracy": 85.75061025223759, "eval_average_metrics": 85.75061025223759, "eval_loss": 0.1638970524072647, "eval_runtime": 60.2198, "eval_samples_per_second": 163.268, "step": 26000 }, { "epoch": 2.13, "eval_accuracy": 85.913344182262, "eval_average_metrics": 85.913344182262, "eval_loss": 0.16841016709804535, "eval_runtime": 59.4106, "eval_samples_per_second": 165.492, "step": 26200 }, { "epoch": 2.15, "eval_accuracy": 86.11676159479251, "eval_average_metrics": 86.11676159479251, "eval_loss": 0.16477040946483612, "eval_runtime": 59.3558, "eval_samples_per_second": 165.645, "step": 26400 }, { "epoch": 2.16, "learning_rate": 8.406127770534549e-05, "loss": 0.1315, "step": 26500 }, { "epoch": 2.17, "eval_accuracy": 85.56753458096013, "eval_average_metrics": 85.56753458096013, "eval_loss": 0.17194555699825287, "eval_runtime": 59.1291, "eval_samples_per_second": 166.28, "step": 26600 }, { "epoch": 2.18, "eval_accuracy": 85.94385679414158, "eval_average_metrics": 85.94385679414158, "eval_loss": 0.16446976363658905, "eval_runtime": 46.5169, "eval_samples_per_second": 211.364, "step": 26800 }, { "epoch": 2.2, "learning_rate": 7.998696219035201e-05, "loss": 0.1332, "step": 27000 }, { "epoch": 2.2, "eval_accuracy": 85.81163547599675, "eval_average_metrics": 85.81163547599675, "eval_loss": 0.1695818454027176, "eval_runtime": 45.2415, "eval_samples_per_second": 217.323, "step": 27000 }, { "epoch": 2.22, "eval_accuracy": 85.93368592351506, "eval_average_metrics": 85.93368592351506, "eval_loss": 0.1703195720911026, "eval_runtime": 45.7196, "eval_samples_per_second": 215.05, "step": 27200 }, { "epoch": 2.23, "eval_accuracy": 86.14727420667208, "eval_average_metrics": 86.14727420667208, "eval_loss": 0.1657610833644867, "eval_runtime": 48.3877, "eval_samples_per_second": 203.192, "step": 27400 }, { "epoch": 2.24, "learning_rate": 7.591264667535854e-05, "loss": 0.1354, "step": 27500 }, { "epoch": 2.25, "eval_accuracy": 86.09641985353946, "eval_average_metrics": 86.09641985353946, "eval_loss": 0.1658048778772354, "eval_runtime": 49.7336, "eval_samples_per_second": 197.693, "step": 27600 }, { "epoch": 2.27, "eval_accuracy": 85.9947111472742, "eval_average_metrics": 85.9947111472742, "eval_loss": 0.16919544339179993, "eval_runtime": 56.1273, "eval_samples_per_second": 175.173, "step": 27800 }, { "epoch": 2.28, "learning_rate": 7.183833116036505e-05, "loss": 0.1311, "step": 28000 }, { "epoch": 2.28, "eval_accuracy": 86.14727420667208, "eval_average_metrics": 86.14727420667208, "eval_loss": 0.16675373911857605, "eval_runtime": 48.3863, "eval_samples_per_second": 203.198, "step": 28000 }, { "epoch": 2.3, "eval_accuracy": 85.73026851098454, "eval_average_metrics": 85.73026851098454, "eval_loss": 0.16785795986652374, "eval_runtime": 49.8131, "eval_samples_per_second": 197.378, "step": 28200 }, { "epoch": 2.31, "eval_accuracy": 85.62855980471929, "eval_average_metrics": 85.62855980471929, "eval_loss": 0.1680869162082672, "eval_runtime": 49.9453, "eval_samples_per_second": 196.855, "step": 28400 }, { "epoch": 2.32, "learning_rate": 6.776401564537158e-05, "loss": 0.1351, "step": 28500 }, { "epoch": 2.33, "eval_accuracy": 85.86248982912937, "eval_average_metrics": 85.86248982912937, "eval_loss": 0.16826093196868896, "eval_runtime": 47.9335, "eval_samples_per_second": 205.117, "step": 28600 }, { "epoch": 2.35, "eval_accuracy": 85.84214808787632, "eval_average_metrics": 85.84214808787632, "eval_loss": 0.16651229560375214, "eval_runtime": 49.892, "eval_samples_per_second": 197.066, "step": 28800 }, { "epoch": 2.36, "learning_rate": 6.368970013037809e-05, "loss": 0.1347, "step": 29000 }, { "epoch": 2.36, "eval_accuracy": 86.01505288852725, "eval_average_metrics": 86.01505288852725, "eval_loss": 0.16772997379302979, "eval_runtime": 49.8947, "eval_samples_per_second": 197.055, "step": 29000 }, { "epoch": 2.38, "eval_accuracy": 86.2693246541904, "eval_average_metrics": 86.2693246541904, "eval_loss": 0.16580338776111603, "eval_runtime": 50.1072, "eval_samples_per_second": 196.219, "step": 29200 }, { "epoch": 2.4, "eval_accuracy": 86.20829943043124, "eval_average_metrics": 86.20829943043124, "eval_loss": 0.16437767446041107, "eval_runtime": 52.1067, "eval_samples_per_second": 188.69, "step": 29400 }, { "epoch": 2.4, "learning_rate": 5.961538461538461e-05, "loss": 0.1308, "step": 29500 }, { "epoch": 2.41, "eval_accuracy": 86.10659072416598, "eval_average_metrics": 86.10659072416598, "eval_loss": 0.16909147799015045, "eval_runtime": 48.4579, "eval_samples_per_second": 202.898, "step": 29600 }, { "epoch": 2.43, "eval_accuracy": 86.07607811228641, "eval_average_metrics": 86.07607811228641, "eval_loss": 0.16544800996780396, "eval_runtime": 53.3838, "eval_samples_per_second": 184.176, "step": 29800 }, { "epoch": 2.44, "learning_rate": 5.554106910039113e-05, "loss": 0.1301, "step": 30000 }, { "epoch": 2.44, "eval_accuracy": 86.15744507729862, "eval_average_metrics": 86.15744507729862, "eval_loss": 0.16652615368366241, "eval_runtime": 45.6168, "eval_samples_per_second": 215.535, "step": 30000 }, { "epoch": 2.46, "eval_accuracy": 86.00488201790073, "eval_average_metrics": 86.00488201790073, "eval_loss": 0.16784194111824036, "eval_runtime": 46.0567, "eval_samples_per_second": 213.476, "step": 30200 }, { "epoch": 2.48, "eval_accuracy": 86.12693246541903, "eval_average_metrics": 86.12693246541903, "eval_loss": 0.16726775467395782, "eval_runtime": 53.9156, "eval_samples_per_second": 182.359, "step": 30400 }, { "epoch": 2.49, "learning_rate": 5.146675358539765e-05, "loss": 0.1324, "step": 30500 }, { "epoch": 2.49, "eval_accuracy": 86.2693246541904, "eval_average_metrics": 86.2693246541904, "eval_loss": 0.16430824995040894, "eval_runtime": 48.9757, "eval_samples_per_second": 200.753, "step": 30600 }, { "epoch": 2.51, "eval_accuracy": 85.88283157038242, "eval_average_metrics": 85.88283157038242, "eval_loss": 0.16779069602489471, "eval_runtime": 45.747, "eval_samples_per_second": 214.921, "step": 30800 }, { "epoch": 2.53, "learning_rate": 4.7392438070404173e-05, "loss": 0.1333, "step": 31000 }, { "epoch": 2.53, "eval_accuracy": 86.12693246541903, "eval_average_metrics": 86.12693246541903, "eval_loss": 0.16358087956905365, "eval_runtime": 45.8832, "eval_samples_per_second": 214.283, "step": 31000 }, { "epoch": 2.54, "eval_accuracy": 86.20829943043124, "eval_average_metrics": 86.20829943043124, "eval_loss": 0.16272908449172974, "eval_runtime": 44.9382, "eval_samples_per_second": 218.789, "step": 31200 }, { "epoch": 2.56, "eval_accuracy": 86.21847030105776, "eval_average_metrics": 86.21847030105776, "eval_loss": 0.16233167052268982, "eval_runtime": 45.3879, "eval_samples_per_second": 216.621, "step": 31400 }, { "epoch": 2.57, "learning_rate": 4.3318122555410686e-05, "loss": 0.1366, "step": 31500 }, { "epoch": 2.57, "eval_accuracy": 86.12693246541903, "eval_average_metrics": 86.12693246541903, "eval_loss": 0.16067072749137878, "eval_runtime": 45.3275, "eval_samples_per_second": 216.91, "step": 31600 }, { "epoch": 2.59, "eval_accuracy": 86.00488201790073, "eval_average_metrics": 86.00488201790073, "eval_loss": 0.16438935697078705, "eval_runtime": 45.069, "eval_samples_per_second": 218.154, "step": 31800 }, { "epoch": 2.61, "learning_rate": 3.924380704041721e-05, "loss": 0.1401, "step": 32000 }, { "epoch": 2.61, "eval_accuracy": 86.13710333604556, "eval_average_metrics": 86.13710333604556, "eval_loss": 0.15911179780960083, "eval_runtime": 45.1936, "eval_samples_per_second": 217.553, "step": 32000 }, { "epoch": 2.62, "eval_accuracy": 86.0353946297803, "eval_average_metrics": 86.0353946297803, "eval_loss": 0.16360121965408325, "eval_runtime": 46.2972, "eval_samples_per_second": 212.367, "step": 32200 }, { "epoch": 2.64, "eval_accuracy": 86.19812855980472, "eval_average_metrics": 86.19812855980472, "eval_loss": 0.1620582789182663, "eval_runtime": 45.431, "eval_samples_per_second": 216.416, "step": 32400 }, { "epoch": 2.65, "learning_rate": 3.5169491525423724e-05, "loss": 0.1343, "step": 32500 }, { "epoch": 2.66, "eval_accuracy": 86.08624898291293, "eval_average_metrics": 86.08624898291293, "eval_loss": 0.16568879783153534, "eval_runtime": 45.3557, "eval_samples_per_second": 216.775, "step": 32600 }, { "epoch": 2.67, "eval_accuracy": 86.3506916192026, "eval_average_metrics": 86.3506916192026, "eval_loss": 0.16203464567661285, "eval_runtime": 44.9934, "eval_samples_per_second": 218.521, "step": 32800 }, { "epoch": 2.69, "learning_rate": 3.109517601043025e-05, "loss": 0.1345, "step": 33000 }, { "epoch": 2.69, "eval_accuracy": 86.2286411716843, "eval_average_metrics": 86.2286411716843, "eval_loss": 0.1651608943939209, "eval_runtime": 45.6781, "eval_samples_per_second": 215.246, "step": 33000 }, { "epoch": 2.71, "eval_accuracy": 86.28966639544345, "eval_average_metrics": 86.28966639544345, "eval_loss": 0.16327986121177673, "eval_runtime": 45.4189, "eval_samples_per_second": 216.474, "step": 33200 }, { "epoch": 2.72, "eval_accuracy": 86.32017900732303, "eval_average_metrics": 86.32017900732303, "eval_loss": 0.16431905329227448, "eval_runtime": 44.9451, "eval_samples_per_second": 218.756, "step": 33400 }, { "epoch": 2.73, "learning_rate": 2.7020860495436762e-05, "loss": 0.1321, "step": 33500 }, { "epoch": 2.74, "eval_accuracy": 86.12693246541903, "eval_average_metrics": 86.12693246541903, "eval_loss": 0.16271112859249115, "eval_runtime": 45.4697, "eval_samples_per_second": 216.232, "step": 33600 }, { "epoch": 2.75, "eval_accuracy": 86.27949552481692, "eval_average_metrics": 86.27949552481692, "eval_loss": 0.16375945508480072, "eval_runtime": 45.4502, "eval_samples_per_second": 216.325, "step": 33800 }, { "epoch": 2.77, "learning_rate": 2.294654498044328e-05, "loss": 0.1348, "step": 34000 }, { "epoch": 2.77, "eval_accuracy": 86.12693246541903, "eval_average_metrics": 86.12693246541903, "eval_loss": 0.16407504677772522, "eval_runtime": 45.1689, "eval_samples_per_second": 217.672, "step": 34000 }, { "epoch": 2.79, "eval_accuracy": 86.2693246541904, "eval_average_metrics": 86.2693246541904, "eval_loss": 0.16450707614421844, "eval_runtime": 45.2377, "eval_samples_per_second": 217.341, "step": 34200 }, { "epoch": 2.8, "eval_accuracy": 86.10659072416598, "eval_average_metrics": 86.10659072416598, "eval_loss": 0.16434065997600555, "eval_runtime": 45.1061, "eval_samples_per_second": 217.975, "step": 34400 }, { "epoch": 2.81, "learning_rate": 1.8872229465449803e-05, "loss": 0.1361, "step": 34500 }, { "epoch": 2.82, "eval_accuracy": 86.2286411716843, "eval_average_metrics": 86.2286411716843, "eval_loss": 0.16249413788318634, "eval_runtime": 45.4837, "eval_samples_per_second": 216.165, "step": 34600 }, { "epoch": 2.84, "eval_accuracy": 86.14727420667208, "eval_average_metrics": 86.14727420667208, "eval_loss": 0.1645725518465042, "eval_runtime": 45.3804, "eval_samples_per_second": 216.657, "step": 34800 }, { "epoch": 2.85, "learning_rate": 1.4797913950456322e-05, "loss": 0.1335, "step": 35000 }, { "epoch": 2.85, "eval_accuracy": 86.16761594792514, "eval_average_metrics": 86.16761594792514, "eval_loss": 0.16331711411476135, "eval_runtime": 45.6878, "eval_samples_per_second": 215.2, "step": 35000 }, { "epoch": 2.87, "eval_accuracy": 86.12693246541903, "eval_average_metrics": 86.12693246541903, "eval_loss": 0.16130615770816803, "eval_runtime": 45.0413, "eval_samples_per_second": 218.289, "step": 35200 }, { "epoch": 2.88, "eval_accuracy": 86.25915378356387, "eval_average_metrics": 86.25915378356387, "eval_loss": 0.16118405759334564, "eval_runtime": 45.4606, "eval_samples_per_second": 216.275, "step": 35400 }, { "epoch": 2.89, "learning_rate": 1.0723598435462841e-05, "loss": 0.1368, "step": 35500 }, { "epoch": 2.9, "eval_accuracy": 86.20829943043124, "eval_average_metrics": 86.20829943043124, "eval_loss": 0.16242747008800507, "eval_runtime": 45.4511, "eval_samples_per_second": 216.32, "step": 35600 }, { "epoch": 2.92, "eval_accuracy": 86.16761594792514, "eval_average_metrics": 86.16761594792514, "eval_loss": 0.16256776452064514, "eval_runtime": 45.2281, "eval_samples_per_second": 217.387, "step": 35800 }, { "epoch": 2.93, "learning_rate": 6.649282920469361e-06, "loss": 0.1323, "step": 36000 }, { "epoch": 2.93, "eval_accuracy": 86.33034987794956, "eval_average_metrics": 86.33034987794956, "eval_loss": 0.1619912087917328, "eval_runtime": 45.4939, "eval_samples_per_second": 216.117, "step": 36000 }, { "epoch": 2.95, "eval_accuracy": 86.20829943043124, "eval_average_metrics": 86.20829943043124, "eval_loss": 0.1618933379650116, "eval_runtime": 45.7568, "eval_samples_per_second": 214.875, "step": 36200 }, { "epoch": 2.97, "eval_accuracy": 86.17778681855167, "eval_average_metrics": 86.17778681855167, "eval_loss": 0.1621612161397934, "eval_runtime": 44.9098, "eval_samples_per_second": 218.928, "step": 36400 }, { "epoch": 2.97, "learning_rate": 2.5749674054758798e-06, "loss": 0.1334, "step": 36500 }, { "epoch": 2.98, "eval_accuracy": 86.21847030105776, "eval_average_metrics": 86.21847030105776, "eval_loss": 0.1621207445859909, "eval_runtime": 45.2906, "eval_samples_per_second": 217.087, "step": 36600 }, { "epoch": 3.0, "eval_accuracy": 86.19812855980472, "eval_average_metrics": 86.19812855980472, "eval_loss": 0.16204114258289337, "eval_runtime": 45.2396, "eval_samples_per_second": 217.332, "step": 36800 }, { "epoch": 3.0, "step": 36816, "total_flos": 1.4734111386140467e+17, "train_loss": 0.14615808979732375, "train_runtime": 23668.4692, "train_samples_per_second": 49.775, "train_steps_per_second": 1.555 } ], "max_steps": 36816, "num_train_epochs": 3, "total_flos": 1.4734111386140467e+17, "trial_name": null, "trial_params": null }