| { | |
| "best_metric": 86.3506916192026, | |
| "best_model_checkpoint": "outputs/bitfit/t5-base/mnli/checkpoint-32800", | |
| "epoch": 3.0, | |
| "global_step": 36816, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.02, | |
| "eval_accuracy": 84.0520748576078, | |
| "eval_average_metrics": 84.0520748576078, | |
| "eval_loss": 0.19396202266216278, | |
| "eval_runtime": 73.5315, | |
| "eval_samples_per_second": 133.711, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_accuracy": 85.08950366151342, | |
| "eval_average_metrics": 85.08950366151342, | |
| "eval_loss": 0.1736125648021698, | |
| "eval_runtime": 77.6953, | |
| "eval_samples_per_second": 126.546, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "learning_rate": 0.0002959256844850065, | |
| "loss": 0.2786, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_accuracy": 85.25223759153783, | |
| "eval_average_metrics": 85.25223759153783, | |
| "eval_loss": 0.1704595386981964, | |
| "eval_runtime": 71.3646, | |
| "eval_samples_per_second": 137.771, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "eval_accuracy": 85.120016273393, | |
| "eval_average_metrics": 85.120016273393, | |
| "eval_loss": 0.17760007083415985, | |
| "eval_runtime": 70.572, | |
| "eval_samples_per_second": 139.319, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 0.000291851368970013, | |
| "loss": 0.1754, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "eval_accuracy": 85.81163547599675, | |
| "eval_average_metrics": 85.81163547599675, | |
| "eval_loss": 0.17395834624767303, | |
| "eval_runtime": 68.4178, | |
| "eval_samples_per_second": 143.705, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "eval_accuracy": 86.04556550040684, | |
| "eval_average_metrics": 86.04556550040684, | |
| "eval_loss": 0.16720984876155853, | |
| "eval_runtime": 74.935, | |
| "eval_samples_per_second": 131.207, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "eval_accuracy": 85.85231895850285, | |
| "eval_average_metrics": 85.85231895850285, | |
| "eval_loss": 0.1686050444841385, | |
| "eval_runtime": 65.5334, | |
| "eval_samples_per_second": 150.03, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "learning_rate": 0.00028777705345501956, | |
| "loss": 0.17, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "eval_accuracy": 84.83523189585028, | |
| "eval_average_metrics": 84.83523189585028, | |
| "eval_loss": 0.17506256699562073, | |
| "eval_runtime": 72.658, | |
| "eval_samples_per_second": 135.319, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "eval_accuracy": 85.77095199349064, | |
| "eval_average_metrics": 85.77095199349064, | |
| "eval_loss": 0.16318167746067047, | |
| "eval_runtime": 71.0929, | |
| "eval_samples_per_second": 138.298, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 0.000283702737940026, | |
| "loss": 0.1593, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "eval_accuracy": 84.98779495524816, | |
| "eval_average_metrics": 84.98779495524816, | |
| "eval_loss": 0.17500561475753784, | |
| "eval_runtime": 76.1036, | |
| "eval_samples_per_second": 129.192, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "eval_accuracy": 85.22172497965825, | |
| "eval_average_metrics": 85.22172497965825, | |
| "eval_loss": 0.16974958777427673, | |
| "eval_runtime": 73.5318, | |
| "eval_samples_per_second": 133.711, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "eval_accuracy": 84.93694060211554, | |
| "eval_average_metrics": 84.93694060211554, | |
| "eval_loss": 0.16803883016109467, | |
| "eval_runtime": 71.8068, | |
| "eval_samples_per_second": 136.923, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 0.00027962842242503253, | |
| "loss": 0.1626, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "eval_accuracy": 85.64890154597234, | |
| "eval_average_metrics": 85.64890154597234, | |
| "eval_loss": 0.16620652377605438, | |
| "eval_runtime": 70.4457, | |
| "eval_samples_per_second": 139.569, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "eval_accuracy": 85.17087062652563, | |
| "eval_average_metrics": 85.17087062652563, | |
| "eval_loss": 0.16685815155506134, | |
| "eval_runtime": 70.323, | |
| "eval_samples_per_second": 139.812, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "learning_rate": 0.0002755541069100391, | |
| "loss": 0.1637, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "eval_accuracy": 85.08950366151342, | |
| "eval_average_metrics": 85.08950366151342, | |
| "eval_loss": 0.1686829775571823, | |
| "eval_runtime": 68.4934, | |
| "eval_samples_per_second": 143.547, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "eval_accuracy": 84.67249796582588, | |
| "eval_average_metrics": 84.67249796582588, | |
| "eval_loss": 0.1832115650177002, | |
| "eval_runtime": 69.9104, | |
| "eval_samples_per_second": 140.637, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "eval_accuracy": 85.74043938161107, | |
| "eval_average_metrics": 85.74043938161107, | |
| "eval_loss": 0.15809670090675354, | |
| "eval_runtime": 71.8214, | |
| "eval_samples_per_second": 136.895, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 0.0002714797913950456, | |
| "loss": 0.1611, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "eval_accuracy": 85.82180634662328, | |
| "eval_average_metrics": 85.82180634662328, | |
| "eval_loss": 0.16679300367832184, | |
| "eval_runtime": 68.4877, | |
| "eval_samples_per_second": 143.559, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "eval_accuracy": 85.69975589910497, | |
| "eval_average_metrics": 85.69975589910497, | |
| "eval_loss": 0.1635247766971588, | |
| "eval_runtime": 70.3174, | |
| "eval_samples_per_second": 139.823, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 0.0002674054758800521, | |
| "loss": 0.1546, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "eval_accuracy": 85.8319772172498, | |
| "eval_average_metrics": 85.8319772172498, | |
| "eval_loss": 0.16676998138427734, | |
| "eval_runtime": 73.2289, | |
| "eval_samples_per_second": 134.264, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "eval_accuracy": 85.50650935720098, | |
| "eval_average_metrics": 85.50650935720098, | |
| "eval_loss": 0.17212657630443573, | |
| "eval_runtime": 74.584, | |
| "eval_samples_per_second": 131.824, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "eval_accuracy": 85.54719283970708, | |
| "eval_average_metrics": 85.54719283970708, | |
| "eval_loss": 0.17162065207958221, | |
| "eval_runtime": 69.8754, | |
| "eval_samples_per_second": 140.708, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00026333116036505864, | |
| "loss": 0.1531, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "eval_accuracy": 85.96419853539463, | |
| "eval_average_metrics": 85.96419853539463, | |
| "eval_loss": 0.16573481261730194, | |
| "eval_runtime": 67.7552, | |
| "eval_samples_per_second": 145.111, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "eval_accuracy": 85.78112286411717, | |
| "eval_average_metrics": 85.78112286411717, | |
| "eval_loss": 0.16396570205688477, | |
| "eval_runtime": 63.3281, | |
| "eval_samples_per_second": 155.255, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 0.0002592568448500652, | |
| "loss": 0.1566, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "eval_accuracy": 85.73026851098454, | |
| "eval_average_metrics": 85.73026851098454, | |
| "eval_loss": 0.16679789125919342, | |
| "eval_runtime": 68.8228, | |
| "eval_samples_per_second": 142.86, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "eval_accuracy": 85.68958502847845, | |
| "eval_average_metrics": 85.68958502847845, | |
| "eval_loss": 0.16058295965194702, | |
| "eval_runtime": 66.5592, | |
| "eval_samples_per_second": 147.718, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "eval_accuracy": 85.66924328722538, | |
| "eval_average_metrics": 85.66924328722538, | |
| "eval_loss": 0.1740991473197937, | |
| "eval_runtime": 66.4465, | |
| "eval_samples_per_second": 147.969, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 0.00025518252933507166, | |
| "loss": 0.1514, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "eval_accuracy": 84.66232709519936, | |
| "eval_average_metrics": 84.66232709519936, | |
| "eval_loss": 0.18576639890670776, | |
| "eval_runtime": 66.3297, | |
| "eval_samples_per_second": 148.229, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "eval_accuracy": 85.88283157038242, | |
| "eval_average_metrics": 85.88283157038242, | |
| "eval_loss": 0.16450409591197968, | |
| "eval_runtime": 64.7793, | |
| "eval_samples_per_second": 151.777, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 0.0002511082138200782, | |
| "loss": 0.1531, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "eval_accuracy": 85.13018714401953, | |
| "eval_average_metrics": 85.13018714401953, | |
| "eval_loss": 0.17607340216636658, | |
| "eval_runtime": 63.5814, | |
| "eval_samples_per_second": 154.637, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "eval_accuracy": 85.35394629780309, | |
| "eval_average_metrics": 85.35394629780309, | |
| "eval_loss": 0.16797170042991638, | |
| "eval_runtime": 63.9708, | |
| "eval_samples_per_second": 153.695, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "eval_accuracy": 85.75061025223759, | |
| "eval_average_metrics": 85.75061025223759, | |
| "eval_loss": 0.16036862134933472, | |
| "eval_runtime": 64.9464, | |
| "eval_samples_per_second": 151.386, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "learning_rate": 0.00024703389830508474, | |
| "loss": 0.1544, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "eval_accuracy": 85.64890154597234, | |
| "eval_average_metrics": 85.64890154597234, | |
| "eval_loss": 0.16572105884552002, | |
| "eval_runtime": 67.1736, | |
| "eval_samples_per_second": 146.367, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "eval_accuracy": 85.40480065093573, | |
| "eval_average_metrics": 85.40480065093573, | |
| "eval_loss": 0.16141638159751892, | |
| "eval_runtime": 67.3069, | |
| "eval_samples_per_second": 146.077, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 0.00024295958279009125, | |
| "loss": 0.1549, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "eval_accuracy": 85.84214808787632, | |
| "eval_average_metrics": 85.84214808787632, | |
| "eval_loss": 0.1581791192293167, | |
| "eval_runtime": 70.2525, | |
| "eval_samples_per_second": 139.952, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "eval_accuracy": 85.78112286411717, | |
| "eval_average_metrics": 85.78112286411717, | |
| "eval_loss": 0.1584727168083191, | |
| "eval_runtime": 70.5829, | |
| "eval_samples_per_second": 139.297, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "eval_accuracy": 85.84214808787632, | |
| "eval_average_metrics": 85.84214808787632, | |
| "eval_loss": 0.16103526949882507, | |
| "eval_runtime": 66.5529, | |
| "eval_samples_per_second": 147.732, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 0.00023888526727509777, | |
| "loss": 0.1572, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "eval_accuracy": 86.00488201790073, | |
| "eval_average_metrics": 86.00488201790073, | |
| "eval_loss": 0.17075441777706146, | |
| "eval_runtime": 61.313, | |
| "eval_samples_per_second": 160.358, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "eval_accuracy": 85.89300244100895, | |
| "eval_average_metrics": 85.89300244100895, | |
| "eval_loss": 0.1631649136543274, | |
| "eval_runtime": 70.2006, | |
| "eval_samples_per_second": 140.056, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.0002348109517601043, | |
| "loss": 0.1552, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "eval_accuracy": 86.00488201790073, | |
| "eval_average_metrics": 86.00488201790073, | |
| "eval_loss": 0.15884214639663696, | |
| "eval_runtime": 70.0105, | |
| "eval_samples_per_second": 140.436, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "eval_accuracy": 85.42514239218877, | |
| "eval_average_metrics": 85.42514239218877, | |
| "eval_loss": 0.16717489063739777, | |
| "eval_runtime": 67.9572, | |
| "eval_samples_per_second": 144.679, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "eval_accuracy": 85.66924328722538, | |
| "eval_average_metrics": 85.66924328722538, | |
| "eval_loss": 0.16362008452415466, | |
| "eval_runtime": 71.9947, | |
| "eval_samples_per_second": 136.566, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 0.00023073663624511082, | |
| "loss": 0.1518, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "eval_accuracy": 85.49633848657446, | |
| "eval_average_metrics": 85.49633848657446, | |
| "eval_loss": 0.16621538996696472, | |
| "eval_runtime": 70.1174, | |
| "eval_samples_per_second": 140.222, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "eval_accuracy": 85.98454027664768, | |
| "eval_average_metrics": 85.98454027664768, | |
| "eval_loss": 0.16499604284763336, | |
| "eval_runtime": 68.6629, | |
| "eval_samples_per_second": 143.192, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "learning_rate": 0.0002266623207301173, | |
| "loss": 0.1514, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "eval_accuracy": 86.09641985353946, | |
| "eval_average_metrics": 86.09641985353946, | |
| "eval_loss": 0.1623799055814743, | |
| "eval_runtime": 70.2156, | |
| "eval_samples_per_second": 140.026, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "eval_accuracy": 85.54719283970708, | |
| "eval_average_metrics": 85.54719283970708, | |
| "eval_loss": 0.1693897545337677, | |
| "eval_runtime": 64.4752, | |
| "eval_samples_per_second": 152.493, | |
| "step": 9200 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "eval_accuracy": 85.15052888527258, | |
| "eval_average_metrics": 85.15052888527258, | |
| "eval_loss": 0.16912253201007843, | |
| "eval_runtime": 71.6316, | |
| "eval_samples_per_second": 137.258, | |
| "step": 9400 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 0.00022258800521512384, | |
| "loss": 0.1492, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "eval_accuracy": 85.81163547599675, | |
| "eval_average_metrics": 85.81163547599675, | |
| "eval_loss": 0.16445724666118622, | |
| "eval_runtime": 70.0896, | |
| "eval_samples_per_second": 140.278, | |
| "step": 9600 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "eval_accuracy": 85.51668022782751, | |
| "eval_average_metrics": 85.51668022782751, | |
| "eval_loss": 0.171467587351799, | |
| "eval_runtime": 67.0912, | |
| "eval_samples_per_second": 146.547, | |
| "step": 9800 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "learning_rate": 0.00021851368970013035, | |
| "loss": 0.1465, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "eval_accuracy": 85.76078112286412, | |
| "eval_average_metrics": 85.76078112286412, | |
| "eval_loss": 0.16485248506069183, | |
| "eval_runtime": 71.6667, | |
| "eval_samples_per_second": 137.191, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "eval_accuracy": 85.54719283970708, | |
| "eval_average_metrics": 85.54719283970708, | |
| "eval_loss": 0.16628311574459076, | |
| "eval_runtime": 69.3952, | |
| "eval_samples_per_second": 141.681, | |
| "step": 10200 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "eval_accuracy": 85.72009764035802, | |
| "eval_average_metrics": 85.72009764035802, | |
| "eval_loss": 0.1626047044992447, | |
| "eval_runtime": 63.0097, | |
| "eval_samples_per_second": 156.039, | |
| "step": 10400 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 0.0002144393741851369, | |
| "loss": 0.1478, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "eval_accuracy": 85.64890154597234, | |
| "eval_average_metrics": 85.64890154597234, | |
| "eval_loss": 0.16279704868793488, | |
| "eval_runtime": 72.6628, | |
| "eval_samples_per_second": 135.31, | |
| "step": 10600 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "eval_accuracy": 85.80146460537021, | |
| "eval_average_metrics": 85.80146460537021, | |
| "eval_loss": 0.1637255698442459, | |
| "eval_runtime": 71.5577, | |
| "eval_samples_per_second": 137.4, | |
| "step": 10800 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 0.0002103650586701434, | |
| "loss": 0.1509, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "eval_accuracy": 85.79129373474369, | |
| "eval_average_metrics": 85.79129373474369, | |
| "eval_loss": 0.16114714741706848, | |
| "eval_runtime": 72.3627, | |
| "eval_samples_per_second": 135.871, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "eval_accuracy": 85.8319772172498, | |
| "eval_average_metrics": 85.8319772172498, | |
| "eval_loss": 0.15985067188739777, | |
| "eval_runtime": 68.9168, | |
| "eval_samples_per_second": 142.665, | |
| "step": 11200 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "eval_accuracy": 85.40480065093573, | |
| "eval_average_metrics": 85.40480065093573, | |
| "eval_loss": 0.17174053192138672, | |
| "eval_runtime": 68.3249, | |
| "eval_samples_per_second": 143.901, | |
| "step": 11400 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 0.00020629074315514992, | |
| "loss": 0.15, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "eval_accuracy": 85.97436940602115, | |
| "eval_average_metrics": 85.97436940602115, | |
| "eval_loss": 0.15932144224643707, | |
| "eval_runtime": 68.7187, | |
| "eval_samples_per_second": 143.076, | |
| "step": 11600 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "eval_accuracy": 85.59804719283972, | |
| "eval_average_metrics": 85.59804719283972, | |
| "eval_loss": 0.16465091705322266, | |
| "eval_runtime": 68.0374, | |
| "eval_samples_per_second": 144.509, | |
| "step": 11800 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "learning_rate": 0.00020221642764015643, | |
| "loss": 0.1514, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "eval_accuracy": 85.88283157038242, | |
| "eval_average_metrics": 85.88283157038242, | |
| "eval_loss": 0.15999911725521088, | |
| "eval_runtime": 67.678, | |
| "eval_samples_per_second": 145.276, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "eval_accuracy": 86.0353946297803, | |
| "eval_average_metrics": 86.0353946297803, | |
| "eval_loss": 0.16025537252426147, | |
| "eval_runtime": 63.6019, | |
| "eval_samples_per_second": 154.587, | |
| "step": 12200 | |
| }, | |
| { | |
| "epoch": 1.01, | |
| "eval_accuracy": 85.8726606997559, | |
| "eval_average_metrics": 85.8726606997559, | |
| "eval_loss": 0.1621241718530655, | |
| "eval_runtime": 68.3301, | |
| "eval_samples_per_second": 143.89, | |
| "step": 12400 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 0.00019814211212516294, | |
| "loss": 0.147, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "eval_accuracy": 85.76078112286412, | |
| "eval_average_metrics": 85.76078112286412, | |
| "eval_loss": 0.17349866032600403, | |
| "eval_runtime": 66.9259, | |
| "eval_samples_per_second": 146.909, | |
| "step": 12600 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "eval_accuracy": 85.78112286411717, | |
| "eval_average_metrics": 85.78112286411717, | |
| "eval_loss": 0.1615545153617859, | |
| "eval_runtime": 69.1346, | |
| "eval_samples_per_second": 142.215, | |
| "step": 12800 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "learning_rate": 0.00019406779661016945, | |
| "loss": 0.143, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 1.06, | |
| "eval_accuracy": 85.84214808787632, | |
| "eval_average_metrics": 85.84214808787632, | |
| "eval_loss": 0.1661369502544403, | |
| "eval_runtime": 62.8833, | |
| "eval_samples_per_second": 156.353, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 1.08, | |
| "eval_accuracy": 86.06590724165989, | |
| "eval_average_metrics": 86.06590724165989, | |
| "eval_loss": 0.16263821721076965, | |
| "eval_runtime": 68.8917, | |
| "eval_samples_per_second": 142.717, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 1.09, | |
| "eval_accuracy": 85.69975589910497, | |
| "eval_average_metrics": 85.69975589910497, | |
| "eval_loss": 0.1634710133075714, | |
| "eval_runtime": 62.7526, | |
| "eval_samples_per_second": 156.679, | |
| "step": 13400 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 0.000189993481095176, | |
| "loss": 0.1444, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 1.11, | |
| "eval_accuracy": 85.78112286411717, | |
| "eval_average_metrics": 85.78112286411717, | |
| "eval_loss": 0.16405758261680603, | |
| "eval_runtime": 66.2833, | |
| "eval_samples_per_second": 148.333, | |
| "step": 13600 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "eval_accuracy": 85.9540276647681, | |
| "eval_average_metrics": 85.9540276647681, | |
| "eval_loss": 0.16064594686031342, | |
| "eval_runtime": 63.0321, | |
| "eval_samples_per_second": 155.984, | |
| "step": 13800 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 0.0001859191655801825, | |
| "loss": 0.1466, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "eval_accuracy": 86.31000813669651, | |
| "eval_average_metrics": 86.31000813669651, | |
| "eval_loss": 0.15932226181030273, | |
| "eval_runtime": 66.0369, | |
| "eval_samples_per_second": 148.886, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 1.16, | |
| "eval_accuracy": 85.72009764035802, | |
| "eval_average_metrics": 85.72009764035802, | |
| "eval_loss": 0.16476964950561523, | |
| "eval_runtime": 67.6592, | |
| "eval_samples_per_second": 145.316, | |
| "step": 14200 | |
| }, | |
| { | |
| "epoch": 1.17, | |
| "eval_accuracy": 85.36411716842962, | |
| "eval_average_metrics": 85.36411716842962, | |
| "eval_loss": 0.17415712773799896, | |
| "eval_runtime": 69.9268, | |
| "eval_samples_per_second": 140.604, | |
| "step": 14400 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 0.00018184485006518905, | |
| "loss": 0.1493, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 1.19, | |
| "eval_accuracy": 85.65907241659886, | |
| "eval_average_metrics": 85.65907241659886, | |
| "eval_loss": 0.1634403020143509, | |
| "eval_runtime": 70.839, | |
| "eval_samples_per_second": 138.794, | |
| "step": 14600 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "eval_accuracy": 85.94385679414158, | |
| "eval_average_metrics": 85.94385679414158, | |
| "eval_loss": 0.16072088479995728, | |
| "eval_runtime": 67.4524, | |
| "eval_samples_per_second": 145.762, | |
| "step": 14800 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "learning_rate": 0.00017777053455019556, | |
| "loss": 0.1453, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 1.22, | |
| "eval_accuracy": 85.97436940602115, | |
| "eval_average_metrics": 85.97436940602115, | |
| "eval_loss": 0.16354931890964508, | |
| "eval_runtime": 71.4854, | |
| "eval_samples_per_second": 137.539, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "eval_accuracy": 86.04556550040684, | |
| "eval_average_metrics": 86.04556550040684, | |
| "eval_loss": 0.16333648562431335, | |
| "eval_runtime": 68.0001, | |
| "eval_samples_per_second": 144.588, | |
| "step": 15200 | |
| }, | |
| { | |
| "epoch": 1.25, | |
| "eval_accuracy": 86.10659072416598, | |
| "eval_average_metrics": 86.10659072416598, | |
| "eval_loss": 0.1655624806880951, | |
| "eval_runtime": 70.1652, | |
| "eval_samples_per_second": 140.126, | |
| "step": 15400 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 0.00017369621903520204, | |
| "loss": 0.1409, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 1.27, | |
| "eval_accuracy": 86.08624898291293, | |
| "eval_average_metrics": 86.08624898291293, | |
| "eval_loss": 0.16712406277656555, | |
| "eval_runtime": 70.4214, | |
| "eval_samples_per_second": 139.617, | |
| "step": 15600 | |
| }, | |
| { | |
| "epoch": 1.29, | |
| "eval_accuracy": 85.65907241659886, | |
| "eval_average_metrics": 85.65907241659886, | |
| "eval_loss": 0.16410161554813385, | |
| "eval_runtime": 69.5518, | |
| "eval_samples_per_second": 141.362, | |
| "step": 15800 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "learning_rate": 0.00016962190352020858, | |
| "loss": 0.144, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 1.3, | |
| "eval_accuracy": 86.0353946297803, | |
| "eval_average_metrics": 86.0353946297803, | |
| "eval_loss": 0.1600012332201004, | |
| "eval_runtime": 69.4158, | |
| "eval_samples_per_second": 141.639, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "eval_accuracy": 85.9947111472742, | |
| "eval_average_metrics": 85.9947111472742, | |
| "eval_loss": 0.1664758175611496, | |
| "eval_runtime": 70.4437, | |
| "eval_samples_per_second": 139.572, | |
| "step": 16200 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "eval_accuracy": 86.10659072416598, | |
| "eval_average_metrics": 86.10659072416598, | |
| "eval_loss": 0.16372230648994446, | |
| "eval_runtime": 70.0549, | |
| "eval_samples_per_second": 140.347, | |
| "step": 16400 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 0.0001655475880052151, | |
| "loss": 0.142, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 1.35, | |
| "eval_accuracy": 86.01505288852725, | |
| "eval_average_metrics": 86.01505288852725, | |
| "eval_loss": 0.16394633054733276, | |
| "eval_runtime": 73.1757, | |
| "eval_samples_per_second": 134.362, | |
| "step": 16600 | |
| }, | |
| { | |
| "epoch": 1.37, | |
| "eval_accuracy": 86.19812855980472, | |
| "eval_average_metrics": 86.19812855980472, | |
| "eval_loss": 0.16236965358257294, | |
| "eval_runtime": 67.0918, | |
| "eval_samples_per_second": 146.545, | |
| "step": 16800 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "learning_rate": 0.00016147327249022163, | |
| "loss": 0.1469, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 1.39, | |
| "eval_accuracy": 86.06590724165989, | |
| "eval_average_metrics": 86.06590724165989, | |
| "eval_loss": 0.15920616686344147, | |
| "eval_runtime": 73.7251, | |
| "eval_samples_per_second": 133.36, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "eval_accuracy": 86.31000813669651, | |
| "eval_average_metrics": 86.31000813669651, | |
| "eval_loss": 0.16292713582515717, | |
| "eval_runtime": 70.7554, | |
| "eval_samples_per_second": 138.958, | |
| "step": 17200 | |
| }, | |
| { | |
| "epoch": 1.42, | |
| "eval_accuracy": 85.86248982912937, | |
| "eval_average_metrics": 85.86248982912937, | |
| "eval_loss": 0.16360752284526825, | |
| "eval_runtime": 72.9243, | |
| "eval_samples_per_second": 134.825, | |
| "step": 17400 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "learning_rate": 0.00015739895697522815, | |
| "loss": 0.145, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 1.43, | |
| "eval_accuracy": 85.88283157038242, | |
| "eval_average_metrics": 85.88283157038242, | |
| "eval_loss": 0.16182997822761536, | |
| "eval_runtime": 71.4174, | |
| "eval_samples_per_second": 137.67, | |
| "step": 17600 | |
| }, | |
| { | |
| "epoch": 1.45, | |
| "eval_accuracy": 85.79129373474369, | |
| "eval_average_metrics": 85.79129373474369, | |
| "eval_loss": 0.16671514511108398, | |
| "eval_runtime": 72.782, | |
| "eval_samples_per_second": 135.088, | |
| "step": 17800 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "learning_rate": 0.00015332464146023469, | |
| "loss": 0.1416, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 1.47, | |
| "eval_accuracy": 85.79129373474369, | |
| "eval_average_metrics": 85.79129373474369, | |
| "eval_loss": 0.16370686888694763, | |
| "eval_runtime": 69.6237, | |
| "eval_samples_per_second": 141.216, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 1.48, | |
| "eval_accuracy": 85.77095199349064, | |
| "eval_average_metrics": 85.77095199349064, | |
| "eval_loss": 0.15921832621097565, | |
| "eval_runtime": 69.2043, | |
| "eval_samples_per_second": 142.072, | |
| "step": 18200 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "eval_accuracy": 85.82180634662328, | |
| "eval_average_metrics": 85.82180634662328, | |
| "eval_loss": 0.1640625149011612, | |
| "eval_runtime": 68.8973, | |
| "eval_samples_per_second": 142.705, | |
| "step": 18400 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 0.00014925032594524117, | |
| "loss": 0.1453, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "eval_accuracy": 85.36411716842962, | |
| "eval_average_metrics": 85.36411716842962, | |
| "eval_loss": 0.1784326434135437, | |
| "eval_runtime": 73.5853, | |
| "eval_samples_per_second": 133.614, | |
| "step": 18600 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "eval_accuracy": 85.89300244100895, | |
| "eval_average_metrics": 85.89300244100895, | |
| "eval_loss": 0.16068434715270996, | |
| "eval_runtime": 69.0604, | |
| "eval_samples_per_second": 142.368, | |
| "step": 18800 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "learning_rate": 0.0001451760104302477, | |
| "loss": 0.1414, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 1.55, | |
| "eval_accuracy": 85.75061025223759, | |
| "eval_average_metrics": 85.75061025223759, | |
| "eval_loss": 0.164332315325737, | |
| "eval_runtime": 75.2094, | |
| "eval_samples_per_second": 130.728, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 1.56, | |
| "eval_accuracy": 85.66924328722538, | |
| "eval_average_metrics": 85.66924328722538, | |
| "eval_loss": 0.15945520997047424, | |
| "eval_runtime": 70.4879, | |
| "eval_samples_per_second": 139.485, | |
| "step": 19200 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "eval_accuracy": 85.74043938161107, | |
| "eval_average_metrics": 85.74043938161107, | |
| "eval_loss": 0.15915806591510773, | |
| "eval_runtime": 72.0792, | |
| "eval_samples_per_second": 136.405, | |
| "step": 19400 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 0.00014110169491525422, | |
| "loss": 0.1463, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "eval_accuracy": 86.10659072416598, | |
| "eval_average_metrics": 86.10659072416598, | |
| "eval_loss": 0.16554424166679382, | |
| "eval_runtime": 69.3189, | |
| "eval_samples_per_second": 141.837, | |
| "step": 19600 | |
| }, | |
| { | |
| "epoch": 1.61, | |
| "eval_accuracy": 85.8726606997559, | |
| "eval_average_metrics": 85.8726606997559, | |
| "eval_loss": 0.1639343500137329, | |
| "eval_runtime": 70.0392, | |
| "eval_samples_per_second": 140.378, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 0.00013702737940026073, | |
| "loss": 0.1435, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "eval_accuracy": 85.79129373474369, | |
| "eval_average_metrics": 85.79129373474369, | |
| "eval_loss": 0.1651633232831955, | |
| "eval_runtime": 72.4148, | |
| "eval_samples_per_second": 135.773, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 1.65, | |
| "eval_accuracy": 85.90317331163547, | |
| "eval_average_metrics": 85.90317331163547, | |
| "eval_loss": 0.163535937666893, | |
| "eval_runtime": 73.7758, | |
| "eval_samples_per_second": 133.269, | |
| "step": 20200 | |
| }, | |
| { | |
| "epoch": 1.66, | |
| "eval_accuracy": 85.81163547599675, | |
| "eval_average_metrics": 85.81163547599675, | |
| "eval_loss": 0.16132992506027222, | |
| "eval_runtime": 74.1683, | |
| "eval_samples_per_second": 132.563, | |
| "step": 20400 | |
| }, | |
| { | |
| "epoch": 1.67, | |
| "learning_rate": 0.00013295306388526727, | |
| "loss": 0.1393, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "eval_accuracy": 85.86248982912937, | |
| "eval_average_metrics": 85.86248982912937, | |
| "eval_loss": 0.16424906253814697, | |
| "eval_runtime": 75.4388, | |
| "eval_samples_per_second": 130.331, | |
| "step": 20600 | |
| }, | |
| { | |
| "epoch": 1.69, | |
| "eval_accuracy": 86.20829943043124, | |
| "eval_average_metrics": 86.20829943043124, | |
| "eval_loss": 0.16280879080295563, | |
| "eval_runtime": 73.6216, | |
| "eval_samples_per_second": 133.548, | |
| "step": 20800 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "learning_rate": 0.00012887874837027379, | |
| "loss": 0.1476, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 1.71, | |
| "eval_accuracy": 86.32017900732303, | |
| "eval_average_metrics": 86.32017900732303, | |
| "eval_loss": 0.1631232500076294, | |
| "eval_runtime": 75.2489, | |
| "eval_samples_per_second": 130.66, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 1.73, | |
| "eval_accuracy": 85.97436940602115, | |
| "eval_average_metrics": 85.97436940602115, | |
| "eval_loss": 0.16299067437648773, | |
| "eval_runtime": 66.4642, | |
| "eval_samples_per_second": 147.929, | |
| "step": 21200 | |
| }, | |
| { | |
| "epoch": 1.74, | |
| "eval_accuracy": 86.14727420667208, | |
| "eval_average_metrics": 86.14727420667208, | |
| "eval_loss": 0.16605544090270996, | |
| "eval_runtime": 72.8565, | |
| "eval_samples_per_second": 134.95, | |
| "step": 21400 | |
| }, | |
| { | |
| "epoch": 1.75, | |
| "learning_rate": 0.0001248044328552803, | |
| "loss": 0.1434, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 1.76, | |
| "eval_accuracy": 85.73026851098454, | |
| "eval_average_metrics": 85.73026851098454, | |
| "eval_loss": 0.16752640902996063, | |
| "eval_runtime": 73.5899, | |
| "eval_samples_per_second": 133.605, | |
| "step": 21600 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "eval_accuracy": 86.05573637103336, | |
| "eval_average_metrics": 86.05573637103336, | |
| "eval_loss": 0.1640099287033081, | |
| "eval_runtime": 70.9879, | |
| "eval_samples_per_second": 138.503, | |
| "step": 21800 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 0.00012073011734028682, | |
| "loss": 0.1425, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "eval_accuracy": 86.02522375915377, | |
| "eval_average_metrics": 86.02522375915377, | |
| "eval_loss": 0.1621551811695099, | |
| "eval_runtime": 67.2101, | |
| "eval_samples_per_second": 146.288, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 1.81, | |
| "eval_accuracy": 86.01505288852725, | |
| "eval_average_metrics": 86.01505288852725, | |
| "eval_loss": 0.1614847183227539, | |
| "eval_runtime": 68.053, | |
| "eval_samples_per_second": 144.476, | |
| "step": 22200 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "eval_accuracy": 85.84214808787632, | |
| "eval_average_metrics": 85.84214808787632, | |
| "eval_loss": 0.1623518317937851, | |
| "eval_runtime": 67.5192, | |
| "eval_samples_per_second": 145.618, | |
| "step": 22400 | |
| }, | |
| { | |
| "epoch": 1.83, | |
| "learning_rate": 0.00011665580182529335, | |
| "loss": 0.1441, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 1.84, | |
| "eval_accuracy": 86.06590724165989, | |
| "eval_average_metrics": 86.06590724165989, | |
| "eval_loss": 0.1657322645187378, | |
| "eval_runtime": 65.1547, | |
| "eval_samples_per_second": 150.902, | |
| "step": 22600 | |
| }, | |
| { | |
| "epoch": 1.86, | |
| "eval_accuracy": 86.00488201790073, | |
| "eval_average_metrics": 86.00488201790073, | |
| "eval_loss": 0.16235147416591644, | |
| "eval_runtime": 57.9601, | |
| "eval_samples_per_second": 169.634, | |
| "step": 22800 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 0.00011258148631029986, | |
| "loss": 0.1391, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "eval_accuracy": 85.82180634662328, | |
| "eval_average_metrics": 85.82180634662328, | |
| "eval_loss": 0.15935710072517395, | |
| "eval_runtime": 57.3417, | |
| "eval_samples_per_second": 171.463, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "eval_accuracy": 85.81163547599675, | |
| "eval_average_metrics": 85.81163547599675, | |
| "eval_loss": 0.1635563224554062, | |
| "eval_runtime": 56.1817, | |
| "eval_samples_per_second": 175.003, | |
| "step": 23200 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "eval_accuracy": 85.7099267697315, | |
| "eval_average_metrics": 85.7099267697315, | |
| "eval_loss": 0.16560596227645874, | |
| "eval_runtime": 59.2499, | |
| "eval_samples_per_second": 165.941, | |
| "step": 23400 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 0.00010850717079530637, | |
| "loss": 0.1382, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 1.92, | |
| "eval_accuracy": 86.02522375915377, | |
| "eval_average_metrics": 86.02522375915377, | |
| "eval_loss": 0.1604122817516327, | |
| "eval_runtime": 59.8309, | |
| "eval_samples_per_second": 164.33, | |
| "step": 23600 | |
| }, | |
| { | |
| "epoch": 1.94, | |
| "eval_accuracy": 85.80146460537021, | |
| "eval_average_metrics": 85.80146460537021, | |
| "eval_loss": 0.16524049639701843, | |
| "eval_runtime": 61.3749, | |
| "eval_samples_per_second": 160.196, | |
| "step": 23800 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 0.0001044328552803129, | |
| "loss": 0.1379, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "eval_accuracy": 85.73026851098454, | |
| "eval_average_metrics": 85.73026851098454, | |
| "eval_loss": 0.16642265021800995, | |
| "eval_runtime": 61.5426, | |
| "eval_samples_per_second": 159.759, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 1.97, | |
| "eval_accuracy": 86.23881204231083, | |
| "eval_average_metrics": 86.23881204231083, | |
| "eval_loss": 0.1592371165752411, | |
| "eval_runtime": 61.4113, | |
| "eval_samples_per_second": 160.101, | |
| "step": 24200 | |
| }, | |
| { | |
| "epoch": 1.99, | |
| "eval_accuracy": 86.23881204231083, | |
| "eval_average_metrics": 86.23881204231083, | |
| "eval_loss": 0.16144132614135742, | |
| "eval_runtime": 60.7982, | |
| "eval_samples_per_second": 161.715, | |
| "step": 24400 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 0.00010035853976531943, | |
| "loss": 0.1454, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 86.09641985353946, | |
| "eval_average_metrics": 86.09641985353946, | |
| "eval_loss": 0.1667686253786087, | |
| "eval_runtime": 60.7535, | |
| "eval_samples_per_second": 161.834, | |
| "step": 24600 | |
| }, | |
| { | |
| "epoch": 2.02, | |
| "eval_accuracy": 86.06590724165989, | |
| "eval_average_metrics": 86.06590724165989, | |
| "eval_loss": 0.1680220663547516, | |
| "eval_runtime": 58.6093, | |
| "eval_samples_per_second": 167.755, | |
| "step": 24800 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "learning_rate": 9.628422425032592e-05, | |
| "loss": 0.138, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 2.04, | |
| "eval_accuracy": 85.84214808787632, | |
| "eval_average_metrics": 85.84214808787632, | |
| "eval_loss": 0.16406849026679993, | |
| "eval_runtime": 56.628, | |
| "eval_samples_per_second": 173.624, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 2.05, | |
| "eval_accuracy": 85.89300244100895, | |
| "eval_average_metrics": 85.89300244100895, | |
| "eval_loss": 0.16717499494552612, | |
| "eval_runtime": 55.5415, | |
| "eval_samples_per_second": 177.021, | |
| "step": 25200 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "eval_accuracy": 86.18795768917819, | |
| "eval_average_metrics": 86.18795768917819, | |
| "eval_loss": 0.16641969978809357, | |
| "eval_runtime": 52.5873, | |
| "eval_samples_per_second": 186.965, | |
| "step": 25400 | |
| }, | |
| { | |
| "epoch": 2.08, | |
| "learning_rate": 9.220990873533245e-05, | |
| "loss": 0.1356, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 2.09, | |
| "eval_accuracy": 86.0353946297803, | |
| "eval_average_metrics": 86.0353946297803, | |
| "eval_loss": 0.16517092287540436, | |
| "eval_runtime": 54.8202, | |
| "eval_samples_per_second": 179.35, | |
| "step": 25600 | |
| }, | |
| { | |
| "epoch": 2.1, | |
| "eval_accuracy": 86.20829943043124, | |
| "eval_average_metrics": 86.20829943043124, | |
| "eval_loss": 0.1606525331735611, | |
| "eval_runtime": 58.0473, | |
| "eval_samples_per_second": 169.379, | |
| "step": 25800 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "learning_rate": 8.813559322033898e-05, | |
| "loss": 0.138, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 2.12, | |
| "eval_accuracy": 85.75061025223759, | |
| "eval_average_metrics": 85.75061025223759, | |
| "eval_loss": 0.1638970524072647, | |
| "eval_runtime": 60.2198, | |
| "eval_samples_per_second": 163.268, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 2.13, | |
| "eval_accuracy": 85.913344182262, | |
| "eval_average_metrics": 85.913344182262, | |
| "eval_loss": 0.16841016709804535, | |
| "eval_runtime": 59.4106, | |
| "eval_samples_per_second": 165.492, | |
| "step": 26200 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "eval_accuracy": 86.11676159479251, | |
| "eval_average_metrics": 86.11676159479251, | |
| "eval_loss": 0.16477040946483612, | |
| "eval_runtime": 59.3558, | |
| "eval_samples_per_second": 165.645, | |
| "step": 26400 | |
| }, | |
| { | |
| "epoch": 2.16, | |
| "learning_rate": 8.406127770534549e-05, | |
| "loss": 0.1315, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "eval_accuracy": 85.56753458096013, | |
| "eval_average_metrics": 85.56753458096013, | |
| "eval_loss": 0.17194555699825287, | |
| "eval_runtime": 59.1291, | |
| "eval_samples_per_second": 166.28, | |
| "step": 26600 | |
| }, | |
| { | |
| "epoch": 2.18, | |
| "eval_accuracy": 85.94385679414158, | |
| "eval_average_metrics": 85.94385679414158, | |
| "eval_loss": 0.16446976363658905, | |
| "eval_runtime": 46.5169, | |
| "eval_samples_per_second": 211.364, | |
| "step": 26800 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 7.998696219035201e-05, | |
| "loss": 0.1332, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "eval_accuracy": 85.81163547599675, | |
| "eval_average_metrics": 85.81163547599675, | |
| "eval_loss": 0.1695818454027176, | |
| "eval_runtime": 45.2415, | |
| "eval_samples_per_second": 217.323, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 2.22, | |
| "eval_accuracy": 85.93368592351506, | |
| "eval_average_metrics": 85.93368592351506, | |
| "eval_loss": 0.1703195720911026, | |
| "eval_runtime": 45.7196, | |
| "eval_samples_per_second": 215.05, | |
| "step": 27200 | |
| }, | |
| { | |
| "epoch": 2.23, | |
| "eval_accuracy": 86.14727420667208, | |
| "eval_average_metrics": 86.14727420667208, | |
| "eval_loss": 0.1657610833644867, | |
| "eval_runtime": 48.3877, | |
| "eval_samples_per_second": 203.192, | |
| "step": 27400 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 7.591264667535854e-05, | |
| "loss": 0.1354, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 2.25, | |
| "eval_accuracy": 86.09641985353946, | |
| "eval_average_metrics": 86.09641985353946, | |
| "eval_loss": 0.1658048778772354, | |
| "eval_runtime": 49.7336, | |
| "eval_samples_per_second": 197.693, | |
| "step": 27600 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "eval_accuracy": 85.9947111472742, | |
| "eval_average_metrics": 85.9947111472742, | |
| "eval_loss": 0.16919544339179993, | |
| "eval_runtime": 56.1273, | |
| "eval_samples_per_second": 175.173, | |
| "step": 27800 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "learning_rate": 7.183833116036505e-05, | |
| "loss": 0.1311, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 2.28, | |
| "eval_accuracy": 86.14727420667208, | |
| "eval_average_metrics": 86.14727420667208, | |
| "eval_loss": 0.16675373911857605, | |
| "eval_runtime": 48.3863, | |
| "eval_samples_per_second": 203.198, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "eval_accuracy": 85.73026851098454, | |
| "eval_average_metrics": 85.73026851098454, | |
| "eval_loss": 0.16785795986652374, | |
| "eval_runtime": 49.8131, | |
| "eval_samples_per_second": 197.378, | |
| "step": 28200 | |
| }, | |
| { | |
| "epoch": 2.31, | |
| "eval_accuracy": 85.62855980471929, | |
| "eval_average_metrics": 85.62855980471929, | |
| "eval_loss": 0.1680869162082672, | |
| "eval_runtime": 49.9453, | |
| "eval_samples_per_second": 196.855, | |
| "step": 28400 | |
| }, | |
| { | |
| "epoch": 2.32, | |
| "learning_rate": 6.776401564537158e-05, | |
| "loss": 0.1351, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 2.33, | |
| "eval_accuracy": 85.86248982912937, | |
| "eval_average_metrics": 85.86248982912937, | |
| "eval_loss": 0.16826093196868896, | |
| "eval_runtime": 47.9335, | |
| "eval_samples_per_second": 205.117, | |
| "step": 28600 | |
| }, | |
| { | |
| "epoch": 2.35, | |
| "eval_accuracy": 85.84214808787632, | |
| "eval_average_metrics": 85.84214808787632, | |
| "eval_loss": 0.16651229560375214, | |
| "eval_runtime": 49.892, | |
| "eval_samples_per_second": 197.066, | |
| "step": 28800 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 6.368970013037809e-05, | |
| "loss": 0.1347, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "eval_accuracy": 86.01505288852725, | |
| "eval_average_metrics": 86.01505288852725, | |
| "eval_loss": 0.16772997379302979, | |
| "eval_runtime": 49.8947, | |
| "eval_samples_per_second": 197.055, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 2.38, | |
| "eval_accuracy": 86.2693246541904, | |
| "eval_average_metrics": 86.2693246541904, | |
| "eval_loss": 0.16580338776111603, | |
| "eval_runtime": 50.1072, | |
| "eval_samples_per_second": 196.219, | |
| "step": 29200 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "eval_accuracy": 86.20829943043124, | |
| "eval_average_metrics": 86.20829943043124, | |
| "eval_loss": 0.16437767446041107, | |
| "eval_runtime": 52.1067, | |
| "eval_samples_per_second": 188.69, | |
| "step": 29400 | |
| }, | |
| { | |
| "epoch": 2.4, | |
| "learning_rate": 5.961538461538461e-05, | |
| "loss": 0.1308, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 2.41, | |
| "eval_accuracy": 86.10659072416598, | |
| "eval_average_metrics": 86.10659072416598, | |
| "eval_loss": 0.16909147799015045, | |
| "eval_runtime": 48.4579, | |
| "eval_samples_per_second": 202.898, | |
| "step": 29600 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "eval_accuracy": 86.07607811228641, | |
| "eval_average_metrics": 86.07607811228641, | |
| "eval_loss": 0.16544800996780396, | |
| "eval_runtime": 53.3838, | |
| "eval_samples_per_second": 184.176, | |
| "step": 29800 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "learning_rate": 5.554106910039113e-05, | |
| "loss": 0.1301, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 2.44, | |
| "eval_accuracy": 86.15744507729862, | |
| "eval_average_metrics": 86.15744507729862, | |
| "eval_loss": 0.16652615368366241, | |
| "eval_runtime": 45.6168, | |
| "eval_samples_per_second": 215.535, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 2.46, | |
| "eval_accuracy": 86.00488201790073, | |
| "eval_average_metrics": 86.00488201790073, | |
| "eval_loss": 0.16784194111824036, | |
| "eval_runtime": 46.0567, | |
| "eval_samples_per_second": 213.476, | |
| "step": 30200 | |
| }, | |
| { | |
| "epoch": 2.48, | |
| "eval_accuracy": 86.12693246541903, | |
| "eval_average_metrics": 86.12693246541903, | |
| "eval_loss": 0.16726775467395782, | |
| "eval_runtime": 53.9156, | |
| "eval_samples_per_second": 182.359, | |
| "step": 30400 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "learning_rate": 5.146675358539765e-05, | |
| "loss": 0.1324, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "eval_accuracy": 86.2693246541904, | |
| "eval_average_metrics": 86.2693246541904, | |
| "eval_loss": 0.16430824995040894, | |
| "eval_runtime": 48.9757, | |
| "eval_samples_per_second": 200.753, | |
| "step": 30600 | |
| }, | |
| { | |
| "epoch": 2.51, | |
| "eval_accuracy": 85.88283157038242, | |
| "eval_average_metrics": 85.88283157038242, | |
| "eval_loss": 0.16779069602489471, | |
| "eval_runtime": 45.747, | |
| "eval_samples_per_second": 214.921, | |
| "step": 30800 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 4.7392438070404173e-05, | |
| "loss": 0.1333, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "eval_accuracy": 86.12693246541903, | |
| "eval_average_metrics": 86.12693246541903, | |
| "eval_loss": 0.16358087956905365, | |
| "eval_runtime": 45.8832, | |
| "eval_samples_per_second": 214.283, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 2.54, | |
| "eval_accuracy": 86.20829943043124, | |
| "eval_average_metrics": 86.20829943043124, | |
| "eval_loss": 0.16272908449172974, | |
| "eval_runtime": 44.9382, | |
| "eval_samples_per_second": 218.789, | |
| "step": 31200 | |
| }, | |
| { | |
| "epoch": 2.56, | |
| "eval_accuracy": 86.21847030105776, | |
| "eval_average_metrics": 86.21847030105776, | |
| "eval_loss": 0.16233167052268982, | |
| "eval_runtime": 45.3879, | |
| "eval_samples_per_second": 216.621, | |
| "step": 31400 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "learning_rate": 4.3318122555410686e-05, | |
| "loss": 0.1366, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 2.57, | |
| "eval_accuracy": 86.12693246541903, | |
| "eval_average_metrics": 86.12693246541903, | |
| "eval_loss": 0.16067072749137878, | |
| "eval_runtime": 45.3275, | |
| "eval_samples_per_second": 216.91, | |
| "step": 31600 | |
| }, | |
| { | |
| "epoch": 2.59, | |
| "eval_accuracy": 86.00488201790073, | |
| "eval_average_metrics": 86.00488201790073, | |
| "eval_loss": 0.16438935697078705, | |
| "eval_runtime": 45.069, | |
| "eval_samples_per_second": 218.154, | |
| "step": 31800 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "learning_rate": 3.924380704041721e-05, | |
| "loss": 0.1401, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 2.61, | |
| "eval_accuracy": 86.13710333604556, | |
| "eval_average_metrics": 86.13710333604556, | |
| "eval_loss": 0.15911179780960083, | |
| "eval_runtime": 45.1936, | |
| "eval_samples_per_second": 217.553, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "eval_accuracy": 86.0353946297803, | |
| "eval_average_metrics": 86.0353946297803, | |
| "eval_loss": 0.16360121965408325, | |
| "eval_runtime": 46.2972, | |
| "eval_samples_per_second": 212.367, | |
| "step": 32200 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "eval_accuracy": 86.19812855980472, | |
| "eval_average_metrics": 86.19812855980472, | |
| "eval_loss": 0.1620582789182663, | |
| "eval_runtime": 45.431, | |
| "eval_samples_per_second": 216.416, | |
| "step": 32400 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "learning_rate": 3.5169491525423724e-05, | |
| "loss": 0.1343, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 2.66, | |
| "eval_accuracy": 86.08624898291293, | |
| "eval_average_metrics": 86.08624898291293, | |
| "eval_loss": 0.16568879783153534, | |
| "eval_runtime": 45.3557, | |
| "eval_samples_per_second": 216.775, | |
| "step": 32600 | |
| }, | |
| { | |
| "epoch": 2.67, | |
| "eval_accuracy": 86.3506916192026, | |
| "eval_average_metrics": 86.3506916192026, | |
| "eval_loss": 0.16203464567661285, | |
| "eval_runtime": 44.9934, | |
| "eval_samples_per_second": 218.521, | |
| "step": 32800 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "learning_rate": 3.109517601043025e-05, | |
| "loss": 0.1345, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 2.69, | |
| "eval_accuracy": 86.2286411716843, | |
| "eval_average_metrics": 86.2286411716843, | |
| "eval_loss": 0.1651608943939209, | |
| "eval_runtime": 45.6781, | |
| "eval_samples_per_second": 215.246, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "eval_accuracy": 86.28966639544345, | |
| "eval_average_metrics": 86.28966639544345, | |
| "eval_loss": 0.16327986121177673, | |
| "eval_runtime": 45.4189, | |
| "eval_samples_per_second": 216.474, | |
| "step": 33200 | |
| }, | |
| { | |
| "epoch": 2.72, | |
| "eval_accuracy": 86.32017900732303, | |
| "eval_average_metrics": 86.32017900732303, | |
| "eval_loss": 0.16431905329227448, | |
| "eval_runtime": 44.9451, | |
| "eval_samples_per_second": 218.756, | |
| "step": 33400 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "learning_rate": 2.7020860495436762e-05, | |
| "loss": 0.1321, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 2.74, | |
| "eval_accuracy": 86.12693246541903, | |
| "eval_average_metrics": 86.12693246541903, | |
| "eval_loss": 0.16271112859249115, | |
| "eval_runtime": 45.4697, | |
| "eval_samples_per_second": 216.232, | |
| "step": 33600 | |
| }, | |
| { | |
| "epoch": 2.75, | |
| "eval_accuracy": 86.27949552481692, | |
| "eval_average_metrics": 86.27949552481692, | |
| "eval_loss": 0.16375945508480072, | |
| "eval_runtime": 45.4502, | |
| "eval_samples_per_second": 216.325, | |
| "step": 33800 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "learning_rate": 2.294654498044328e-05, | |
| "loss": 0.1348, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 2.77, | |
| "eval_accuracy": 86.12693246541903, | |
| "eval_average_metrics": 86.12693246541903, | |
| "eval_loss": 0.16407504677772522, | |
| "eval_runtime": 45.1689, | |
| "eval_samples_per_second": 217.672, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 2.79, | |
| "eval_accuracy": 86.2693246541904, | |
| "eval_average_metrics": 86.2693246541904, | |
| "eval_loss": 0.16450707614421844, | |
| "eval_runtime": 45.2377, | |
| "eval_samples_per_second": 217.341, | |
| "step": 34200 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "eval_accuracy": 86.10659072416598, | |
| "eval_average_metrics": 86.10659072416598, | |
| "eval_loss": 0.16434065997600555, | |
| "eval_runtime": 45.1061, | |
| "eval_samples_per_second": 217.975, | |
| "step": 34400 | |
| }, | |
| { | |
| "epoch": 2.81, | |
| "learning_rate": 1.8872229465449803e-05, | |
| "loss": 0.1361, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 2.82, | |
| "eval_accuracy": 86.2286411716843, | |
| "eval_average_metrics": 86.2286411716843, | |
| "eval_loss": 0.16249413788318634, | |
| "eval_runtime": 45.4837, | |
| "eval_samples_per_second": 216.165, | |
| "step": 34600 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "eval_accuracy": 86.14727420667208, | |
| "eval_average_metrics": 86.14727420667208, | |
| "eval_loss": 0.1645725518465042, | |
| "eval_runtime": 45.3804, | |
| "eval_samples_per_second": 216.657, | |
| "step": 34800 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "learning_rate": 1.4797913950456322e-05, | |
| "loss": 0.1335, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 2.85, | |
| "eval_accuracy": 86.16761594792514, | |
| "eval_average_metrics": 86.16761594792514, | |
| "eval_loss": 0.16331711411476135, | |
| "eval_runtime": 45.6878, | |
| "eval_samples_per_second": 215.2, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "eval_accuracy": 86.12693246541903, | |
| "eval_average_metrics": 86.12693246541903, | |
| "eval_loss": 0.16130615770816803, | |
| "eval_runtime": 45.0413, | |
| "eval_samples_per_second": 218.289, | |
| "step": 35200 | |
| }, | |
| { | |
| "epoch": 2.88, | |
| "eval_accuracy": 86.25915378356387, | |
| "eval_average_metrics": 86.25915378356387, | |
| "eval_loss": 0.16118405759334564, | |
| "eval_runtime": 45.4606, | |
| "eval_samples_per_second": 216.275, | |
| "step": 35400 | |
| }, | |
| { | |
| "epoch": 2.89, | |
| "learning_rate": 1.0723598435462841e-05, | |
| "loss": 0.1368, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "eval_accuracy": 86.20829943043124, | |
| "eval_average_metrics": 86.20829943043124, | |
| "eval_loss": 0.16242747008800507, | |
| "eval_runtime": 45.4511, | |
| "eval_samples_per_second": 216.32, | |
| "step": 35600 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "eval_accuracy": 86.16761594792514, | |
| "eval_average_metrics": 86.16761594792514, | |
| "eval_loss": 0.16256776452064514, | |
| "eval_runtime": 45.2281, | |
| "eval_samples_per_second": 217.387, | |
| "step": 35800 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "learning_rate": 6.649282920469361e-06, | |
| "loss": 0.1323, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "eval_accuracy": 86.33034987794956, | |
| "eval_average_metrics": 86.33034987794956, | |
| "eval_loss": 0.1619912087917328, | |
| "eval_runtime": 45.4939, | |
| "eval_samples_per_second": 216.117, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 2.95, | |
| "eval_accuracy": 86.20829943043124, | |
| "eval_average_metrics": 86.20829943043124, | |
| "eval_loss": 0.1618933379650116, | |
| "eval_runtime": 45.7568, | |
| "eval_samples_per_second": 214.875, | |
| "step": 36200 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "eval_accuracy": 86.17778681855167, | |
| "eval_average_metrics": 86.17778681855167, | |
| "eval_loss": 0.1621612161397934, | |
| "eval_runtime": 44.9098, | |
| "eval_samples_per_second": 218.928, | |
| "step": 36400 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 2.5749674054758798e-06, | |
| "loss": 0.1334, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 2.98, | |
| "eval_accuracy": 86.21847030105776, | |
| "eval_average_metrics": 86.21847030105776, | |
| "eval_loss": 0.1621207445859909, | |
| "eval_runtime": 45.2906, | |
| "eval_samples_per_second": 217.087, | |
| "step": 36600 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 86.19812855980472, | |
| "eval_average_metrics": 86.19812855980472, | |
| "eval_loss": 0.16204114258289337, | |
| "eval_runtime": 45.2396, | |
| "eval_samples_per_second": 217.332, | |
| "step": 36800 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "step": 36816, | |
| "total_flos": 1.4734111386140467e+17, | |
| "train_loss": 0.14615808979732375, | |
| "train_runtime": 23668.4692, | |
| "train_samples_per_second": 49.775, | |
| "train_steps_per_second": 1.555 | |
| } | |
| ], | |
| "max_steps": 36816, | |
| "num_train_epochs": 3, | |
| "total_flos": 1.4734111386140467e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |