{ "best_metric": 94.22774869109946, "best_model_checkpoint": "outputs/bitfit/t5-base/qqp/checkpoint-8600", "epoch": 3.0, "global_step": 34017, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.02, "eval_accuracy": 93.60000000000001, "eval_average_metrics": 92.41643835616439, "eval_f1": 91.23287671232877, "eval_loss": 0.06451932340860367, "eval_runtime": 4.5218, "eval_samples_per_second": 221.15, "step": 200 }, { "epoch": 0.04, "eval_accuracy": 90.2, "eval_average_metrics": 89.15339805825244, "eval_f1": 88.10679611650487, "eval_loss": 0.08826350420713425, "eval_runtime": 4.5364, "eval_samples_per_second": 220.438, "step": 400 }, { "epoch": 0.04, "learning_rate": 0.00029559044007408056, "loss": 0.1448, "step": 500 }, { "epoch": 0.05, "eval_accuracy": 93.7, "eval_average_metrics": 92.76439688715953, "eval_f1": 91.82879377431907, "eval_loss": 0.061929114162921906, "eval_runtime": 4.5766, "eval_samples_per_second": 218.503, "step": 600 }, { "epoch": 0.07, "eval_accuracy": 93.30000000000001, "eval_average_metrics": 92.3385456885457, "eval_f1": 91.37709137709138, "eval_loss": 0.0669504851102829, "eval_runtime": 4.5237, "eval_samples_per_second": 221.059, "step": 800 }, { "epoch": 0.09, "learning_rate": 0.0002911808801481612, "loss": 0.0782, "step": 1000 }, { "epoch": 0.09, "eval_accuracy": 93.7, "eval_average_metrics": 92.74308996088658, "eval_f1": 91.78617992177314, "eval_loss": 0.06603064388036728, "eval_runtime": 4.727, "eval_samples_per_second": 211.549, "step": 1000 }, { "epoch": 0.11, "eval_accuracy": 93.30000000000001, "eval_average_metrics": 92.14125168236878, "eval_f1": 90.98250336473754, "eval_loss": 0.06394415348768234, "eval_runtime": 4.6179, "eval_samples_per_second": 216.548, "step": 1200 }, { "epoch": 0.12, "eval_accuracy": 93.30000000000001, "eval_average_metrics": 92.07974079126876, "eval_f1": 90.85948158253751, "eval_loss": 0.059915054589509964, "eval_runtime": 4.4912, "eval_samples_per_second": 222.655, "step": 1400 }, { "epoch": 0.13, "learning_rate": 0.0002867713202222418, "loss": 0.0773, "step": 1500 }, { "epoch": 0.14, "eval_accuracy": 93.0, "eval_average_metrics": 92.0242966751918, "eval_f1": 91.04859335038363, "eval_loss": 0.06896140426397324, "eval_runtime": 4.614, "eval_samples_per_second": 216.732, "step": 1600 }, { "epoch": 0.16, "eval_accuracy": 93.60000000000001, "eval_average_metrics": 92.61151832460735, "eval_f1": 91.62303664921467, "eval_loss": 0.06234096363186836, "eval_runtime": 4.6818, "eval_samples_per_second": 213.593, "step": 1800 }, { "epoch": 0.18, "learning_rate": 0.0002823617602963224, "loss": 0.0746, "step": 2000 }, { "epoch": 0.18, "eval_accuracy": 94.19999999999999, "eval_average_metrics": 93.23333333333332, "eval_f1": 92.26666666666667, "eval_loss": 0.0571160614490509, "eval_runtime": 4.7897, "eval_samples_per_second": 208.783, "step": 2000 }, { "epoch": 0.19, "eval_accuracy": 93.60000000000001, "eval_average_metrics": 92.54468085106384, "eval_f1": 91.48936170212767, "eval_loss": 0.059621669352054596, "eval_runtime": 4.6087, "eval_samples_per_second": 216.979, "step": 2200 }, { "epoch": 0.21, "eval_accuracy": 93.60000000000001, "eval_average_metrics": 92.36786703601109, "eval_f1": 91.13573407202216, "eval_loss": 0.06162749230861664, "eval_runtime": 4.7312, "eval_samples_per_second": 211.361, "step": 2400 }, { "epoch": 0.22, "learning_rate": 0.00027795220037040303, "loss": 0.0771, "step": 2500 }, { "epoch": 0.23, "eval_accuracy": 93.10000000000001, "eval_average_metrics": 91.91912751677853, "eval_f1": 90.73825503355705, "eval_loss": 0.06196223199367523, "eval_runtime": 4.4778, "eval_samples_per_second": 223.322, "step": 2600 }, { "epoch": 0.25, "eval_accuracy": 93.7, "eval_average_metrics": 92.72155963302754, "eval_f1": 91.74311926605506, "eval_loss": 0.059104129672050476, "eval_runtime": 4.462, "eval_samples_per_second": 224.113, "step": 2800 }, { "epoch": 0.26, "learning_rate": 0.0002735426404444836, "loss": 0.0764, "step": 3000 }, { "epoch": 0.26, "eval_accuracy": 94.1, "eval_average_metrics": 93.1633069828722, "eval_f1": 92.2266139657444, "eval_loss": 0.059123676270246506, "eval_runtime": 4.5787, "eval_samples_per_second": 218.4, "step": 3000 }, { "epoch": 0.28, "eval_accuracy": 91.5, "eval_average_metrics": 90.49660074165637, "eval_f1": 89.49320148331273, "eval_loss": 0.07883985340595245, "eval_runtime": 4.5148, "eval_samples_per_second": 221.496, "step": 3200 }, { "epoch": 0.3, "eval_accuracy": 93.10000000000001, "eval_average_metrics": 92.02837483617301, "eval_f1": 90.956749672346, "eval_loss": 0.06356123834848404, "eval_runtime": 4.5836, "eval_samples_per_second": 218.171, "step": 3400 }, { "epoch": 0.31, "learning_rate": 0.0002691330805185642, "loss": 0.0732, "step": 3500 }, { "epoch": 0.32, "eval_accuracy": 93.7, "eval_average_metrics": 92.598987854251, "eval_f1": 91.49797570850201, "eval_loss": 0.058661118149757385, "eval_runtime": 4.581, "eval_samples_per_second": 218.292, "step": 3600 }, { "epoch": 0.34, "eval_accuracy": 93.10000000000001, "eval_average_metrics": 92.00454545454545, "eval_f1": 90.90909090909089, "eval_loss": 0.06345341354608536, "eval_runtime": 4.6337, "eval_samples_per_second": 215.808, "step": 3800 }, { "epoch": 0.35, "learning_rate": 0.00026472352059264486, "loss": 0.0745, "step": 4000 }, { "epoch": 0.35, "eval_accuracy": 92.7, "eval_average_metrics": 91.72389100126742, "eval_f1": 90.74778200253485, "eval_loss": 0.07112478464841843, "eval_runtime": 4.5897, "eval_samples_per_second": 217.881, "step": 4000 }, { "epoch": 0.37, "eval_accuracy": 92.4, "eval_average_metrics": 91.38987341772153, "eval_f1": 90.37974683544304, "eval_loss": 0.06926184892654419, "eval_runtime": 4.5334, "eval_samples_per_second": 220.585, "step": 4200 }, { "epoch": 0.39, "eval_accuracy": 94.19999999999999, "eval_average_metrics": 93.2021505376344, "eval_f1": 92.20430107526882, "eval_loss": 0.06072888895869255, "eval_runtime": 4.7545, "eval_samples_per_second": 210.329, "step": 4400 }, { "epoch": 0.4, "learning_rate": 0.00026031396066672545, "loss": 0.0766, "step": 4500 }, { "epoch": 0.41, "eval_accuracy": 93.89999999999999, "eval_average_metrics": 92.8779038718291, "eval_f1": 91.85580774365822, "eval_loss": 0.06117413192987442, "eval_runtime": 4.5554, "eval_samples_per_second": 219.52, "step": 4600 }, { "epoch": 0.42, "eval_accuracy": 94.0, "eval_average_metrics": 93.13402061855669, "eval_f1": 92.2680412371134, "eval_loss": 0.060421667993068695, "eval_runtime": 4.5444, "eval_samples_per_second": 220.049, "step": 4800 }, { "epoch": 0.44, "learning_rate": 0.00025590440074080604, "loss": 0.0729, "step": 5000 }, { "epoch": 0.44, "eval_accuracy": 93.0, "eval_average_metrics": 91.95454545454547, "eval_f1": 90.90909090909092, "eval_loss": 0.06169410049915314, "eval_runtime": 4.6688, "eval_samples_per_second": 214.188, "step": 5000 }, { "epoch": 0.46, "eval_accuracy": 94.3, "eval_average_metrics": 93.3244966442953, "eval_f1": 92.34899328859059, "eval_loss": 0.0613214485347271, "eval_runtime": 4.5049, "eval_samples_per_second": 221.979, "step": 5200 }, { "epoch": 0.48, "eval_accuracy": 93.7, "eval_average_metrics": 92.71070959264125, "eval_f1": 91.72141918528251, "eval_loss": 0.06161003187298775, "eval_runtime": 4.5685, "eval_samples_per_second": 218.891, "step": 5400 }, { "epoch": 0.49, "learning_rate": 0.0002514948408148867, "loss": 0.071, "step": 5500 }, { "epoch": 0.49, "eval_accuracy": 93.89999999999999, "eval_average_metrics": 92.95262123197902, "eval_f1": 92.00524246395806, "eval_loss": 0.06118384748697281, "eval_runtime": 4.564, "eval_samples_per_second": 219.107, "step": 5600 }, { "epoch": 0.51, "eval_accuracy": 93.0, "eval_average_metrics": 92.0470737913486, "eval_f1": 91.09414758269719, "eval_loss": 0.06946446746587753, "eval_runtime": 4.5787, "eval_samples_per_second": 218.4, "step": 5800 }, { "epoch": 0.53, "learning_rate": 0.0002470852808889673, "loss": 0.0748, "step": 6000 }, { "epoch": 0.53, "eval_accuracy": 94.6, "eval_average_metrics": 93.7095744680851, "eval_f1": 92.81914893617021, "eval_loss": 0.05765092372894287, "eval_runtime": 4.5272, "eval_samples_per_second": 220.889, "step": 6000 }, { "epoch": 0.55, "eval_accuracy": 94.3, "eval_average_metrics": 93.3244966442953, "eval_f1": 92.34899328859059, "eval_loss": 0.05894589051604271, "eval_runtime": 4.6099, "eval_samples_per_second": 216.924, "step": 6200 }, { "epoch": 0.56, "eval_accuracy": 93.7, "eval_average_metrics": 92.598987854251, "eval_f1": 91.49797570850201, "eval_loss": 0.061102479696273804, "eval_runtime": 4.6948, "eval_samples_per_second": 213.001, "step": 6400 }, { "epoch": 0.57, "learning_rate": 0.00024267572096304786, "loss": 0.074, "step": 6500 }, { "epoch": 0.58, "eval_accuracy": 93.0, "eval_average_metrics": 92.03571428571429, "eval_f1": 91.07142857142858, "eval_loss": 0.06452207267284393, "eval_runtime": 4.6106, "eval_samples_per_second": 216.891, "step": 6600 }, { "epoch": 0.6, "eval_accuracy": 93.5, "eval_average_metrics": 92.55645161290323, "eval_f1": 91.61290322580645, "eval_loss": 0.05938281863927841, "eval_runtime": 4.5228, "eval_samples_per_second": 221.102, "step": 6800 }, { "epoch": 0.62, "learning_rate": 0.00023826616103712848, "loss": 0.0738, "step": 7000 }, { "epoch": 0.62, "eval_accuracy": 94.6, "eval_average_metrics": 93.77519582245431, "eval_f1": 92.95039164490862, "eval_loss": 0.057858582586050034, "eval_runtime": 4.5704, "eval_samples_per_second": 218.797, "step": 7000 }, { "epoch": 0.63, "eval_accuracy": 94.6, "eval_average_metrics": 93.71909814323607, "eval_f1": 92.83819628647215, "eval_loss": 0.05671229586005211, "eval_runtime": 4.4966, "eval_samples_per_second": 222.39, "step": 7200 }, { "epoch": 0.65, "eval_accuracy": 94.5, "eval_average_metrics": 93.64580602883355, "eval_f1": 92.7916120576671, "eval_loss": 0.059491805732250214, "eval_runtime": 4.5973, "eval_samples_per_second": 217.521, "step": 7400 }, { "epoch": 0.66, "learning_rate": 0.00023385660111120907, "loss": 0.0746, "step": 7500 }, { "epoch": 0.67, "eval_accuracy": 94.5, "eval_average_metrics": 93.53879892037787, "eval_f1": 92.57759784075573, "eval_loss": 0.057486891746520996, "eval_runtime": 4.6372, "eval_samples_per_second": 215.649, "step": 7600 }, { "epoch": 0.69, "eval_accuracy": 94.69999999999999, "eval_average_metrics": 93.81194926568757, "eval_f1": 92.92389853137516, "eval_loss": 0.05628298968076706, "eval_runtime": 4.6937, "eval_samples_per_second": 213.051, "step": 7800 }, { "epoch": 0.71, "learning_rate": 0.0002294470411852897, "loss": 0.0762, "step": 8000 }, { "epoch": 0.71, "eval_accuracy": 94.39999999999999, "eval_average_metrics": 93.4566844919786, "eval_f1": 92.51336898395722, "eval_loss": 0.05849047377705574, "eval_runtime": 4.6139, "eval_samples_per_second": 216.737, "step": 8000 }, { "epoch": 0.72, "eval_accuracy": 94.69999999999999, "eval_average_metrics": 93.86773981603153, "eval_f1": 93.03547963206307, "eval_loss": 0.056792281568050385, "eval_runtime": 4.6916, "eval_samples_per_second": 213.147, "step": 8200 }, { "epoch": 0.74, "eval_accuracy": 94.5, "eval_average_metrics": 93.59794156706508, "eval_f1": 92.69588313413014, "eval_loss": 0.05638590082526207, "eval_runtime": 4.6952, "eval_samples_per_second": 212.982, "step": 8400 }, { "epoch": 0.75, "learning_rate": 0.0002250374812593703, "loss": 0.0726, "step": 8500 }, { "epoch": 0.76, "eval_accuracy": 95.0, "eval_average_metrics": 94.22774869109946, "eval_f1": 93.45549738219894, "eval_loss": 0.055720701813697815, "eval_runtime": 4.5004, "eval_samples_per_second": 222.204, "step": 8600 }, { "epoch": 0.78, "eval_accuracy": 94.0, "eval_average_metrics": 93.08355091383812, "eval_f1": 92.16710182767625, "eval_loss": 0.06084197014570236, "eval_runtime": 4.5822, "eval_samples_per_second": 218.238, "step": 8800 }, { "epoch": 0.79, "learning_rate": 0.0002206279213334509, "loss": 0.0734, "step": 9000 }, { "epoch": 0.79, "eval_accuracy": 93.10000000000001, "eval_average_metrics": 92.14386973180078, "eval_f1": 91.18773946360155, "eval_loss": 0.06530317664146423, "eval_runtime": 4.5035, "eval_samples_per_second": 222.05, "step": 9000 }, { "epoch": 0.81, "eval_accuracy": 93.8, "eval_average_metrics": 92.76666666666665, "eval_f1": 91.73333333333332, "eval_loss": 0.05946441367268562, "eval_runtime": 4.8109, "eval_samples_per_second": 207.861, "step": 9200 }, { "epoch": 0.83, "eval_accuracy": 94.39999999999999, "eval_average_metrics": 93.4566844919786, "eval_f1": 92.51336898395722, "eval_loss": 0.059339020401239395, "eval_runtime": 4.5265, "eval_samples_per_second": 220.922, "step": 9400 }, { "epoch": 0.84, "learning_rate": 0.00021621836140753152, "loss": 0.0731, "step": 9500 }, { "epoch": 0.85, "eval_accuracy": 92.60000000000001, "eval_average_metrics": 91.64005037783375, "eval_f1": 90.6801007556675, "eval_loss": 0.07186109572649002, "eval_runtime": 4.5431, "eval_samples_per_second": 220.114, "step": 9600 }, { "epoch": 0.86, "eval_accuracy": 94.1, "eval_average_metrics": 93.1633069828722, "eval_f1": 92.2266139657444, "eval_loss": 0.05946135148406029, "eval_runtime": 4.5655, "eval_samples_per_second": 219.036, "step": 9800 }, { "epoch": 0.88, "learning_rate": 0.0002118088014816121, "loss": 0.0733, "step": 10000 }, { "epoch": 0.88, "eval_accuracy": 93.89999999999999, "eval_average_metrics": 92.95262123197902, "eval_f1": 92.00524246395806, "eval_loss": 0.06076710671186447, "eval_runtime": 4.5407, "eval_samples_per_second": 220.229, "step": 10000 }, { "epoch": 0.9, "eval_accuracy": 94.3, "eval_average_metrics": 93.34492656875835, "eval_f1": 92.3898531375167, "eval_loss": 0.055939000099897385, "eval_runtime": 4.4996, "eval_samples_per_second": 222.24, "step": 10200 }, { "epoch": 0.92, "eval_accuracy": 94.8, "eval_average_metrics": 93.96084656084656, "eval_f1": 93.12169312169313, "eval_loss": 0.05636580288410187, "eval_runtime": 4.4643, "eval_samples_per_second": 223.998, "step": 10400 }, { "epoch": 0.93, "learning_rate": 0.00020739924155569272, "loss": 0.0738, "step": 10500 }, { "epoch": 0.93, "eval_accuracy": 93.8, "eval_average_metrics": 92.71081081081081, "eval_f1": 91.62162162162161, "eval_loss": 0.059175312519073486, "eval_runtime": 4.7877, "eval_samples_per_second": 208.867, "step": 10600 }, { "epoch": 0.95, "eval_accuracy": 93.89999999999999, "eval_average_metrics": 92.91026490066224, "eval_f1": 91.9205298013245, "eval_loss": 0.059644319117069244, "eval_runtime": 4.6781, "eval_samples_per_second": 213.761, "step": 10800 }, { "epoch": 0.97, "learning_rate": 0.0002029896816297733, "loss": 0.0752, "step": 11000 }, { "epoch": 0.97, "eval_accuracy": 93.60000000000001, "eval_average_metrics": 92.622454308094, "eval_f1": 91.644908616188, "eval_loss": 0.061212606728076935, "eval_runtime": 4.4874, "eval_samples_per_second": 222.845, "step": 11000 }, { "epoch": 0.99, "eval_accuracy": 94.3, "eval_average_metrics": 93.41474442988203, "eval_f1": 92.52948885976409, "eval_loss": 0.059587035328149796, "eval_runtime": 4.5784, "eval_samples_per_second": 218.418, "step": 11200 }, { "epoch": 1.01, "eval_accuracy": 94.1, "eval_average_metrics": 93.2533462033462, "eval_f1": 92.4066924066924, "eval_loss": 0.060919877141714096, "eval_runtime": 4.5416, "eval_samples_per_second": 220.185, "step": 11400 }, { "epoch": 1.01, "learning_rate": 0.00019858012170385393, "loss": 0.0716, "step": 11500 }, { "epoch": 1.02, "eval_accuracy": 94.1, "eval_average_metrics": 93.11141522029372, "eval_f1": 92.12283044058745, "eval_loss": 0.05961688980460167, "eval_runtime": 4.5998, "eval_samples_per_second": 217.402, "step": 11600 }, { "epoch": 1.04, "eval_accuracy": 94.0, "eval_average_metrics": 93.0212201591512, "eval_f1": 92.04244031830238, "eval_loss": 0.06122226640582085, "eval_runtime": 4.6213, "eval_samples_per_second": 216.391, "step": 11800 }, { "epoch": 1.06, "learning_rate": 0.00019417056177793455, "loss": 0.0713, "step": 12000 }, { "epoch": 1.06, "eval_accuracy": 94.0, "eval_average_metrics": 92.96774193548387, "eval_f1": 91.93548387096774, "eval_loss": 0.06119931861758232, "eval_runtime": 4.5888, "eval_samples_per_second": 217.92, "step": 12000 }, { "epoch": 1.08, "eval_accuracy": 94.3, "eval_average_metrics": 93.31419919246298, "eval_f1": 92.32839838492598, "eval_loss": 0.05847727879881859, "eval_runtime": 4.4633, "eval_samples_per_second": 224.05, "step": 12200 }, { "epoch": 1.09, "eval_accuracy": 93.4, "eval_average_metrics": 92.39190600522193, "eval_f1": 91.38381201044386, "eval_loss": 0.06247144192457199, "eval_runtime": 4.5667, "eval_samples_per_second": 218.978, "step": 12400 }, { "epoch": 1.1, "learning_rate": 0.00018976100185201514, "loss": 0.0687, "step": 12500 }, { "epoch": 1.11, "eval_accuracy": 93.89999999999999, "eval_average_metrics": 93.04475032010242, "eval_f1": 92.18950064020484, "eval_loss": 0.0635332465171814, "eval_runtime": 4.5944, "eval_samples_per_second": 217.654, "step": 12600 }, { "epoch": 1.13, "eval_accuracy": 94.0, "eval_average_metrics": 93.05263157894737, "eval_f1": 92.10526315789474, "eval_loss": 0.06063272804021835, "eval_runtime": 4.5058, "eval_samples_per_second": 221.934, "step": 12800 }, { "epoch": 1.15, "learning_rate": 0.00018535144192609576, "loss": 0.0711, "step": 13000 }, { "epoch": 1.15, "eval_accuracy": 94.0, "eval_average_metrics": 93.12403100775194, "eval_f1": 92.24806201550389, "eval_loss": 0.06045162305235863, "eval_runtime": 4.6598, "eval_samples_per_second": 214.601, "step": 13000 }, { "epoch": 1.16, "eval_accuracy": 94.39999999999999, "eval_average_metrics": 93.50606860158311, "eval_f1": 92.61213720316623, "eval_loss": 0.06117108836770058, "eval_runtime": 4.501, "eval_samples_per_second": 222.172, "step": 13200 }, { "epoch": 1.18, "eval_accuracy": 94.39999999999999, "eval_average_metrics": 93.4962962962963, "eval_f1": 92.5925925925926, "eval_loss": 0.05846463143825531, "eval_runtime": 4.4931, "eval_samples_per_second": 222.561, "step": 13400 }, { "epoch": 1.19, "learning_rate": 0.00018094188200017637, "loss": 0.0694, "step": 13500 }, { "epoch": 1.2, "eval_accuracy": 94.1, "eval_average_metrics": 93.06889338731443, "eval_f1": 92.03778677462888, "eval_loss": 0.05917409434914589, "eval_runtime": 4.6082, "eval_samples_per_second": 217.003, "step": 13600 }, { "epoch": 1.22, "eval_accuracy": 93.8, "eval_average_metrics": 92.85300261096606, "eval_f1": 91.9060052219321, "eval_loss": 0.06280769407749176, "eval_runtime": 4.5282, "eval_samples_per_second": 220.836, "step": 13800 }, { "epoch": 1.23, "learning_rate": 0.00017653232207425696, "loss": 0.0741, "step": 14000 }, { "epoch": 1.23, "eval_accuracy": 93.60000000000001, "eval_average_metrics": 92.65492227979274, "eval_f1": 91.70984455958549, "eval_loss": 0.06333824247121811, "eval_runtime": 4.4743, "eval_samples_per_second": 223.497, "step": 14000 }, { "epoch": 1.25, "eval_accuracy": 93.89999999999999, "eval_average_metrics": 92.94211563731932, "eval_f1": 91.98423127463865, "eval_loss": 0.06064913421869278, "eval_runtime": 4.6765, "eval_samples_per_second": 213.836, "step": 14200 }, { "epoch": 1.27, "eval_accuracy": 92.60000000000001, "eval_average_metrics": 91.62828282828283, "eval_f1": 90.65656565656566, "eval_loss": 0.07161322236061096, "eval_runtime": 4.5138, "eval_samples_per_second": 221.545, "step": 14400 }, { "epoch": 1.28, "learning_rate": 0.00017212276214833758, "loss": 0.0715, "step": 14500 }, { "epoch": 1.29, "eval_accuracy": 93.7, "eval_average_metrics": 92.6998023715415, "eval_f1": 91.699604743083, "eval_loss": 0.06242042034864426, "eval_runtime": 4.764, "eval_samples_per_second": 209.909, "step": 14600 }, { "epoch": 1.31, "eval_accuracy": 93.7, "eval_average_metrics": 92.73235294117647, "eval_f1": 91.76470588235294, "eval_loss": 0.0626644566655159, "eval_runtime": 4.4732, "eval_samples_per_second": 223.555, "step": 14800 }, { "epoch": 1.32, "learning_rate": 0.0001677132022224182, "loss": 0.0714, "step": 15000 }, { "epoch": 1.32, "eval_accuracy": 94.39999999999999, "eval_average_metrics": 93.54464751958224, "eval_f1": 92.68929503916449, "eval_loss": 0.05990656465291977, "eval_runtime": 4.5922, "eval_samples_per_second": 217.762, "step": 15000 }, { "epoch": 1.34, "eval_accuracy": 94.6, "eval_average_metrics": 93.73799472295514, "eval_f1": 92.87598944591029, "eval_loss": 0.060957495123147964, "eval_runtime": 4.4536, "eval_samples_per_second": 224.537, "step": 15200 }, { "epoch": 1.36, "eval_accuracy": 94.39999999999999, "eval_average_metrics": 93.51578947368421, "eval_f1": 92.63157894736842, "eval_loss": 0.06167261675000191, "eval_runtime": 4.4865, "eval_samples_per_second": 222.89, "step": 15400 }, { "epoch": 1.37, "learning_rate": 0.0001633036422964988, "loss": 0.0707, "step": 15500 }, { "epoch": 1.38, "eval_accuracy": 94.39999999999999, "eval_average_metrics": 93.51578947368421, "eval_f1": 92.63157894736842, "eval_loss": 0.061066027730703354, "eval_runtime": 4.5716, "eval_samples_per_second": 218.74, "step": 15600 }, { "epoch": 1.39, "eval_accuracy": 94.1, "eval_average_metrics": 93.1633069828722, "eval_f1": 92.2266139657444, "eval_loss": 0.06235107034444809, "eval_runtime": 4.499, "eval_samples_per_second": 222.27, "step": 15800 }, { "epoch": 1.41, "learning_rate": 0.00015889408237057938, "loss": 0.0709, "step": 16000 }, { "epoch": 1.41, "eval_accuracy": 94.39999999999999, "eval_average_metrics": 93.47659574468085, "eval_f1": 92.55319148936171, "eval_loss": 0.06194847822189331, "eval_runtime": 4.5231, "eval_samples_per_second": 221.086, "step": 16000 }, { "epoch": 1.43, "eval_accuracy": 94.6, "eval_average_metrics": 93.76596858638743, "eval_f1": 92.93193717277488, "eval_loss": 0.05966123938560486, "eval_runtime": 4.637, "eval_samples_per_second": 215.656, "step": 16200 }, { "epoch": 1.45, "eval_accuracy": 94.39999999999999, "eval_average_metrics": 93.44664879356569, "eval_f1": 92.49329758713138, "eval_loss": 0.06104936823248863, "eval_runtime": 4.5374, "eval_samples_per_second": 220.391, "step": 16400 }, { "epoch": 1.46, "learning_rate": 0.00015448452244466002, "loss": 0.0729, "step": 16500 }, { "epoch": 1.46, "eval_accuracy": 94.69999999999999, "eval_average_metrics": 93.87686762778506, "eval_f1": 93.05373525557013, "eval_loss": 0.06205834820866585, "eval_runtime": 4.4838, "eval_samples_per_second": 223.023, "step": 16600 }, { "epoch": 1.48, "eval_accuracy": 94.6, "eval_average_metrics": 93.78437500000001, "eval_f1": 92.96875000000001, "eval_loss": 0.06089754402637482, "eval_runtime": 4.5038, "eval_samples_per_second": 222.035, "step": 16800 }, { "epoch": 1.5, "learning_rate": 0.00015007496251874061, "loss": 0.07, "step": 17000 }, { "epoch": 1.5, "eval_accuracy": 94.19999999999999, "eval_average_metrics": 93.36288659793814, "eval_f1": 92.52577319587628, "eval_loss": 0.06112566590309143, "eval_runtime": 4.6026, "eval_samples_per_second": 217.269, "step": 17000 }, { "epoch": 1.52, "eval_accuracy": 94.39999999999999, "eval_average_metrics": 93.48647214854111, "eval_f1": 92.57294429708222, "eval_loss": 0.06089947372674942, "eval_runtime": 4.5389, "eval_samples_per_second": 220.318, "step": 17200 }, { "epoch": 1.53, "eval_accuracy": 94.1, "eval_average_metrics": 93.24354838709678, "eval_f1": 92.38709677419355, "eval_loss": 0.06110972911119461, "eval_runtime": 4.6859, "eval_samples_per_second": 213.405, "step": 17400 }, { "epoch": 1.54, "learning_rate": 0.00014566540259282123, "loss": 0.0669, "step": 17500 }, { "epoch": 1.55, "eval_accuracy": 94.3, "eval_average_metrics": 93.38513870541613, "eval_f1": 92.47027741083225, "eval_loss": 0.06174538657069206, "eval_runtime": 4.6674, "eval_samples_per_second": 214.254, "step": 17600 }, { "epoch": 1.57, "eval_accuracy": 94.6, "eval_average_metrics": 93.7095744680851, "eval_f1": 92.81914893617021, "eval_loss": 0.059681929647922516, "eval_runtime": 4.6196, "eval_samples_per_second": 216.471, "step": 17800 }, { "epoch": 1.59, "learning_rate": 0.00014125584266690182, "loss": 0.07, "step": 18000 }, { "epoch": 1.59, "eval_accuracy": 94.19999999999999, "eval_average_metrics": 93.29422572178477, "eval_f1": 92.38845144356955, "eval_loss": 0.061346184462308884, "eval_runtime": 4.4984, "eval_samples_per_second": 222.301, "step": 18000 }, { "epoch": 1.61, "eval_accuracy": 94.19999999999999, "eval_average_metrics": 93.28421052631577, "eval_f1": 92.36842105263158, "eval_loss": 0.06077203154563904, "eval_runtime": 4.454, "eval_samples_per_second": 224.518, "step": 18200 }, { "epoch": 1.62, "eval_accuracy": 94.1, "eval_average_metrics": 93.13233731739707, "eval_f1": 92.16467463479415, "eval_loss": 0.05959217995405197, "eval_runtime": 4.588, "eval_samples_per_second": 217.959, "step": 18400 }, { "epoch": 1.63, "learning_rate": 0.00013684628274098244, "loss": 0.069, "step": 18500 }, { "epoch": 1.64, "eval_accuracy": 94.39999999999999, "eval_average_metrics": 93.46666666666665, "eval_f1": 92.53333333333332, "eval_loss": 0.06017257645726204, "eval_runtime": 4.483, "eval_samples_per_second": 223.066, "step": 18600 }, { "epoch": 1.66, "eval_accuracy": 94.5, "eval_average_metrics": 93.58821571238349, "eval_f1": 92.67643142476697, "eval_loss": 0.058851905167102814, "eval_runtime": 4.5048, "eval_samples_per_second": 221.985, "step": 18800 }, { "epoch": 1.68, "learning_rate": 0.00013243672281506306, "loss": 0.0713, "step": 19000 }, { "epoch": 1.68, "eval_accuracy": 93.89999999999999, "eval_average_metrics": 92.97346805736636, "eval_f1": 92.04693611473273, "eval_loss": 0.06167756766080856, "eval_runtime": 4.4859, "eval_samples_per_second": 222.921, "step": 19000 }, { "epoch": 1.69, "eval_accuracy": 93.7, "eval_average_metrics": 92.6998023715415, "eval_f1": 91.699604743083, "eval_loss": 0.06253690272569656, "eval_runtime": 4.582, "eval_samples_per_second": 218.244, "step": 19200 }, { "epoch": 1.71, "eval_accuracy": 93.89999999999999, "eval_average_metrics": 93.00433376455368, "eval_f1": 92.10866752910736, "eval_loss": 0.06255872547626495, "eval_runtime": 4.5188, "eval_samples_per_second": 221.296, "step": 19400 }, { "epoch": 1.72, "learning_rate": 0.00012802716288914365, "loss": 0.0699, "step": 19500 }, { "epoch": 1.73, "eval_accuracy": 94.3, "eval_average_metrics": 93.40492772667542, "eval_f1": 92.50985545335085, "eval_loss": 0.062451381236314774, "eval_runtime": 4.5919, "eval_samples_per_second": 217.773, "step": 19600 }, { "epoch": 1.75, "eval_accuracy": 94.0, "eval_average_metrics": 93.01063829787235, "eval_f1": 92.0212765957447, "eval_loss": 0.06319490820169449, "eval_runtime": 4.591, "eval_samples_per_second": 217.817, "step": 19800 }, { "epoch": 1.76, "learning_rate": 0.00012361760296322426, "loss": 0.0698, "step": 20000 }, { "epoch": 1.76, "eval_accuracy": 93.5, "eval_average_metrics": 92.51271186440678, "eval_f1": 91.52542372881356, "eval_loss": 0.06364640593528748, "eval_runtime": 4.5171, "eval_samples_per_second": 221.382, "step": 20000 }, { "epoch": 1.78, "eval_accuracy": 93.89999999999999, "eval_average_metrics": 92.99409857328145, "eval_f1": 92.0881971465629, "eval_loss": 0.06635148823261261, "eval_runtime": 4.6206, "eval_samples_per_second": 216.422, "step": 20200 }, { "epoch": 1.8, "eval_accuracy": 94.1, "eval_average_metrics": 93.11141522029372, "eval_f1": 92.12283044058745, "eval_loss": 0.0606299452483654, "eval_runtime": 4.4605, "eval_samples_per_second": 224.19, "step": 20400 }, { "epoch": 1.81, "learning_rate": 0.00011920804303730487, "loss": 0.0703, "step": 20500 }, { "epoch": 1.82, "eval_accuracy": 94.19999999999999, "eval_average_metrics": 93.32395833333332, "eval_f1": 92.44791666666666, "eval_loss": 0.060722097754478455, "eval_runtime": 4.5249, "eval_samples_per_second": 221.001, "step": 20600 }, { "epoch": 1.83, "eval_accuracy": 93.8, "eval_average_metrics": 92.76666666666665, "eval_f1": 91.73333333333332, "eval_loss": 0.05862819775938988, "eval_runtime": 4.5187, "eval_samples_per_second": 221.304, "step": 20800 }, { "epoch": 1.85, "learning_rate": 0.00011479848311138547, "loss": 0.0698, "step": 21000 }, { "epoch": 1.85, "eval_accuracy": 93.8, "eval_average_metrics": 92.79947089947089, "eval_f1": 91.7989417989418, "eval_loss": 0.06128830835223198, "eval_runtime": 4.521, "eval_samples_per_second": 221.188, "step": 21000 }, { "epoch": 1.87, "eval_accuracy": 93.8, "eval_average_metrics": 92.87402597402597, "eval_f1": 91.94805194805194, "eval_loss": 0.06374780088663101, "eval_runtime": 4.4879, "eval_samples_per_second": 222.822, "step": 21200 }, { "epoch": 1.89, "eval_accuracy": 93.89999999999999, "eval_average_metrics": 92.88874833555259, "eval_f1": 91.87749667110519, "eval_loss": 0.06154455617070198, "eval_runtime": 4.6011, "eval_samples_per_second": 217.337, "step": 21400 }, { "epoch": 1.9, "learning_rate": 0.00011038892318546609, "loss": 0.0709, "step": 21500 }, { "epoch": 1.9, "eval_accuracy": 94.5, "eval_average_metrics": 93.57843791722297, "eval_f1": 92.65687583444593, "eval_loss": 0.060043178498744965, "eval_runtime": 4.46, "eval_samples_per_second": 224.215, "step": 21600 }, { "epoch": 1.92, "eval_accuracy": 93.89999999999999, "eval_average_metrics": 92.93155467720685, "eval_f1": 91.9631093544137, "eval_loss": 0.061132512986660004, "eval_runtime": 4.4987, "eval_samples_per_second": 222.287, "step": 21800 }, { "epoch": 1.94, "learning_rate": 0.00010597936325954669, "loss": 0.0695, "step": 22000 }, { "epoch": 1.94, "eval_accuracy": 93.5, "eval_average_metrics": 92.56724581724582, "eval_f1": 91.63449163449164, "eval_loss": 0.06395059078931808, "eval_runtime": 4.6548, "eval_samples_per_second": 214.832, "step": 22000 }, { "epoch": 1.96, "eval_accuracy": 94.19999999999999, "eval_average_metrics": 93.29422572178477, "eval_f1": 92.38845144356955, "eval_loss": 0.06141780689358711, "eval_runtime": 4.4836, "eval_samples_per_second": 223.034, "step": 22200 }, { "epoch": 1.98, "eval_accuracy": 94.5, "eval_average_metrics": 93.65522875816993, "eval_f1": 92.81045751633987, "eval_loss": 0.058759015053510666, "eval_runtime": 4.5162, "eval_samples_per_second": 221.426, "step": 22400 }, { "epoch": 1.98, "learning_rate": 0.0001015698033336273, "loss": 0.0715, "step": 22500 }, { "epoch": 1.99, "eval_accuracy": 93.89999999999999, "eval_average_metrics": 92.97346805736636, "eval_f1": 92.04693611473273, "eval_loss": 0.06228160858154297, "eval_runtime": 4.4726, "eval_samples_per_second": 223.582, "step": 22600 }, { "epoch": 2.01, "eval_accuracy": 94.6, "eval_average_metrics": 93.73799472295514, "eval_f1": 92.87598944591029, "eval_loss": 0.05991463363170624, "eval_runtime": 4.5003, "eval_samples_per_second": 222.206, "step": 22800 }, { "epoch": 2.03, "learning_rate": 9.71602434077079e-05, "loss": 0.0682, "step": 23000 }, { "epoch": 2.03, "eval_accuracy": 94.0, "eval_average_metrics": 93.1038961038961, "eval_f1": 92.20779220779221, "eval_loss": 0.061682794243097305, "eval_runtime": 4.611, "eval_samples_per_second": 216.874, "step": 23000 }, { "epoch": 2.05, "eval_accuracy": 93.5, "eval_average_metrics": 92.55645161290323, "eval_f1": 91.61290322580645, "eval_loss": 0.06373216211795807, "eval_runtime": 4.6044, "eval_samples_per_second": 217.186, "step": 23200 }, { "epoch": 2.06, "eval_accuracy": 94.3, "eval_average_metrics": 93.34492656875835, "eval_f1": 92.3898531375167, "eval_loss": 0.05869932472705841, "eval_runtime": 4.4706, "eval_samples_per_second": 223.684, "step": 23400 }, { "epoch": 2.07, "learning_rate": 9.27506834817885e-05, "loss": 0.0652, "step": 23500 }, { "epoch": 2.08, "eval_accuracy": 94.39999999999999, "eval_average_metrics": 93.50606860158311, "eval_f1": 92.61213720316623, "eval_loss": 0.06166525185108185, "eval_runtime": 4.5725, "eval_samples_per_second": 218.699, "step": 23600 }, { "epoch": 2.1, "eval_accuracy": 94.19999999999999, "eval_average_metrics": 93.33376623376623, "eval_f1": 92.46753246753246, "eval_loss": 0.06055561453104019, "eval_runtime": 4.5492, "eval_samples_per_second": 219.818, "step": 23800 }, { "epoch": 2.12, "learning_rate": 8.834112355586911e-05, "loss": 0.0691, "step": 24000 }, { "epoch": 2.12, "eval_accuracy": 93.7, "eval_average_metrics": 92.75377113133939, "eval_f1": 91.8075422626788, "eval_loss": 0.06339309364557266, "eval_runtime": 4.6365, "eval_samples_per_second": 215.678, "step": 24000 }, { "epoch": 2.13, "eval_accuracy": 94.1, "eval_average_metrics": 93.1633069828722, "eval_f1": 92.2266139657444, "eval_loss": 0.06319531798362732, "eval_runtime": 4.5722, "eval_samples_per_second": 218.712, "step": 24200 }, { "epoch": 2.15, "eval_accuracy": 94.1, "eval_average_metrics": 93.14271523178809, "eval_f1": 92.18543046357617, "eval_loss": 0.060979247093200684, "eval_runtime": 4.4363, "eval_samples_per_second": 225.412, "step": 24400 }, { "epoch": 2.16, "learning_rate": 8.393156362994973e-05, "loss": 0.0679, "step": 24500 }, { "epoch": 2.17, "eval_accuracy": 94.3, "eval_average_metrics": 93.38513870541613, "eval_f1": 92.47027741083225, "eval_loss": 0.061841148883104324, "eval_runtime": 4.4945, "eval_samples_per_second": 222.493, "step": 24600 }, { "epoch": 2.19, "eval_accuracy": 94.39999999999999, "eval_average_metrics": 93.51578947368421, "eval_f1": 92.63157894736842, "eval_loss": 0.06020021066069603, "eval_runtime": 4.6482, "eval_samples_per_second": 215.136, "step": 24800 }, { "epoch": 2.2, "learning_rate": 7.952200370403033e-05, "loss": 0.0678, "step": 25000 }, { "epoch": 2.2, "eval_accuracy": 94.69999999999999, "eval_average_metrics": 93.86773981603153, "eval_f1": 93.03547963206307, "eval_loss": 0.061626460403203964, "eval_runtime": 4.4847, "eval_samples_per_second": 222.982, "step": 25000 }, { "epoch": 2.22, "eval_accuracy": 94.3, "eval_average_metrics": 93.35505992010653, "eval_f1": 92.41011984021304, "eval_loss": 0.05932234972715378, "eval_runtime": 4.444, "eval_samples_per_second": 225.02, "step": 25200 }, { "epoch": 2.24, "eval_accuracy": 94.3, "eval_average_metrics": 93.35505992010653, "eval_f1": 92.41011984021304, "eval_loss": 0.05860959738492966, "eval_runtime": 4.4729, "eval_samples_per_second": 223.568, "step": 25400 }, { "epoch": 2.25, "learning_rate": 7.511244377811093e-05, "loss": 0.0687, "step": 25500 }, { "epoch": 2.26, "eval_accuracy": 94.6, "eval_average_metrics": 93.74736842105261, "eval_f1": 92.89473684210525, "eval_loss": 0.05995591729879379, "eval_runtime": 4.6311, "eval_samples_per_second": 215.933, "step": 25600 }, { "epoch": 2.28, "eval_accuracy": 94.39999999999999, "eval_average_metrics": 93.51578947368421, "eval_f1": 92.63157894736842, "eval_loss": 0.06067919358611107, "eval_runtime": 4.4705, "eval_samples_per_second": 223.69, "step": 25800 }, { "epoch": 2.29, "learning_rate": 7.070288385219154e-05, "loss": 0.0665, "step": 26000 }, { "epoch": 2.29, "eval_accuracy": 94.6, "eval_average_metrics": 93.74736842105261, "eval_f1": 92.89473684210525, "eval_loss": 0.06090604141354561, "eval_runtime": 4.4777, "eval_samples_per_second": 223.33, "step": 26000 }, { "epoch": 2.31, "eval_accuracy": 94.5, "eval_average_metrics": 93.63633377135348, "eval_f1": 92.77266754270696, "eval_loss": 0.06175965070724487, "eval_runtime": 4.5456, "eval_samples_per_second": 219.993, "step": 26200 }, { "epoch": 2.33, "eval_accuracy": 94.1, "eval_average_metrics": 93.1937908496732, "eval_f1": 92.2875816993464, "eval_loss": 0.062108419835567474, "eval_runtime": 4.5414, "eval_samples_per_second": 220.196, "step": 26400 }, { "epoch": 2.34, "learning_rate": 6.629332392627216e-05, "loss": 0.0681, "step": 26500 }, { "epoch": 2.35, "eval_accuracy": 94.39999999999999, "eval_average_metrics": 93.48647214854111, "eval_f1": 92.57294429708222, "eval_loss": 0.060741446912288666, "eval_runtime": 4.4624, "eval_samples_per_second": 224.096, "step": 26600 }, { "epoch": 2.36, "eval_accuracy": 94.39999999999999, "eval_average_metrics": 93.4962962962963, "eval_f1": 92.5925925925926, "eval_loss": 0.06029416620731354, "eval_runtime": 4.5256, "eval_samples_per_second": 220.966, "step": 26800 }, { "epoch": 2.38, "learning_rate": 6.188376400035276e-05, "loss": 0.0667, "step": 27000 }, { "epoch": 2.38, "eval_accuracy": 94.69999999999999, "eval_average_metrics": 93.84933949801848, "eval_f1": 92.99867899603699, "eval_loss": 0.059210509061813354, "eval_runtime": 4.8741, "eval_samples_per_second": 205.167, "step": 27000 }, { "epoch": 2.4, "eval_accuracy": 94.3, "eval_average_metrics": 93.41474442988203, "eval_f1": 92.52948885976409, "eval_loss": 0.0605180561542511, "eval_runtime": 4.5293, "eval_samples_per_second": 220.783, "step": 27200 }, { "epoch": 2.42, "eval_accuracy": 94.5, "eval_average_metrics": 93.64580602883355, "eval_f1": 92.7916120576671, "eval_loss": 0.060811493545770645, "eval_runtime": 4.5424, "eval_samples_per_second": 220.147, "step": 27400 }, { "epoch": 2.43, "learning_rate": 5.747420407443336e-05, "loss": 0.0685, "step": 27500 }, { "epoch": 2.43, "eval_accuracy": 94.1, "eval_average_metrics": 93.11141522029372, "eval_f1": 92.12283044058745, "eval_loss": 0.05978462100028992, "eval_runtime": 4.4831, "eval_samples_per_second": 223.06, "step": 27600 }, { "epoch": 2.45, "eval_accuracy": 93.8, "eval_average_metrics": 92.87402597402597, "eval_f1": 91.94805194805194, "eval_loss": 0.06267183274030685, "eval_runtime": 4.4576, "eval_samples_per_second": 224.334, "step": 27800 }, { "epoch": 2.47, "learning_rate": 5.3064644148513973e-05, "loss": 0.0672, "step": 28000 }, { "epoch": 2.47, "eval_accuracy": 94.0, "eval_average_metrics": 93.06299212598425, "eval_f1": 92.1259842519685, "eval_loss": 0.061355073004961014, "eval_runtime": 4.5194, "eval_samples_per_second": 221.27, "step": 28000 }, { "epoch": 2.49, "eval_accuracy": 94.19999999999999, "eval_average_metrics": 93.29422572178477, "eval_f1": 92.38845144356955, "eval_loss": 0.06131287291646004, "eval_runtime": 4.5837, "eval_samples_per_second": 218.165, "step": 28200 }, { "epoch": 2.5, "eval_accuracy": 94.0, "eval_average_metrics": 93.04221635883906, "eval_f1": 92.0844327176781, "eval_loss": 0.06105473265051842, "eval_runtime": 4.5101, "eval_samples_per_second": 221.726, "step": 28400 }, { "epoch": 2.51, "learning_rate": 4.8655084222594584e-05, "loss": 0.0656, "step": 28500 }, { "epoch": 2.52, "eval_accuracy": 94.19999999999999, "eval_average_metrics": 93.25384615384615, "eval_f1": 92.3076923076923, "eval_loss": 0.06093791127204895, "eval_runtime": 4.6588, "eval_samples_per_second": 214.647, "step": 28600 }, { "epoch": 2.54, "eval_accuracy": 94.3, "eval_average_metrics": 93.40492772667542, "eval_f1": 92.50985545335085, "eval_loss": 0.061501096934080124, "eval_runtime": 4.5262, "eval_samples_per_second": 220.936, "step": 28800 }, { "epoch": 2.56, "learning_rate": 4.424552429667519e-05, "loss": 0.067, "step": 29000 }, { "epoch": 2.56, "eval_accuracy": 94.19999999999999, "eval_average_metrics": 93.27414248021108, "eval_f1": 92.34828496042218, "eval_loss": 0.05971948057413101, "eval_runtime": 4.5243, "eval_samples_per_second": 221.027, "step": 29000 }, { "epoch": 2.58, "eval_accuracy": 93.60000000000001, "eval_average_metrics": 92.65492227979274, "eval_f1": 91.70984455958549, "eval_loss": 0.063376285135746, "eval_runtime": 4.6334, "eval_samples_per_second": 215.825, "step": 29200 }, { "epoch": 2.59, "eval_accuracy": 94.19999999999999, "eval_average_metrics": 93.26402116402116, "eval_f1": 92.32804232804234, "eval_loss": 0.06081530451774597, "eval_runtime": 4.7045, "eval_samples_per_second": 212.561, "step": 29400 }, { "epoch": 2.6, "learning_rate": 3.98359643707558e-05, "loss": 0.0675, "step": 29500 }, { "epoch": 2.61, "eval_accuracy": 94.1, "eval_average_metrics": 93.18368283093054, "eval_f1": 92.26736566186108, "eval_loss": 0.062262628227472305, "eval_runtime": 4.6273, "eval_samples_per_second": 216.108, "step": 29600 }, { "epoch": 2.63, "eval_accuracy": 94.3, "eval_average_metrics": 93.3751655629139, "eval_f1": 92.45033112582782, "eval_loss": 0.06007382273674011, "eval_runtime": 4.5698, "eval_samples_per_second": 218.83, "step": 29800 }, { "epoch": 2.65, "learning_rate": 3.54264044448364e-05, "loss": 0.0682, "step": 30000 }, { "epoch": 2.65, "eval_accuracy": 94.1, "eval_average_metrics": 93.1633069828722, "eval_f1": 92.2266139657444, "eval_loss": 0.0607917495071888, "eval_runtime": 4.6423, "eval_samples_per_second": 215.411, "step": 30000 }, { "epoch": 2.66, "eval_accuracy": 94.39999999999999, "eval_average_metrics": 93.5254593175853, "eval_f1": 92.6509186351706, "eval_loss": 0.06171978637576103, "eval_runtime": 4.4956, "eval_samples_per_second": 222.439, "step": 30200 }, { "epoch": 2.68, "eval_accuracy": 94.39999999999999, "eval_average_metrics": 93.4566844919786, "eval_f1": 92.51336898395722, "eval_loss": 0.05954898148775101, "eval_runtime": 4.5069, "eval_samples_per_second": 221.881, "step": 30400 }, { "epoch": 2.69, "learning_rate": 3.1016844518917006e-05, "loss": 0.0684, "step": 30500 }, { "epoch": 2.7, "eval_accuracy": 94.5, "eval_average_metrics": 93.64580602883355, "eval_f1": 92.7916120576671, "eval_loss": 0.06073066592216492, "eval_runtime": 4.5568, "eval_samples_per_second": 219.452, "step": 30600 }, { "epoch": 2.72, "eval_accuracy": 94.5, "eval_average_metrics": 93.64580602883355, "eval_f1": 92.7916120576671, "eval_loss": 0.06212097778916359, "eval_runtime": 4.4991, "eval_samples_per_second": 222.265, "step": 30800 }, { "epoch": 2.73, "learning_rate": 2.6607284592997617e-05, "loss": 0.0644, "step": 31000 }, { "epoch": 2.73, "eval_accuracy": 94.5, "eval_average_metrics": 93.64580602883355, "eval_f1": 92.7916120576671, "eval_loss": 0.061464857310056686, "eval_runtime": 4.6313, "eval_samples_per_second": 215.924, "step": 31000 }, { "epoch": 2.75, "eval_accuracy": 94.19999999999999, "eval_average_metrics": 93.28421052631577, "eval_f1": 92.36842105263158, "eval_loss": 0.06165764480829239, "eval_runtime": 4.4772, "eval_samples_per_second": 223.356, "step": 31200 }, { "epoch": 2.77, "eval_accuracy": 94.19999999999999, "eval_average_metrics": 93.27414248021108, "eval_f1": 92.34828496042218, "eval_loss": 0.061222758144140244, "eval_runtime": 4.485, "eval_samples_per_second": 222.965, "step": 31400 }, { "epoch": 2.78, "learning_rate": 2.219772466707822e-05, "loss": 0.0656, "step": 31500 }, { "epoch": 2.79, "eval_accuracy": 94.3, "eval_average_metrics": 93.40492772667542, "eval_f1": 92.50985545335085, "eval_loss": 0.06175553798675537, "eval_runtime": 4.4473, "eval_samples_per_second": 224.857, "step": 31600 }, { "epoch": 2.8, "eval_accuracy": 94.0, "eval_average_metrics": 93.04221635883906, "eval_f1": 92.0844327176781, "eval_loss": 0.06141304597258568, "eval_runtime": 4.5384, "eval_samples_per_second": 220.341, "step": 31800 }, { "epoch": 2.82, "learning_rate": 1.778816474115883e-05, "loss": 0.0682, "step": 32000 }, { "epoch": 2.82, "eval_accuracy": 94.3, "eval_average_metrics": 93.39505928853755, "eval_f1": 92.49011857707511, "eval_loss": 0.06122256815433502, "eval_runtime": 4.532, "eval_samples_per_second": 220.652, "step": 32000 }, { "epoch": 2.84, "eval_accuracy": 94.39999999999999, "eval_average_metrics": 93.5254593175853, "eval_f1": 92.6509186351706, "eval_loss": 0.06179660186171532, "eval_runtime": 4.5432, "eval_samples_per_second": 220.11, "step": 32200 }, { "epoch": 2.86, "eval_accuracy": 94.19999999999999, "eval_average_metrics": 93.26402116402116, "eval_f1": 92.32804232804234, "eval_loss": 0.060935478657484055, "eval_runtime": 4.5308, "eval_samples_per_second": 220.712, "step": 32400 }, { "epoch": 2.87, "learning_rate": 1.3378604815239437e-05, "loss": 0.0628, "step": 32500 }, { "epoch": 2.88, "eval_accuracy": 94.39999999999999, "eval_average_metrics": 93.51578947368421, "eval_f1": 92.63157894736842, "eval_loss": 0.06167520210146904, "eval_runtime": 4.5363, "eval_samples_per_second": 220.442, "step": 32600 }, { "epoch": 2.89, "eval_accuracy": 94.39999999999999, "eval_average_metrics": 93.51578947368421, "eval_f1": 92.63157894736842, "eval_loss": 0.061225228011608124, "eval_runtime": 4.5208, "eval_samples_per_second": 221.199, "step": 32800 }, { "epoch": 2.91, "learning_rate": 8.969044889320046e-06, "loss": 0.0659, "step": 33000 }, { "epoch": 2.91, "eval_accuracy": 94.3, "eval_average_metrics": 93.3751655629139, "eval_f1": 92.45033112582782, "eval_loss": 0.06039771810173988, "eval_runtime": 4.5643, "eval_samples_per_second": 219.093, "step": 33000 }, { "epoch": 2.93, "eval_accuracy": 94.3, "eval_average_metrics": 93.40492772667542, "eval_f1": 92.50985545335085, "eval_loss": 0.06096240133047104, "eval_runtime": 4.5827, "eval_samples_per_second": 218.214, "step": 33200 }, { "epoch": 2.95, "eval_accuracy": 94.3, "eval_average_metrics": 93.38513870541613, "eval_f1": 92.47027741083225, "eval_loss": 0.060673393309116364, "eval_runtime": 4.9126, "eval_samples_per_second": 203.559, "step": 33400 }, { "epoch": 2.95, "learning_rate": 4.559484963400652e-06, "loss": 0.0692, "step": 33500 }, { "epoch": 2.96, "eval_accuracy": 94.19999999999999, "eval_average_metrics": 93.26402116402116, "eval_f1": 92.32804232804234, "eval_loss": 0.06072871759533882, "eval_runtime": 4.5081, "eval_samples_per_second": 221.824, "step": 33600 }, { "epoch": 2.98, "eval_accuracy": 94.19999999999999, "eval_average_metrics": 93.26402116402116, "eval_f1": 92.32804232804234, "eval_loss": 0.06088118627667427, "eval_runtime": 4.513, "eval_samples_per_second": 221.581, "step": 33800 }, { "epoch": 3.0, "learning_rate": 1.4992503748125936e-07, "loss": 0.0654, "step": 34000 }, { "epoch": 3.0, "eval_accuracy": 94.19999999999999, "eval_average_metrics": 93.26402116402116, "eval_f1": 92.32804232804234, "eval_loss": 0.060787323862314224, "eval_runtime": 4.526, "eval_samples_per_second": 220.947, "step": 34000 }, { "epoch": 3.0, "step": 34017, "total_flos": 1.0629344517601075e+17, "train_loss": 0.07169761398949699, "train_runtime": 13428.6442, "train_samples_per_second": 81.061, "train_steps_per_second": 2.533 } ], "max_steps": 34017, "num_train_epochs": 3, "total_flos": 1.0629344517601075e+17, "trial_name": null, "trial_params": null }