| { | |
| "best_metric": 0.8358047604560852, | |
| "best_model_checkpoint": "./vit-eGTZANplus\\checkpoint-480", | |
| "epoch": 50.0, | |
| "eval_steps": 10, | |
| "global_step": 5350, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 0.00019962616822429908, | |
| "loss": 2.4098, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "eval_accuracy": 0.14285714285714285, | |
| "eval_loss": 2.3848845958709717, | |
| "eval_runtime": 3.3561, | |
| "eval_samples_per_second": 56.316, | |
| "eval_steps_per_second": 7.151, | |
| "step": 10 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 0.00019925233644859814, | |
| "loss": 2.3376, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "eval_accuracy": 0.21164021164021163, | |
| "eval_loss": 2.257709264755249, | |
| "eval_runtime": 3.2453, | |
| "eval_samples_per_second": 58.238, | |
| "eval_steps_per_second": 7.395, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 0.0001988785046728972, | |
| "loss": 2.2047, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "eval_accuracy": 0.2962962962962963, | |
| "eval_loss": 2.158203125, | |
| "eval_runtime": 2.9918, | |
| "eval_samples_per_second": 63.172, | |
| "eval_steps_per_second": 8.022, | |
| "step": 30 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "learning_rate": 0.00019850467289719628, | |
| "loss": 2.1267, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "eval_accuracy": 0.3862433862433862, | |
| "eval_loss": 1.942036747932434, | |
| "eval_runtime": 3.0218, | |
| "eval_samples_per_second": 62.546, | |
| "eval_steps_per_second": 7.942, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 0.00019813084112149535, | |
| "loss": 2.0365, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "eval_accuracy": 0.36507936507936506, | |
| "eval_loss": 1.9487409591674805, | |
| "eval_runtime": 2.938, | |
| "eval_samples_per_second": 64.329, | |
| "eval_steps_per_second": 8.169, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 0.00019775700934579439, | |
| "loss": 1.9884, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "eval_accuracy": 0.4973544973544973, | |
| "eval_loss": 1.7399966716766357, | |
| "eval_runtime": 3.2957, | |
| "eval_samples_per_second": 57.348, | |
| "eval_steps_per_second": 7.282, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "learning_rate": 0.00019738317757009345, | |
| "loss": 1.8822, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "eval_accuracy": 0.455026455026455, | |
| "eval_loss": 1.6512662172317505, | |
| "eval_runtime": 3.1742, | |
| "eval_samples_per_second": 59.542, | |
| "eval_steps_per_second": 7.561, | |
| "step": 70 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 0.00019700934579439255, | |
| "loss": 1.7083, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "eval_accuracy": 0.4603174603174603, | |
| "eval_loss": 1.560472011566162, | |
| "eval_runtime": 3.0275, | |
| "eval_samples_per_second": 62.429, | |
| "eval_steps_per_second": 7.927, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 0.00019663551401869161, | |
| "loss": 1.6416, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "eval_accuracy": 0.49206349206349204, | |
| "eval_loss": 1.5726529359817505, | |
| "eval_runtime": 3.0636, | |
| "eval_samples_per_second": 61.691, | |
| "eval_steps_per_second": 7.834, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 0.00019626168224299065, | |
| "loss": 1.6473, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "eval_accuracy": 0.4656084656084656, | |
| "eval_loss": 1.648504614830017, | |
| "eval_runtime": 3.2766, | |
| "eval_samples_per_second": 57.682, | |
| "eval_steps_per_second": 7.325, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "learning_rate": 0.0001959252336448598, | |
| "loss": 1.3355, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.03, | |
| "eval_accuracy": 0.5343915343915344, | |
| "eval_loss": 1.4000248908996582, | |
| "eval_runtime": 3.1074, | |
| "eval_samples_per_second": 60.823, | |
| "eval_steps_per_second": 7.724, | |
| "step": 110 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 0.00019555140186915888, | |
| "loss": 1.4677, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "eval_accuracy": 0.544973544973545, | |
| "eval_loss": 1.3445547819137573, | |
| "eval_runtime": 3.4486, | |
| "eval_samples_per_second": 54.805, | |
| "eval_steps_per_second": 6.959, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "learning_rate": 0.00019517757009345797, | |
| "loss": 1.3832, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.21, | |
| "eval_accuracy": 0.5555555555555556, | |
| "eval_loss": 1.3656994104385376, | |
| "eval_runtime": 3.0673, | |
| "eval_samples_per_second": 61.618, | |
| "eval_steps_per_second": 7.824, | |
| "step": 130 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "learning_rate": 0.00019480373831775701, | |
| "loss": 1.3364, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.31, | |
| "eval_accuracy": 0.582010582010582, | |
| "eval_loss": 1.255820631980896, | |
| "eval_runtime": 3.1137, | |
| "eval_samples_per_second": 60.7, | |
| "eval_steps_per_second": 7.708, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "learning_rate": 0.00019442990654205608, | |
| "loss": 1.3741, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.4, | |
| "eval_accuracy": 0.5343915343915344, | |
| "eval_loss": 1.4308785200119019, | |
| "eval_runtime": 3.1126, | |
| "eval_samples_per_second": 60.721, | |
| "eval_steps_per_second": 7.711, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "learning_rate": 0.00019405607476635515, | |
| "loss": 1.3806, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.5, | |
| "eval_accuracy": 0.5873015873015873, | |
| "eval_loss": 1.3130360841751099, | |
| "eval_runtime": 3.1851, | |
| "eval_samples_per_second": 59.339, | |
| "eval_steps_per_second": 7.535, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "learning_rate": 0.00019368224299065422, | |
| "loss": 1.096, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 1.59, | |
| "eval_accuracy": 0.6084656084656085, | |
| "eval_loss": 1.2385209798812866, | |
| "eval_runtime": 3.165, | |
| "eval_samples_per_second": 59.716, | |
| "eval_steps_per_second": 7.583, | |
| "step": 170 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "learning_rate": 0.00019330841121495328, | |
| "loss": 1.2139, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 1.68, | |
| "eval_accuracy": 0.5925925925925926, | |
| "eval_loss": 1.2447173595428467, | |
| "eval_runtime": 3.2024, | |
| "eval_samples_per_second": 59.019, | |
| "eval_steps_per_second": 7.494, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "learning_rate": 0.00019293457943925235, | |
| "loss": 1.1646, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 1.78, | |
| "eval_accuracy": 0.6243386243386243, | |
| "eval_loss": 1.1505087614059448, | |
| "eval_runtime": 3.3803, | |
| "eval_samples_per_second": 55.912, | |
| "eval_steps_per_second": 7.1, | |
| "step": 190 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "learning_rate": 0.00019256074766355142, | |
| "loss": 1.1851, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.87, | |
| "eval_accuracy": 0.5555555555555556, | |
| "eval_loss": 1.292531967163086, | |
| "eval_runtime": 3.3243, | |
| "eval_samples_per_second": 56.854, | |
| "eval_steps_per_second": 7.22, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "learning_rate": 0.00019218691588785048, | |
| "loss": 1.0773, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "eval_accuracy": 0.6825396825396826, | |
| "eval_loss": 1.025804042816162, | |
| "eval_runtime": 3.2438, | |
| "eval_samples_per_second": 58.265, | |
| "eval_steps_per_second": 7.399, | |
| "step": 210 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "learning_rate": 0.00019181308411214952, | |
| "loss": 1.2694, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 2.06, | |
| "eval_accuracy": 0.5978835978835979, | |
| "eval_loss": 1.1972746849060059, | |
| "eval_runtime": 3.0541, | |
| "eval_samples_per_second": 61.884, | |
| "eval_steps_per_second": 7.858, | |
| "step": 220 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "learning_rate": 0.00019143925233644862, | |
| "loss": 0.8254, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 2.15, | |
| "eval_accuracy": 0.7142857142857143, | |
| "eval_loss": 0.9814253449440002, | |
| "eval_runtime": 3.1455, | |
| "eval_samples_per_second": 60.086, | |
| "eval_steps_per_second": 7.63, | |
| "step": 230 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "learning_rate": 0.0001910654205607477, | |
| "loss": 0.8614, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 2.24, | |
| "eval_accuracy": 0.656084656084656, | |
| "eval_loss": 1.1236768960952759, | |
| "eval_runtime": 3.1907, | |
| "eval_samples_per_second": 59.235, | |
| "eval_steps_per_second": 7.522, | |
| "step": 240 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "learning_rate": 0.00019069158878504673, | |
| "loss": 0.961, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 2.34, | |
| "eval_accuracy": 0.6666666666666666, | |
| "eval_loss": 1.0817521810531616, | |
| "eval_runtime": 3.2851, | |
| "eval_samples_per_second": 57.533, | |
| "eval_steps_per_second": 7.306, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "learning_rate": 0.0001903177570093458, | |
| "loss": 0.8305, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 2.43, | |
| "eval_accuracy": 0.6613756613756614, | |
| "eval_loss": 1.0328330993652344, | |
| "eval_runtime": 3.1629, | |
| "eval_samples_per_second": 59.756, | |
| "eval_steps_per_second": 7.588, | |
| "step": 260 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "learning_rate": 0.00018994392523364486, | |
| "loss": 0.882, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 2.52, | |
| "eval_accuracy": 0.6349206349206349, | |
| "eval_loss": 1.1382650136947632, | |
| "eval_runtime": 3.1128, | |
| "eval_samples_per_second": 60.718, | |
| "eval_steps_per_second": 7.71, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "learning_rate": 0.00018957009345794396, | |
| "loss": 0.9153, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 2.62, | |
| "eval_accuracy": 0.6507936507936508, | |
| "eval_loss": 1.0411267280578613, | |
| "eval_runtime": 3.1356, | |
| "eval_samples_per_second": 60.275, | |
| "eval_steps_per_second": 7.654, | |
| "step": 280 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "learning_rate": 0.000189196261682243, | |
| "loss": 0.8855, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 2.71, | |
| "eval_accuracy": 0.6984126984126984, | |
| "eval_loss": 0.9475411772727966, | |
| "eval_runtime": 3.2668, | |
| "eval_samples_per_second": 57.854, | |
| "eval_steps_per_second": 7.347, | |
| "step": 290 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "learning_rate": 0.00018882242990654206, | |
| "loss": 0.8792, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 2.8, | |
| "eval_accuracy": 0.5978835978835979, | |
| "eval_loss": 1.1673120260238647, | |
| "eval_runtime": 3.2828, | |
| "eval_samples_per_second": 57.574, | |
| "eval_steps_per_second": 7.311, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "learning_rate": 0.00018844859813084113, | |
| "loss": 0.8555, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 2.9, | |
| "eval_accuracy": 0.7407407407407407, | |
| "eval_loss": 0.8777327537536621, | |
| "eval_runtime": 3.2563, | |
| "eval_samples_per_second": 58.041, | |
| "eval_steps_per_second": 7.37, | |
| "step": 310 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "learning_rate": 0.0001880747663551402, | |
| "loss": 0.8841, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 2.99, | |
| "eval_accuracy": 0.671957671957672, | |
| "eval_loss": 1.0181235074996948, | |
| "eval_runtime": 3.2414, | |
| "eval_samples_per_second": 58.308, | |
| "eval_steps_per_second": 7.404, | |
| "step": 320 | |
| }, | |
| { | |
| "epoch": 3.08, | |
| "learning_rate": 0.00018770093457943926, | |
| "loss": 0.5579, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 3.08, | |
| "eval_accuracy": 0.6613756613756614, | |
| "eval_loss": 1.052778959274292, | |
| "eval_runtime": 3.3555, | |
| "eval_samples_per_second": 56.326, | |
| "eval_steps_per_second": 7.153, | |
| "step": 330 | |
| }, | |
| { | |
| "epoch": 3.18, | |
| "learning_rate": 0.00018732710280373833, | |
| "loss": 0.5953, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 3.18, | |
| "eval_accuracy": 0.6455026455026455, | |
| "eval_loss": 1.095900058746338, | |
| "eval_runtime": 3.2945, | |
| "eval_samples_per_second": 57.369, | |
| "eval_steps_per_second": 7.285, | |
| "step": 340 | |
| }, | |
| { | |
| "epoch": 3.27, | |
| "learning_rate": 0.0001869532710280374, | |
| "loss": 0.62, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 3.27, | |
| "eval_accuracy": 0.7037037037037037, | |
| "eval_loss": 0.9120954275131226, | |
| "eval_runtime": 3.6012, | |
| "eval_samples_per_second": 52.483, | |
| "eval_steps_per_second": 6.664, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 3.36, | |
| "learning_rate": 0.00018657943925233644, | |
| "loss": 0.7633, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 3.36, | |
| "eval_accuracy": 0.6984126984126984, | |
| "eval_loss": 0.926105797290802, | |
| "eval_runtime": 3.4368, | |
| "eval_samples_per_second": 54.992, | |
| "eval_steps_per_second": 6.983, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 3.46, | |
| "learning_rate": 0.0001862056074766355, | |
| "loss": 0.7009, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 3.46, | |
| "eval_accuracy": 0.6296296296296297, | |
| "eval_loss": 1.1945137977600098, | |
| "eval_runtime": 3.788, | |
| "eval_samples_per_second": 49.895, | |
| "eval_steps_per_second": 6.336, | |
| "step": 370 | |
| }, | |
| { | |
| "epoch": 3.55, | |
| "learning_rate": 0.0001858317757009346, | |
| "loss": 0.8007, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 3.55, | |
| "eval_accuracy": 0.6296296296296297, | |
| "eval_loss": 1.0851304531097412, | |
| "eval_runtime": 3.287, | |
| "eval_samples_per_second": 57.499, | |
| "eval_steps_per_second": 7.301, | |
| "step": 380 | |
| }, | |
| { | |
| "epoch": 3.64, | |
| "learning_rate": 0.00018545794392523367, | |
| "loss": 0.7921, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 3.64, | |
| "eval_accuracy": 0.6984126984126984, | |
| "eval_loss": 0.935813307762146, | |
| "eval_runtime": 3.1749, | |
| "eval_samples_per_second": 59.529, | |
| "eval_steps_per_second": 7.559, | |
| "step": 390 | |
| }, | |
| { | |
| "epoch": 3.74, | |
| "learning_rate": 0.0001850841121495327, | |
| "loss": 0.5837, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 3.74, | |
| "eval_accuracy": 0.6666666666666666, | |
| "eval_loss": 0.987457811832428, | |
| "eval_runtime": 3.1206, | |
| "eval_samples_per_second": 60.565, | |
| "eval_steps_per_second": 7.691, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 3.83, | |
| "learning_rate": 0.00018471028037383178, | |
| "loss": 0.6557, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 3.83, | |
| "eval_accuracy": 0.6931216931216931, | |
| "eval_loss": 0.9543613791465759, | |
| "eval_runtime": 3.2455, | |
| "eval_samples_per_second": 58.235, | |
| "eval_steps_per_second": 7.395, | |
| "step": 410 | |
| }, | |
| { | |
| "epoch": 3.93, | |
| "learning_rate": 0.00018433644859813084, | |
| "loss": 0.8081, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 3.93, | |
| "eval_accuracy": 0.656084656084656, | |
| "eval_loss": 1.087867259979248, | |
| "eval_runtime": 3.0993, | |
| "eval_samples_per_second": 60.981, | |
| "eval_steps_per_second": 7.744, | |
| "step": 420 | |
| }, | |
| { | |
| "epoch": 4.02, | |
| "learning_rate": 0.0001839626168224299, | |
| "loss": 0.7486, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 4.02, | |
| "eval_accuracy": 0.6190476190476191, | |
| "eval_loss": 1.266100287437439, | |
| "eval_runtime": 3.1838, | |
| "eval_samples_per_second": 59.363, | |
| "eval_steps_per_second": 7.538, | |
| "step": 430 | |
| }, | |
| { | |
| "epoch": 4.11, | |
| "learning_rate": 0.00018358878504672898, | |
| "loss": 0.5166, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 4.11, | |
| "eval_accuracy": 0.7142857142857143, | |
| "eval_loss": 0.9324920773506165, | |
| "eval_runtime": 3.1917, | |
| "eval_samples_per_second": 59.216, | |
| "eval_steps_per_second": 7.52, | |
| "step": 440 | |
| }, | |
| { | |
| "epoch": 4.21, | |
| "learning_rate": 0.00018321495327102804, | |
| "loss": 0.4375, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 4.21, | |
| "eval_accuracy": 0.7037037037037037, | |
| "eval_loss": 0.8819901943206787, | |
| "eval_runtime": 3.1479, | |
| "eval_samples_per_second": 60.04, | |
| "eval_steps_per_second": 7.624, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 4.3, | |
| "learning_rate": 0.0001828411214953271, | |
| "loss": 0.4839, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 4.3, | |
| "eval_accuracy": 0.6825396825396826, | |
| "eval_loss": 1.0533747673034668, | |
| "eval_runtime": 3.1426, | |
| "eval_samples_per_second": 60.141, | |
| "eval_steps_per_second": 7.637, | |
| "step": 460 | |
| }, | |
| { | |
| "epoch": 4.39, | |
| "learning_rate": 0.00018246728971962618, | |
| "loss": 0.3932, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 4.39, | |
| "eval_accuracy": 0.7037037037037037, | |
| "eval_loss": 1.0015952587127686, | |
| "eval_runtime": 3.1739, | |
| "eval_samples_per_second": 59.548, | |
| "eval_steps_per_second": 7.562, | |
| "step": 470 | |
| }, | |
| { | |
| "epoch": 4.49, | |
| "learning_rate": 0.00018209345794392525, | |
| "loss": 0.4672, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 4.49, | |
| "eval_accuracy": 0.746031746031746, | |
| "eval_loss": 0.8358047604560852, | |
| "eval_runtime": 3.1086, | |
| "eval_samples_per_second": 60.8, | |
| "eval_steps_per_second": 7.721, | |
| "step": 480 | |
| }, | |
| { | |
| "epoch": 4.58, | |
| "learning_rate": 0.0001817196261682243, | |
| "loss": 0.4839, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 4.58, | |
| "eval_accuracy": 0.6666666666666666, | |
| "eval_loss": 1.0010868310928345, | |
| "eval_runtime": 3.436, | |
| "eval_samples_per_second": 55.006, | |
| "eval_steps_per_second": 6.985, | |
| "step": 490 | |
| }, | |
| { | |
| "epoch": 4.67, | |
| "learning_rate": 0.00018134579439252338, | |
| "loss": 0.4536, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 4.67, | |
| "eval_accuracy": 0.5925925925925926, | |
| "eval_loss": 1.4390077590942383, | |
| "eval_runtime": 3.2086, | |
| "eval_samples_per_second": 58.905, | |
| "eval_steps_per_second": 7.48, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 4.77, | |
| "learning_rate": 0.00018097196261682242, | |
| "loss": 0.5925, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 4.77, | |
| "eval_accuracy": 0.6455026455026455, | |
| "eval_loss": 1.0699883699417114, | |
| "eval_runtime": 3.2145, | |
| "eval_samples_per_second": 58.796, | |
| "eval_steps_per_second": 7.466, | |
| "step": 510 | |
| }, | |
| { | |
| "epoch": 4.86, | |
| "learning_rate": 0.0001805981308411215, | |
| "loss": 0.4905, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 4.86, | |
| "eval_accuracy": 0.6772486772486772, | |
| "eval_loss": 1.0934034585952759, | |
| "eval_runtime": 3.1633, | |
| "eval_samples_per_second": 59.748, | |
| "eval_steps_per_second": 7.587, | |
| "step": 520 | |
| }, | |
| { | |
| "epoch": 4.95, | |
| "learning_rate": 0.00018022429906542058, | |
| "loss": 0.6323, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 4.95, | |
| "eval_accuracy": 0.656084656084656, | |
| "eval_loss": 1.074127197265625, | |
| "eval_runtime": 3.0611, | |
| "eval_samples_per_second": 61.742, | |
| "eval_steps_per_second": 7.84, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 5.05, | |
| "learning_rate": 0.00017985046728971965, | |
| "loss": 0.3686, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 5.05, | |
| "eval_accuracy": 0.6613756613756614, | |
| "eval_loss": 1.17642080783844, | |
| "eval_runtime": 3.1625, | |
| "eval_samples_per_second": 59.764, | |
| "eval_steps_per_second": 7.589, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 5.14, | |
| "learning_rate": 0.0001794766355140187, | |
| "loss": 0.3233, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 5.14, | |
| "eval_accuracy": 0.6666666666666666, | |
| "eval_loss": 0.9969061017036438, | |
| "eval_runtime": 3.1186, | |
| "eval_samples_per_second": 60.604, | |
| "eval_steps_per_second": 7.696, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 5.23, | |
| "learning_rate": 0.00017910280373831776, | |
| "loss": 0.2791, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 5.23, | |
| "eval_accuracy": 0.6613756613756614, | |
| "eval_loss": 1.20182466506958, | |
| "eval_runtime": 3.2809, | |
| "eval_samples_per_second": 57.607, | |
| "eval_steps_per_second": 7.315, | |
| "step": 560 | |
| }, | |
| { | |
| "epoch": 5.33, | |
| "learning_rate": 0.00017872897196261682, | |
| "loss": 0.2798, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 5.33, | |
| "eval_accuracy": 0.6772486772486772, | |
| "eval_loss": 1.0335559844970703, | |
| "eval_runtime": 3.1419, | |
| "eval_samples_per_second": 60.155, | |
| "eval_steps_per_second": 7.639, | |
| "step": 570 | |
| }, | |
| { | |
| "epoch": 5.42, | |
| "learning_rate": 0.0001783551401869159, | |
| "loss": 0.2892, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 5.42, | |
| "eval_accuracy": 0.6296296296296297, | |
| "eval_loss": 1.3315926790237427, | |
| "eval_runtime": 3.2144, | |
| "eval_samples_per_second": 58.797, | |
| "eval_steps_per_second": 7.466, | |
| "step": 580 | |
| }, | |
| { | |
| "epoch": 5.51, | |
| "learning_rate": 0.00017798130841121496, | |
| "loss": 0.317, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 5.51, | |
| "eval_accuracy": 0.708994708994709, | |
| "eval_loss": 1.0159733295440674, | |
| "eval_runtime": 3.1281, | |
| "eval_samples_per_second": 60.42, | |
| "eval_steps_per_second": 7.672, | |
| "step": 590 | |
| }, | |
| { | |
| "epoch": 5.61, | |
| "learning_rate": 0.00017760747663551403, | |
| "loss": 0.3673, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 5.61, | |
| "eval_accuracy": 0.6190476190476191, | |
| "eval_loss": 1.3200335502624512, | |
| "eval_runtime": 2.9997, | |
| "eval_samples_per_second": 63.006, | |
| "eval_steps_per_second": 8.001, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 5.7, | |
| "learning_rate": 0.0001772336448598131, | |
| "loss": 0.4733, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 5.7, | |
| "eval_accuracy": 0.6455026455026455, | |
| "eval_loss": 1.2423778772354126, | |
| "eval_runtime": 3.2132, | |
| "eval_samples_per_second": 58.819, | |
| "eval_steps_per_second": 7.469, | |
| "step": 610 | |
| }, | |
| { | |
| "epoch": 5.79, | |
| "learning_rate": 0.00017685981308411216, | |
| "loss": 0.4683, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 5.79, | |
| "eval_accuracy": 0.7248677248677249, | |
| "eval_loss": 0.8893383145332336, | |
| "eval_runtime": 3.1811, | |
| "eval_samples_per_second": 59.413, | |
| "eval_steps_per_second": 7.545, | |
| "step": 620 | |
| }, | |
| { | |
| "epoch": 5.89, | |
| "learning_rate": 0.00017648598130841123, | |
| "loss": 0.4179, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 5.89, | |
| "eval_accuracy": 0.7142857142857143, | |
| "eval_loss": 1.0192750692367554, | |
| "eval_runtime": 3.1066, | |
| "eval_samples_per_second": 60.839, | |
| "eval_steps_per_second": 7.726, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 5.98, | |
| "learning_rate": 0.0001761121495327103, | |
| "loss": 0.3667, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 5.98, | |
| "eval_accuracy": 0.7037037037037037, | |
| "eval_loss": 0.9950593113899231, | |
| "eval_runtime": 3.1029, | |
| "eval_samples_per_second": 60.911, | |
| "eval_steps_per_second": 7.735, | |
| "step": 640 | |
| }, | |
| { | |
| "epoch": 6.07, | |
| "learning_rate": 0.00017573831775700936, | |
| "loss": 0.2212, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 6.07, | |
| "eval_accuracy": 0.7354497354497355, | |
| "eval_loss": 0.9019126296043396, | |
| "eval_runtime": 3.0407, | |
| "eval_samples_per_second": 62.156, | |
| "eval_steps_per_second": 7.893, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 6.17, | |
| "learning_rate": 0.0001753644859813084, | |
| "loss": 0.1881, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 6.17, | |
| "eval_accuracy": 0.656084656084656, | |
| "eval_loss": 1.1512494087219238, | |
| "eval_runtime": 3.0574, | |
| "eval_samples_per_second": 61.818, | |
| "eval_steps_per_second": 7.85, | |
| "step": 660 | |
| }, | |
| { | |
| "epoch": 6.26, | |
| "learning_rate": 0.0001749906542056075, | |
| "loss": 0.2403, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 6.26, | |
| "eval_accuracy": 0.708994708994709, | |
| "eval_loss": 1.0092432498931885, | |
| "eval_runtime": 3.1346, | |
| "eval_samples_per_second": 60.295, | |
| "eval_steps_per_second": 7.657, | |
| "step": 670 | |
| }, | |
| { | |
| "epoch": 6.36, | |
| "learning_rate": 0.00017461682242990656, | |
| "loss": 0.2597, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 6.36, | |
| "eval_accuracy": 0.6825396825396826, | |
| "eval_loss": 1.1817060708999634, | |
| "eval_runtime": 3.0592, | |
| "eval_samples_per_second": 61.781, | |
| "eval_steps_per_second": 7.845, | |
| "step": 680 | |
| }, | |
| { | |
| "epoch": 6.45, | |
| "learning_rate": 0.00017424299065420563, | |
| "loss": 0.2644, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 6.45, | |
| "eval_accuracy": 0.6507936507936508, | |
| "eval_loss": 1.258557677268982, | |
| "eval_runtime": 3.1473, | |
| "eval_samples_per_second": 60.051, | |
| "eval_steps_per_second": 7.626, | |
| "step": 690 | |
| }, | |
| { | |
| "epoch": 6.54, | |
| "learning_rate": 0.00017386915887850467, | |
| "loss": 0.2562, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 6.54, | |
| "eval_accuracy": 0.7195767195767195, | |
| "eval_loss": 1.0005096197128296, | |
| "eval_runtime": 3.1123, | |
| "eval_samples_per_second": 60.726, | |
| "eval_steps_per_second": 7.711, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 6.64, | |
| "learning_rate": 0.00017349532710280374, | |
| "loss": 0.1487, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 6.64, | |
| "eval_accuracy": 0.6507936507936508, | |
| "eval_loss": 1.2767467498779297, | |
| "eval_runtime": 3.1275, | |
| "eval_samples_per_second": 60.433, | |
| "eval_steps_per_second": 7.674, | |
| "step": 710 | |
| }, | |
| { | |
| "epoch": 6.73, | |
| "learning_rate": 0.00017312149532710283, | |
| "loss": 0.2953, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 6.73, | |
| "eval_accuracy": 0.6984126984126984, | |
| "eval_loss": 1.253779649734497, | |
| "eval_runtime": 3.2123, | |
| "eval_samples_per_second": 58.836, | |
| "eval_steps_per_second": 7.471, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 6.82, | |
| "learning_rate": 0.00017274766355140187, | |
| "loss": 0.1725, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 6.82, | |
| "eval_accuracy": 0.656084656084656, | |
| "eval_loss": 1.2238225936889648, | |
| "eval_runtime": 3.1844, | |
| "eval_samples_per_second": 59.353, | |
| "eval_steps_per_second": 7.537, | |
| "step": 730 | |
| }, | |
| { | |
| "epoch": 6.92, | |
| "learning_rate": 0.00017237383177570094, | |
| "loss": 0.2037, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 6.92, | |
| "eval_accuracy": 0.6084656084656085, | |
| "eval_loss": 1.468080759048462, | |
| "eval_runtime": 3.1309, | |
| "eval_samples_per_second": 60.366, | |
| "eval_steps_per_second": 7.666, | |
| "step": 740 | |
| }, | |
| { | |
| "epoch": 7.01, | |
| "learning_rate": 0.000172, | |
| "loss": 0.2592, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 7.01, | |
| "eval_accuracy": 0.708994708994709, | |
| "eval_loss": 1.2074000835418701, | |
| "eval_runtime": 3.2681, | |
| "eval_samples_per_second": 57.831, | |
| "eval_steps_per_second": 7.344, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 7.1, | |
| "learning_rate": 0.00017162616822429907, | |
| "loss": 0.1851, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 7.1, | |
| "eval_accuracy": 0.6825396825396826, | |
| "eval_loss": 1.1313153505325317, | |
| "eval_runtime": 3.1857, | |
| "eval_samples_per_second": 59.328, | |
| "eval_steps_per_second": 7.534, | |
| "step": 760 | |
| }, | |
| { | |
| "epoch": 7.2, | |
| "learning_rate": 0.00017125233644859814, | |
| "loss": 0.0958, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 7.2, | |
| "eval_accuracy": 0.6931216931216931, | |
| "eval_loss": 1.222944974899292, | |
| "eval_runtime": 3.1213, | |
| "eval_samples_per_second": 60.551, | |
| "eval_steps_per_second": 7.689, | |
| "step": 770 | |
| }, | |
| { | |
| "epoch": 7.29, | |
| "learning_rate": 0.0001708785046728972, | |
| "loss": 0.0947, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 7.29, | |
| "eval_accuracy": 0.6613756613756614, | |
| "eval_loss": 1.3873189687728882, | |
| "eval_runtime": 3.0939, | |
| "eval_samples_per_second": 61.087, | |
| "eval_steps_per_second": 7.757, | |
| "step": 780 | |
| }, | |
| { | |
| "epoch": 7.38, | |
| "learning_rate": 0.00017050467289719628, | |
| "loss": 0.2053, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 7.38, | |
| "eval_accuracy": 0.6666666666666666, | |
| "eval_loss": 1.4111433029174805, | |
| "eval_runtime": 3.0678, | |
| "eval_samples_per_second": 61.607, | |
| "eval_steps_per_second": 7.823, | |
| "step": 790 | |
| }, | |
| { | |
| "epoch": 7.48, | |
| "learning_rate": 0.00017013084112149534, | |
| "loss": 0.2165, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 7.48, | |
| "eval_accuracy": 0.6878306878306878, | |
| "eval_loss": 1.3087962865829468, | |
| "eval_runtime": 3.1862, | |
| "eval_samples_per_second": 59.319, | |
| "eval_steps_per_second": 7.533, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 7.57, | |
| "learning_rate": 0.00016975700934579438, | |
| "loss": 0.2425, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 7.57, | |
| "eval_accuracy": 0.6984126984126984, | |
| "eval_loss": 1.3991620540618896, | |
| "eval_runtime": 3.2153, | |
| "eval_samples_per_second": 58.781, | |
| "eval_steps_per_second": 7.464, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 7.66, | |
| "learning_rate": 0.00016938317757009348, | |
| "loss": 0.1644, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 7.66, | |
| "eval_accuracy": 0.7248677248677249, | |
| "eval_loss": 1.080649733543396, | |
| "eval_runtime": 3.2137, | |
| "eval_samples_per_second": 58.811, | |
| "eval_steps_per_second": 7.468, | |
| "step": 820 | |
| }, | |
| { | |
| "epoch": 7.76, | |
| "learning_rate": 0.00016900934579439254, | |
| "loss": 0.3093, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 7.76, | |
| "eval_accuracy": 0.6878306878306878, | |
| "eval_loss": 1.214190125465393, | |
| "eval_runtime": 3.1342, | |
| "eval_samples_per_second": 60.302, | |
| "eval_steps_per_second": 7.657, | |
| "step": 830 | |
| }, | |
| { | |
| "epoch": 7.85, | |
| "learning_rate": 0.0001686355140186916, | |
| "loss": 0.2525, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 7.85, | |
| "eval_accuracy": 0.656084656084656, | |
| "eval_loss": 1.3408259153366089, | |
| "eval_runtime": 3.0897, | |
| "eval_samples_per_second": 61.171, | |
| "eval_steps_per_second": 7.768, | |
| "step": 840 | |
| }, | |
| { | |
| "epoch": 7.94, | |
| "learning_rate": 0.00016826168224299065, | |
| "loss": 0.3339, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 7.94, | |
| "eval_accuracy": 0.7248677248677249, | |
| "eval_loss": 1.1808757781982422, | |
| "eval_runtime": 3.2528, | |
| "eval_samples_per_second": 58.104, | |
| "eval_steps_per_second": 7.378, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 8.04, | |
| "learning_rate": 0.00016788785046728972, | |
| "loss": 0.1044, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 8.04, | |
| "eval_accuracy": 0.6666666666666666, | |
| "eval_loss": 1.2688654661178589, | |
| "eval_runtime": 3.09, | |
| "eval_samples_per_second": 61.165, | |
| "eval_steps_per_second": 7.767, | |
| "step": 860 | |
| }, | |
| { | |
| "epoch": 8.13, | |
| "learning_rate": 0.0001675140186915888, | |
| "loss": 0.093, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 8.13, | |
| "eval_accuracy": 0.708994708994709, | |
| "eval_loss": 1.209309697151184, | |
| "eval_runtime": 3.1281, | |
| "eval_samples_per_second": 60.421, | |
| "eval_steps_per_second": 7.672, | |
| "step": 870 | |
| }, | |
| { | |
| "epoch": 8.22, | |
| "learning_rate": 0.00016714018691588785, | |
| "loss": 0.2934, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 8.22, | |
| "eval_accuracy": 0.671957671957672, | |
| "eval_loss": 1.1540151834487915, | |
| "eval_runtime": 3.27, | |
| "eval_samples_per_second": 57.798, | |
| "eval_steps_per_second": 7.339, | |
| "step": 880 | |
| }, | |
| { | |
| "epoch": 8.32, | |
| "learning_rate": 0.00016676635514018692, | |
| "loss": 0.2133, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 8.32, | |
| "eval_accuracy": 0.6349206349206349, | |
| "eval_loss": 1.5835676193237305, | |
| "eval_runtime": 3.2878, | |
| "eval_samples_per_second": 57.485, | |
| "eval_steps_per_second": 7.3, | |
| "step": 890 | |
| }, | |
| { | |
| "epoch": 8.41, | |
| "learning_rate": 0.000166392523364486, | |
| "loss": 0.2045, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 8.41, | |
| "eval_accuracy": 0.6507936507936508, | |
| "eval_loss": 1.2564616203308105, | |
| "eval_runtime": 3.1746, | |
| "eval_samples_per_second": 59.536, | |
| "eval_steps_per_second": 7.56, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 8.5, | |
| "learning_rate": 0.00016601869158878506, | |
| "loss": 0.2886, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 8.5, | |
| "eval_accuracy": 0.6613756613756614, | |
| "eval_loss": 1.3674818277359009, | |
| "eval_runtime": 3.2413, | |
| "eval_samples_per_second": 58.311, | |
| "eval_steps_per_second": 7.405, | |
| "step": 910 | |
| }, | |
| { | |
| "epoch": 8.6, | |
| "learning_rate": 0.00016564485981308412, | |
| "loss": 0.128, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 8.6, | |
| "eval_accuracy": 0.671957671957672, | |
| "eval_loss": 1.2137342691421509, | |
| "eval_runtime": 3.222, | |
| "eval_samples_per_second": 58.658, | |
| "eval_steps_per_second": 7.449, | |
| "step": 920 | |
| }, | |
| { | |
| "epoch": 8.69, | |
| "learning_rate": 0.0001652710280373832, | |
| "loss": 0.0785, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 8.69, | |
| "eval_accuracy": 0.6825396825396826, | |
| "eval_loss": 1.3206517696380615, | |
| "eval_runtime": 3.21, | |
| "eval_samples_per_second": 58.878, | |
| "eval_steps_per_second": 7.477, | |
| "step": 930 | |
| }, | |
| { | |
| "epoch": 8.79, | |
| "learning_rate": 0.00016489719626168226, | |
| "loss": 0.1409, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 8.79, | |
| "eval_accuracy": 0.6878306878306878, | |
| "eval_loss": 1.2279229164123535, | |
| "eval_runtime": 3.1127, | |
| "eval_samples_per_second": 60.719, | |
| "eval_steps_per_second": 7.71, | |
| "step": 940 | |
| }, | |
| { | |
| "epoch": 8.88, | |
| "learning_rate": 0.00016452336448598132, | |
| "loss": 0.1221, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 8.88, | |
| "eval_accuracy": 0.7513227513227513, | |
| "eval_loss": 0.9318807125091553, | |
| "eval_runtime": 3.2281, | |
| "eval_samples_per_second": 58.549, | |
| "eval_steps_per_second": 7.435, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 8.97, | |
| "learning_rate": 0.00016414953271028036, | |
| "loss": 0.112, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 8.97, | |
| "eval_accuracy": 0.671957671957672, | |
| "eval_loss": 1.267318606376648, | |
| "eval_runtime": 3.1888, | |
| "eval_samples_per_second": 59.27, | |
| "eval_steps_per_second": 7.526, | |
| "step": 960 | |
| }, | |
| { | |
| "epoch": 9.07, | |
| "learning_rate": 0.00016377570093457946, | |
| "loss": 0.0863, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 9.07, | |
| "eval_accuracy": 0.6931216931216931, | |
| "eval_loss": 1.3446311950683594, | |
| "eval_runtime": 3.2112, | |
| "eval_samples_per_second": 58.857, | |
| "eval_steps_per_second": 7.474, | |
| "step": 970 | |
| }, | |
| { | |
| "epoch": 9.16, | |
| "learning_rate": 0.00016340186915887853, | |
| "loss": 0.0915, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 9.16, | |
| "eval_accuracy": 0.7037037037037037, | |
| "eval_loss": 1.1720484495162964, | |
| "eval_runtime": 3.1413, | |
| "eval_samples_per_second": 60.167, | |
| "eval_steps_per_second": 7.64, | |
| "step": 980 | |
| }, | |
| { | |
| "epoch": 9.25, | |
| "learning_rate": 0.00016302803738317757, | |
| "loss": 0.0911, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 9.25, | |
| "eval_accuracy": 0.6984126984126984, | |
| "eval_loss": 1.2161829471588135, | |
| "eval_runtime": 3.2839, | |
| "eval_samples_per_second": 57.553, | |
| "eval_steps_per_second": 7.308, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 9.35, | |
| "learning_rate": 0.00016265420560747663, | |
| "loss": 0.0763, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 9.35, | |
| "eval_accuracy": 0.7142857142857143, | |
| "eval_loss": 1.1872042417526245, | |
| "eval_runtime": 3.1612, | |
| "eval_samples_per_second": 59.787, | |
| "eval_steps_per_second": 7.592, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 9.44, | |
| "learning_rate": 0.0001622803738317757, | |
| "loss": 0.0524, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 9.44, | |
| "eval_accuracy": 0.6878306878306878, | |
| "eval_loss": 1.3205093145370483, | |
| "eval_runtime": 3.1614, | |
| "eval_samples_per_second": 59.784, | |
| "eval_steps_per_second": 7.592, | |
| "step": 1010 | |
| }, | |
| { | |
| "epoch": 9.53, | |
| "learning_rate": 0.0001619065420560748, | |
| "loss": 0.1508, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 9.53, | |
| "eval_accuracy": 0.6507936507936508, | |
| "eval_loss": 1.5726176500320435, | |
| "eval_runtime": 3.161, | |
| "eval_samples_per_second": 59.791, | |
| "eval_steps_per_second": 7.593, | |
| "step": 1020 | |
| }, | |
| { | |
| "epoch": 9.63, | |
| "learning_rate": 0.00016153271028037383, | |
| "loss": 0.0994, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 9.63, | |
| "eval_accuracy": 0.6931216931216931, | |
| "eval_loss": 1.3505302667617798, | |
| "eval_runtime": 3.1505, | |
| "eval_samples_per_second": 59.99, | |
| "eval_steps_per_second": 7.618, | |
| "step": 1030 | |
| }, | |
| { | |
| "epoch": 9.72, | |
| "learning_rate": 0.0001611588785046729, | |
| "loss": 0.223, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 9.72, | |
| "eval_accuracy": 0.671957671957672, | |
| "eval_loss": 1.3896968364715576, | |
| "eval_runtime": 3.1746, | |
| "eval_samples_per_second": 59.536, | |
| "eval_steps_per_second": 7.56, | |
| "step": 1040 | |
| }, | |
| { | |
| "epoch": 9.81, | |
| "learning_rate": 0.00016078504672897197, | |
| "loss": 0.1115, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 9.81, | |
| "eval_accuracy": 0.6772486772486772, | |
| "eval_loss": 1.3965896368026733, | |
| "eval_runtime": 3.2706, | |
| "eval_samples_per_second": 57.787, | |
| "eval_steps_per_second": 7.338, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 9.91, | |
| "learning_rate": 0.00016041121495327104, | |
| "loss": 0.1485, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 9.91, | |
| "eval_accuracy": 0.6878306878306878, | |
| "eval_loss": 1.4686475992202759, | |
| "eval_runtime": 3.1507, | |
| "eval_samples_per_second": 59.987, | |
| "eval_steps_per_second": 7.617, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "learning_rate": 0.0001600373831775701, | |
| "loss": 0.18, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.6878306878306878, | |
| "eval_loss": 1.2233675718307495, | |
| "eval_runtime": 3.1543, | |
| "eval_samples_per_second": 59.919, | |
| "eval_steps_per_second": 7.609, | |
| "step": 1070 | |
| }, | |
| { | |
| "epoch": 10.09, | |
| "learning_rate": 0.00015966355140186917, | |
| "loss": 0.1366, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 10.09, | |
| "eval_accuracy": 0.6666666666666666, | |
| "eval_loss": 1.4836784601211548, | |
| "eval_runtime": 3.2142, | |
| "eval_samples_per_second": 58.802, | |
| "eval_steps_per_second": 7.467, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 10.19, | |
| "learning_rate": 0.00015928971962616824, | |
| "loss": 0.149, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 10.19, | |
| "eval_accuracy": 0.671957671957672, | |
| "eval_loss": 1.4587175846099854, | |
| "eval_runtime": 3.2536, | |
| "eval_samples_per_second": 58.09, | |
| "eval_steps_per_second": 7.377, | |
| "step": 1090 | |
| }, | |
| { | |
| "epoch": 10.28, | |
| "learning_rate": 0.0001589158878504673, | |
| "loss": 0.1618, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 10.28, | |
| "eval_accuracy": 0.6666666666666666, | |
| "eval_loss": 1.3593031167984009, | |
| "eval_runtime": 3.1414, | |
| "eval_samples_per_second": 60.165, | |
| "eval_steps_per_second": 7.64, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 10.37, | |
| "learning_rate": 0.00015854205607476635, | |
| "loss": 0.1302, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 10.37, | |
| "eval_accuracy": 0.6349206349206349, | |
| "eval_loss": 1.5082346200942993, | |
| "eval_runtime": 3.1372, | |
| "eval_samples_per_second": 60.244, | |
| "eval_steps_per_second": 7.65, | |
| "step": 1110 | |
| }, | |
| { | |
| "epoch": 10.47, | |
| "learning_rate": 0.00015816822429906544, | |
| "loss": 0.0208, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 10.47, | |
| "eval_accuracy": 0.7037037037037037, | |
| "eval_loss": 1.4162836074829102, | |
| "eval_runtime": 3.2255, | |
| "eval_samples_per_second": 58.596, | |
| "eval_steps_per_second": 7.441, | |
| "step": 1120 | |
| }, | |
| { | |
| "epoch": 10.56, | |
| "learning_rate": 0.0001577943925233645, | |
| "loss": 0.0314, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 10.56, | |
| "eval_accuracy": 0.7248677248677249, | |
| "eval_loss": 1.2450639009475708, | |
| "eval_runtime": 3.2013, | |
| "eval_samples_per_second": 59.038, | |
| "eval_steps_per_second": 7.497, | |
| "step": 1130 | |
| }, | |
| { | |
| "epoch": 10.65, | |
| "learning_rate": 0.00015742056074766355, | |
| "loss": 0.0355, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 10.65, | |
| "eval_accuracy": 0.708994708994709, | |
| "eval_loss": 1.3142927885055542, | |
| "eval_runtime": 3.2738, | |
| "eval_samples_per_second": 57.732, | |
| "eval_steps_per_second": 7.331, | |
| "step": 1140 | |
| }, | |
| { | |
| "epoch": 10.75, | |
| "learning_rate": 0.00015704672897196261, | |
| "loss": 0.1024, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 10.75, | |
| "eval_accuracy": 0.6878306878306878, | |
| "eval_loss": 1.4214942455291748, | |
| "eval_runtime": 3.2147, | |
| "eval_samples_per_second": 58.792, | |
| "eval_steps_per_second": 7.466, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 10.84, | |
| "learning_rate": 0.00015667289719626168, | |
| "loss": 0.0733, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 10.84, | |
| "eval_accuracy": 0.7142857142857143, | |
| "eval_loss": 1.2427018880844116, | |
| "eval_runtime": 3.1456, | |
| "eval_samples_per_second": 60.085, | |
| "eval_steps_per_second": 7.63, | |
| "step": 1160 | |
| }, | |
| { | |
| "epoch": 10.93, | |
| "learning_rate": 0.00015629906542056078, | |
| "loss": 0.0542, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 10.93, | |
| "eval_accuracy": 0.6825396825396826, | |
| "eval_loss": 1.5809307098388672, | |
| "eval_runtime": 3.2372, | |
| "eval_samples_per_second": 58.384, | |
| "eval_steps_per_second": 7.414, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 11.03, | |
| "learning_rate": 0.00015592523364485982, | |
| "loss": 0.0995, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 11.03, | |
| "eval_accuracy": 0.6613756613756614, | |
| "eval_loss": 1.5994837284088135, | |
| "eval_runtime": 3.153, | |
| "eval_samples_per_second": 59.943, | |
| "eval_steps_per_second": 7.612, | |
| "step": 1180 | |
| }, | |
| { | |
| "epoch": 11.12, | |
| "learning_rate": 0.00015555140186915888, | |
| "loss": 0.0653, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 11.12, | |
| "eval_accuracy": 0.708994708994709, | |
| "eval_loss": 1.3932106494903564, | |
| "eval_runtime": 3.2168, | |
| "eval_samples_per_second": 58.753, | |
| "eval_steps_per_second": 7.461, | |
| "step": 1190 | |
| }, | |
| { | |
| "epoch": 11.21, | |
| "learning_rate": 0.00015517757009345795, | |
| "loss": 0.0339, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 11.21, | |
| "eval_accuracy": 0.708994708994709, | |
| "eval_loss": 1.2856649160385132, | |
| "eval_runtime": 3.0564, | |
| "eval_samples_per_second": 61.837, | |
| "eval_steps_per_second": 7.852, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 11.31, | |
| "learning_rate": 0.00015480373831775702, | |
| "loss": 0.1038, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 11.31, | |
| "eval_accuracy": 0.7407407407407407, | |
| "eval_loss": 1.2895965576171875, | |
| "eval_runtime": 3.1971, | |
| "eval_samples_per_second": 59.115, | |
| "eval_steps_per_second": 7.507, | |
| "step": 1210 | |
| }, | |
| { | |
| "epoch": 11.4, | |
| "learning_rate": 0.00015442990654205608, | |
| "loss": 0.0415, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 11.4, | |
| "eval_accuracy": 0.7354497354497355, | |
| "eval_loss": 1.2908622026443481, | |
| "eval_runtime": 3.2149, | |
| "eval_samples_per_second": 58.789, | |
| "eval_steps_per_second": 7.465, | |
| "step": 1220 | |
| }, | |
| { | |
| "epoch": 11.5, | |
| "learning_rate": 0.00015405607476635515, | |
| "loss": 0.0629, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 11.5, | |
| "eval_accuracy": 0.7142857142857143, | |
| "eval_loss": 1.3047831058502197, | |
| "eval_runtime": 3.228, | |
| "eval_samples_per_second": 58.55, | |
| "eval_steps_per_second": 7.435, | |
| "step": 1230 | |
| }, | |
| { | |
| "epoch": 11.59, | |
| "learning_rate": 0.00015368224299065422, | |
| "loss": 0.0137, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 11.59, | |
| "eval_accuracy": 0.7195767195767195, | |
| "eval_loss": 1.3543046712875366, | |
| "eval_runtime": 3.1086, | |
| "eval_samples_per_second": 60.8, | |
| "eval_steps_per_second": 7.721, | |
| "step": 1240 | |
| }, | |
| { | |
| "epoch": 11.68, | |
| "learning_rate": 0.0001533084112149533, | |
| "loss": 0.035, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 11.68, | |
| "eval_accuracy": 0.7301587301587301, | |
| "eval_loss": 1.2130463123321533, | |
| "eval_runtime": 3.4203, | |
| "eval_samples_per_second": 55.258, | |
| "eval_steps_per_second": 7.017, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 11.78, | |
| "learning_rate": 0.00015293457943925233, | |
| "loss": 0.0102, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 11.78, | |
| "eval_accuracy": 0.7248677248677249, | |
| "eval_loss": 1.2087303400039673, | |
| "eval_runtime": 3.3746, | |
| "eval_samples_per_second": 56.006, | |
| "eval_steps_per_second": 7.112, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 11.87, | |
| "learning_rate": 0.00015256074766355142, | |
| "loss": 0.0409, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 11.87, | |
| "eval_accuracy": 0.7566137566137566, | |
| "eval_loss": 1.2329652309417725, | |
| "eval_runtime": 3.1081, | |
| "eval_samples_per_second": 60.809, | |
| "eval_steps_per_second": 7.722, | |
| "step": 1270 | |
| }, | |
| { | |
| "epoch": 11.96, | |
| "learning_rate": 0.0001521869158878505, | |
| "loss": 0.0659, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 11.96, | |
| "eval_accuracy": 0.7142857142857143, | |
| "eval_loss": 1.3084936141967773, | |
| "eval_runtime": 3.3561, | |
| "eval_samples_per_second": 56.315, | |
| "eval_steps_per_second": 7.151, | |
| "step": 1280 | |
| }, | |
| { | |
| "epoch": 12.06, | |
| "learning_rate": 0.00015181308411214953, | |
| "loss": 0.035, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 12.06, | |
| "eval_accuracy": 0.7248677248677249, | |
| "eval_loss": 1.405351161956787, | |
| "eval_runtime": 3.4254, | |
| "eval_samples_per_second": 55.175, | |
| "eval_steps_per_second": 7.006, | |
| "step": 1290 | |
| }, | |
| { | |
| "epoch": 12.15, | |
| "learning_rate": 0.0001514392523364486, | |
| "loss": 0.103, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 12.15, | |
| "eval_accuracy": 0.7248677248677249, | |
| "eval_loss": 1.464030146598816, | |
| "eval_runtime": 3.2557, | |
| "eval_samples_per_second": 58.053, | |
| "eval_steps_per_second": 7.372, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 12.24, | |
| "learning_rate": 0.00015106542056074766, | |
| "loss": 0.0238, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 12.24, | |
| "eval_accuracy": 0.7037037037037037, | |
| "eval_loss": 1.347579002380371, | |
| "eval_runtime": 3.048, | |
| "eval_samples_per_second": 62.008, | |
| "eval_steps_per_second": 7.874, | |
| "step": 1310 | |
| }, | |
| { | |
| "epoch": 12.34, | |
| "learning_rate": 0.00015069158878504676, | |
| "loss": 0.0196, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 12.34, | |
| "eval_accuracy": 0.708994708994709, | |
| "eval_loss": 1.40040123462677, | |
| "eval_runtime": 3.1832, | |
| "eval_samples_per_second": 59.375, | |
| "eval_steps_per_second": 7.54, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 12.43, | |
| "learning_rate": 0.0001503177570093458, | |
| "loss": 0.009, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 12.43, | |
| "eval_accuracy": 0.7142857142857143, | |
| "eval_loss": 1.4277156591415405, | |
| "eval_runtime": 3.2679, | |
| "eval_samples_per_second": 57.836, | |
| "eval_steps_per_second": 7.344, | |
| "step": 1330 | |
| }, | |
| { | |
| "epoch": 12.52, | |
| "learning_rate": 0.00014994392523364486, | |
| "loss": 0.0238, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 12.52, | |
| "eval_accuracy": 0.7407407407407407, | |
| "eval_loss": 1.4086812734603882, | |
| "eval_runtime": 3.2187, | |
| "eval_samples_per_second": 58.719, | |
| "eval_steps_per_second": 7.456, | |
| "step": 1340 | |
| }, | |
| { | |
| "epoch": 12.62, | |
| "learning_rate": 0.00014957009345794393, | |
| "loss": 0.0468, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 12.62, | |
| "eval_accuracy": 0.7195767195767195, | |
| "eval_loss": 1.3357652425765991, | |
| "eval_runtime": 3.1607, | |
| "eval_samples_per_second": 59.798, | |
| "eval_steps_per_second": 7.593, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 12.71, | |
| "learning_rate": 0.000149196261682243, | |
| "loss": 0.0207, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 12.71, | |
| "eval_accuracy": 0.7142857142857143, | |
| "eval_loss": 1.4172828197479248, | |
| "eval_runtime": 3.0888, | |
| "eval_samples_per_second": 61.188, | |
| "eval_steps_per_second": 7.77, | |
| "step": 1360 | |
| }, | |
| { | |
| "epoch": 12.8, | |
| "learning_rate": 0.00014882242990654207, | |
| "loss": 0.0138, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 12.8, | |
| "eval_accuracy": 0.7037037037037037, | |
| "eval_loss": 1.4307596683502197, | |
| "eval_runtime": 3.1887, | |
| "eval_samples_per_second": 59.271, | |
| "eval_steps_per_second": 7.526, | |
| "step": 1370 | |
| }, | |
| { | |
| "epoch": 12.9, | |
| "learning_rate": 0.00014844859813084113, | |
| "loss": 0.0241, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 12.9, | |
| "eval_accuracy": 0.6772486772486772, | |
| "eval_loss": 1.6381709575653076, | |
| "eval_runtime": 3.2282, | |
| "eval_samples_per_second": 58.546, | |
| "eval_steps_per_second": 7.434, | |
| "step": 1380 | |
| }, | |
| { | |
| "epoch": 12.99, | |
| "learning_rate": 0.0001480747663551402, | |
| "loss": 0.0224, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 12.99, | |
| "eval_accuracy": 0.6984126984126984, | |
| "eval_loss": 1.513045072555542, | |
| "eval_runtime": 3.1555, | |
| "eval_samples_per_second": 59.896, | |
| "eval_steps_per_second": 7.606, | |
| "step": 1390 | |
| }, | |
| { | |
| "epoch": 13.08, | |
| "learning_rate": 0.00014770093457943924, | |
| "loss": 0.0367, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 13.08, | |
| "eval_accuracy": 0.7037037037037037, | |
| "eval_loss": 1.5821019411087036, | |
| "eval_runtime": 3.1013, | |
| "eval_samples_per_second": 60.941, | |
| "eval_steps_per_second": 7.739, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 13.18, | |
| "learning_rate": 0.0001473271028037383, | |
| "loss": 0.0201, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 13.18, | |
| "eval_accuracy": 0.708994708994709, | |
| "eval_loss": 1.499505877494812, | |
| "eval_runtime": 3.3162, | |
| "eval_samples_per_second": 56.994, | |
| "eval_steps_per_second": 7.237, | |
| "step": 1410 | |
| }, | |
| { | |
| "epoch": 13.27, | |
| "learning_rate": 0.0001469532710280374, | |
| "loss": 0.0431, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 13.27, | |
| "eval_accuracy": 0.7354497354497355, | |
| "eval_loss": 1.3571968078613281, | |
| "eval_runtime": 3.1517, | |
| "eval_samples_per_second": 59.968, | |
| "eval_steps_per_second": 7.615, | |
| "step": 1420 | |
| }, | |
| { | |
| "epoch": 13.36, | |
| "learning_rate": 0.00014657943925233647, | |
| "loss": 0.0137, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 13.36, | |
| "eval_accuracy": 0.7248677248677249, | |
| "eval_loss": 1.3700077533721924, | |
| "eval_runtime": 3.0539, | |
| "eval_samples_per_second": 61.889, | |
| "eval_steps_per_second": 7.859, | |
| "step": 1430 | |
| }, | |
| { | |
| "epoch": 13.46, | |
| "learning_rate": 0.0001462056074766355, | |
| "loss": 0.0498, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 13.46, | |
| "eval_accuracy": 0.708994708994709, | |
| "eval_loss": 1.6434003114700317, | |
| "eval_runtime": 3.0715, | |
| "eval_samples_per_second": 61.534, | |
| "eval_steps_per_second": 7.814, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 13.55, | |
| "learning_rate": 0.00014583177570093458, | |
| "loss": 0.0175, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 13.55, | |
| "eval_accuracy": 0.708994708994709, | |
| "eval_loss": 1.7298402786254883, | |
| "eval_runtime": 3.1073, | |
| "eval_samples_per_second": 60.825, | |
| "eval_steps_per_second": 7.724, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 13.64, | |
| "learning_rate": 0.00014545794392523364, | |
| "loss": 0.0142, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 13.64, | |
| "eval_accuracy": 0.7142857142857143, | |
| "eval_loss": 1.5783988237380981, | |
| "eval_runtime": 3.116, | |
| "eval_samples_per_second": 60.655, | |
| "eval_steps_per_second": 7.702, | |
| "step": 1460 | |
| }, | |
| { | |
| "epoch": 13.74, | |
| "learning_rate": 0.0001450841121495327, | |
| "loss": 0.0235, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 13.74, | |
| "eval_accuracy": 0.6984126984126984, | |
| "eval_loss": 1.6561763286590576, | |
| "eval_runtime": 3.2007, | |
| "eval_samples_per_second": 59.049, | |
| "eval_steps_per_second": 7.498, | |
| "step": 1470 | |
| }, | |
| { | |
| "epoch": 13.83, | |
| "learning_rate": 0.00014471028037383178, | |
| "loss": 0.0524, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 13.83, | |
| "eval_accuracy": 0.6984126984126984, | |
| "eval_loss": 1.831541657447815, | |
| "eval_runtime": 3.3046, | |
| "eval_samples_per_second": 57.193, | |
| "eval_steps_per_second": 7.263, | |
| "step": 1480 | |
| }, | |
| { | |
| "epoch": 13.93, | |
| "learning_rate": 0.00014433644859813085, | |
| "loss": 0.0506, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 13.93, | |
| "eval_accuracy": 0.6825396825396826, | |
| "eval_loss": 1.6628289222717285, | |
| "eval_runtime": 3.2548, | |
| "eval_samples_per_second": 58.068, | |
| "eval_steps_per_second": 7.374, | |
| "step": 1490 | |
| }, | |
| { | |
| "epoch": 14.02, | |
| "learning_rate": 0.0001439626168224299, | |
| "loss": 0.0566, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 14.02, | |
| "eval_accuracy": 0.6878306878306878, | |
| "eval_loss": 1.6691248416900635, | |
| "eval_runtime": 3.3252, | |
| "eval_samples_per_second": 56.839, | |
| "eval_steps_per_second": 7.218, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 14.11, | |
| "learning_rate": 0.00014358878504672898, | |
| "loss": 0.0872, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 14.11, | |
| "eval_accuracy": 0.6084656084656085, | |
| "eval_loss": 2.196704387664795, | |
| "eval_runtime": 3.2014, | |
| "eval_samples_per_second": 59.037, | |
| "eval_steps_per_second": 7.497, | |
| "step": 1510 | |
| }, | |
| { | |
| "epoch": 14.21, | |
| "learning_rate": 0.00014321495327102805, | |
| "loss": 0.1338, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 14.21, | |
| "eval_accuracy": 0.6507936507936508, | |
| "eval_loss": 1.7845758199691772, | |
| "eval_runtime": 3.176, | |
| "eval_samples_per_second": 59.51, | |
| "eval_steps_per_second": 7.557, | |
| "step": 1520 | |
| }, | |
| { | |
| "epoch": 14.3, | |
| "learning_rate": 0.00014284112149532711, | |
| "loss": 0.0222, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 14.3, | |
| "eval_accuracy": 0.6772486772486772, | |
| "eval_loss": 1.6833900213241577, | |
| "eval_runtime": 3.1793, | |
| "eval_samples_per_second": 59.447, | |
| "eval_steps_per_second": 7.549, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 14.39, | |
| "learning_rate": 0.00014246728971962618, | |
| "loss": 0.0254, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 14.39, | |
| "eval_accuracy": 0.656084656084656, | |
| "eval_loss": 1.9036774635314941, | |
| "eval_runtime": 3.1611, | |
| "eval_samples_per_second": 59.789, | |
| "eval_steps_per_second": 7.592, | |
| "step": 1540 | |
| }, | |
| { | |
| "epoch": 14.49, | |
| "learning_rate": 0.00014209345794392522, | |
| "loss": 0.0244, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 14.49, | |
| "eval_accuracy": 0.7037037037037037, | |
| "eval_loss": 1.5309627056121826, | |
| "eval_runtime": 3.1613, | |
| "eval_samples_per_second": 59.786, | |
| "eval_steps_per_second": 7.592, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 14.58, | |
| "learning_rate": 0.0001417196261682243, | |
| "loss": 0.0178, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 14.58, | |
| "eval_accuracy": 0.6825396825396826, | |
| "eval_loss": 1.53221595287323, | |
| "eval_runtime": 3.144, | |
| "eval_samples_per_second": 60.114, | |
| "eval_steps_per_second": 7.634, | |
| "step": 1560 | |
| }, | |
| { | |
| "epoch": 14.67, | |
| "learning_rate": 0.00014134579439252338, | |
| "loss": 0.0045, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 14.67, | |
| "eval_accuracy": 0.7301587301587301, | |
| "eval_loss": 1.3083724975585938, | |
| "eval_runtime": 3.1864, | |
| "eval_samples_per_second": 59.314, | |
| "eval_steps_per_second": 7.532, | |
| "step": 1570 | |
| }, | |
| { | |
| "epoch": 14.77, | |
| "learning_rate": 0.00014097196261682245, | |
| "loss": 0.0485, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 14.77, | |
| "eval_accuracy": 0.7354497354497355, | |
| "eval_loss": 1.2855416536331177, | |
| "eval_runtime": 3.2166, | |
| "eval_samples_per_second": 58.758, | |
| "eval_steps_per_second": 7.461, | |
| "step": 1580 | |
| }, | |
| { | |
| "epoch": 14.86, | |
| "learning_rate": 0.0001405981308411215, | |
| "loss": 0.0575, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 14.86, | |
| "eval_accuracy": 0.7301587301587301, | |
| "eval_loss": 1.3779939413070679, | |
| "eval_runtime": 3.2389, | |
| "eval_samples_per_second": 58.353, | |
| "eval_steps_per_second": 7.41, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 14.95, | |
| "learning_rate": 0.00014022429906542056, | |
| "loss": 0.0131, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 14.95, | |
| "eval_accuracy": 0.6984126984126984, | |
| "eval_loss": 1.6261014938354492, | |
| "eval_runtime": 3.1879, | |
| "eval_samples_per_second": 59.286, | |
| "eval_steps_per_second": 7.528, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 15.05, | |
| "learning_rate": 0.00013985046728971963, | |
| "loss": 0.0059, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 15.05, | |
| "eval_accuracy": 0.6931216931216931, | |
| "eval_loss": 1.6843873262405396, | |
| "eval_runtime": 3.2196, | |
| "eval_samples_per_second": 58.702, | |
| "eval_steps_per_second": 7.454, | |
| "step": 1610 | |
| }, | |
| { | |
| "epoch": 15.14, | |
| "learning_rate": 0.0001394766355140187, | |
| "loss": 0.0113, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 15.14, | |
| "eval_accuracy": 0.7248677248677249, | |
| "eval_loss": 1.4520890712738037, | |
| "eval_runtime": 3.1975, | |
| "eval_samples_per_second": 59.108, | |
| "eval_steps_per_second": 7.506, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 15.23, | |
| "learning_rate": 0.00013910280373831776, | |
| "loss": 0.0264, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 15.23, | |
| "eval_accuracy": 0.708994708994709, | |
| "eval_loss": 1.5394195318222046, | |
| "eval_runtime": 3.2412, | |
| "eval_samples_per_second": 58.312, | |
| "eval_steps_per_second": 7.405, | |
| "step": 1630 | |
| }, | |
| { | |
| "epoch": 15.33, | |
| "learning_rate": 0.00013872897196261683, | |
| "loss": 0.0131, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 15.33, | |
| "eval_accuracy": 0.7248677248677249, | |
| "eval_loss": 1.521421194076538, | |
| "eval_runtime": 3.2643, | |
| "eval_samples_per_second": 57.899, | |
| "eval_steps_per_second": 7.352, | |
| "step": 1640 | |
| }, | |
| { | |
| "epoch": 15.42, | |
| "learning_rate": 0.0001383551401869159, | |
| "loss": 0.0067, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 15.42, | |
| "eval_accuracy": 0.6984126984126984, | |
| "eval_loss": 1.8741341829299927, | |
| "eval_runtime": 3.2542, | |
| "eval_samples_per_second": 58.079, | |
| "eval_steps_per_second": 7.375, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 15.51, | |
| "learning_rate": 0.00013798130841121496, | |
| "loss": 0.1502, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 15.51, | |
| "eval_accuracy": 0.7037037037037037, | |
| "eval_loss": 1.5996290445327759, | |
| "eval_runtime": 3.2543, | |
| "eval_samples_per_second": 58.076, | |
| "eval_steps_per_second": 7.375, | |
| "step": 1660 | |
| }, | |
| { | |
| "epoch": 15.61, | |
| "learning_rate": 0.00013760747663551403, | |
| "loss": 0.0505, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 15.61, | |
| "eval_accuracy": 0.6984126984126984, | |
| "eval_loss": 1.5197315216064453, | |
| "eval_runtime": 3.3082, | |
| "eval_samples_per_second": 57.131, | |
| "eval_steps_per_second": 7.255, | |
| "step": 1670 | |
| }, | |
| { | |
| "epoch": 15.7, | |
| "learning_rate": 0.0001372336448598131, | |
| "loss": 0.0096, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 15.7, | |
| "eval_accuracy": 0.708994708994709, | |
| "eval_loss": 1.562042236328125, | |
| "eval_runtime": 3.2413, | |
| "eval_samples_per_second": 58.31, | |
| "eval_steps_per_second": 7.404, | |
| "step": 1680 | |
| }, | |
| { | |
| "epoch": 15.79, | |
| "learning_rate": 0.00013685981308411216, | |
| "loss": 0.0623, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 15.79, | |
| "eval_accuracy": 0.7301587301587301, | |
| "eval_loss": 1.5186712741851807, | |
| "eval_runtime": 3.3016, | |
| "eval_samples_per_second": 57.244, | |
| "eval_steps_per_second": 7.269, | |
| "step": 1690 | |
| }, | |
| { | |
| "epoch": 15.89, | |
| "learning_rate": 0.0001364859813084112, | |
| "loss": 0.0732, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 15.89, | |
| "eval_accuracy": 0.6825396825396826, | |
| "eval_loss": 1.5817829370498657, | |
| "eval_runtime": 3.2211, | |
| "eval_samples_per_second": 58.675, | |
| "eval_steps_per_second": 7.451, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 15.98, | |
| "learning_rate": 0.00013611214953271027, | |
| "loss": 0.0587, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 15.98, | |
| "eval_accuracy": 0.7566137566137566, | |
| "eval_loss": 1.331645131111145, | |
| "eval_runtime": 3.254, | |
| "eval_samples_per_second": 58.083, | |
| "eval_steps_per_second": 7.376, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 16.07, | |
| "learning_rate": 0.00013573831775700936, | |
| "loss": 0.0135, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 16.07, | |
| "eval_accuracy": 0.7142857142857143, | |
| "eval_loss": 1.5572881698608398, | |
| "eval_runtime": 3.1348, | |
| "eval_samples_per_second": 60.292, | |
| "eval_steps_per_second": 7.656, | |
| "step": 1720 | |
| }, | |
| { | |
| "epoch": 16.17, | |
| "learning_rate": 0.00013536448598130843, | |
| "loss": 0.0405, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 16.17, | |
| "eval_accuracy": 0.7195767195767195, | |
| "eval_loss": 1.5583974123001099, | |
| "eval_runtime": 3.1747, | |
| "eval_samples_per_second": 59.534, | |
| "eval_steps_per_second": 7.56, | |
| "step": 1730 | |
| }, | |
| { | |
| "epoch": 16.26, | |
| "learning_rate": 0.00013499065420560747, | |
| "loss": 0.0379, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 16.26, | |
| "eval_accuracy": 0.6613756613756614, | |
| "eval_loss": 1.8542115688323975, | |
| "eval_runtime": 3.4991, | |
| "eval_samples_per_second": 54.014, | |
| "eval_steps_per_second": 6.859, | |
| "step": 1740 | |
| }, | |
| { | |
| "epoch": 16.36, | |
| "learning_rate": 0.00013461682242990654, | |
| "loss": 0.0778, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 16.36, | |
| "eval_accuracy": 0.6772486772486772, | |
| "eval_loss": 1.81163489818573, | |
| "eval_runtime": 3.2013, | |
| "eval_samples_per_second": 59.039, | |
| "eval_steps_per_second": 7.497, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 16.45, | |
| "learning_rate": 0.00013424299065420563, | |
| "loss": 0.0178, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 16.45, | |
| "eval_accuracy": 0.7037037037037037, | |
| "eval_loss": 1.6405593156814575, | |
| "eval_runtime": 3.1824, | |
| "eval_samples_per_second": 59.389, | |
| "eval_steps_per_second": 7.541, | |
| "step": 1760 | |
| }, | |
| { | |
| "epoch": 16.54, | |
| "learning_rate": 0.00013386915887850467, | |
| "loss": 0.0252, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 16.54, | |
| "eval_accuracy": 0.6772486772486772, | |
| "eval_loss": 1.6421043872833252, | |
| "eval_runtime": 3.2328, | |
| "eval_samples_per_second": 58.464, | |
| "eval_steps_per_second": 7.424, | |
| "step": 1770 | |
| }, | |
| { | |
| "epoch": 16.64, | |
| "learning_rate": 0.00013349532710280374, | |
| "loss": 0.0638, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 16.64, | |
| "eval_accuracy": 0.746031746031746, | |
| "eval_loss": 1.4504343271255493, | |
| "eval_runtime": 3.1566, | |
| "eval_samples_per_second": 59.875, | |
| "eval_steps_per_second": 7.603, | |
| "step": 1780 | |
| }, | |
| { | |
| "epoch": 16.73, | |
| "learning_rate": 0.0001331214953271028, | |
| "loss": 0.0138, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 16.73, | |
| "eval_accuracy": 0.708994708994709, | |
| "eval_loss": 1.5848379135131836, | |
| "eval_runtime": 3.2276, | |
| "eval_samples_per_second": 58.557, | |
| "eval_steps_per_second": 7.436, | |
| "step": 1790 | |
| }, | |
| { | |
| "epoch": 16.82, | |
| "learning_rate": 0.00013274766355140188, | |
| "loss": 0.1027, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 16.82, | |
| "eval_accuracy": 0.7354497354497355, | |
| "eval_loss": 1.5205998420715332, | |
| "eval_runtime": 3.2514, | |
| "eval_samples_per_second": 58.128, | |
| "eval_steps_per_second": 7.381, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 16.92, | |
| "learning_rate": 0.00013237383177570094, | |
| "loss": 0.086, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 16.92, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.3391714096069336, | |
| "eval_runtime": 3.2275, | |
| "eval_samples_per_second": 58.56, | |
| "eval_steps_per_second": 7.436, | |
| "step": 1810 | |
| }, | |
| { | |
| "epoch": 17.01, | |
| "learning_rate": 0.000132, | |
| "loss": 0.1029, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 17.01, | |
| "eval_accuracy": 0.708994708994709, | |
| "eval_loss": 1.4613063335418701, | |
| "eval_runtime": 3.268, | |
| "eval_samples_per_second": 57.834, | |
| "eval_steps_per_second": 7.344, | |
| "step": 1820 | |
| }, | |
| { | |
| "epoch": 17.1, | |
| "learning_rate": 0.00013162616822429908, | |
| "loss": 0.115, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 17.1, | |
| "eval_accuracy": 0.6931216931216931, | |
| "eval_loss": 1.7344552278518677, | |
| "eval_runtime": 3.2414, | |
| "eval_samples_per_second": 58.308, | |
| "eval_steps_per_second": 7.404, | |
| "step": 1830 | |
| }, | |
| { | |
| "epoch": 17.2, | |
| "learning_rate": 0.00013125233644859814, | |
| "loss": 0.0286, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 17.2, | |
| "eval_accuracy": 0.7301587301587301, | |
| "eval_loss": 1.5347076654434204, | |
| "eval_runtime": 3.2891, | |
| "eval_samples_per_second": 57.462, | |
| "eval_steps_per_second": 7.297, | |
| "step": 1840 | |
| }, | |
| { | |
| "epoch": 17.29, | |
| "learning_rate": 0.00013087850467289718, | |
| "loss": 0.0608, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 17.29, | |
| "eval_accuracy": 0.6931216931216931, | |
| "eval_loss": 1.5781868696212769, | |
| "eval_runtime": 3.2271, | |
| "eval_samples_per_second": 58.566, | |
| "eval_steps_per_second": 7.437, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 17.38, | |
| "learning_rate": 0.00013050467289719628, | |
| "loss": 0.1215, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 17.38, | |
| "eval_accuracy": 0.6931216931216931, | |
| "eval_loss": 1.5484486818313599, | |
| "eval_runtime": 3.2944, | |
| "eval_samples_per_second": 57.37, | |
| "eval_steps_per_second": 7.285, | |
| "step": 1860 | |
| }, | |
| { | |
| "epoch": 17.48, | |
| "learning_rate": 0.00013013084112149535, | |
| "loss": 0.095, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 17.48, | |
| "eval_accuracy": 0.708994708994709, | |
| "eval_loss": 1.5258464813232422, | |
| "eval_runtime": 3.265, | |
| "eval_samples_per_second": 57.887, | |
| "eval_steps_per_second": 7.351, | |
| "step": 1870 | |
| }, | |
| { | |
| "epoch": 17.57, | |
| "learning_rate": 0.0001297570093457944, | |
| "loss": 0.0176, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 17.57, | |
| "eval_accuracy": 0.708994708994709, | |
| "eval_loss": 1.5887885093688965, | |
| "eval_runtime": 3.1883, | |
| "eval_samples_per_second": 59.278, | |
| "eval_steps_per_second": 7.527, | |
| "step": 1880 | |
| }, | |
| { | |
| "epoch": 17.66, | |
| "learning_rate": 0.00012938317757009345, | |
| "loss": 0.0208, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 17.66, | |
| "eval_accuracy": 0.6878306878306878, | |
| "eval_loss": 2.017876625061035, | |
| "eval_runtime": 3.1617, | |
| "eval_samples_per_second": 59.778, | |
| "eval_steps_per_second": 7.591, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 17.76, | |
| "learning_rate": 0.00012900934579439252, | |
| "loss": 0.0752, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 17.76, | |
| "eval_accuracy": 0.6878306878306878, | |
| "eval_loss": 1.8983193635940552, | |
| "eval_runtime": 3.3023, | |
| "eval_samples_per_second": 57.233, | |
| "eval_steps_per_second": 7.268, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 17.85, | |
| "learning_rate": 0.00012863551401869162, | |
| "loss": 0.0609, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 17.85, | |
| "eval_accuracy": 0.7037037037037037, | |
| "eval_loss": 1.6523683071136475, | |
| "eval_runtime": 3.2531, | |
| "eval_samples_per_second": 58.099, | |
| "eval_steps_per_second": 7.378, | |
| "step": 1910 | |
| }, | |
| { | |
| "epoch": 17.94, | |
| "learning_rate": 0.00012826168224299066, | |
| "loss": 0.0059, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 17.94, | |
| "eval_accuracy": 0.6984126984126984, | |
| "eval_loss": 1.6338155269622803, | |
| "eval_runtime": 3.2413, | |
| "eval_samples_per_second": 58.309, | |
| "eval_steps_per_second": 7.404, | |
| "step": 1920 | |
| }, | |
| { | |
| "epoch": 18.04, | |
| "learning_rate": 0.00012788785046728972, | |
| "loss": 0.0264, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 18.04, | |
| "eval_accuracy": 0.7037037037037037, | |
| "eval_loss": 1.6208415031433105, | |
| "eval_runtime": 3.4796, | |
| "eval_samples_per_second": 54.317, | |
| "eval_steps_per_second": 6.897, | |
| "step": 1930 | |
| }, | |
| { | |
| "epoch": 18.13, | |
| "learning_rate": 0.0001275140186915888, | |
| "loss": 0.0128, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 18.13, | |
| "eval_accuracy": 0.7566137566137566, | |
| "eval_loss": 1.521356225013733, | |
| "eval_runtime": 3.1889, | |
| "eval_samples_per_second": 59.268, | |
| "eval_steps_per_second": 7.526, | |
| "step": 1940 | |
| }, | |
| { | |
| "epoch": 18.22, | |
| "learning_rate": 0.00012714018691588786, | |
| "loss": 0.0053, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 18.22, | |
| "eval_accuracy": 0.7195767195767195, | |
| "eval_loss": 1.5842546224594116, | |
| "eval_runtime": 3.1478, | |
| "eval_samples_per_second": 60.042, | |
| "eval_steps_per_second": 7.624, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 18.32, | |
| "learning_rate": 0.00012676635514018692, | |
| "loss": 0.0129, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 18.32, | |
| "eval_accuracy": 0.7248677248677249, | |
| "eval_loss": 1.5440059900283813, | |
| "eval_runtime": 4.1754, | |
| "eval_samples_per_second": 45.265, | |
| "eval_steps_per_second": 5.748, | |
| "step": 1960 | |
| }, | |
| { | |
| "epoch": 18.41, | |
| "learning_rate": 0.000126392523364486, | |
| "loss": 0.016, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 18.41, | |
| "eval_accuracy": 0.7566137566137566, | |
| "eval_loss": 1.4988614320755005, | |
| "eval_runtime": 3.1898, | |
| "eval_samples_per_second": 59.252, | |
| "eval_steps_per_second": 7.524, | |
| "step": 1970 | |
| }, | |
| { | |
| "epoch": 18.5, | |
| "learning_rate": 0.00012601869158878506, | |
| "loss": 0.0696, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 18.5, | |
| "eval_accuracy": 0.7407407407407407, | |
| "eval_loss": 1.4819732904434204, | |
| "eval_runtime": 3.1615, | |
| "eval_samples_per_second": 59.781, | |
| "eval_steps_per_second": 7.591, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 18.6, | |
| "learning_rate": 0.00012564485981308413, | |
| "loss": 0.0217, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 18.6, | |
| "eval_accuracy": 0.7195767195767195, | |
| "eval_loss": 1.4832117557525635, | |
| "eval_runtime": 3.179, | |
| "eval_samples_per_second": 59.452, | |
| "eval_steps_per_second": 7.549, | |
| "step": 1990 | |
| }, | |
| { | |
| "epoch": 18.69, | |
| "learning_rate": 0.00012527102803738317, | |
| "loss": 0.006, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 18.69, | |
| "eval_accuracy": 0.6772486772486772, | |
| "eval_loss": 1.9052395820617676, | |
| "eval_runtime": 3.1938, | |
| "eval_samples_per_second": 59.176, | |
| "eval_steps_per_second": 7.514, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 18.79, | |
| "learning_rate": 0.00012489719626168226, | |
| "loss": 0.0357, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 18.79, | |
| "eval_accuracy": 0.6984126984126984, | |
| "eval_loss": 1.73564612865448, | |
| "eval_runtime": 3.3743, | |
| "eval_samples_per_second": 56.012, | |
| "eval_steps_per_second": 7.113, | |
| "step": 2010 | |
| }, | |
| { | |
| "epoch": 18.88, | |
| "learning_rate": 0.00012452336448598133, | |
| "loss": 0.0197, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 18.88, | |
| "eval_accuracy": 0.7301587301587301, | |
| "eval_loss": 1.617836594581604, | |
| "eval_runtime": 3.185, | |
| "eval_samples_per_second": 59.34, | |
| "eval_steps_per_second": 7.535, | |
| "step": 2020 | |
| }, | |
| { | |
| "epoch": 18.97, | |
| "learning_rate": 0.00012414953271028037, | |
| "loss": 0.0331, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 18.97, | |
| "eval_accuracy": 0.7407407407407407, | |
| "eval_loss": 1.5125271081924438, | |
| "eval_runtime": 3.228, | |
| "eval_samples_per_second": 58.55, | |
| "eval_steps_per_second": 7.435, | |
| "step": 2030 | |
| }, | |
| { | |
| "epoch": 19.07, | |
| "learning_rate": 0.00012381308411214953, | |
| "loss": 0.2593, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 19.07, | |
| "eval_accuracy": 0.708994708994709, | |
| "eval_loss": 1.6546989679336548, | |
| "eval_runtime": 3.1974, | |
| "eval_samples_per_second": 59.111, | |
| "eval_steps_per_second": 7.506, | |
| "step": 2040 | |
| }, | |
| { | |
| "epoch": 19.16, | |
| "learning_rate": 0.0001234392523364486, | |
| "loss": 0.0134, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 19.16, | |
| "eval_accuracy": 0.6931216931216931, | |
| "eval_loss": 1.6934614181518555, | |
| "eval_runtime": 3.2069, | |
| "eval_samples_per_second": 58.936, | |
| "eval_steps_per_second": 7.484, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 19.25, | |
| "learning_rate": 0.0001230654205607477, | |
| "loss": 0.0069, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 19.25, | |
| "eval_accuracy": 0.7407407407407407, | |
| "eval_loss": 1.5514878034591675, | |
| "eval_runtime": 3.2273, | |
| "eval_samples_per_second": 58.563, | |
| "eval_steps_per_second": 7.437, | |
| "step": 2060 | |
| }, | |
| { | |
| "epoch": 19.35, | |
| "learning_rate": 0.00012269158878504673, | |
| "loss": 0.0083, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 19.35, | |
| "eval_accuracy": 0.7248677248677249, | |
| "eval_loss": 1.576420545578003, | |
| "eval_runtime": 3.1752, | |
| "eval_samples_per_second": 59.525, | |
| "eval_steps_per_second": 7.559, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 19.44, | |
| "learning_rate": 0.0001223177570093458, | |
| "loss": 0.0299, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 19.44, | |
| "eval_accuracy": 0.7301587301587301, | |
| "eval_loss": 1.5843783617019653, | |
| "eval_runtime": 3.1803, | |
| "eval_samples_per_second": 59.428, | |
| "eval_steps_per_second": 7.546, | |
| "step": 2080 | |
| }, | |
| { | |
| "epoch": 19.53, | |
| "learning_rate": 0.00012194392523364486, | |
| "loss": 0.002, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 19.53, | |
| "eval_accuracy": 0.746031746031746, | |
| "eval_loss": 1.5910844802856445, | |
| "eval_runtime": 3.1724, | |
| "eval_samples_per_second": 59.576, | |
| "eval_steps_per_second": 7.565, | |
| "step": 2090 | |
| }, | |
| { | |
| "epoch": 19.63, | |
| "learning_rate": 0.00012157009345794393, | |
| "loss": 0.0022, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 19.63, | |
| "eval_accuracy": 0.7513227513227513, | |
| "eval_loss": 1.5894769430160522, | |
| "eval_runtime": 3.2193, | |
| "eval_samples_per_second": 58.708, | |
| "eval_steps_per_second": 7.455, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 19.72, | |
| "learning_rate": 0.00012119626168224301, | |
| "loss": 0.0024, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 19.72, | |
| "eval_accuracy": 0.7513227513227513, | |
| "eval_loss": 1.586985468864441, | |
| "eval_runtime": 3.1897, | |
| "eval_samples_per_second": 59.254, | |
| "eval_steps_per_second": 7.524, | |
| "step": 2110 | |
| }, | |
| { | |
| "epoch": 19.81, | |
| "learning_rate": 0.00012082242990654206, | |
| "loss": 0.0082, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 19.81, | |
| "eval_accuracy": 0.7407407407407407, | |
| "eval_loss": 1.582448124885559, | |
| "eval_runtime": 3.1594, | |
| "eval_samples_per_second": 59.821, | |
| "eval_steps_per_second": 7.596, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 19.91, | |
| "learning_rate": 0.00012044859813084113, | |
| "loss": 0.0022, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 19.91, | |
| "eval_accuracy": 0.7354497354497355, | |
| "eval_loss": 1.5609190464019775, | |
| "eval_runtime": 3.3172, | |
| "eval_samples_per_second": 56.976, | |
| "eval_steps_per_second": 7.235, | |
| "step": 2130 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "learning_rate": 0.00012007476635514018, | |
| "loss": 0.02, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.7354497354497355, | |
| "eval_loss": 1.6138461828231812, | |
| "eval_runtime": 3.3238, | |
| "eval_samples_per_second": 56.863, | |
| "eval_steps_per_second": 7.221, | |
| "step": 2140 | |
| }, | |
| { | |
| "epoch": 20.09, | |
| "learning_rate": 0.00011970093457943925, | |
| "loss": 0.0063, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 20.09, | |
| "eval_accuracy": 0.7037037037037037, | |
| "eval_loss": 1.7616651058197021, | |
| "eval_runtime": 3.2361, | |
| "eval_samples_per_second": 58.403, | |
| "eval_steps_per_second": 7.416, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 20.19, | |
| "learning_rate": 0.00011932710280373833, | |
| "loss": 0.0021, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 20.19, | |
| "eval_accuracy": 0.6984126984126984, | |
| "eval_loss": 1.7941789627075195, | |
| "eval_runtime": 5.8284, | |
| "eval_samples_per_second": 32.428, | |
| "eval_steps_per_second": 4.118, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 20.28, | |
| "learning_rate": 0.0001189532710280374, | |
| "loss": 0.0068, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 20.28, | |
| "eval_accuracy": 0.7142857142857143, | |
| "eval_loss": 1.7489572763442993, | |
| "eval_runtime": 3.085, | |
| "eval_samples_per_second": 61.265, | |
| "eval_steps_per_second": 7.78, | |
| "step": 2170 | |
| }, | |
| { | |
| "epoch": 20.37, | |
| "learning_rate": 0.00011857943925233645, | |
| "loss": 0.0078, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 20.37, | |
| "eval_accuracy": 0.7354497354497355, | |
| "eval_loss": 1.6500416994094849, | |
| "eval_runtime": 3.0679, | |
| "eval_samples_per_second": 61.606, | |
| "eval_steps_per_second": 7.823, | |
| "step": 2180 | |
| }, | |
| { | |
| "epoch": 20.47, | |
| "learning_rate": 0.00011820560747663552, | |
| "loss": 0.0019, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 20.47, | |
| "eval_accuracy": 0.7513227513227513, | |
| "eval_loss": 1.5972764492034912, | |
| "eval_runtime": 3.3043, | |
| "eval_samples_per_second": 57.198, | |
| "eval_steps_per_second": 7.263, | |
| "step": 2190 | |
| }, | |
| { | |
| "epoch": 20.56, | |
| "learning_rate": 0.00011783177570093457, | |
| "loss": 0.0052, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 20.56, | |
| "eval_accuracy": 0.7301587301587301, | |
| "eval_loss": 1.64747154712677, | |
| "eval_runtime": 3.2063, | |
| "eval_samples_per_second": 58.947, | |
| "eval_steps_per_second": 7.485, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 20.65, | |
| "learning_rate": 0.00011745794392523365, | |
| "loss": 0.0169, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 20.65, | |
| "eval_accuracy": 0.7195767195767195, | |
| "eval_loss": 1.6964852809906006, | |
| "eval_runtime": 3.2678, | |
| "eval_samples_per_second": 57.837, | |
| "eval_steps_per_second": 7.344, | |
| "step": 2210 | |
| }, | |
| { | |
| "epoch": 20.75, | |
| "learning_rate": 0.00011708411214953272, | |
| "loss": 0.011, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 20.75, | |
| "eval_accuracy": 0.708994708994709, | |
| "eval_loss": 1.7380739450454712, | |
| "eval_runtime": 3.1484, | |
| "eval_samples_per_second": 60.03, | |
| "eval_steps_per_second": 7.623, | |
| "step": 2220 | |
| }, | |
| { | |
| "epoch": 20.84, | |
| "learning_rate": 0.00011671028037383178, | |
| "loss": 0.0141, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 20.84, | |
| "eval_accuracy": 0.7354497354497355, | |
| "eval_loss": 1.6947896480560303, | |
| "eval_runtime": 3.3613, | |
| "eval_samples_per_second": 56.228, | |
| "eval_steps_per_second": 7.14, | |
| "step": 2230 | |
| }, | |
| { | |
| "epoch": 20.93, | |
| "learning_rate": 0.00011633644859813084, | |
| "loss": 0.026, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 20.93, | |
| "eval_accuracy": 0.7037037037037037, | |
| "eval_loss": 1.7711342573165894, | |
| "eval_runtime": 3.2033, | |
| "eval_samples_per_second": 59.001, | |
| "eval_steps_per_second": 7.492, | |
| "step": 2240 | |
| }, | |
| { | |
| "epoch": 21.03, | |
| "learning_rate": 0.00011596261682242991, | |
| "loss": 0.0019, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 21.03, | |
| "eval_accuracy": 0.6825396825396826, | |
| "eval_loss": 1.8577070236206055, | |
| "eval_runtime": 3.1485, | |
| "eval_samples_per_second": 60.03, | |
| "eval_steps_per_second": 7.623, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 21.12, | |
| "learning_rate": 0.00011558878504672899, | |
| "loss": 0.0523, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 21.12, | |
| "eval_accuracy": 0.6878306878306878, | |
| "eval_loss": 1.837315320968628, | |
| "eval_runtime": 3.1213, | |
| "eval_samples_per_second": 60.552, | |
| "eval_steps_per_second": 7.689, | |
| "step": 2260 | |
| }, | |
| { | |
| "epoch": 21.21, | |
| "learning_rate": 0.00011521495327102804, | |
| "loss": 0.0234, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 21.21, | |
| "eval_accuracy": 0.7248677248677249, | |
| "eval_loss": 1.7204312086105347, | |
| "eval_runtime": 3.3214, | |
| "eval_samples_per_second": 56.904, | |
| "eval_steps_per_second": 7.226, | |
| "step": 2270 | |
| }, | |
| { | |
| "epoch": 21.31, | |
| "learning_rate": 0.00011484112149532711, | |
| "loss": 0.0105, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 21.31, | |
| "eval_accuracy": 0.7195767195767195, | |
| "eval_loss": 1.6235790252685547, | |
| "eval_runtime": 3.1507, | |
| "eval_samples_per_second": 59.986, | |
| "eval_steps_per_second": 7.617, | |
| "step": 2280 | |
| }, | |
| { | |
| "epoch": 21.4, | |
| "learning_rate": 0.00011446728971962617, | |
| "loss": 0.048, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 21.4, | |
| "eval_accuracy": 0.6984126984126984, | |
| "eval_loss": 1.8592135906219482, | |
| "eval_runtime": 3.1589, | |
| "eval_samples_per_second": 59.83, | |
| "eval_steps_per_second": 7.598, | |
| "step": 2290 | |
| }, | |
| { | |
| "epoch": 21.5, | |
| "learning_rate": 0.00011409345794392523, | |
| "loss": 0.0456, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 21.5, | |
| "eval_accuracy": 0.7142857142857143, | |
| "eval_loss": 1.7222115993499756, | |
| "eval_runtime": 3.1346, | |
| "eval_samples_per_second": 60.296, | |
| "eval_steps_per_second": 7.657, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 21.59, | |
| "learning_rate": 0.00011371962616822431, | |
| "loss": 0.0129, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 21.59, | |
| "eval_accuracy": 0.7142857142857143, | |
| "eval_loss": 1.6260654926300049, | |
| "eval_runtime": 3.1239, | |
| "eval_samples_per_second": 60.501, | |
| "eval_steps_per_second": 7.683, | |
| "step": 2310 | |
| }, | |
| { | |
| "epoch": 21.68, | |
| "learning_rate": 0.00011334579439252338, | |
| "loss": 0.0048, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 21.68, | |
| "eval_accuracy": 0.7142857142857143, | |
| "eval_loss": 1.5359770059585571, | |
| "eval_runtime": 3.254, | |
| "eval_samples_per_second": 58.083, | |
| "eval_steps_per_second": 7.376, | |
| "step": 2320 | |
| }, | |
| { | |
| "epoch": 21.78, | |
| "learning_rate": 0.00011297196261682243, | |
| "loss": 0.0217, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 21.78, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.4278719425201416, | |
| "eval_runtime": 3.2536, | |
| "eval_samples_per_second": 58.09, | |
| "eval_steps_per_second": 7.376, | |
| "step": 2330 | |
| }, | |
| { | |
| "epoch": 21.87, | |
| "learning_rate": 0.0001125981308411215, | |
| "loss": 0.011, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 21.87, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.369935393333435, | |
| "eval_runtime": 3.0331, | |
| "eval_samples_per_second": 62.312, | |
| "eval_steps_per_second": 7.913, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 21.96, | |
| "learning_rate": 0.00011222429906542056, | |
| "loss": 0.0104, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 21.96, | |
| "eval_accuracy": 0.6878306878306878, | |
| "eval_loss": 1.9012395143508911, | |
| "eval_runtime": 3.1254, | |
| "eval_samples_per_second": 60.472, | |
| "eval_steps_per_second": 7.679, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 22.06, | |
| "learning_rate": 0.00011185046728971964, | |
| "loss": 0.0204, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 22.06, | |
| "eval_accuracy": 0.7354497354497355, | |
| "eval_loss": 1.5529383420944214, | |
| "eval_runtime": 3.1087, | |
| "eval_samples_per_second": 60.796, | |
| "eval_steps_per_second": 7.72, | |
| "step": 2360 | |
| }, | |
| { | |
| "epoch": 22.15, | |
| "learning_rate": 0.0001114766355140187, | |
| "loss": 0.02, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 22.15, | |
| "eval_accuracy": 0.6931216931216931, | |
| "eval_loss": 1.7242515087127686, | |
| "eval_runtime": 3.0296, | |
| "eval_samples_per_second": 62.384, | |
| "eval_steps_per_second": 7.922, | |
| "step": 2370 | |
| }, | |
| { | |
| "epoch": 22.24, | |
| "learning_rate": 0.00011110280373831776, | |
| "loss": 0.0394, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 22.24, | |
| "eval_accuracy": 0.6931216931216931, | |
| "eval_loss": 1.8429406881332397, | |
| "eval_runtime": 3.2178, | |
| "eval_samples_per_second": 58.735, | |
| "eval_steps_per_second": 7.458, | |
| "step": 2380 | |
| }, | |
| { | |
| "epoch": 22.34, | |
| "learning_rate": 0.00011072897196261682, | |
| "loss": 0.0217, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 22.34, | |
| "eval_accuracy": 0.7407407407407407, | |
| "eval_loss": 1.6551986932754517, | |
| "eval_runtime": 3.0937, | |
| "eval_samples_per_second": 61.092, | |
| "eval_steps_per_second": 7.758, | |
| "step": 2390 | |
| }, | |
| { | |
| "epoch": 22.43, | |
| "learning_rate": 0.00011035514018691588, | |
| "loss": 0.0407, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 22.43, | |
| "eval_accuracy": 0.7407407407407407, | |
| "eval_loss": 1.584214687347412, | |
| "eval_runtime": 3.0876, | |
| "eval_samples_per_second": 61.213, | |
| "eval_steps_per_second": 7.773, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 22.52, | |
| "learning_rate": 0.00010998130841121497, | |
| "loss": 0.0075, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 22.52, | |
| "eval_accuracy": 0.7354497354497355, | |
| "eval_loss": 1.5881952047348022, | |
| "eval_runtime": 3.1026, | |
| "eval_samples_per_second": 60.916, | |
| "eval_steps_per_second": 7.735, | |
| "step": 2410 | |
| }, | |
| { | |
| "epoch": 22.62, | |
| "learning_rate": 0.00010960747663551403, | |
| "loss": 0.0144, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 22.62, | |
| "eval_accuracy": 0.7301587301587301, | |
| "eval_loss": 1.6610509157180786, | |
| "eval_runtime": 3.2448, | |
| "eval_samples_per_second": 58.248, | |
| "eval_steps_per_second": 7.397, | |
| "step": 2420 | |
| }, | |
| { | |
| "epoch": 22.71, | |
| "learning_rate": 0.00010923364485981309, | |
| "loss": 0.0021, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 22.71, | |
| "eval_accuracy": 0.746031746031746, | |
| "eval_loss": 1.7075546979904175, | |
| "eval_runtime": 3.273, | |
| "eval_samples_per_second": 57.746, | |
| "eval_steps_per_second": 7.333, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 22.8, | |
| "learning_rate": 0.00010885981308411215, | |
| "loss": 0.0019, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 22.8, | |
| "eval_accuracy": 0.7354497354497355, | |
| "eval_loss": 1.616317629814148, | |
| "eval_runtime": 3.3746, | |
| "eval_samples_per_second": 56.006, | |
| "eval_steps_per_second": 7.112, | |
| "step": 2440 | |
| }, | |
| { | |
| "epoch": 22.9, | |
| "learning_rate": 0.00010848598130841121, | |
| "loss": 0.0074, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 22.9, | |
| "eval_accuracy": 0.746031746031746, | |
| "eval_loss": 1.5530917644500732, | |
| "eval_runtime": 3.2179, | |
| "eval_samples_per_second": 58.735, | |
| "eval_steps_per_second": 7.458, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 22.99, | |
| "learning_rate": 0.0001081121495327103, | |
| "loss": 0.0195, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 22.99, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.5417741537094116, | |
| "eval_runtime": 3.3609, | |
| "eval_samples_per_second": 56.235, | |
| "eval_steps_per_second": 7.141, | |
| "step": 2460 | |
| }, | |
| { | |
| "epoch": 23.08, | |
| "learning_rate": 0.00010773831775700935, | |
| "loss": 0.0085, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 23.08, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.532561182975769, | |
| "eval_runtime": 3.2217, | |
| "eval_samples_per_second": 58.664, | |
| "eval_steps_per_second": 7.449, | |
| "step": 2470 | |
| }, | |
| { | |
| "epoch": 23.18, | |
| "learning_rate": 0.00010736448598130842, | |
| "loss": 0.0015, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 23.18, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.530474305152893, | |
| "eval_runtime": 3.1097, | |
| "eval_samples_per_second": 60.778, | |
| "eval_steps_per_second": 7.718, | |
| "step": 2480 | |
| }, | |
| { | |
| "epoch": 23.27, | |
| "learning_rate": 0.00010699065420560748, | |
| "loss": 0.0015, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 23.27, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.530909776687622, | |
| "eval_runtime": 3.3395, | |
| "eval_samples_per_second": 56.594, | |
| "eval_steps_per_second": 7.187, | |
| "step": 2490 | |
| }, | |
| { | |
| "epoch": 23.36, | |
| "learning_rate": 0.00010661682242990654, | |
| "loss": 0.0058, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 23.36, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.5259517431259155, | |
| "eval_runtime": 3.1744, | |
| "eval_samples_per_second": 59.539, | |
| "eval_steps_per_second": 7.56, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 23.46, | |
| "learning_rate": 0.00010624299065420562, | |
| "loss": 0.0015, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 23.46, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.5237162113189697, | |
| "eval_runtime": 3.3526, | |
| "eval_samples_per_second": 56.374, | |
| "eval_steps_per_second": 7.159, | |
| "step": 2510 | |
| }, | |
| { | |
| "epoch": 23.55, | |
| "learning_rate": 0.00010586915887850468, | |
| "loss": 0.0083, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 23.55, | |
| "eval_accuracy": 0.7566137566137566, | |
| "eval_loss": 1.5256061553955078, | |
| "eval_runtime": 3.4146, | |
| "eval_samples_per_second": 55.351, | |
| "eval_steps_per_second": 7.029, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 23.64, | |
| "learning_rate": 0.00010549532710280374, | |
| "loss": 0.0102, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 23.64, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.5389765501022339, | |
| "eval_runtime": 3.0883, | |
| "eval_samples_per_second": 61.198, | |
| "eval_steps_per_second": 7.771, | |
| "step": 2530 | |
| }, | |
| { | |
| "epoch": 23.74, | |
| "learning_rate": 0.0001051214953271028, | |
| "loss": 0.0014, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 23.74, | |
| "eval_accuracy": 0.7671957671957672, | |
| "eval_loss": 1.551665186882019, | |
| "eval_runtime": 3.2146, | |
| "eval_samples_per_second": 58.794, | |
| "eval_steps_per_second": 7.466, | |
| "step": 2540 | |
| }, | |
| { | |
| "epoch": 23.83, | |
| "learning_rate": 0.00010474766355140186, | |
| "loss": 0.0137, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 23.83, | |
| "eval_accuracy": 0.7566137566137566, | |
| "eval_loss": 1.5628893375396729, | |
| "eval_runtime": 3.2259, | |
| "eval_samples_per_second": 58.589, | |
| "eval_steps_per_second": 7.44, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 23.93, | |
| "learning_rate": 0.00010437383177570095, | |
| "loss": 0.0211, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 23.93, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.5249507427215576, | |
| "eval_runtime": 3.1967, | |
| "eval_samples_per_second": 59.123, | |
| "eval_steps_per_second": 7.508, | |
| "step": 2560 | |
| }, | |
| { | |
| "epoch": 24.02, | |
| "learning_rate": 0.00010400000000000001, | |
| "loss": 0.0014, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 24.02, | |
| "eval_accuracy": 0.7671957671957672, | |
| "eval_loss": 1.5051255226135254, | |
| "eval_runtime": 3.3479, | |
| "eval_samples_per_second": 56.454, | |
| "eval_steps_per_second": 7.169, | |
| "step": 2570 | |
| }, | |
| { | |
| "epoch": 24.11, | |
| "learning_rate": 0.00010362616822429907, | |
| "loss": 0.0102, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 24.11, | |
| "eval_accuracy": 0.7671957671957672, | |
| "eval_loss": 1.4923993349075317, | |
| "eval_runtime": 3.3214, | |
| "eval_samples_per_second": 56.904, | |
| "eval_steps_per_second": 7.226, | |
| "step": 2580 | |
| }, | |
| { | |
| "epoch": 24.21, | |
| "learning_rate": 0.00010325233644859813, | |
| "loss": 0.0032, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 24.21, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.4887175559997559, | |
| "eval_runtime": 3.2614, | |
| "eval_samples_per_second": 57.95, | |
| "eval_steps_per_second": 7.359, | |
| "step": 2590 | |
| }, | |
| { | |
| "epoch": 24.3, | |
| "learning_rate": 0.0001028785046728972, | |
| "loss": 0.0012, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 24.3, | |
| "eval_accuracy": 0.7566137566137566, | |
| "eval_loss": 1.4899401664733887, | |
| "eval_runtime": 3.1875, | |
| "eval_samples_per_second": 59.295, | |
| "eval_steps_per_second": 7.53, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 24.39, | |
| "learning_rate": 0.00010250467289719628, | |
| "loss": 0.0013, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 24.39, | |
| "eval_accuracy": 0.7566137566137566, | |
| "eval_loss": 1.4907020330429077, | |
| "eval_runtime": 3.2459, | |
| "eval_samples_per_second": 58.227, | |
| "eval_steps_per_second": 7.394, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 24.49, | |
| "learning_rate": 0.00010213084112149533, | |
| "loss": 0.0013, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 24.49, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.4912313222885132, | |
| "eval_runtime": 3.1875, | |
| "eval_samples_per_second": 59.294, | |
| "eval_steps_per_second": 7.529, | |
| "step": 2620 | |
| }, | |
| { | |
| "epoch": 24.58, | |
| "learning_rate": 0.0001017570093457944, | |
| "loss": 0.0012, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 24.58, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.4922280311584473, | |
| "eval_runtime": 3.428, | |
| "eval_samples_per_second": 55.134, | |
| "eval_steps_per_second": 7.001, | |
| "step": 2630 | |
| }, | |
| { | |
| "epoch": 24.67, | |
| "learning_rate": 0.00010138317757009345, | |
| "loss": 0.0131, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 24.67, | |
| "eval_accuracy": 0.7671957671957672, | |
| "eval_loss": 1.501592755317688, | |
| "eval_runtime": 3.3214, | |
| "eval_samples_per_second": 56.904, | |
| "eval_steps_per_second": 7.226, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 24.77, | |
| "learning_rate": 0.00010100934579439252, | |
| "loss": 0.0266, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 24.77, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.517897129058838, | |
| "eval_runtime": 3.3613, | |
| "eval_samples_per_second": 56.229, | |
| "eval_steps_per_second": 7.14, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 24.86, | |
| "learning_rate": 0.0001006355140186916, | |
| "loss": 0.0072, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 24.86, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.5467013120651245, | |
| "eval_runtime": 3.2995, | |
| "eval_samples_per_second": 57.281, | |
| "eval_steps_per_second": 7.274, | |
| "step": 2660 | |
| }, | |
| { | |
| "epoch": 24.95, | |
| "learning_rate": 0.00010026168224299067, | |
| "loss": 0.0084, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 24.95, | |
| "eval_accuracy": 0.7566137566137566, | |
| "eval_loss": 1.5633758306503296, | |
| "eval_runtime": 3.3204, | |
| "eval_samples_per_second": 56.921, | |
| "eval_steps_per_second": 7.228, | |
| "step": 2670 | |
| }, | |
| { | |
| "epoch": 25.05, | |
| "learning_rate": 9.988785046728972e-05, | |
| "loss": 0.0141, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 25.05, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.549333095550537, | |
| "eval_runtime": 3.3387, | |
| "eval_samples_per_second": 56.609, | |
| "eval_steps_per_second": 7.188, | |
| "step": 2680 | |
| }, | |
| { | |
| "epoch": 25.14, | |
| "learning_rate": 9.95140186915888e-05, | |
| "loss": 0.006, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 25.14, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.5422124862670898, | |
| "eval_runtime": 3.2678, | |
| "eval_samples_per_second": 57.837, | |
| "eval_steps_per_second": 7.344, | |
| "step": 2690 | |
| }, | |
| { | |
| "epoch": 25.23, | |
| "learning_rate": 9.914018691588785e-05, | |
| "loss": 0.0049, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 25.23, | |
| "eval_accuracy": 0.7671957671957672, | |
| "eval_loss": 1.5276082754135132, | |
| "eval_runtime": 3.4271, | |
| "eval_samples_per_second": 55.149, | |
| "eval_steps_per_second": 7.003, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 25.33, | |
| "learning_rate": 9.876635514018692e-05, | |
| "loss": 0.0012, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 25.33, | |
| "eval_accuracy": 0.7671957671957672, | |
| "eval_loss": 1.5173016786575317, | |
| "eval_runtime": 3.3173, | |
| "eval_samples_per_second": 56.973, | |
| "eval_steps_per_second": 7.235, | |
| "step": 2710 | |
| }, | |
| { | |
| "epoch": 25.42, | |
| "learning_rate": 9.839252336448599e-05, | |
| "loss": 0.0012, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 25.42, | |
| "eval_accuracy": 0.7671957671957672, | |
| "eval_loss": 1.5145915746688843, | |
| "eval_runtime": 3.3338, | |
| "eval_samples_per_second": 56.692, | |
| "eval_steps_per_second": 7.199, | |
| "step": 2720 | |
| }, | |
| { | |
| "epoch": 25.51, | |
| "learning_rate": 9.801869158878506e-05, | |
| "loss": 0.0104, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 25.51, | |
| "eval_accuracy": 0.7671957671957672, | |
| "eval_loss": 1.5092376470565796, | |
| "eval_runtime": 3.3686, | |
| "eval_samples_per_second": 56.107, | |
| "eval_steps_per_second": 7.125, | |
| "step": 2730 | |
| }, | |
| { | |
| "epoch": 25.61, | |
| "learning_rate": 9.764485981308412e-05, | |
| "loss": 0.0011, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 25.61, | |
| "eval_accuracy": 0.7671957671957672, | |
| "eval_loss": 1.5066778659820557, | |
| "eval_runtime": 3.2919, | |
| "eval_samples_per_second": 57.415, | |
| "eval_steps_per_second": 7.291, | |
| "step": 2740 | |
| }, | |
| { | |
| "epoch": 25.7, | |
| "learning_rate": 9.727102803738318e-05, | |
| "loss": 0.01, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 25.7, | |
| "eval_accuracy": 0.7671957671957672, | |
| "eval_loss": 1.5135998725891113, | |
| "eval_runtime": 3.3065, | |
| "eval_samples_per_second": 57.16, | |
| "eval_steps_per_second": 7.258, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 25.79, | |
| "learning_rate": 9.689719626168224e-05, | |
| "loss": 0.0012, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 25.79, | |
| "eval_accuracy": 0.7671957671957672, | |
| "eval_loss": 1.5182033777236938, | |
| "eval_runtime": 3.1889, | |
| "eval_samples_per_second": 59.268, | |
| "eval_steps_per_second": 7.526, | |
| "step": 2760 | |
| }, | |
| { | |
| "epoch": 25.89, | |
| "learning_rate": 9.652336448598131e-05, | |
| "loss": 0.0011, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 25.89, | |
| "eval_accuracy": 0.7671957671957672, | |
| "eval_loss": 1.5205999612808228, | |
| "eval_runtime": 3.2413, | |
| "eval_samples_per_second": 58.311, | |
| "eval_steps_per_second": 7.405, | |
| "step": 2770 | |
| }, | |
| { | |
| "epoch": 25.98, | |
| "learning_rate": 9.614953271028038e-05, | |
| "loss": 0.0199, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 25.98, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.5461922883987427, | |
| "eval_runtime": 3.1874, | |
| "eval_samples_per_second": 59.295, | |
| "eval_steps_per_second": 7.53, | |
| "step": 2780 | |
| }, | |
| { | |
| "epoch": 26.07, | |
| "learning_rate": 9.577570093457945e-05, | |
| "loss": 0.0105, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 26.07, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.560713291168213, | |
| "eval_runtime": 3.3195, | |
| "eval_samples_per_second": 56.936, | |
| "eval_steps_per_second": 7.23, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 26.17, | |
| "learning_rate": 9.540186915887851e-05, | |
| "loss": 0.0011, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 26.17, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.5657832622528076, | |
| "eval_runtime": 3.108, | |
| "eval_samples_per_second": 60.811, | |
| "eval_steps_per_second": 7.722, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 26.26, | |
| "learning_rate": 9.502803738317757e-05, | |
| "loss": 0.004, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 26.26, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.5667065382003784, | |
| "eval_runtime": 3.1078, | |
| "eval_samples_per_second": 60.815, | |
| "eval_steps_per_second": 7.723, | |
| "step": 2810 | |
| }, | |
| { | |
| "epoch": 26.36, | |
| "learning_rate": 9.465420560747665e-05, | |
| "loss": 0.0011, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 26.36, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.565108060836792, | |
| "eval_runtime": 3.1483, | |
| "eval_samples_per_second": 60.032, | |
| "eval_steps_per_second": 7.623, | |
| "step": 2820 | |
| }, | |
| { | |
| "epoch": 26.45, | |
| "learning_rate": 9.42803738317757e-05, | |
| "loss": 0.0129, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 26.45, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.5749871730804443, | |
| "eval_runtime": 3.086, | |
| "eval_samples_per_second": 61.245, | |
| "eval_steps_per_second": 7.777, | |
| "step": 2830 | |
| }, | |
| { | |
| "epoch": 26.54, | |
| "learning_rate": 9.390654205607478e-05, | |
| "loss": 0.0102, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 26.54, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.574375867843628, | |
| "eval_runtime": 3.141, | |
| "eval_samples_per_second": 60.171, | |
| "eval_steps_per_second": 7.641, | |
| "step": 2840 | |
| }, | |
| { | |
| "epoch": 26.64, | |
| "learning_rate": 9.353271028037384e-05, | |
| "loss": 0.0041, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 26.64, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.5728485584259033, | |
| "eval_runtime": 3.0982, | |
| "eval_samples_per_second": 61.002, | |
| "eval_steps_per_second": 7.746, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 26.73, | |
| "learning_rate": 9.31588785046729e-05, | |
| "loss": 0.0082, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 26.73, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.5666193962097168, | |
| "eval_runtime": 3.0518, | |
| "eval_samples_per_second": 61.93, | |
| "eval_steps_per_second": 7.864, | |
| "step": 2860 | |
| }, | |
| { | |
| "epoch": 26.82, | |
| "learning_rate": 9.278504672897197e-05, | |
| "loss": 0.0069, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 26.82, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.5484100580215454, | |
| "eval_runtime": 3.1504, | |
| "eval_samples_per_second": 59.992, | |
| "eval_steps_per_second": 7.618, | |
| "step": 2870 | |
| }, | |
| { | |
| "epoch": 26.92, | |
| "learning_rate": 9.241121495327104e-05, | |
| "loss": 0.0058, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 26.92, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.5442478656768799, | |
| "eval_runtime": 3.2682, | |
| "eval_samples_per_second": 57.831, | |
| "eval_steps_per_second": 7.344, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 27.01, | |
| "learning_rate": 9.20373831775701e-05, | |
| "loss": 0.0011, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 27.01, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.5461711883544922, | |
| "eval_runtime": 3.0931, | |
| "eval_samples_per_second": 61.105, | |
| "eval_steps_per_second": 7.759, | |
| "step": 2890 | |
| }, | |
| { | |
| "epoch": 27.1, | |
| "learning_rate": 9.166355140186916e-05, | |
| "loss": 0.0058, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 27.1, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.5431169271469116, | |
| "eval_runtime": 3.0975, | |
| "eval_samples_per_second": 61.016, | |
| "eval_steps_per_second": 7.748, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 27.2, | |
| "learning_rate": 9.128971962616823e-05, | |
| "loss": 0.0054, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 27.2, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.5305052995681763, | |
| "eval_runtime": 3.1126, | |
| "eval_samples_per_second": 60.722, | |
| "eval_steps_per_second": 7.711, | |
| "step": 2910 | |
| }, | |
| { | |
| "epoch": 27.29, | |
| "learning_rate": 9.091588785046729e-05, | |
| "loss": 0.0064, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 27.29, | |
| "eval_accuracy": 0.7671957671957672, | |
| "eval_loss": 1.5352741479873657, | |
| "eval_runtime": 3.1506, | |
| "eval_samples_per_second": 59.988, | |
| "eval_steps_per_second": 7.618, | |
| "step": 2920 | |
| }, | |
| { | |
| "epoch": 27.38, | |
| "learning_rate": 9.054205607476636e-05, | |
| "loss": 0.0116, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 27.38, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.546917200088501, | |
| "eval_runtime": 3.3601, | |
| "eval_samples_per_second": 56.248, | |
| "eval_steps_per_second": 7.143, | |
| "step": 2930 | |
| }, | |
| { | |
| "epoch": 27.48, | |
| "learning_rate": 9.016822429906543e-05, | |
| "loss": 0.001, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 27.48, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.5545085668563843, | |
| "eval_runtime": 3.1606, | |
| "eval_samples_per_second": 59.798, | |
| "eval_steps_per_second": 7.593, | |
| "step": 2940 | |
| }, | |
| { | |
| "epoch": 27.57, | |
| "learning_rate": 8.97943925233645e-05, | |
| "loss": 0.0058, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 27.57, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.554548978805542, | |
| "eval_runtime": 3.2008, | |
| "eval_samples_per_second": 59.047, | |
| "eval_steps_per_second": 7.498, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 27.66, | |
| "learning_rate": 8.942056074766355e-05, | |
| "loss": 0.0056, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 27.66, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.546671748161316, | |
| "eval_runtime": 3.4523, | |
| "eval_samples_per_second": 54.747, | |
| "eval_steps_per_second": 6.952, | |
| "step": 2960 | |
| }, | |
| { | |
| "epoch": 27.76, | |
| "learning_rate": 8.904672897196263e-05, | |
| "loss": 0.001, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 27.76, | |
| "eval_accuracy": 0.7671957671957672, | |
| "eval_loss": 1.5371477603912354, | |
| "eval_runtime": 3.2841, | |
| "eval_samples_per_second": 57.551, | |
| "eval_steps_per_second": 7.308, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 27.85, | |
| "learning_rate": 8.867289719626168e-05, | |
| "loss": 0.0153, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 27.85, | |
| "eval_accuracy": 0.7671957671957672, | |
| "eval_loss": 1.5416946411132812, | |
| "eval_runtime": 3.3871, | |
| "eval_samples_per_second": 55.799, | |
| "eval_steps_per_second": 7.086, | |
| "step": 2980 | |
| }, | |
| { | |
| "epoch": 27.94, | |
| "learning_rate": 8.829906542056075e-05, | |
| "loss": 0.0063, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 27.94, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.556403636932373, | |
| "eval_runtime": 3.2332, | |
| "eval_samples_per_second": 58.456, | |
| "eval_steps_per_second": 7.423, | |
| "step": 2990 | |
| }, | |
| { | |
| "epoch": 28.04, | |
| "learning_rate": 8.792523364485982e-05, | |
| "loss": 0.001, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 28.04, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.5654370784759521, | |
| "eval_runtime": 3.2405, | |
| "eval_samples_per_second": 58.325, | |
| "eval_steps_per_second": 7.406, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 28.13, | |
| "learning_rate": 8.755140186915888e-05, | |
| "loss": 0.001, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 28.13, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.5694721937179565, | |
| "eval_runtime": 3.2113, | |
| "eval_samples_per_second": 58.854, | |
| "eval_steps_per_second": 7.474, | |
| "step": 3010 | |
| }, | |
| { | |
| "epoch": 28.22, | |
| "learning_rate": 8.717757009345795e-05, | |
| "loss": 0.001, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 28.22, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.5711843967437744, | |
| "eval_runtime": 3.1919, | |
| "eval_samples_per_second": 59.213, | |
| "eval_steps_per_second": 7.519, | |
| "step": 3020 | |
| }, | |
| { | |
| "epoch": 28.32, | |
| "learning_rate": 8.6803738317757e-05, | |
| "loss": 0.0043, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 28.32, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.573392391204834, | |
| "eval_runtime": 3.0318, | |
| "eval_samples_per_second": 62.338, | |
| "eval_steps_per_second": 7.916, | |
| "step": 3030 | |
| }, | |
| { | |
| "epoch": 28.41, | |
| "learning_rate": 8.642990654205609e-05, | |
| "loss": 0.0043, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 28.41, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.5829499959945679, | |
| "eval_runtime": 3.2944, | |
| "eval_samples_per_second": 57.37, | |
| "eval_steps_per_second": 7.285, | |
| "step": 3040 | |
| }, | |
| { | |
| "epoch": 28.5, | |
| "learning_rate": 8.605607476635514e-05, | |
| "loss": 0.0009, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 28.5, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.5897167921066284, | |
| "eval_runtime": 3.1612, | |
| "eval_samples_per_second": 59.787, | |
| "eval_steps_per_second": 7.592, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 28.6, | |
| "learning_rate": 8.56822429906542e-05, | |
| "loss": 0.0009, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 28.6, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.5928257703781128, | |
| "eval_runtime": 3.2548, | |
| "eval_samples_per_second": 58.068, | |
| "eval_steps_per_second": 7.374, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 28.69, | |
| "learning_rate": 8.530841121495327e-05, | |
| "loss": 0.0136, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 28.69, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.5987612009048462, | |
| "eval_runtime": 3.0819, | |
| "eval_samples_per_second": 61.325, | |
| "eval_steps_per_second": 7.787, | |
| "step": 3070 | |
| }, | |
| { | |
| "epoch": 28.79, | |
| "learning_rate": 8.493457943925234e-05, | |
| "loss": 0.0106, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 28.79, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.5925683975219727, | |
| "eval_runtime": 3.4371, | |
| "eval_samples_per_second": 54.988, | |
| "eval_steps_per_second": 6.983, | |
| "step": 3080 | |
| }, | |
| { | |
| "epoch": 28.88, | |
| "learning_rate": 8.456074766355141e-05, | |
| "loss": 0.0169, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 28.88, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.5758073329925537, | |
| "eval_runtime": 3.1208, | |
| "eval_samples_per_second": 60.562, | |
| "eval_steps_per_second": 7.69, | |
| "step": 3090 | |
| }, | |
| { | |
| "epoch": 28.97, | |
| "learning_rate": 8.418691588785048e-05, | |
| "loss": 0.0058, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 28.97, | |
| "eval_accuracy": 0.7671957671957672, | |
| "eval_loss": 1.5587332248687744, | |
| "eval_runtime": 3.2941, | |
| "eval_samples_per_second": 57.375, | |
| "eval_steps_per_second": 7.286, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 29.07, | |
| "learning_rate": 8.381308411214953e-05, | |
| "loss": 0.0008, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 29.07, | |
| "eval_accuracy": 0.7671957671957672, | |
| "eval_loss": 1.5488831996917725, | |
| "eval_runtime": 3.104, | |
| "eval_samples_per_second": 60.89, | |
| "eval_steps_per_second": 7.732, | |
| "step": 3110 | |
| }, | |
| { | |
| "epoch": 29.16, | |
| "learning_rate": 8.343925233644861e-05, | |
| "loss": 0.0106, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 29.16, | |
| "eval_accuracy": 0.7671957671957672, | |
| "eval_loss": 1.5524382591247559, | |
| "eval_runtime": 3.1857, | |
| "eval_samples_per_second": 59.327, | |
| "eval_steps_per_second": 7.534, | |
| "step": 3120 | |
| }, | |
| { | |
| "epoch": 29.25, | |
| "learning_rate": 8.306542056074766e-05, | |
| "loss": 0.0066, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 29.25, | |
| "eval_accuracy": 0.7671957671957672, | |
| "eval_loss": 1.5529749393463135, | |
| "eval_runtime": 3.2947, | |
| "eval_samples_per_second": 57.365, | |
| "eval_steps_per_second": 7.284, | |
| "step": 3130 | |
| }, | |
| { | |
| "epoch": 29.35, | |
| "learning_rate": 8.269158878504673e-05, | |
| "loss": 0.0047, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 29.35, | |
| "eval_accuracy": 0.7724867724867724, | |
| "eval_loss": 1.561766266822815, | |
| "eval_runtime": 3.0618, | |
| "eval_samples_per_second": 61.729, | |
| "eval_steps_per_second": 7.839, | |
| "step": 3140 | |
| }, | |
| { | |
| "epoch": 29.44, | |
| "learning_rate": 8.23177570093458e-05, | |
| "loss": 0.0009, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 29.44, | |
| "eval_accuracy": 0.7671957671957672, | |
| "eval_loss": 1.562992811203003, | |
| "eval_runtime": 4.4425, | |
| "eval_samples_per_second": 42.543, | |
| "eval_steps_per_second": 5.402, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 29.53, | |
| "learning_rate": 8.194392523364487e-05, | |
| "loss": 0.01, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 29.53, | |
| "eval_accuracy": 0.7671957671957672, | |
| "eval_loss": 1.5655356645584106, | |
| "eval_runtime": 3.1679, | |
| "eval_samples_per_second": 59.661, | |
| "eval_steps_per_second": 7.576, | |
| "step": 3160 | |
| }, | |
| { | |
| "epoch": 29.63, | |
| "learning_rate": 8.157009345794393e-05, | |
| "loss": 0.0131, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 29.63, | |
| "eval_accuracy": 0.7724867724867724, | |
| "eval_loss": 1.568717360496521, | |
| "eval_runtime": 4.272, | |
| "eval_samples_per_second": 44.242, | |
| "eval_steps_per_second": 5.618, | |
| "step": 3170 | |
| }, | |
| { | |
| "epoch": 29.72, | |
| "learning_rate": 8.119626168224299e-05, | |
| "loss": 0.0065, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 29.72, | |
| "eval_accuracy": 0.7724867724867724, | |
| "eval_loss": 1.5695167779922485, | |
| "eval_runtime": 3.2729, | |
| "eval_samples_per_second": 57.748, | |
| "eval_steps_per_second": 7.333, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 29.81, | |
| "learning_rate": 8.082242990654207e-05, | |
| "loss": 0.0049, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 29.81, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.5673458576202393, | |
| "eval_runtime": 3.2283, | |
| "eval_samples_per_second": 58.545, | |
| "eval_steps_per_second": 7.434, | |
| "step": 3190 | |
| }, | |
| { | |
| "epoch": 29.91, | |
| "learning_rate": 8.044859813084112e-05, | |
| "loss": 0.0008, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 29.91, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.5677809715270996, | |
| "eval_runtime": 3.2238, | |
| "eval_samples_per_second": 58.627, | |
| "eval_steps_per_second": 7.445, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "learning_rate": 8.00747663551402e-05, | |
| "loss": 0.0009, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.569243311882019, | |
| "eval_runtime": 3.2344, | |
| "eval_samples_per_second": 58.434, | |
| "eval_steps_per_second": 7.42, | |
| "step": 3210 | |
| }, | |
| { | |
| "epoch": 30.09, | |
| "learning_rate": 7.970093457943925e-05, | |
| "loss": 0.0008, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 30.09, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.569542407989502, | |
| "eval_runtime": 3.1865, | |
| "eval_samples_per_second": 59.312, | |
| "eval_steps_per_second": 7.532, | |
| "step": 3220 | |
| }, | |
| { | |
| "epoch": 30.19, | |
| "learning_rate": 7.932710280373832e-05, | |
| "loss": 0.0008, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 30.19, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.5703235864639282, | |
| "eval_runtime": 3.1705, | |
| "eval_samples_per_second": 59.612, | |
| "eval_steps_per_second": 7.57, | |
| "step": 3230 | |
| }, | |
| { | |
| "epoch": 30.28, | |
| "learning_rate": 7.895327102803739e-05, | |
| "loss": 0.0047, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 30.28, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.5737032890319824, | |
| "eval_runtime": 3.2402, | |
| "eval_samples_per_second": 58.33, | |
| "eval_steps_per_second": 7.407, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 30.37, | |
| "learning_rate": 7.857943925233646e-05, | |
| "loss": 0.0008, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 30.37, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.5757778882980347, | |
| "eval_runtime": 3.1947, | |
| "eval_samples_per_second": 59.16, | |
| "eval_steps_per_second": 7.512, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 30.47, | |
| "learning_rate": 7.820560747663552e-05, | |
| "loss": 0.0059, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 30.47, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.5871896743774414, | |
| "eval_runtime": 3.2182, | |
| "eval_samples_per_second": 58.729, | |
| "eval_steps_per_second": 7.458, | |
| "step": 3260 | |
| }, | |
| { | |
| "epoch": 30.56, | |
| "learning_rate": 7.783177570093458e-05, | |
| "loss": 0.0091, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 30.56, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.608903169631958, | |
| "eval_runtime": 3.1183, | |
| "eval_samples_per_second": 60.61, | |
| "eval_steps_per_second": 7.696, | |
| "step": 3270 | |
| }, | |
| { | |
| "epoch": 30.65, | |
| "learning_rate": 7.745794392523364e-05, | |
| "loss": 0.0033, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 30.65, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.623267650604248, | |
| "eval_runtime": 3.1768, | |
| "eval_samples_per_second": 59.494, | |
| "eval_steps_per_second": 7.555, | |
| "step": 3280 | |
| }, | |
| { | |
| "epoch": 30.75, | |
| "learning_rate": 7.708411214953271e-05, | |
| "loss": 0.0165, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 30.75, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.611799955368042, | |
| "eval_runtime": 3.266, | |
| "eval_samples_per_second": 57.869, | |
| "eval_steps_per_second": 7.348, | |
| "step": 3290 | |
| }, | |
| { | |
| "epoch": 30.84, | |
| "learning_rate": 7.671028037383178e-05, | |
| "loss": 0.015, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 30.84, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.5901665687561035, | |
| "eval_runtime": 3.1817, | |
| "eval_samples_per_second": 59.402, | |
| "eval_steps_per_second": 7.543, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 30.93, | |
| "learning_rate": 7.633644859813085e-05, | |
| "loss": 0.0049, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 30.93, | |
| "eval_accuracy": 0.7671957671957672, | |
| "eval_loss": 1.5764192342758179, | |
| "eval_runtime": 3.4445, | |
| "eval_samples_per_second": 54.87, | |
| "eval_steps_per_second": 6.968, | |
| "step": 3310 | |
| }, | |
| { | |
| "epoch": 31.03, | |
| "learning_rate": 7.596261682242991e-05, | |
| "loss": 0.0008, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 31.03, | |
| "eval_accuracy": 0.7671957671957672, | |
| "eval_loss": 1.5679030418395996, | |
| "eval_runtime": 3.18, | |
| "eval_samples_per_second": 59.433, | |
| "eval_steps_per_second": 7.547, | |
| "step": 3320 | |
| }, | |
| { | |
| "epoch": 31.12, | |
| "learning_rate": 7.558878504672897e-05, | |
| "loss": 0.0071, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 31.12, | |
| "eval_accuracy": 0.7671957671957672, | |
| "eval_loss": 1.5708253383636475, | |
| "eval_runtime": 3.2342, | |
| "eval_samples_per_second": 58.438, | |
| "eval_steps_per_second": 7.421, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 31.21, | |
| "learning_rate": 7.521495327102805e-05, | |
| "loss": 0.0068, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 31.21, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.5782489776611328, | |
| "eval_runtime": 3.0933, | |
| "eval_samples_per_second": 61.099, | |
| "eval_steps_per_second": 7.759, | |
| "step": 3340 | |
| }, | |
| { | |
| "epoch": 31.31, | |
| "learning_rate": 7.48411214953271e-05, | |
| "loss": 0.0058, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 31.31, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.5822737216949463, | |
| "eval_runtime": 3.2696, | |
| "eval_samples_per_second": 57.806, | |
| "eval_steps_per_second": 7.34, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 31.4, | |
| "learning_rate": 7.446728971962618e-05, | |
| "loss": 0.0008, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 31.4, | |
| "eval_accuracy": 0.7671957671957672, | |
| "eval_loss": 1.5795854330062866, | |
| "eval_runtime": 3.2032, | |
| "eval_samples_per_second": 59.004, | |
| "eval_steps_per_second": 7.493, | |
| "step": 3360 | |
| }, | |
| { | |
| "epoch": 31.5, | |
| "learning_rate": 7.409345794392524e-05, | |
| "loss": 0.0008, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 31.5, | |
| "eval_accuracy": 0.7671957671957672, | |
| "eval_loss": 1.5795681476593018, | |
| "eval_runtime": 3.2182, | |
| "eval_samples_per_second": 58.729, | |
| "eval_steps_per_second": 7.458, | |
| "step": 3370 | |
| }, | |
| { | |
| "epoch": 31.59, | |
| "learning_rate": 7.37196261682243e-05, | |
| "loss": 0.0084, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 31.59, | |
| "eval_accuracy": 0.7671957671957672, | |
| "eval_loss": 1.5760174989700317, | |
| "eval_runtime": 3.1968, | |
| "eval_samples_per_second": 59.122, | |
| "eval_steps_per_second": 7.508, | |
| "step": 3380 | |
| }, | |
| { | |
| "epoch": 31.68, | |
| "learning_rate": 7.334579439252337e-05, | |
| "loss": 0.0074, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 31.68, | |
| "eval_accuracy": 0.7671957671957672, | |
| "eval_loss": 1.5738554000854492, | |
| "eval_runtime": 3.2685, | |
| "eval_samples_per_second": 57.825, | |
| "eval_steps_per_second": 7.343, | |
| "step": 3390 | |
| }, | |
| { | |
| "epoch": 31.78, | |
| "learning_rate": 7.297196261682244e-05, | |
| "loss": 0.0039, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 31.78, | |
| "eval_accuracy": 0.7671957671957672, | |
| "eval_loss": 1.5700509548187256, | |
| "eval_runtime": 3.1115, | |
| "eval_samples_per_second": 60.742, | |
| "eval_steps_per_second": 7.713, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 31.87, | |
| "learning_rate": 7.25981308411215e-05, | |
| "loss": 0.0144, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 31.87, | |
| "eval_accuracy": 0.7671957671957672, | |
| "eval_loss": 1.5835039615631104, | |
| "eval_runtime": 3.3036, | |
| "eval_samples_per_second": 57.211, | |
| "eval_steps_per_second": 7.265, | |
| "step": 3410 | |
| }, | |
| { | |
| "epoch": 31.96, | |
| "learning_rate": 7.222429906542056e-05, | |
| "loss": 0.0051, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 31.96, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.5920816659927368, | |
| "eval_runtime": 3.4411, | |
| "eval_samples_per_second": 54.924, | |
| "eval_steps_per_second": 6.975, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 32.06, | |
| "learning_rate": 7.185046728971963e-05, | |
| "loss": 0.0054, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 32.06, | |
| "eval_accuracy": 0.7671957671957672, | |
| "eval_loss": 1.5909570455551147, | |
| "eval_runtime": 3.0254, | |
| "eval_samples_per_second": 62.471, | |
| "eval_steps_per_second": 7.933, | |
| "step": 3430 | |
| }, | |
| { | |
| "epoch": 32.15, | |
| "learning_rate": 7.14766355140187e-05, | |
| "loss": 0.0047, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 32.15, | |
| "eval_accuracy": 0.7671957671957672, | |
| "eval_loss": 1.5870330333709717, | |
| "eval_runtime": 3.3063, | |
| "eval_samples_per_second": 57.163, | |
| "eval_steps_per_second": 7.259, | |
| "step": 3440 | |
| }, | |
| { | |
| "epoch": 32.24, | |
| "learning_rate": 7.110280373831776e-05, | |
| "loss": 0.0125, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 32.24, | |
| "eval_accuracy": 0.7671957671957672, | |
| "eval_loss": 1.5907707214355469, | |
| "eval_runtime": 3.0785, | |
| "eval_samples_per_second": 61.394, | |
| "eval_steps_per_second": 7.796, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 32.34, | |
| "learning_rate": 7.072897196261683e-05, | |
| "loss": 0.0057, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 32.34, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.5986977815628052, | |
| "eval_runtime": 3.0752, | |
| "eval_samples_per_second": 61.459, | |
| "eval_steps_per_second": 7.804, | |
| "step": 3460 | |
| }, | |
| { | |
| "epoch": 32.43, | |
| "learning_rate": 7.03551401869159e-05, | |
| "loss": 0.0007, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 32.43, | |
| "eval_accuracy": 0.7566137566137566, | |
| "eval_loss": 1.6025718450546265, | |
| "eval_runtime": 3.1619, | |
| "eval_samples_per_second": 59.775, | |
| "eval_steps_per_second": 7.59, | |
| "step": 3470 | |
| }, | |
| { | |
| "epoch": 32.52, | |
| "learning_rate": 6.998130841121495e-05, | |
| "loss": 0.006, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 32.52, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.5968295335769653, | |
| "eval_runtime": 3.0263, | |
| "eval_samples_per_second": 62.452, | |
| "eval_steps_per_second": 7.93, | |
| "step": 3480 | |
| }, | |
| { | |
| "epoch": 32.62, | |
| "learning_rate": 6.960747663551403e-05, | |
| "loss": 0.0007, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 32.62, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.594142198562622, | |
| "eval_runtime": 3.2795, | |
| "eval_samples_per_second": 57.631, | |
| "eval_steps_per_second": 7.318, | |
| "step": 3490 | |
| }, | |
| { | |
| "epoch": 32.71, | |
| "learning_rate": 6.923364485981308e-05, | |
| "loss": 0.0007, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 32.71, | |
| "eval_accuracy": 0.7671957671957672, | |
| "eval_loss": 1.59373140335083, | |
| "eval_runtime": 3.0541, | |
| "eval_samples_per_second": 61.884, | |
| "eval_steps_per_second": 7.858, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 32.8, | |
| "learning_rate": 6.885981308411215e-05, | |
| "loss": 0.0097, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 32.8, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.600218415260315, | |
| "eval_runtime": 3.105, | |
| "eval_samples_per_second": 60.87, | |
| "eval_steps_per_second": 7.73, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 32.9, | |
| "learning_rate": 6.848598130841122e-05, | |
| "loss": 0.0105, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 32.9, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6051002740859985, | |
| "eval_runtime": 3.4012, | |
| "eval_samples_per_second": 55.569, | |
| "eval_steps_per_second": 7.056, | |
| "step": 3520 | |
| }, | |
| { | |
| "epoch": 32.99, | |
| "learning_rate": 6.811214953271028e-05, | |
| "loss": 0.0007, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 32.99, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6050375699996948, | |
| "eval_runtime": 3.0547, | |
| "eval_samples_per_second": 61.873, | |
| "eval_steps_per_second": 7.857, | |
| "step": 3530 | |
| }, | |
| { | |
| "epoch": 33.08, | |
| "learning_rate": 6.773831775700935e-05, | |
| "loss": 0.0006, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 33.08, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6053138971328735, | |
| "eval_runtime": 3.1748, | |
| "eval_samples_per_second": 59.531, | |
| "eval_steps_per_second": 7.559, | |
| "step": 3540 | |
| }, | |
| { | |
| "epoch": 33.18, | |
| "learning_rate": 6.73644859813084e-05, | |
| "loss": 0.0008, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 33.18, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6058577299118042, | |
| "eval_runtime": 3.1448, | |
| "eval_samples_per_second": 60.099, | |
| "eval_steps_per_second": 7.632, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 33.27, | |
| "learning_rate": 6.699065420560749e-05, | |
| "loss": 0.0061, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 33.27, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.605916976928711, | |
| "eval_runtime": 3.094, | |
| "eval_samples_per_second": 61.086, | |
| "eval_steps_per_second": 7.757, | |
| "step": 3560 | |
| }, | |
| { | |
| "epoch": 33.36, | |
| "learning_rate": 6.661682242990654e-05, | |
| "loss": 0.0098, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 33.36, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6077380180358887, | |
| "eval_runtime": 3.1529, | |
| "eval_samples_per_second": 59.944, | |
| "eval_steps_per_second": 7.612, | |
| "step": 3570 | |
| }, | |
| { | |
| "epoch": 33.46, | |
| "learning_rate": 6.624299065420561e-05, | |
| "loss": 0.005, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 33.46, | |
| "eval_accuracy": 0.7671957671957672, | |
| "eval_loss": 1.6049844026565552, | |
| "eval_runtime": 3.2167, | |
| "eval_samples_per_second": 58.755, | |
| "eval_steps_per_second": 7.461, | |
| "step": 3580 | |
| }, | |
| { | |
| "epoch": 33.55, | |
| "learning_rate": 6.586915887850467e-05, | |
| "loss": 0.0007, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 33.55, | |
| "eval_accuracy": 0.7671957671957672, | |
| "eval_loss": 1.6010342836380005, | |
| "eval_runtime": 3.1643, | |
| "eval_samples_per_second": 59.729, | |
| "eval_steps_per_second": 7.585, | |
| "step": 3590 | |
| }, | |
| { | |
| "epoch": 33.64, | |
| "learning_rate": 6.549532710280374e-05, | |
| "loss": 0.0065, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 33.64, | |
| "eval_accuracy": 0.7671957671957672, | |
| "eval_loss": 1.6033494472503662, | |
| "eval_runtime": 3.3617, | |
| "eval_samples_per_second": 56.222, | |
| "eval_steps_per_second": 7.139, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 33.74, | |
| "learning_rate": 6.512149532710281e-05, | |
| "loss": 0.0047, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 33.74, | |
| "eval_accuracy": 0.7671957671957672, | |
| "eval_loss": 1.602766513824463, | |
| "eval_runtime": 3.347, | |
| "eval_samples_per_second": 56.469, | |
| "eval_steps_per_second": 7.171, | |
| "step": 3610 | |
| }, | |
| { | |
| "epoch": 33.83, | |
| "learning_rate": 6.474766355140188e-05, | |
| "loss": 0.0174, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 33.83, | |
| "eval_accuracy": 0.7566137566137566, | |
| "eval_loss": 1.6135550737380981, | |
| "eval_runtime": 3.3024, | |
| "eval_samples_per_second": 57.231, | |
| "eval_steps_per_second": 7.267, | |
| "step": 3620 | |
| }, | |
| { | |
| "epoch": 33.93, | |
| "learning_rate": 6.437383177570093e-05, | |
| "loss": 0.0057, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 33.93, | |
| "eval_accuracy": 0.7566137566137566, | |
| "eval_loss": 1.6196566820144653, | |
| "eval_runtime": 3.1607, | |
| "eval_samples_per_second": 59.797, | |
| "eval_steps_per_second": 7.593, | |
| "step": 3630 | |
| }, | |
| { | |
| "epoch": 34.02, | |
| "learning_rate": 6.400000000000001e-05, | |
| "loss": 0.0007, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 34.02, | |
| "eval_accuracy": 0.7566137566137566, | |
| "eval_loss": 1.6192022562026978, | |
| "eval_runtime": 3.2012, | |
| "eval_samples_per_second": 59.04, | |
| "eval_steps_per_second": 7.497, | |
| "step": 3640 | |
| }, | |
| { | |
| "epoch": 34.11, | |
| "learning_rate": 6.362616822429906e-05, | |
| "loss": 0.01, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 34.11, | |
| "eval_accuracy": 0.7566137566137566, | |
| "eval_loss": 1.616557240486145, | |
| "eval_runtime": 3.0372, | |
| "eval_samples_per_second": 62.228, | |
| "eval_steps_per_second": 7.902, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 34.21, | |
| "learning_rate": 6.325233644859813e-05, | |
| "loss": 0.0144, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 34.21, | |
| "eval_accuracy": 0.7566137566137566, | |
| "eval_loss": 1.6212772130966187, | |
| "eval_runtime": 3.1343, | |
| "eval_samples_per_second": 60.3, | |
| "eval_steps_per_second": 7.657, | |
| "step": 3660 | |
| }, | |
| { | |
| "epoch": 34.3, | |
| "learning_rate": 6.28785046728972e-05, | |
| "loss": 0.0007, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 34.3, | |
| "eval_accuracy": 0.7566137566137566, | |
| "eval_loss": 1.6278337240219116, | |
| "eval_runtime": 3.1421, | |
| "eval_samples_per_second": 60.15, | |
| "eval_steps_per_second": 7.638, | |
| "step": 3670 | |
| }, | |
| { | |
| "epoch": 34.39, | |
| "learning_rate": 6.250467289719625e-05, | |
| "loss": 0.0095, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 34.39, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6321161985397339, | |
| "eval_runtime": 3.249, | |
| "eval_samples_per_second": 58.171, | |
| "eval_steps_per_second": 7.387, | |
| "step": 3680 | |
| }, | |
| { | |
| "epoch": 34.49, | |
| "learning_rate": 6.213084112149533e-05, | |
| "loss": 0.0006, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 34.49, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6336792707443237, | |
| "eval_runtime": 3.1451, | |
| "eval_samples_per_second": 60.093, | |
| "eval_steps_per_second": 7.631, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 34.58, | |
| "learning_rate": 6.175700934579439e-05, | |
| "loss": 0.0007, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 34.58, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6350607872009277, | |
| "eval_runtime": 3.1678, | |
| "eval_samples_per_second": 59.662, | |
| "eval_steps_per_second": 7.576, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 34.67, | |
| "learning_rate": 6.138317757009347e-05, | |
| "loss": 0.0006, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 34.67, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6363701820373535, | |
| "eval_runtime": 3.2215, | |
| "eval_samples_per_second": 58.669, | |
| "eval_steps_per_second": 7.45, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 34.77, | |
| "learning_rate": 6.100934579439253e-05, | |
| "loss": 0.0063, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 34.77, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6366699934005737, | |
| "eval_runtime": 3.2135, | |
| "eval_samples_per_second": 58.815, | |
| "eval_steps_per_second": 7.469, | |
| "step": 3720 | |
| }, | |
| { | |
| "epoch": 34.86, | |
| "learning_rate": 6.0635514018691595e-05, | |
| "loss": 0.0062, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 34.86, | |
| "eval_accuracy": 0.7671957671957672, | |
| "eval_loss": 1.6348670721054077, | |
| "eval_runtime": 3.0917, | |
| "eval_samples_per_second": 61.132, | |
| "eval_steps_per_second": 7.763, | |
| "step": 3730 | |
| }, | |
| { | |
| "epoch": 34.95, | |
| "learning_rate": 6.0261682242990656e-05, | |
| "loss": 0.0064, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 34.95, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6241209506988525, | |
| "eval_runtime": 3.38, | |
| "eval_samples_per_second": 55.917, | |
| "eval_steps_per_second": 7.101, | |
| "step": 3740 | |
| }, | |
| { | |
| "epoch": 35.05, | |
| "learning_rate": 5.9887850467289716e-05, | |
| "loss": 0.0006, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 35.05, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6208295822143555, | |
| "eval_runtime": 3.2066, | |
| "eval_samples_per_second": 58.942, | |
| "eval_steps_per_second": 7.485, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 35.14, | |
| "learning_rate": 5.951401869158879e-05, | |
| "loss": 0.0006, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 35.14, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6204679012298584, | |
| "eval_runtime": 3.2685, | |
| "eval_samples_per_second": 57.825, | |
| "eval_steps_per_second": 7.343, | |
| "step": 3760 | |
| }, | |
| { | |
| "epoch": 35.23, | |
| "learning_rate": 5.914018691588785e-05, | |
| "loss": 0.0137, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 35.23, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6377967596054077, | |
| "eval_runtime": 3.1739, | |
| "eval_samples_per_second": 59.549, | |
| "eval_steps_per_second": 7.562, | |
| "step": 3770 | |
| }, | |
| { | |
| "epoch": 35.33, | |
| "learning_rate": 5.8766355140186925e-05, | |
| "loss": 0.0007, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 35.33, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6510041952133179, | |
| "eval_runtime": 3.0141, | |
| "eval_samples_per_second": 62.706, | |
| "eval_steps_per_second": 7.963, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 35.42, | |
| "learning_rate": 5.8392523364485985e-05, | |
| "loss": 0.0113, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 35.42, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6442958116531372, | |
| "eval_runtime": 3.0277, | |
| "eval_samples_per_second": 62.424, | |
| "eval_steps_per_second": 7.927, | |
| "step": 3790 | |
| }, | |
| { | |
| "epoch": 35.51, | |
| "learning_rate": 5.8018691588785046e-05, | |
| "loss": 0.0112, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 35.51, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6433544158935547, | |
| "eval_runtime": 3.1874, | |
| "eval_samples_per_second": 59.295, | |
| "eval_steps_per_second": 7.53, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 35.61, | |
| "learning_rate": 5.764485981308412e-05, | |
| "loss": 0.0006, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 35.61, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6435635089874268, | |
| "eval_runtime": 3.281, | |
| "eval_samples_per_second": 57.604, | |
| "eval_steps_per_second": 7.315, | |
| "step": 3810 | |
| }, | |
| { | |
| "epoch": 35.7, | |
| "learning_rate": 5.727102803738318e-05, | |
| "loss": 0.0006, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 35.7, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6441317796707153, | |
| "eval_runtime": 3.1827, | |
| "eval_samples_per_second": 59.384, | |
| "eval_steps_per_second": 7.541, | |
| "step": 3820 | |
| }, | |
| { | |
| "epoch": 35.79, | |
| "learning_rate": 5.6897196261682254e-05, | |
| "loss": 0.0145, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 35.79, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6258851289749146, | |
| "eval_runtime": 3.2346, | |
| "eval_samples_per_second": 58.431, | |
| "eval_steps_per_second": 7.42, | |
| "step": 3830 | |
| }, | |
| { | |
| "epoch": 35.89, | |
| "learning_rate": 5.6523364485981315e-05, | |
| "loss": 0.0006, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 35.89, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6195772886276245, | |
| "eval_runtime": 3.2109, | |
| "eval_samples_per_second": 58.862, | |
| "eval_steps_per_second": 7.475, | |
| "step": 3840 | |
| }, | |
| { | |
| "epoch": 35.98, | |
| "learning_rate": 5.6149532710280375e-05, | |
| "loss": 0.0047, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 35.98, | |
| "eval_accuracy": 0.7671957671957672, | |
| "eval_loss": 1.6145455837249756, | |
| "eval_runtime": 3.1308, | |
| "eval_samples_per_second": 60.367, | |
| "eval_steps_per_second": 7.666, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 36.07, | |
| "learning_rate": 5.577570093457944e-05, | |
| "loss": 0.0096, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 36.07, | |
| "eval_accuracy": 0.7671957671957672, | |
| "eval_loss": 1.6121631860733032, | |
| "eval_runtime": 3.1516, | |
| "eval_samples_per_second": 59.97, | |
| "eval_steps_per_second": 7.615, | |
| "step": 3860 | |
| }, | |
| { | |
| "epoch": 36.17, | |
| "learning_rate": 5.54018691588785e-05, | |
| "loss": 0.0006, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 36.17, | |
| "eval_accuracy": 0.7671957671957672, | |
| "eval_loss": 1.6133732795715332, | |
| "eval_runtime": 3.2412, | |
| "eval_samples_per_second": 58.312, | |
| "eval_steps_per_second": 7.405, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 36.26, | |
| "learning_rate": 5.502803738317758e-05, | |
| "loss": 0.0063, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 36.26, | |
| "eval_accuracy": 0.7671957671957672, | |
| "eval_loss": 1.6175814867019653, | |
| "eval_runtime": 3.1207, | |
| "eval_samples_per_second": 60.563, | |
| "eval_steps_per_second": 7.691, | |
| "step": 3880 | |
| }, | |
| { | |
| "epoch": 36.36, | |
| "learning_rate": 5.465420560747664e-05, | |
| "loss": 0.0049, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 36.36, | |
| "eval_accuracy": 0.7671957671957672, | |
| "eval_loss": 1.6226632595062256, | |
| "eval_runtime": 3.2034, | |
| "eval_samples_per_second": 59.0, | |
| "eval_steps_per_second": 7.492, | |
| "step": 3890 | |
| }, | |
| { | |
| "epoch": 36.45, | |
| "learning_rate": 5.42803738317757e-05, | |
| "loss": 0.0006, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 36.45, | |
| "eval_accuracy": 0.7671957671957672, | |
| "eval_loss": 1.6221052408218384, | |
| "eval_runtime": 3.2707, | |
| "eval_samples_per_second": 57.786, | |
| "eval_steps_per_second": 7.338, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 36.54, | |
| "learning_rate": 5.390654205607477e-05, | |
| "loss": 0.0057, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 36.54, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.626526951789856, | |
| "eval_runtime": 3.2153, | |
| "eval_samples_per_second": 58.781, | |
| "eval_steps_per_second": 7.464, | |
| "step": 3910 | |
| }, | |
| { | |
| "epoch": 36.64, | |
| "learning_rate": 5.353271028037383e-05, | |
| "loss": 0.0006, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 36.64, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6315840482711792, | |
| "eval_runtime": 3.2376, | |
| "eval_samples_per_second": 58.377, | |
| "eval_steps_per_second": 7.413, | |
| "step": 3920 | |
| }, | |
| { | |
| "epoch": 36.73, | |
| "learning_rate": 5.3158878504672906e-05, | |
| "loss": 0.0095, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 36.73, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6349341869354248, | |
| "eval_runtime": 3.1421, | |
| "eval_samples_per_second": 60.15, | |
| "eval_steps_per_second": 7.638, | |
| "step": 3930 | |
| }, | |
| { | |
| "epoch": 36.82, | |
| "learning_rate": 5.2785046728971966e-05, | |
| "loss": 0.0062, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 36.82, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6396187543869019, | |
| "eval_runtime": 3.1897, | |
| "eval_samples_per_second": 59.254, | |
| "eval_steps_per_second": 7.524, | |
| "step": 3940 | |
| }, | |
| { | |
| "epoch": 36.92, | |
| "learning_rate": 5.241121495327103e-05, | |
| "loss": 0.0062, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 36.92, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6347572803497314, | |
| "eval_runtime": 3.3171, | |
| "eval_samples_per_second": 56.978, | |
| "eval_steps_per_second": 7.235, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 37.01, | |
| "learning_rate": 5.20373831775701e-05, | |
| "loss": 0.0052, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 37.01, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6283901929855347, | |
| "eval_runtime": 3.2006, | |
| "eval_samples_per_second": 59.051, | |
| "eval_steps_per_second": 7.499, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 37.1, | |
| "learning_rate": 5.166355140186916e-05, | |
| "loss": 0.0054, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 37.1, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6296182870864868, | |
| "eval_runtime": 3.2544, | |
| "eval_samples_per_second": 58.075, | |
| "eval_steps_per_second": 7.375, | |
| "step": 3970 | |
| }, | |
| { | |
| "epoch": 37.2, | |
| "learning_rate": 5.128971962616823e-05, | |
| "loss": 0.0142, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 37.2, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6342829465866089, | |
| "eval_runtime": 3.2879, | |
| "eval_samples_per_second": 57.483, | |
| "eval_steps_per_second": 7.299, | |
| "step": 3980 | |
| }, | |
| { | |
| "epoch": 37.29, | |
| "learning_rate": 5.091588785046729e-05, | |
| "loss": 0.0006, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 37.29, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6389538049697876, | |
| "eval_runtime": 3.1746, | |
| "eval_samples_per_second": 59.535, | |
| "eval_steps_per_second": 7.56, | |
| "step": 3990 | |
| }, | |
| { | |
| "epoch": 37.38, | |
| "learning_rate": 5.0542056074766356e-05, | |
| "loss": 0.0056, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 37.38, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.63650643825531, | |
| "eval_runtime": 3.2374, | |
| "eval_samples_per_second": 58.38, | |
| "eval_steps_per_second": 7.413, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 37.48, | |
| "learning_rate": 5.0168224299065423e-05, | |
| "loss": 0.0006, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 37.48, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6352988481521606, | |
| "eval_runtime": 3.2389, | |
| "eval_samples_per_second": 58.353, | |
| "eval_steps_per_second": 7.41, | |
| "step": 4010 | |
| }, | |
| { | |
| "epoch": 37.57, | |
| "learning_rate": 4.9794392523364484e-05, | |
| "loss": 0.0006, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 37.57, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6355526447296143, | |
| "eval_runtime": 3.3214, | |
| "eval_samples_per_second": 56.904, | |
| "eval_steps_per_second": 7.226, | |
| "step": 4020 | |
| }, | |
| { | |
| "epoch": 37.66, | |
| "learning_rate": 4.942056074766355e-05, | |
| "loss": 0.0006, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 37.66, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6356136798858643, | |
| "eval_runtime": 3.3612, | |
| "eval_samples_per_second": 56.23, | |
| "eval_steps_per_second": 7.14, | |
| "step": 4030 | |
| }, | |
| { | |
| "epoch": 37.76, | |
| "learning_rate": 4.904672897196262e-05, | |
| "loss": 0.0006, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 37.76, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6366937160491943, | |
| "eval_runtime": 3.2144, | |
| "eval_samples_per_second": 58.797, | |
| "eval_steps_per_second": 7.466, | |
| "step": 4040 | |
| }, | |
| { | |
| "epoch": 37.85, | |
| "learning_rate": 4.8672897196261686e-05, | |
| "loss": 0.01, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 37.85, | |
| "eval_accuracy": 0.7671957671957672, | |
| "eval_loss": 1.631759762763977, | |
| "eval_runtime": 3.29, | |
| "eval_samples_per_second": 57.446, | |
| "eval_steps_per_second": 7.295, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 37.94, | |
| "learning_rate": 4.829906542056075e-05, | |
| "loss": 0.0151, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 37.94, | |
| "eval_accuracy": 0.7671957671957672, | |
| "eval_loss": 1.6293249130249023, | |
| "eval_runtime": 3.1536, | |
| "eval_samples_per_second": 59.931, | |
| "eval_steps_per_second": 7.61, | |
| "step": 4060 | |
| }, | |
| { | |
| "epoch": 38.04, | |
| "learning_rate": 4.792523364485981e-05, | |
| "loss": 0.006, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 38.04, | |
| "eval_accuracy": 0.7671957671957672, | |
| "eval_loss": 1.6346244812011719, | |
| "eval_runtime": 3.2143, | |
| "eval_samples_per_second": 58.801, | |
| "eval_steps_per_second": 7.467, | |
| "step": 4070 | |
| }, | |
| { | |
| "epoch": 38.13, | |
| "learning_rate": 4.755140186915888e-05, | |
| "loss": 0.0006, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 38.13, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6401140689849854, | |
| "eval_runtime": 3.257, | |
| "eval_samples_per_second": 58.028, | |
| "eval_steps_per_second": 7.369, | |
| "step": 4080 | |
| }, | |
| { | |
| "epoch": 38.22, | |
| "learning_rate": 4.717757009345795e-05, | |
| "loss": 0.0006, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 38.22, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.642866849899292, | |
| "eval_runtime": 3.1879, | |
| "eval_samples_per_second": 59.286, | |
| "eval_steps_per_second": 7.528, | |
| "step": 4090 | |
| }, | |
| { | |
| "epoch": 38.32, | |
| "learning_rate": 4.6803738317757015e-05, | |
| "loss": 0.0093, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 38.32, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.635596513748169, | |
| "eval_runtime": 3.2354, | |
| "eval_samples_per_second": 58.417, | |
| "eval_steps_per_second": 7.418, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 38.41, | |
| "learning_rate": 4.6429906542056075e-05, | |
| "loss": 0.0046, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 38.41, | |
| "eval_accuracy": 0.7671957671957672, | |
| "eval_loss": 1.6306812763214111, | |
| "eval_runtime": 3.2449, | |
| "eval_samples_per_second": 58.246, | |
| "eval_steps_per_second": 7.396, | |
| "step": 4110 | |
| }, | |
| { | |
| "epoch": 38.5, | |
| "learning_rate": 4.605607476635514e-05, | |
| "loss": 0.0111, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 38.5, | |
| "eval_accuracy": 0.7566137566137566, | |
| "eval_loss": 1.6402584314346313, | |
| "eval_runtime": 3.1872, | |
| "eval_samples_per_second": 59.299, | |
| "eval_steps_per_second": 7.53, | |
| "step": 4120 | |
| }, | |
| { | |
| "epoch": 38.6, | |
| "learning_rate": 4.56822429906542e-05, | |
| "loss": 0.0005, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 38.6, | |
| "eval_accuracy": 0.7566137566137566, | |
| "eval_loss": 1.6475365161895752, | |
| "eval_runtime": 3.5346, | |
| "eval_samples_per_second": 53.472, | |
| "eval_steps_per_second": 6.79, | |
| "step": 4130 | |
| }, | |
| { | |
| "epoch": 38.69, | |
| "learning_rate": 4.530841121495327e-05, | |
| "loss": 0.0006, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 38.69, | |
| "eval_accuracy": 0.7566137566137566, | |
| "eval_loss": 1.6509393453598022, | |
| "eval_runtime": 3.062, | |
| "eval_samples_per_second": 61.724, | |
| "eval_steps_per_second": 7.838, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 38.79, | |
| "learning_rate": 4.493457943925234e-05, | |
| "loss": 0.0006, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 38.79, | |
| "eval_accuracy": 0.7566137566137566, | |
| "eval_loss": 1.652215600013733, | |
| "eval_runtime": 3.5079, | |
| "eval_samples_per_second": 53.878, | |
| "eval_steps_per_second": 6.842, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 38.88, | |
| "learning_rate": 4.4560747663551405e-05, | |
| "loss": 0.0075, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 38.88, | |
| "eval_accuracy": 0.7566137566137566, | |
| "eval_loss": 1.653160810470581, | |
| "eval_runtime": 3.2762, | |
| "eval_samples_per_second": 57.688, | |
| "eval_steps_per_second": 7.325, | |
| "step": 4160 | |
| }, | |
| { | |
| "epoch": 38.97, | |
| "learning_rate": 4.418691588785047e-05, | |
| "loss": 0.0097, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 38.97, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6547893285751343, | |
| "eval_runtime": 3.2144, | |
| "eval_samples_per_second": 58.797, | |
| "eval_steps_per_second": 7.466, | |
| "step": 4170 | |
| }, | |
| { | |
| "epoch": 39.07, | |
| "learning_rate": 4.381308411214953e-05, | |
| "loss": 0.0058, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 39.07, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6471433639526367, | |
| "eval_runtime": 3.2814, | |
| "eval_samples_per_second": 57.598, | |
| "eval_steps_per_second": 7.314, | |
| "step": 4180 | |
| }, | |
| { | |
| "epoch": 39.16, | |
| "learning_rate": 4.34392523364486e-05, | |
| "loss": 0.0049, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 39.16, | |
| "eval_accuracy": 0.7671957671957672, | |
| "eval_loss": 1.6409401893615723, | |
| "eval_runtime": 3.1609, | |
| "eval_samples_per_second": 59.793, | |
| "eval_steps_per_second": 7.593, | |
| "step": 4190 | |
| }, | |
| { | |
| "epoch": 39.25, | |
| "learning_rate": 4.306542056074767e-05, | |
| "loss": 0.0111, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 39.25, | |
| "eval_accuracy": 0.7671957671957672, | |
| "eval_loss": 1.6414356231689453, | |
| "eval_runtime": 3.2279, | |
| "eval_samples_per_second": 58.552, | |
| "eval_steps_per_second": 7.435, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 39.35, | |
| "learning_rate": 4.2691588785046734e-05, | |
| "loss": 0.0052, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 39.35, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.652433156967163, | |
| "eval_runtime": 3.1373, | |
| "eval_samples_per_second": 60.242, | |
| "eval_steps_per_second": 7.65, | |
| "step": 4210 | |
| }, | |
| { | |
| "epoch": 39.44, | |
| "learning_rate": 4.23177570093458e-05, | |
| "loss": 0.0005, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 39.44, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.657133936882019, | |
| "eval_runtime": 3.2697, | |
| "eval_samples_per_second": 57.804, | |
| "eval_steps_per_second": 7.34, | |
| "step": 4220 | |
| }, | |
| { | |
| "epoch": 39.53, | |
| "learning_rate": 4.194392523364486e-05, | |
| "loss": 0.0052, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 39.53, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6594574451446533, | |
| "eval_runtime": 3.2345, | |
| "eval_samples_per_second": 58.432, | |
| "eval_steps_per_second": 7.42, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 39.63, | |
| "learning_rate": 4.157009345794393e-05, | |
| "loss": 0.0061, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 39.63, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6557565927505493, | |
| "eval_runtime": 3.268, | |
| "eval_samples_per_second": 57.833, | |
| "eval_steps_per_second": 7.344, | |
| "step": 4240 | |
| }, | |
| { | |
| "epoch": 39.72, | |
| "learning_rate": 4.119626168224299e-05, | |
| "loss": 0.0056, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 39.72, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.649285912513733, | |
| "eval_runtime": 3.2248, | |
| "eval_samples_per_second": 58.608, | |
| "eval_steps_per_second": 7.442, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 39.81, | |
| "learning_rate": 4.082242990654206e-05, | |
| "loss": 0.0006, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 39.81, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6455833911895752, | |
| "eval_runtime": 3.1611, | |
| "eval_samples_per_second": 59.789, | |
| "eval_steps_per_second": 7.592, | |
| "step": 4260 | |
| }, | |
| { | |
| "epoch": 39.91, | |
| "learning_rate": 4.0448598130841124e-05, | |
| "loss": 0.011, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 39.91, | |
| "eval_accuracy": 0.7671957671957672, | |
| "eval_loss": 1.6422607898712158, | |
| "eval_runtime": 3.2656, | |
| "eval_samples_per_second": 57.877, | |
| "eval_steps_per_second": 7.349, | |
| "step": 4270 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "learning_rate": 4.0074766355140184e-05, | |
| "loss": 0.006, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "eval_accuracy": 0.7671957671957672, | |
| "eval_loss": 1.6446949243545532, | |
| "eval_runtime": 3.3462, | |
| "eval_samples_per_second": 56.481, | |
| "eval_steps_per_second": 7.172, | |
| "step": 4280 | |
| }, | |
| { | |
| "epoch": 40.09, | |
| "learning_rate": 3.970093457943925e-05, | |
| "loss": 0.0043, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 40.09, | |
| "eval_accuracy": 0.7671957671957672, | |
| "eval_loss": 1.6466460227966309, | |
| "eval_runtime": 3.3213, | |
| "eval_samples_per_second": 56.906, | |
| "eval_steps_per_second": 7.226, | |
| "step": 4290 | |
| }, | |
| { | |
| "epoch": 40.19, | |
| "learning_rate": 3.932710280373832e-05, | |
| "loss": 0.0098, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 40.19, | |
| "eval_accuracy": 0.7671957671957672, | |
| "eval_loss": 1.6464401483535767, | |
| "eval_runtime": 3.1753, | |
| "eval_samples_per_second": 59.521, | |
| "eval_steps_per_second": 7.558, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 40.28, | |
| "learning_rate": 3.8953271028037386e-05, | |
| "loss": 0.006, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 40.28, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6519055366516113, | |
| "eval_runtime": 3.2608, | |
| "eval_samples_per_second": 57.961, | |
| "eval_steps_per_second": 7.36, | |
| "step": 4310 | |
| }, | |
| { | |
| "epoch": 40.37, | |
| "learning_rate": 3.857943925233645e-05, | |
| "loss": 0.0053, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 40.37, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6609773635864258, | |
| "eval_runtime": 3.2634, | |
| "eval_samples_per_second": 57.916, | |
| "eval_steps_per_second": 7.354, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 40.47, | |
| "learning_rate": 3.8205607476635514e-05, | |
| "loss": 0.0005, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 40.47, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6645921468734741, | |
| "eval_runtime": 3.2004, | |
| "eval_samples_per_second": 59.054, | |
| "eval_steps_per_second": 7.499, | |
| "step": 4330 | |
| }, | |
| { | |
| "epoch": 40.56, | |
| "learning_rate": 3.783177570093458e-05, | |
| "loss": 0.0006, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 40.56, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6660394668579102, | |
| "eval_runtime": 3.415, | |
| "eval_samples_per_second": 55.344, | |
| "eval_steps_per_second": 7.028, | |
| "step": 4340 | |
| }, | |
| { | |
| "epoch": 40.65, | |
| "learning_rate": 3.745794392523365e-05, | |
| "loss": 0.0082, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 40.65, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.669595718383789, | |
| "eval_runtime": 3.3898, | |
| "eval_samples_per_second": 55.756, | |
| "eval_steps_per_second": 7.08, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 40.75, | |
| "learning_rate": 3.7084112149532715e-05, | |
| "loss": 0.0064, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 40.75, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.672002911567688, | |
| "eval_runtime": 3.3575, | |
| "eval_samples_per_second": 56.291, | |
| "eval_steps_per_second": 7.148, | |
| "step": 4360 | |
| }, | |
| { | |
| "epoch": 40.84, | |
| "learning_rate": 3.6710280373831776e-05, | |
| "loss": 0.0005, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 40.84, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6732321977615356, | |
| "eval_runtime": 2.9775, | |
| "eval_samples_per_second": 63.477, | |
| "eval_steps_per_second": 8.061, | |
| "step": 4370 | |
| }, | |
| { | |
| "epoch": 40.93, | |
| "learning_rate": 3.633644859813084e-05, | |
| "loss": 0.0065, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 40.93, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6676148176193237, | |
| "eval_runtime": 3.245, | |
| "eval_samples_per_second": 58.243, | |
| "eval_steps_per_second": 7.396, | |
| "step": 4380 | |
| }, | |
| { | |
| "epoch": 41.03, | |
| "learning_rate": 3.5962616822429904e-05, | |
| "loss": 0.006, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 41.03, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6597942113876343, | |
| "eval_runtime": 3.4525, | |
| "eval_samples_per_second": 54.742, | |
| "eval_steps_per_second": 6.951, | |
| "step": 4390 | |
| }, | |
| { | |
| "epoch": 41.12, | |
| "learning_rate": 3.558878504672897e-05, | |
| "loss": 0.0047, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 41.12, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6621626615524292, | |
| "eval_runtime": 3.1882, | |
| "eval_samples_per_second": 59.28, | |
| "eval_steps_per_second": 7.528, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 41.21, | |
| "learning_rate": 3.521495327102804e-05, | |
| "loss": 0.0055, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 41.21, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6600810289382935, | |
| "eval_runtime": 3.1527, | |
| "eval_samples_per_second": 59.949, | |
| "eval_steps_per_second": 7.613, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 41.31, | |
| "learning_rate": 3.4841121495327105e-05, | |
| "loss": 0.0049, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 41.31, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6604359149932861, | |
| "eval_runtime": 3.0872, | |
| "eval_samples_per_second": 61.22, | |
| "eval_steps_per_second": 7.774, | |
| "step": 4420 | |
| }, | |
| { | |
| "epoch": 41.4, | |
| "learning_rate": 3.446728971962617e-05, | |
| "loss": 0.0005, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 41.4, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.664872646331787, | |
| "eval_runtime": 3.2713, | |
| "eval_samples_per_second": 57.776, | |
| "eval_steps_per_second": 7.337, | |
| "step": 4430 | |
| }, | |
| { | |
| "epoch": 41.5, | |
| "learning_rate": 3.409345794392523e-05, | |
| "loss": 0.005, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 41.5, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6663637161254883, | |
| "eval_runtime": 3.1479, | |
| "eval_samples_per_second": 60.04, | |
| "eval_steps_per_second": 7.624, | |
| "step": 4440 | |
| }, | |
| { | |
| "epoch": 41.59, | |
| "learning_rate": 3.37196261682243e-05, | |
| "loss": 0.0098, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 41.59, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6684166193008423, | |
| "eval_runtime": 3.1477, | |
| "eval_samples_per_second": 60.043, | |
| "eval_steps_per_second": 7.625, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 41.68, | |
| "learning_rate": 3.334579439252337e-05, | |
| "loss": 0.0005, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 41.68, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.672025203704834, | |
| "eval_runtime": 3.1099, | |
| "eval_samples_per_second": 60.773, | |
| "eval_steps_per_second": 7.717, | |
| "step": 4460 | |
| }, | |
| { | |
| "epoch": 41.78, | |
| "learning_rate": 3.2971962616822435e-05, | |
| "loss": 0.0148, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 41.78, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6690597534179688, | |
| "eval_runtime": 3.17, | |
| "eval_samples_per_second": 59.622, | |
| "eval_steps_per_second": 7.571, | |
| "step": 4470 | |
| }, | |
| { | |
| "epoch": 41.87, | |
| "learning_rate": 3.25981308411215e-05, | |
| "loss": 0.0005, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 41.87, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6645516157150269, | |
| "eval_runtime": 3.0775, | |
| "eval_samples_per_second": 61.414, | |
| "eval_steps_per_second": 7.799, | |
| "step": 4480 | |
| }, | |
| { | |
| "epoch": 41.96, | |
| "learning_rate": 3.222429906542056e-05, | |
| "loss": 0.0052, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 41.96, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6626436710357666, | |
| "eval_runtime": 3.1486, | |
| "eval_samples_per_second": 60.027, | |
| "eval_steps_per_second": 7.622, | |
| "step": 4490 | |
| }, | |
| { | |
| "epoch": 42.06, | |
| "learning_rate": 3.185046728971963e-05, | |
| "loss": 0.0052, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 42.06, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6587588787078857, | |
| "eval_runtime": 3.3613, | |
| "eval_samples_per_second": 56.229, | |
| "eval_steps_per_second": 7.14, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 42.15, | |
| "learning_rate": 3.147663551401869e-05, | |
| "loss": 0.0044, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 42.15, | |
| "eval_accuracy": 0.7671957671957672, | |
| "eval_loss": 1.6567378044128418, | |
| "eval_runtime": 3.1117, | |
| "eval_samples_per_second": 60.738, | |
| "eval_steps_per_second": 7.713, | |
| "step": 4510 | |
| }, | |
| { | |
| "epoch": 42.24, | |
| "learning_rate": 3.110280373831776e-05, | |
| "loss": 0.0059, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 42.24, | |
| "eval_accuracy": 0.7671957671957672, | |
| "eval_loss": 1.6559375524520874, | |
| "eval_runtime": 3.1594, | |
| "eval_samples_per_second": 59.821, | |
| "eval_steps_per_second": 7.596, | |
| "step": 4520 | |
| }, | |
| { | |
| "epoch": 42.34, | |
| "learning_rate": 3.0728971962616824e-05, | |
| "loss": 0.0005, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 42.34, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6584206819534302, | |
| "eval_runtime": 3.215, | |
| "eval_samples_per_second": 58.786, | |
| "eval_steps_per_second": 7.465, | |
| "step": 4530 | |
| }, | |
| { | |
| "epoch": 42.43, | |
| "learning_rate": 3.0355140186915888e-05, | |
| "loss": 0.0046, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 42.43, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.658771276473999, | |
| "eval_runtime": 3.081, | |
| "eval_samples_per_second": 61.344, | |
| "eval_steps_per_second": 7.79, | |
| "step": 4540 | |
| }, | |
| { | |
| "epoch": 42.52, | |
| "learning_rate": 2.9981308411214952e-05, | |
| "loss": 0.0005, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 42.52, | |
| "eval_accuracy": 0.7671957671957672, | |
| "eval_loss": 1.6556823253631592, | |
| "eval_runtime": 3.7215, | |
| "eval_samples_per_second": 50.786, | |
| "eval_steps_per_second": 6.449, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 42.62, | |
| "learning_rate": 2.960747663551402e-05, | |
| "loss": 0.0005, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 42.62, | |
| "eval_accuracy": 0.7671957671957672, | |
| "eval_loss": 1.6551687717437744, | |
| "eval_runtime": 3.2069, | |
| "eval_samples_per_second": 58.935, | |
| "eval_steps_per_second": 7.484, | |
| "step": 4560 | |
| }, | |
| { | |
| "epoch": 42.71, | |
| "learning_rate": 2.9233644859813087e-05, | |
| "loss": 0.0005, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 42.71, | |
| "eval_accuracy": 0.7671957671957672, | |
| "eval_loss": 1.655452013015747, | |
| "eval_runtime": 3.2606, | |
| "eval_samples_per_second": 57.964, | |
| "eval_steps_per_second": 7.361, | |
| "step": 4570 | |
| }, | |
| { | |
| "epoch": 42.8, | |
| "learning_rate": 2.8859813084112154e-05, | |
| "loss": 0.0107, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 42.8, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6596006155014038, | |
| "eval_runtime": 3.241, | |
| "eval_samples_per_second": 58.315, | |
| "eval_steps_per_second": 7.405, | |
| "step": 4580 | |
| }, | |
| { | |
| "epoch": 42.9, | |
| "learning_rate": 2.8485981308411214e-05, | |
| "loss": 0.015, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 42.9, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6657310724258423, | |
| "eval_runtime": 3.1747, | |
| "eval_samples_per_second": 59.534, | |
| "eval_steps_per_second": 7.56, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 42.99, | |
| "learning_rate": 2.811214953271028e-05, | |
| "loss": 0.0052, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 42.99, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6676826477050781, | |
| "eval_runtime": 3.254, | |
| "eval_samples_per_second": 58.083, | |
| "eval_steps_per_second": 7.376, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 43.08, | |
| "learning_rate": 2.7738317757009345e-05, | |
| "loss": 0.0051, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 43.08, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.668189525604248, | |
| "eval_runtime": 3.1876, | |
| "eval_samples_per_second": 59.293, | |
| "eval_steps_per_second": 7.529, | |
| "step": 4610 | |
| }, | |
| { | |
| "epoch": 43.18, | |
| "learning_rate": 2.7364485981308413e-05, | |
| "loss": 0.0051, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 43.18, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6663062572479248, | |
| "eval_runtime": 3.3928, | |
| "eval_samples_per_second": 55.707, | |
| "eval_steps_per_second": 7.074, | |
| "step": 4620 | |
| }, | |
| { | |
| "epoch": 43.27, | |
| "learning_rate": 2.699065420560748e-05, | |
| "loss": 0.0005, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 43.27, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.664025068283081, | |
| "eval_runtime": 3.2939, | |
| "eval_samples_per_second": 57.378, | |
| "eval_steps_per_second": 7.286, | |
| "step": 4630 | |
| }, | |
| { | |
| "epoch": 43.36, | |
| "learning_rate": 2.6616822429906547e-05, | |
| "loss": 0.0088, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 43.36, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6622815132141113, | |
| "eval_runtime": 3.135, | |
| "eval_samples_per_second": 60.287, | |
| "eval_steps_per_second": 7.655, | |
| "step": 4640 | |
| }, | |
| { | |
| "epoch": 43.46, | |
| "learning_rate": 2.6242990654205607e-05, | |
| "loss": 0.0053, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 43.46, | |
| "eval_accuracy": 0.7671957671957672, | |
| "eval_loss": 1.664080262184143, | |
| "eval_runtime": 3.2274, | |
| "eval_samples_per_second": 58.56, | |
| "eval_steps_per_second": 7.436, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 43.55, | |
| "learning_rate": 2.5869158878504675e-05, | |
| "loss": 0.0064, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 43.55, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.665493130683899, | |
| "eval_runtime": 3.1518, | |
| "eval_samples_per_second": 59.966, | |
| "eval_steps_per_second": 7.615, | |
| "step": 4660 | |
| }, | |
| { | |
| "epoch": 43.64, | |
| "learning_rate": 2.549532710280374e-05, | |
| "loss": 0.0005, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 43.64, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6664865016937256, | |
| "eval_runtime": 3.2866, | |
| "eval_samples_per_second": 57.506, | |
| "eval_steps_per_second": 7.302, | |
| "step": 4670 | |
| }, | |
| { | |
| "epoch": 43.74, | |
| "learning_rate": 2.5121495327102806e-05, | |
| "loss": 0.005, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 43.74, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6677204370498657, | |
| "eval_runtime": 3.2047, | |
| "eval_samples_per_second": 58.976, | |
| "eval_steps_per_second": 7.489, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 43.83, | |
| "learning_rate": 2.474766355140187e-05, | |
| "loss": 0.0049, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 43.83, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6758949756622314, | |
| "eval_runtime": 3.4806, | |
| "eval_samples_per_second": 54.301, | |
| "eval_steps_per_second": 6.895, | |
| "step": 4690 | |
| }, | |
| { | |
| "epoch": 43.93, | |
| "learning_rate": 2.4373831775700937e-05, | |
| "loss": 0.0055, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 43.93, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6764609813690186, | |
| "eval_runtime": 3.2004, | |
| "eval_samples_per_second": 59.055, | |
| "eval_steps_per_second": 7.499, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 44.02, | |
| "learning_rate": 2.4e-05, | |
| "loss": 0.0144, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 44.02, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6740403175354004, | |
| "eval_runtime": 3.2944, | |
| "eval_samples_per_second": 57.37, | |
| "eval_steps_per_second": 7.285, | |
| "step": 4710 | |
| }, | |
| { | |
| "epoch": 44.11, | |
| "learning_rate": 2.3626168224299068e-05, | |
| "loss": 0.0005, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 44.11, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6708952188491821, | |
| "eval_runtime": 3.0279, | |
| "eval_samples_per_second": 62.42, | |
| "eval_steps_per_second": 7.926, | |
| "step": 4720 | |
| }, | |
| { | |
| "epoch": 44.21, | |
| "learning_rate": 2.325233644859813e-05, | |
| "loss": 0.0051, | |
| "step": 4730 | |
| }, | |
| { | |
| "epoch": 44.21, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6715834140777588, | |
| "eval_runtime": 3.2347, | |
| "eval_samples_per_second": 58.428, | |
| "eval_steps_per_second": 7.419, | |
| "step": 4730 | |
| }, | |
| { | |
| "epoch": 44.3, | |
| "learning_rate": 2.2878504672897196e-05, | |
| "loss": 0.005, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 44.3, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6689761877059937, | |
| "eval_runtime": 3.1331, | |
| "eval_samples_per_second": 60.324, | |
| "eval_steps_per_second": 7.66, | |
| "step": 4740 | |
| }, | |
| { | |
| "epoch": 44.39, | |
| "learning_rate": 2.2504672897196263e-05, | |
| "loss": 0.0005, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 44.39, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6669028997421265, | |
| "eval_runtime": 2.9298, | |
| "eval_samples_per_second": 64.509, | |
| "eval_steps_per_second": 8.192, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 44.49, | |
| "learning_rate": 2.2130841121495327e-05, | |
| "loss": 0.005, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 44.49, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6714545488357544, | |
| "eval_runtime": 3.0674, | |
| "eval_samples_per_second": 61.615, | |
| "eval_steps_per_second": 7.824, | |
| "step": 4760 | |
| }, | |
| { | |
| "epoch": 44.58, | |
| "learning_rate": 2.1757009345794394e-05, | |
| "loss": 0.0005, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 44.58, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6736085414886475, | |
| "eval_runtime": 2.9894, | |
| "eval_samples_per_second": 63.222, | |
| "eval_steps_per_second": 8.028, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 44.67, | |
| "learning_rate": 2.138317757009346e-05, | |
| "loss": 0.0046, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 44.67, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.676229476928711, | |
| "eval_runtime": 2.9735, | |
| "eval_samples_per_second": 63.562, | |
| "eval_steps_per_second": 8.071, | |
| "step": 4780 | |
| }, | |
| { | |
| "epoch": 44.77, | |
| "learning_rate": 2.1009345794392525e-05, | |
| "loss": 0.0055, | |
| "step": 4790 | |
| }, | |
| { | |
| "epoch": 44.77, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6757352352142334, | |
| "eval_runtime": 3.1208, | |
| "eval_samples_per_second": 60.561, | |
| "eval_steps_per_second": 7.69, | |
| "step": 4790 | |
| }, | |
| { | |
| "epoch": 44.86, | |
| "learning_rate": 2.063551401869159e-05, | |
| "loss": 0.0098, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 44.86, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.670250654220581, | |
| "eval_runtime": 3.1041, | |
| "eval_samples_per_second": 60.887, | |
| "eval_steps_per_second": 7.732, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 44.95, | |
| "learning_rate": 2.0261682242990653e-05, | |
| "loss": 0.005, | |
| "step": 4810 | |
| }, | |
| { | |
| "epoch": 44.95, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6712110042572021, | |
| "eval_runtime": 3.374, | |
| "eval_samples_per_second": 56.016, | |
| "eval_steps_per_second": 7.113, | |
| "step": 4810 | |
| }, | |
| { | |
| "epoch": 45.05, | |
| "learning_rate": 1.988785046728972e-05, | |
| "loss": 0.005, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 45.05, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6744420528411865, | |
| "eval_runtime": 3.1947, | |
| "eval_samples_per_second": 59.161, | |
| "eval_steps_per_second": 7.513, | |
| "step": 4820 | |
| }, | |
| { | |
| "epoch": 45.14, | |
| "learning_rate": 1.9514018691588787e-05, | |
| "loss": 0.0005, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 45.14, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6774510145187378, | |
| "eval_runtime": 3.126, | |
| "eval_samples_per_second": 60.46, | |
| "eval_steps_per_second": 7.677, | |
| "step": 4830 | |
| }, | |
| { | |
| "epoch": 45.23, | |
| "learning_rate": 1.914018691588785e-05, | |
| "loss": 0.0005, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 45.23, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6790132522583008, | |
| "eval_runtime": 2.9821, | |
| "eval_samples_per_second": 63.379, | |
| "eval_steps_per_second": 8.048, | |
| "step": 4840 | |
| }, | |
| { | |
| "epoch": 45.33, | |
| "learning_rate": 1.8766355140186918e-05, | |
| "loss": 0.0005, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 45.33, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6794980764389038, | |
| "eval_runtime": 3.139, | |
| "eval_samples_per_second": 60.21, | |
| "eval_steps_per_second": 7.646, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 45.42, | |
| "learning_rate": 1.8392523364485982e-05, | |
| "loss": 0.0049, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 45.42, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6801530122756958, | |
| "eval_runtime": 2.945, | |
| "eval_samples_per_second": 64.176, | |
| "eval_steps_per_second": 8.149, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 45.51, | |
| "learning_rate": 1.8018691588785046e-05, | |
| "loss": 0.0051, | |
| "step": 4870 | |
| }, | |
| { | |
| "epoch": 45.51, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6776082515716553, | |
| "eval_runtime": 2.9852, | |
| "eval_samples_per_second": 63.313, | |
| "eval_steps_per_second": 8.04, | |
| "step": 4870 | |
| }, | |
| { | |
| "epoch": 45.61, | |
| "learning_rate": 1.7644859813084113e-05, | |
| "loss": 0.0049, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 45.61, | |
| "eval_accuracy": 0.7671957671957672, | |
| "eval_loss": 1.6780359745025635, | |
| "eval_runtime": 3.0255, | |
| "eval_samples_per_second": 62.469, | |
| "eval_steps_per_second": 7.933, | |
| "step": 4880 | |
| }, | |
| { | |
| "epoch": 45.7, | |
| "learning_rate": 1.7271028037383177e-05, | |
| "loss": 0.0091, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 45.7, | |
| "eval_accuracy": 0.7671957671957672, | |
| "eval_loss": 1.6785778999328613, | |
| "eval_runtime": 2.9136, | |
| "eval_samples_per_second": 64.868, | |
| "eval_steps_per_second": 8.237, | |
| "step": 4890 | |
| }, | |
| { | |
| "epoch": 45.79, | |
| "learning_rate": 1.6897196261682244e-05, | |
| "loss": 0.0046, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 45.79, | |
| "eval_accuracy": 0.7671957671957672, | |
| "eval_loss": 1.6758513450622559, | |
| "eval_runtime": 2.9184, | |
| "eval_samples_per_second": 64.761, | |
| "eval_steps_per_second": 8.224, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 45.89, | |
| "learning_rate": 1.652336448598131e-05, | |
| "loss": 0.0056, | |
| "step": 4910 | |
| }, | |
| { | |
| "epoch": 45.89, | |
| "eval_accuracy": 0.7724867724867724, | |
| "eval_loss": 1.6727031469345093, | |
| "eval_runtime": 2.9001, | |
| "eval_samples_per_second": 65.169, | |
| "eval_steps_per_second": 8.275, | |
| "step": 4910 | |
| }, | |
| { | |
| "epoch": 45.98, | |
| "learning_rate": 1.6149532710280375e-05, | |
| "loss": 0.011, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 45.98, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6746748685836792, | |
| "eval_runtime": 2.9034, | |
| "eval_samples_per_second": 65.095, | |
| "eval_steps_per_second": 8.266, | |
| "step": 4920 | |
| }, | |
| { | |
| "epoch": 46.07, | |
| "learning_rate": 1.577570093457944e-05, | |
| "loss": 0.0093, | |
| "step": 4930 | |
| }, | |
| { | |
| "epoch": 46.07, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.674180507659912, | |
| "eval_runtime": 2.9574, | |
| "eval_samples_per_second": 63.907, | |
| "eval_steps_per_second": 8.115, | |
| "step": 4930 | |
| }, | |
| { | |
| "epoch": 46.17, | |
| "learning_rate": 1.5401869158878503e-05, | |
| "loss": 0.0047, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 46.17, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6757071018218994, | |
| "eval_runtime": 2.8997, | |
| "eval_samples_per_second": 65.179, | |
| "eval_steps_per_second": 8.277, | |
| "step": 4940 | |
| }, | |
| { | |
| "epoch": 46.26, | |
| "learning_rate": 1.502803738317757e-05, | |
| "loss": 0.0089, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 46.26, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6735711097717285, | |
| "eval_runtime": 3.1476, | |
| "eval_samples_per_second": 60.046, | |
| "eval_steps_per_second": 7.625, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 46.36, | |
| "learning_rate": 1.4654205607476637e-05, | |
| "loss": 0.0005, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 46.36, | |
| "eval_accuracy": 0.7671957671957672, | |
| "eval_loss": 1.6719815731048584, | |
| "eval_runtime": 3.192, | |
| "eval_samples_per_second": 59.21, | |
| "eval_steps_per_second": 7.519, | |
| "step": 4960 | |
| }, | |
| { | |
| "epoch": 46.45, | |
| "learning_rate": 1.4280373831775701e-05, | |
| "loss": 0.0005, | |
| "step": 4970 | |
| }, | |
| { | |
| "epoch": 46.45, | |
| "eval_accuracy": 0.7671957671957672, | |
| "eval_loss": 1.6716129779815674, | |
| "eval_runtime": 2.9471, | |
| "eval_samples_per_second": 64.131, | |
| "eval_steps_per_second": 8.144, | |
| "step": 4970 | |
| }, | |
| { | |
| "epoch": 46.54, | |
| "learning_rate": 1.3906542056074767e-05, | |
| "loss": 0.0097, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 46.54, | |
| "eval_accuracy": 0.7671957671957672, | |
| "eval_loss": 1.672421932220459, | |
| "eval_runtime": 2.9663, | |
| "eval_samples_per_second": 63.716, | |
| "eval_steps_per_second": 8.091, | |
| "step": 4980 | |
| }, | |
| { | |
| "epoch": 46.64, | |
| "learning_rate": 1.3532710280373834e-05, | |
| "loss": 0.0005, | |
| "step": 4990 | |
| }, | |
| { | |
| "epoch": 46.64, | |
| "eval_accuracy": 0.7671957671957672, | |
| "eval_loss": 1.671976089477539, | |
| "eval_runtime": 3.0014, | |
| "eval_samples_per_second": 62.971, | |
| "eval_steps_per_second": 7.996, | |
| "step": 4990 | |
| }, | |
| { | |
| "epoch": 46.73, | |
| "learning_rate": 1.3158878504672898e-05, | |
| "loss": 0.0005, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 46.73, | |
| "eval_accuracy": 0.7671957671957672, | |
| "eval_loss": 1.6718426942825317, | |
| "eval_runtime": 3.162, | |
| "eval_samples_per_second": 59.772, | |
| "eval_steps_per_second": 7.59, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 46.82, | |
| "learning_rate": 1.2785046728971963e-05, | |
| "loss": 0.0004, | |
| "step": 5010 | |
| }, | |
| { | |
| "epoch": 46.82, | |
| "eval_accuracy": 0.7671957671957672, | |
| "eval_loss": 1.6721214056015015, | |
| "eval_runtime": 2.9348, | |
| "eval_samples_per_second": 64.4, | |
| "eval_steps_per_second": 8.178, | |
| "step": 5010 | |
| }, | |
| { | |
| "epoch": 46.92, | |
| "learning_rate": 1.2411214953271029e-05, | |
| "loss": 0.0107, | |
| "step": 5020 | |
| }, | |
| { | |
| "epoch": 46.92, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6742489337921143, | |
| "eval_runtime": 2.9616, | |
| "eval_samples_per_second": 63.816, | |
| "eval_steps_per_second": 8.104, | |
| "step": 5020 | |
| }, | |
| { | |
| "epoch": 47.01, | |
| "learning_rate": 1.2037383177570094e-05, | |
| "loss": 0.0051, | |
| "step": 5030 | |
| }, | |
| { | |
| "epoch": 47.01, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6763916015625, | |
| "eval_runtime": 3.3324, | |
| "eval_samples_per_second": 56.716, | |
| "eval_steps_per_second": 7.202, | |
| "step": 5030 | |
| }, | |
| { | |
| "epoch": 47.1, | |
| "learning_rate": 1.166355140186916e-05, | |
| "loss": 0.0004, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 47.1, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6787925958633423, | |
| "eval_runtime": 2.994, | |
| "eval_samples_per_second": 63.127, | |
| "eval_steps_per_second": 8.016, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 47.2, | |
| "learning_rate": 1.1289719626168224e-05, | |
| "loss": 0.0048, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 47.2, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.67880380153656, | |
| "eval_runtime": 2.9739, | |
| "eval_samples_per_second": 63.554, | |
| "eval_steps_per_second": 8.07, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 47.29, | |
| "learning_rate": 1.0915887850467291e-05, | |
| "loss": 0.0005, | |
| "step": 5060 | |
| }, | |
| { | |
| "epoch": 47.29, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6779260635375977, | |
| "eval_runtime": 2.9453, | |
| "eval_samples_per_second": 64.171, | |
| "eval_steps_per_second": 8.149, | |
| "step": 5060 | |
| }, | |
| { | |
| "epoch": 47.38, | |
| "learning_rate": 1.0542056074766356e-05, | |
| "loss": 0.0048, | |
| "step": 5070 | |
| }, | |
| { | |
| "epoch": 47.38, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6772257089614868, | |
| "eval_runtime": 3.0548, | |
| "eval_samples_per_second": 61.87, | |
| "eval_steps_per_second": 7.856, | |
| "step": 5070 | |
| }, | |
| { | |
| "epoch": 47.48, | |
| "learning_rate": 1.016822429906542e-05, | |
| "loss": 0.0044, | |
| "step": 5080 | |
| }, | |
| { | |
| "epoch": 47.48, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.677033543586731, | |
| "eval_runtime": 3.0759, | |
| "eval_samples_per_second": 61.446, | |
| "eval_steps_per_second": 7.803, | |
| "step": 5080 | |
| }, | |
| { | |
| "epoch": 47.57, | |
| "learning_rate": 9.794392523364486e-06, | |
| "loss": 0.0004, | |
| "step": 5090 | |
| }, | |
| { | |
| "epoch": 47.57, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6759369373321533, | |
| "eval_runtime": 3.134, | |
| "eval_samples_per_second": 60.306, | |
| "eval_steps_per_second": 7.658, | |
| "step": 5090 | |
| }, | |
| { | |
| "epoch": 47.66, | |
| "learning_rate": 9.420560747663553e-06, | |
| "loss": 0.0053, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 47.66, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6769102811813354, | |
| "eval_runtime": 3.2383, | |
| "eval_samples_per_second": 58.363, | |
| "eval_steps_per_second": 7.411, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 47.76, | |
| "learning_rate": 9.046728971962617e-06, | |
| "loss": 0.0093, | |
| "step": 5110 | |
| }, | |
| { | |
| "epoch": 47.76, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6791408061981201, | |
| "eval_runtime": 3.1946, | |
| "eval_samples_per_second": 59.163, | |
| "eval_steps_per_second": 7.513, | |
| "step": 5110 | |
| }, | |
| { | |
| "epoch": 47.85, | |
| "learning_rate": 8.672897196261682e-06, | |
| "loss": 0.0046, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 47.85, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6781762838363647, | |
| "eval_runtime": 3.0007, | |
| "eval_samples_per_second": 62.986, | |
| "eval_steps_per_second": 7.998, | |
| "step": 5120 | |
| }, | |
| { | |
| "epoch": 47.94, | |
| "learning_rate": 8.299065420560748e-06, | |
| "loss": 0.0101, | |
| "step": 5130 | |
| }, | |
| { | |
| "epoch": 47.94, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.679875135421753, | |
| "eval_runtime": 2.8961, | |
| "eval_samples_per_second": 65.259, | |
| "eval_steps_per_second": 8.287, | |
| "step": 5130 | |
| }, | |
| { | |
| "epoch": 48.04, | |
| "learning_rate": 7.925233644859813e-06, | |
| "loss": 0.0055, | |
| "step": 5140 | |
| }, | |
| { | |
| "epoch": 48.04, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6814370155334473, | |
| "eval_runtime": 2.99, | |
| "eval_samples_per_second": 63.21, | |
| "eval_steps_per_second": 8.027, | |
| "step": 5140 | |
| }, | |
| { | |
| "epoch": 48.13, | |
| "learning_rate": 7.551401869158879e-06, | |
| "loss": 0.0004, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 48.13, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6820155382156372, | |
| "eval_runtime": 3.0809, | |
| "eval_samples_per_second": 61.346, | |
| "eval_steps_per_second": 7.79, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 48.22, | |
| "learning_rate": 7.1775700934579445e-06, | |
| "loss": 0.0005, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 48.22, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6822861433029175, | |
| "eval_runtime": 2.9449, | |
| "eval_samples_per_second": 64.179, | |
| "eval_steps_per_second": 8.15, | |
| "step": 5160 | |
| }, | |
| { | |
| "epoch": 48.32, | |
| "learning_rate": 6.803738317757009e-06, | |
| "loss": 0.005, | |
| "step": 5170 | |
| }, | |
| { | |
| "epoch": 48.32, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.682709813117981, | |
| "eval_runtime": 3.0319, | |
| "eval_samples_per_second": 62.337, | |
| "eval_steps_per_second": 7.916, | |
| "step": 5170 | |
| }, | |
| { | |
| "epoch": 48.41, | |
| "learning_rate": 6.429906542056075e-06, | |
| "loss": 0.0093, | |
| "step": 5180 | |
| }, | |
| { | |
| "epoch": 48.41, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6838692426681519, | |
| "eval_runtime": 3.0335, | |
| "eval_samples_per_second": 62.304, | |
| "eval_steps_per_second": 7.912, | |
| "step": 5180 | |
| }, | |
| { | |
| "epoch": 48.5, | |
| "learning_rate": 6.05607476635514e-06, | |
| "loss": 0.0048, | |
| "step": 5190 | |
| }, | |
| { | |
| "epoch": 48.5, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6845488548278809, | |
| "eval_runtime": 3.0676, | |
| "eval_samples_per_second": 61.612, | |
| "eval_steps_per_second": 7.824, | |
| "step": 5190 | |
| }, | |
| { | |
| "epoch": 48.6, | |
| "learning_rate": 5.682242990654206e-06, | |
| "loss": 0.0005, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 48.6, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6849009990692139, | |
| "eval_runtime": 3.008, | |
| "eval_samples_per_second": 62.832, | |
| "eval_steps_per_second": 7.979, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 48.69, | |
| "learning_rate": 5.308411214953271e-06, | |
| "loss": 0.0005, | |
| "step": 5210 | |
| }, | |
| { | |
| "epoch": 48.69, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6851400136947632, | |
| "eval_runtime": 2.9174, | |
| "eval_samples_per_second": 64.784, | |
| "eval_steps_per_second": 8.227, | |
| "step": 5210 | |
| }, | |
| { | |
| "epoch": 48.79, | |
| "learning_rate": 4.934579439252337e-06, | |
| "loss": 0.0136, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 48.79, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.686295747756958, | |
| "eval_runtime": 2.9681, | |
| "eval_samples_per_second": 63.678, | |
| "eval_steps_per_second": 8.086, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 48.88, | |
| "learning_rate": 4.560747663551402e-06, | |
| "loss": 0.005, | |
| "step": 5230 | |
| }, | |
| { | |
| "epoch": 48.88, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6866832971572876, | |
| "eval_runtime": 2.9518, | |
| "eval_samples_per_second": 64.029, | |
| "eval_steps_per_second": 8.131, | |
| "step": 5230 | |
| }, | |
| { | |
| "epoch": 48.97, | |
| "learning_rate": 4.186915887850468e-06, | |
| "loss": 0.0096, | |
| "step": 5240 | |
| }, | |
| { | |
| "epoch": 48.97, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.685899257659912, | |
| "eval_runtime": 3.0006, | |
| "eval_samples_per_second": 62.987, | |
| "eval_steps_per_second": 7.998, | |
| "step": 5240 | |
| }, | |
| { | |
| "epoch": 49.07, | |
| "learning_rate": 3.813084112149533e-06, | |
| "loss": 0.0048, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 49.07, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6844896078109741, | |
| "eval_runtime": 2.9876, | |
| "eval_samples_per_second": 63.261, | |
| "eval_steps_per_second": 8.033, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 49.16, | |
| "learning_rate": 3.4392523364485985e-06, | |
| "loss": 0.0048, | |
| "step": 5260 | |
| }, | |
| { | |
| "epoch": 49.16, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6853784322738647, | |
| "eval_runtime": 2.9873, | |
| "eval_samples_per_second": 63.267, | |
| "eval_steps_per_second": 8.034, | |
| "step": 5260 | |
| }, | |
| { | |
| "epoch": 49.25, | |
| "learning_rate": 3.0654205607476637e-06, | |
| "loss": 0.0093, | |
| "step": 5270 | |
| }, | |
| { | |
| "epoch": 49.25, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6857768297195435, | |
| "eval_runtime": 2.9803, | |
| "eval_samples_per_second": 63.416, | |
| "eval_steps_per_second": 8.053, | |
| "step": 5270 | |
| }, | |
| { | |
| "epoch": 49.35, | |
| "learning_rate": 2.691588785046729e-06, | |
| "loss": 0.0004, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 49.35, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6857463121414185, | |
| "eval_runtime": 3.0292, | |
| "eval_samples_per_second": 62.393, | |
| "eval_steps_per_second": 7.923, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 49.44, | |
| "learning_rate": 2.3177570093457947e-06, | |
| "loss": 0.0095, | |
| "step": 5290 | |
| }, | |
| { | |
| "epoch": 49.44, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.685395359992981, | |
| "eval_runtime": 2.9645, | |
| "eval_samples_per_second": 63.755, | |
| "eval_steps_per_second": 8.096, | |
| "step": 5290 | |
| }, | |
| { | |
| "epoch": 49.53, | |
| "learning_rate": 1.94392523364486e-06, | |
| "loss": 0.0005, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 49.53, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6847246885299683, | |
| "eval_runtime": 3.0054, | |
| "eval_samples_per_second": 62.887, | |
| "eval_steps_per_second": 7.986, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 49.63, | |
| "learning_rate": 1.5700934579439254e-06, | |
| "loss": 0.0005, | |
| "step": 5310 | |
| }, | |
| { | |
| "epoch": 49.63, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6844700574874878, | |
| "eval_runtime": 2.9223, | |
| "eval_samples_per_second": 64.676, | |
| "eval_steps_per_second": 8.213, | |
| "step": 5310 | |
| }, | |
| { | |
| "epoch": 49.72, | |
| "learning_rate": 1.1962616822429907e-06, | |
| "loss": 0.0092, | |
| "step": 5320 | |
| }, | |
| { | |
| "epoch": 49.72, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6847366094589233, | |
| "eval_runtime": 3.0496, | |
| "eval_samples_per_second": 61.975, | |
| "eval_steps_per_second": 7.87, | |
| "step": 5320 | |
| }, | |
| { | |
| "epoch": 49.81, | |
| "learning_rate": 8.224299065420561e-07, | |
| "loss": 0.0005, | |
| "step": 5330 | |
| }, | |
| { | |
| "epoch": 49.81, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6845016479492188, | |
| "eval_runtime": 3.1606, | |
| "eval_samples_per_second": 59.798, | |
| "eval_steps_per_second": 7.593, | |
| "step": 5330 | |
| }, | |
| { | |
| "epoch": 49.91, | |
| "learning_rate": 4.4859813084112153e-07, | |
| "loss": 0.0092, | |
| "step": 5340 | |
| }, | |
| { | |
| "epoch": 49.91, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.684584140777588, | |
| "eval_runtime": 2.921, | |
| "eval_samples_per_second": 64.704, | |
| "eval_steps_per_second": 8.216, | |
| "step": 5340 | |
| }, | |
| { | |
| "epoch": 50.0, | |
| "learning_rate": 7.476635514018692e-08, | |
| "loss": 0.0005, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 50.0, | |
| "eval_accuracy": 0.7619047619047619, | |
| "eval_loss": 1.6846909523010254, | |
| "eval_runtime": 2.9853, | |
| "eval_samples_per_second": 63.309, | |
| "eval_steps_per_second": 8.039, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 50.0, | |
| "step": 5350, | |
| "total_flos": 6.575784632757043e+18, | |
| "train_loss": 0.14066274270554568, | |
| "train_runtime": 4434.2749, | |
| "train_samples_per_second": 19.135, | |
| "train_steps_per_second": 1.207 | |
| } | |
| ], | |
| "logging_steps": 10, | |
| "max_steps": 5350, | |
| "num_train_epochs": 50, | |
| "save_steps": 10, | |
| "total_flos": 6.575784632757043e+18, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |