| { | |
| "best_metric": 0.9622641509433962, | |
| "best_model_checkpoint": "wav2vec2-2Class-easy-train-test-large/checkpoint-2520", | |
| "epoch": 782.2222222222222, | |
| "eval_steps": 500, | |
| "global_step": 8800, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.98, | |
| "eval_accuracy": 0.4088050314465409, | |
| "eval_loss": 0.7003181576728821, | |
| "eval_runtime": 1.8048, | |
| "eval_samples_per_second": 88.1, | |
| "eval_steps_per_second": 5.541, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "eval_accuracy": 0.4088050314465409, | |
| "eval_loss": 0.7001124620437622, | |
| "eval_runtime": 1.7728, | |
| "eval_samples_per_second": 89.69, | |
| "eval_steps_per_second": 5.641, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "eval_accuracy": 0.41509433962264153, | |
| "eval_loss": 0.69970703125, | |
| "eval_runtime": 1.7593, | |
| "eval_samples_per_second": 90.375, | |
| "eval_steps_per_second": 5.684, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.42138364779874216, | |
| "eval_loss": 0.6991450786590576, | |
| "eval_runtime": 1.7582, | |
| "eval_samples_per_second": 90.433, | |
| "eval_steps_per_second": 5.688, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 4.44, | |
| "grad_norm": 0.8353477716445923, | |
| "learning_rate": 1.7045454545454546e-06, | |
| "loss": 0.6976, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 4.98, | |
| "eval_accuracy": 0.4276729559748428, | |
| "eval_loss": 0.6984724998474121, | |
| "eval_runtime": 1.7849, | |
| "eval_samples_per_second": 89.08, | |
| "eval_steps_per_second": 5.603, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 5.96, | |
| "eval_accuracy": 0.44025157232704404, | |
| "eval_loss": 0.697744607925415, | |
| "eval_runtime": 2.127, | |
| "eval_samples_per_second": 74.753, | |
| "eval_steps_per_second": 4.701, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 6.93, | |
| "eval_accuracy": 0.44654088050314467, | |
| "eval_loss": 0.6968724727630615, | |
| "eval_runtime": 2.2513, | |
| "eval_samples_per_second": 70.624, | |
| "eval_steps_per_second": 4.442, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.46540880503144655, | |
| "eval_loss": 0.6957085728645325, | |
| "eval_runtime": 2.1194, | |
| "eval_samples_per_second": 75.021, | |
| "eval_steps_per_second": 4.718, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 8.89, | |
| "grad_norm": 0.45805710554122925, | |
| "learning_rate": 3.409090909090909e-06, | |
| "loss": 0.6952, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 8.98, | |
| "eval_accuracy": 0.46540880503144655, | |
| "eval_loss": 0.6945385932922363, | |
| "eval_runtime": 2.2918, | |
| "eval_samples_per_second": 69.378, | |
| "eval_steps_per_second": 4.363, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 9.96, | |
| "eval_accuracy": 0.4779874213836478, | |
| "eval_loss": 0.6933900117874146, | |
| "eval_runtime": 2.2504, | |
| "eval_samples_per_second": 70.654, | |
| "eval_steps_per_second": 4.444, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 10.93, | |
| "eval_accuracy": 0.49056603773584906, | |
| "eval_loss": 0.692146360874176, | |
| "eval_runtime": 2.1543, | |
| "eval_samples_per_second": 73.804, | |
| "eval_steps_per_second": 4.642, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.5471698113207547, | |
| "eval_loss": 0.6906170845031738, | |
| "eval_runtime": 2.0832, | |
| "eval_samples_per_second": 76.326, | |
| "eval_steps_per_second": 4.8, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 12.98, | |
| "eval_accuracy": 0.610062893081761, | |
| "eval_loss": 0.6892228722572327, | |
| "eval_runtime": 2.0269, | |
| "eval_samples_per_second": 78.443, | |
| "eval_steps_per_second": 4.934, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 13.33, | |
| "grad_norm": 0.6493268609046936, | |
| "learning_rate": 5.1136363636363635e-06, | |
| "loss": 0.6911, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 13.96, | |
| "eval_accuracy": 0.6037735849056604, | |
| "eval_loss": 0.6878040432929993, | |
| "eval_runtime": 2.1502, | |
| "eval_samples_per_second": 73.946, | |
| "eval_steps_per_second": 4.651, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 14.93, | |
| "eval_accuracy": 0.5911949685534591, | |
| "eval_loss": 0.6863483190536499, | |
| "eval_runtime": 2.0844, | |
| "eval_samples_per_second": 76.279, | |
| "eval_steps_per_second": 4.797, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.5911949685534591, | |
| "eval_loss": 0.6847361326217651, | |
| "eval_runtime": 2.1372, | |
| "eval_samples_per_second": 74.395, | |
| "eval_steps_per_second": 4.679, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 16.98, | |
| "eval_accuracy": 0.5849056603773585, | |
| "eval_loss": 0.6830993294715881, | |
| "eval_runtime": 2.3473, | |
| "eval_samples_per_second": 67.739, | |
| "eval_steps_per_second": 4.26, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 17.78, | |
| "grad_norm": 0.5862739086151123, | |
| "learning_rate": 6.818181818181818e-06, | |
| "loss": 0.6852, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 17.96, | |
| "eval_accuracy": 0.5849056603773585, | |
| "eval_loss": 0.6815393567085266, | |
| "eval_runtime": 2.1307, | |
| "eval_samples_per_second": 74.623, | |
| "eval_steps_per_second": 4.693, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 18.93, | |
| "eval_accuracy": 0.5849056603773585, | |
| "eval_loss": 0.679994523525238, | |
| "eval_runtime": 2.082, | |
| "eval_samples_per_second": 76.37, | |
| "eval_steps_per_second": 4.803, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.5849056603773585, | |
| "eval_loss": 0.6782289147377014, | |
| "eval_runtime": 2.1302, | |
| "eval_samples_per_second": 74.641, | |
| "eval_steps_per_second": 4.694, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 20.98, | |
| "eval_accuracy": 0.5849056603773585, | |
| "eval_loss": 0.6765275001525879, | |
| "eval_runtime": 2.0229, | |
| "eval_samples_per_second": 78.601, | |
| "eval_steps_per_second": 4.943, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 21.96, | |
| "eval_accuracy": 0.5849056603773585, | |
| "eval_loss": 0.6749551892280579, | |
| "eval_runtime": 2.0505, | |
| "eval_samples_per_second": 77.542, | |
| "eval_steps_per_second": 4.877, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 22.22, | |
| "grad_norm": 0.10243403911590576, | |
| "learning_rate": 8.522727272727273e-06, | |
| "loss": 0.6783, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 22.93, | |
| "eval_accuracy": 0.5849056603773585, | |
| "eval_loss": 0.6732170581817627, | |
| "eval_runtime": 2.0616, | |
| "eval_samples_per_second": 77.125, | |
| "eval_steps_per_second": 4.851, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_accuracy": 0.5849056603773585, | |
| "eval_loss": 0.6713252067565918, | |
| "eval_runtime": 2.1605, | |
| "eval_samples_per_second": 73.595, | |
| "eval_steps_per_second": 4.629, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 24.98, | |
| "eval_accuracy": 0.5849056603773585, | |
| "eval_loss": 0.6694673895835876, | |
| "eval_runtime": 2.0526, | |
| "eval_samples_per_second": 77.462, | |
| "eval_steps_per_second": 4.872, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 25.96, | |
| "eval_accuracy": 0.5849056603773585, | |
| "eval_loss": 0.6674391031265259, | |
| "eval_runtime": 2.1284, | |
| "eval_samples_per_second": 74.704, | |
| "eval_steps_per_second": 4.698, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 26.67, | |
| "grad_norm": 0.3114006221294403, | |
| "learning_rate": 1.0227272727272727e-05, | |
| "loss": 0.6676, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 26.93, | |
| "eval_accuracy": 0.5849056603773585, | |
| "eval_loss": 0.6654335856437683, | |
| "eval_runtime": 1.9991, | |
| "eval_samples_per_second": 79.535, | |
| "eval_steps_per_second": 5.002, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_accuracy": 0.5849056603773585, | |
| "eval_loss": 0.6630644202232361, | |
| "eval_runtime": 2.0451, | |
| "eval_samples_per_second": 77.745, | |
| "eval_steps_per_second": 4.89, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 28.98, | |
| "eval_accuracy": 0.5849056603773585, | |
| "eval_loss": 0.6605831980705261, | |
| "eval_runtime": 2.0625, | |
| "eval_samples_per_second": 77.092, | |
| "eval_steps_per_second": 4.849, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 29.96, | |
| "eval_accuracy": 0.5849056603773585, | |
| "eval_loss": 0.6578991413116455, | |
| "eval_runtime": 2.0381, | |
| "eval_samples_per_second": 78.014, | |
| "eval_steps_per_second": 4.907, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 30.93, | |
| "eval_accuracy": 0.5849056603773585, | |
| "eval_loss": 0.6539114713668823, | |
| "eval_runtime": 1.9774, | |
| "eval_samples_per_second": 80.407, | |
| "eval_steps_per_second": 5.057, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 31.11, | |
| "grad_norm": 0.2134709656238556, | |
| "learning_rate": 1.1931818181818181e-05, | |
| "loss": 0.6516, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "eval_accuracy": 0.5974842767295597, | |
| "eval_loss": 0.6492742896080017, | |
| "eval_runtime": 2.0601, | |
| "eval_samples_per_second": 77.182, | |
| "eval_steps_per_second": 4.854, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 32.98, | |
| "eval_accuracy": 0.610062893081761, | |
| "eval_loss": 0.6441397070884705, | |
| "eval_runtime": 2.0739, | |
| "eval_samples_per_second": 76.667, | |
| "eval_steps_per_second": 4.822, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 33.96, | |
| "eval_accuracy": 0.6226415094339622, | |
| "eval_loss": 0.6348815560340881, | |
| "eval_runtime": 2.1526, | |
| "eval_samples_per_second": 73.865, | |
| "eval_steps_per_second": 4.646, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 34.93, | |
| "eval_accuracy": 0.6289308176100629, | |
| "eval_loss": 0.6257140040397644, | |
| "eval_runtime": 2.0081, | |
| "eval_samples_per_second": 79.179, | |
| "eval_steps_per_second": 4.98, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 35.56, | |
| "grad_norm": 0.8974349498748779, | |
| "learning_rate": 1.3636363636363637e-05, | |
| "loss": 0.6124, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "eval_accuracy": 0.6415094339622641, | |
| "eval_loss": 0.611738920211792, | |
| "eval_runtime": 1.9854, | |
| "eval_samples_per_second": 80.083, | |
| "eval_steps_per_second": 5.037, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 36.98, | |
| "eval_accuracy": 0.6666666666666666, | |
| "eval_loss": 0.5910706520080566, | |
| "eval_runtime": 2.0618, | |
| "eval_samples_per_second": 77.117, | |
| "eval_steps_per_second": 4.85, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 37.96, | |
| "eval_accuracy": 0.6918238993710691, | |
| "eval_loss": 0.5672016143798828, | |
| "eval_runtime": 2.0402, | |
| "eval_samples_per_second": 77.932, | |
| "eval_steps_per_second": 4.901, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 38.93, | |
| "eval_accuracy": 0.7232704402515723, | |
| "eval_loss": 0.5392354130744934, | |
| "eval_runtime": 2.2936, | |
| "eval_samples_per_second": 69.324, | |
| "eval_steps_per_second": 4.36, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "grad_norm": 0.7736309170722961, | |
| "learning_rate": 1.534090909090909e-05, | |
| "loss": 0.5073, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "eval_accuracy": 0.7547169811320755, | |
| "eval_loss": 0.5041937232017517, | |
| "eval_runtime": 2.1247, | |
| "eval_samples_per_second": 74.835, | |
| "eval_steps_per_second": 4.707, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 40.98, | |
| "eval_accuracy": 0.7672955974842768, | |
| "eval_loss": 0.47902750968933105, | |
| "eval_runtime": 2.163, | |
| "eval_samples_per_second": 73.509, | |
| "eval_steps_per_second": 4.623, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 41.96, | |
| "eval_accuracy": 0.779874213836478, | |
| "eval_loss": 0.47594940662384033, | |
| "eval_runtime": 2.1321, | |
| "eval_samples_per_second": 74.574, | |
| "eval_steps_per_second": 4.69, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 42.93, | |
| "eval_accuracy": 0.7987421383647799, | |
| "eval_loss": 0.4369964003562927, | |
| "eval_runtime": 2.1555, | |
| "eval_samples_per_second": 73.765, | |
| "eval_steps_per_second": 4.639, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 44.0, | |
| "eval_accuracy": 0.7987421383647799, | |
| "eval_loss": 0.43516698479652405, | |
| "eval_runtime": 2.032, | |
| "eval_samples_per_second": 78.249, | |
| "eval_steps_per_second": 4.921, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 44.44, | |
| "grad_norm": 0.4976819157600403, | |
| "learning_rate": 1.7045454545454546e-05, | |
| "loss": 0.3489, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 44.98, | |
| "eval_accuracy": 0.7987421383647799, | |
| "eval_loss": 0.4422326385974884, | |
| "eval_runtime": 2.1135, | |
| "eval_samples_per_second": 75.231, | |
| "eval_steps_per_second": 4.732, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 45.96, | |
| "eval_accuracy": 0.8050314465408805, | |
| "eval_loss": 0.41540881991386414, | |
| "eval_runtime": 2.0847, | |
| "eval_samples_per_second": 76.27, | |
| "eval_steps_per_second": 4.797, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 46.93, | |
| "eval_accuracy": 0.8050314465408805, | |
| "eval_loss": 0.4131433367729187, | |
| "eval_runtime": 1.9752, | |
| "eval_samples_per_second": 80.498, | |
| "eval_steps_per_second": 5.063, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 48.0, | |
| "eval_accuracy": 0.8113207547169812, | |
| "eval_loss": 0.3975575864315033, | |
| "eval_runtime": 2.01, | |
| "eval_samples_per_second": 79.104, | |
| "eval_steps_per_second": 4.975, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 48.89, | |
| "grad_norm": 0.5197520852088928, | |
| "learning_rate": 1.8750000000000002e-05, | |
| "loss": 0.2962, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 48.98, | |
| "eval_accuracy": 0.8113207547169812, | |
| "eval_loss": 0.39397454261779785, | |
| "eval_runtime": 2.0261, | |
| "eval_samples_per_second": 78.474, | |
| "eval_steps_per_second": 4.935, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 49.96, | |
| "eval_accuracy": 0.8238993710691824, | |
| "eval_loss": 0.371494859457016, | |
| "eval_runtime": 2.0246, | |
| "eval_samples_per_second": 78.535, | |
| "eval_steps_per_second": 4.939, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 50.93, | |
| "eval_accuracy": 0.8427672955974843, | |
| "eval_loss": 0.34951409697532654, | |
| "eval_runtime": 2.3286, | |
| "eval_samples_per_second": 68.281, | |
| "eval_steps_per_second": 4.294, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 52.0, | |
| "eval_accuracy": 0.8364779874213837, | |
| "eval_loss": 0.3481156826019287, | |
| "eval_runtime": 1.9542, | |
| "eval_samples_per_second": 81.362, | |
| "eval_steps_per_second": 5.117, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 52.98, | |
| "eval_accuracy": 0.8176100628930818, | |
| "eval_loss": 0.3817409873008728, | |
| "eval_runtime": 2.0789, | |
| "eval_samples_per_second": 76.484, | |
| "eval_steps_per_second": 4.81, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 53.33, | |
| "grad_norm": 0.5608111023902893, | |
| "learning_rate": 2.0454545454545454e-05, | |
| "loss": 0.2573, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 53.96, | |
| "eval_accuracy": 0.8490566037735849, | |
| "eval_loss": 0.3412492871284485, | |
| "eval_runtime": 2.0746, | |
| "eval_samples_per_second": 76.642, | |
| "eval_steps_per_second": 4.82, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 54.93, | |
| "eval_accuracy": 0.8490566037735849, | |
| "eval_loss": 0.32929155230522156, | |
| "eval_runtime": 1.9991, | |
| "eval_samples_per_second": 79.538, | |
| "eval_steps_per_second": 5.002, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 56.0, | |
| "eval_accuracy": 0.8427672955974843, | |
| "eval_loss": 0.3547687232494354, | |
| "eval_runtime": 2.1242, | |
| "eval_samples_per_second": 74.851, | |
| "eval_steps_per_second": 4.708, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 56.98, | |
| "eval_accuracy": 0.8427672955974843, | |
| "eval_loss": 0.3044220209121704, | |
| "eval_runtime": 2.0508, | |
| "eval_samples_per_second": 77.532, | |
| "eval_steps_per_second": 4.876, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 57.78, | |
| "grad_norm": 0.894092321395874, | |
| "learning_rate": 2.215909090909091e-05, | |
| "loss": 0.2279, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 57.96, | |
| "eval_accuracy": 0.8490566037735849, | |
| "eval_loss": 0.32347577810287476, | |
| "eval_runtime": 2.2095, | |
| "eval_samples_per_second": 71.963, | |
| "eval_steps_per_second": 4.526, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 58.93, | |
| "eval_accuracy": 0.8490566037735849, | |
| "eval_loss": 0.3371436297893524, | |
| "eval_runtime": 2.1055, | |
| "eval_samples_per_second": 75.518, | |
| "eval_steps_per_second": 4.75, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 60.0, | |
| "eval_accuracy": 0.8490566037735849, | |
| "eval_loss": 0.31275492906570435, | |
| "eval_runtime": 2.1311, | |
| "eval_samples_per_second": 74.61, | |
| "eval_steps_per_second": 4.692, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 60.98, | |
| "eval_accuracy": 0.8553459119496856, | |
| "eval_loss": 0.32111966609954834, | |
| "eval_runtime": 2.0639, | |
| "eval_samples_per_second": 77.038, | |
| "eval_steps_per_second": 4.845, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 61.96, | |
| "eval_accuracy": 0.8616352201257862, | |
| "eval_loss": 0.302960604429245, | |
| "eval_runtime": 2.0241, | |
| "eval_samples_per_second": 78.552, | |
| "eval_steps_per_second": 4.94, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 62.22, | |
| "grad_norm": 0.4315973222255707, | |
| "learning_rate": 2.3863636363636362e-05, | |
| "loss": 0.2167, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 62.93, | |
| "eval_accuracy": 0.8616352201257862, | |
| "eval_loss": 0.29696550965309143, | |
| "eval_runtime": 2.034, | |
| "eval_samples_per_second": 78.169, | |
| "eval_steps_per_second": 4.916, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 64.0, | |
| "eval_accuracy": 0.8679245283018868, | |
| "eval_loss": 0.29949402809143066, | |
| "eval_runtime": 2.095, | |
| "eval_samples_per_second": 75.897, | |
| "eval_steps_per_second": 4.773, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 64.98, | |
| "eval_accuracy": 0.8742138364779874, | |
| "eval_loss": 0.2867083251476288, | |
| "eval_runtime": 2.0417, | |
| "eval_samples_per_second": 77.876, | |
| "eval_steps_per_second": 4.898, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 65.96, | |
| "eval_accuracy": 0.8930817610062893, | |
| "eval_loss": 0.26363295316696167, | |
| "eval_runtime": 2.1382, | |
| "eval_samples_per_second": 74.363, | |
| "eval_steps_per_second": 4.677, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 66.67, | |
| "grad_norm": 0.37665870785713196, | |
| "learning_rate": 2.556818181818182e-05, | |
| "loss": 0.207, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 66.93, | |
| "eval_accuracy": 0.8805031446540881, | |
| "eval_loss": 0.28482353687286377, | |
| "eval_runtime": 2.1166, | |
| "eval_samples_per_second": 75.119, | |
| "eval_steps_per_second": 4.724, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 68.0, | |
| "eval_accuracy": 0.8867924528301887, | |
| "eval_loss": 0.2750767767429352, | |
| "eval_runtime": 2.1981, | |
| "eval_samples_per_second": 72.336, | |
| "eval_steps_per_second": 4.549, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 68.98, | |
| "eval_accuracy": 0.8930817610062893, | |
| "eval_loss": 0.256393700838089, | |
| "eval_runtime": 2.033, | |
| "eval_samples_per_second": 78.211, | |
| "eval_steps_per_second": 4.919, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 69.96, | |
| "eval_accuracy": 0.8930817610062893, | |
| "eval_loss": 0.25443732738494873, | |
| "eval_runtime": 2.0096, | |
| "eval_samples_per_second": 79.121, | |
| "eval_steps_per_second": 4.976, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 70.93, | |
| "eval_accuracy": 0.8742138364779874, | |
| "eval_loss": 0.2954423129558563, | |
| "eval_runtime": 2.1018, | |
| "eval_samples_per_second": 75.649, | |
| "eval_steps_per_second": 4.758, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 71.11, | |
| "grad_norm": 0.7302255630493164, | |
| "learning_rate": 2.7272727272727273e-05, | |
| "loss": 0.1899, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 72.0, | |
| "eval_accuracy": 0.8930817610062893, | |
| "eval_loss": 0.25169771909713745, | |
| "eval_runtime": 2.041, | |
| "eval_samples_per_second": 77.904, | |
| "eval_steps_per_second": 4.9, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 72.98, | |
| "eval_accuracy": 0.8930817610062893, | |
| "eval_loss": 0.2506076693534851, | |
| "eval_runtime": 2.0257, | |
| "eval_samples_per_second": 78.49, | |
| "eval_steps_per_second": 4.936, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 73.96, | |
| "eval_accuracy": 0.8930817610062893, | |
| "eval_loss": 0.2434261441230774, | |
| "eval_runtime": 2.0325, | |
| "eval_samples_per_second": 78.23, | |
| "eval_steps_per_second": 4.92, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 74.93, | |
| "eval_accuracy": 0.89937106918239, | |
| "eval_loss": 0.23832084238529205, | |
| "eval_runtime": 2.1871, | |
| "eval_samples_per_second": 72.699, | |
| "eval_steps_per_second": 4.572, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 75.56, | |
| "grad_norm": 0.5180615186691284, | |
| "learning_rate": 2.897727272727273e-05, | |
| "loss": 0.1801, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 76.0, | |
| "eval_accuracy": 0.89937106918239, | |
| "eval_loss": 0.23464229702949524, | |
| "eval_runtime": 2.026, | |
| "eval_samples_per_second": 78.48, | |
| "eval_steps_per_second": 4.936, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 76.98, | |
| "eval_accuracy": 0.89937106918239, | |
| "eval_loss": 0.22975026071071625, | |
| "eval_runtime": 2.0881, | |
| "eval_samples_per_second": 76.147, | |
| "eval_steps_per_second": 4.789, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 77.96, | |
| "eval_accuracy": 0.9056603773584906, | |
| "eval_loss": 0.2403678596019745, | |
| "eval_runtime": 2.075, | |
| "eval_samples_per_second": 76.626, | |
| "eval_steps_per_second": 4.819, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 78.93, | |
| "eval_accuracy": 0.8930817610062893, | |
| "eval_loss": 0.2674010097980499, | |
| "eval_runtime": 2.037, | |
| "eval_samples_per_second": 78.057, | |
| "eval_steps_per_second": 4.909, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 80.0, | |
| "grad_norm": 1.2135472297668457, | |
| "learning_rate": 2.9924242424242427e-05, | |
| "loss": 0.1692, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 80.0, | |
| "eval_accuracy": 0.89937106918239, | |
| "eval_loss": 0.2231501042842865, | |
| "eval_runtime": 2.0398, | |
| "eval_samples_per_second": 77.949, | |
| "eval_steps_per_second": 4.902, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 80.98, | |
| "eval_accuracy": 0.89937106918239, | |
| "eval_loss": 0.2390480935573578, | |
| "eval_runtime": 1.9822, | |
| "eval_samples_per_second": 80.213, | |
| "eval_steps_per_second": 5.045, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 81.96, | |
| "eval_accuracy": 0.8930817610062893, | |
| "eval_loss": 0.20583955943584442, | |
| "eval_runtime": 2.0665, | |
| "eval_samples_per_second": 76.94, | |
| "eval_steps_per_second": 4.839, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 82.93, | |
| "eval_accuracy": 0.9056603773584906, | |
| "eval_loss": 0.2114023119211197, | |
| "eval_runtime": 2.0736, | |
| "eval_samples_per_second": 76.678, | |
| "eval_steps_per_second": 4.823, | |
| "step": 933 | |
| }, | |
| { | |
| "epoch": 84.0, | |
| "eval_accuracy": 0.89937106918239, | |
| "eval_loss": 0.24830691516399384, | |
| "eval_runtime": 2.0148, | |
| "eval_samples_per_second": 78.915, | |
| "eval_steps_per_second": 4.963, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 84.44, | |
| "grad_norm": 0.5111488103866577, | |
| "learning_rate": 2.9734848484848486e-05, | |
| "loss": 0.1691, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 84.98, | |
| "eval_accuracy": 0.9119496855345912, | |
| "eval_loss": 0.2259017676115036, | |
| "eval_runtime": 2.2201, | |
| "eval_samples_per_second": 71.618, | |
| "eval_steps_per_second": 4.504, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 85.96, | |
| "eval_accuracy": 0.9119496855345912, | |
| "eval_loss": 0.20239894092082977, | |
| "eval_runtime": 2.0671, | |
| "eval_samples_per_second": 76.918, | |
| "eval_steps_per_second": 4.838, | |
| "step": 967 | |
| }, | |
| { | |
| "epoch": 86.93, | |
| "eval_accuracy": 0.89937106918239, | |
| "eval_loss": 0.20193150639533997, | |
| "eval_runtime": 2.0416, | |
| "eval_samples_per_second": 77.879, | |
| "eval_steps_per_second": 4.898, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 88.0, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.19625458121299744, | |
| "eval_runtime": 2.0196, | |
| "eval_samples_per_second": 78.73, | |
| "eval_steps_per_second": 4.952, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 88.89, | |
| "grad_norm": 0.4683234989643097, | |
| "learning_rate": 2.9545454545454545e-05, | |
| "loss": 0.1609, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 88.98, | |
| "eval_accuracy": 0.9119496855345912, | |
| "eval_loss": 0.21583892405033112, | |
| "eval_runtime": 2.0254, | |
| "eval_samples_per_second": 78.503, | |
| "eval_steps_per_second": 4.937, | |
| "step": 1001 | |
| }, | |
| { | |
| "epoch": 89.96, | |
| "eval_accuracy": 0.9119496855345912, | |
| "eval_loss": 0.197691410779953, | |
| "eval_runtime": 1.9978, | |
| "eval_samples_per_second": 79.586, | |
| "eval_steps_per_second": 5.005, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 90.93, | |
| "eval_accuracy": 0.9182389937106918, | |
| "eval_loss": 0.19791610538959503, | |
| "eval_runtime": 2.0853, | |
| "eval_samples_per_second": 76.248, | |
| "eval_steps_per_second": 4.795, | |
| "step": 1023 | |
| }, | |
| { | |
| "epoch": 92.0, | |
| "eval_accuracy": 0.9119496855345912, | |
| "eval_loss": 0.20358721911907196, | |
| "eval_runtime": 2.1963, | |
| "eval_samples_per_second": 72.393, | |
| "eval_steps_per_second": 4.553, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 92.98, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.19769711792469025, | |
| "eval_runtime": 2.0089, | |
| "eval_samples_per_second": 79.146, | |
| "eval_steps_per_second": 4.978, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 93.33, | |
| "grad_norm": 0.6099847555160522, | |
| "learning_rate": 2.9356060606060604e-05, | |
| "loss": 0.1516, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 93.96, | |
| "eval_accuracy": 0.9182389937106918, | |
| "eval_loss": 0.1974458247423172, | |
| "eval_runtime": 2.1182, | |
| "eval_samples_per_second": 75.065, | |
| "eval_steps_per_second": 4.721, | |
| "step": 1057 | |
| }, | |
| { | |
| "epoch": 94.93, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.1993919163942337, | |
| "eval_runtime": 2.0707, | |
| "eval_samples_per_second": 76.787, | |
| "eval_steps_per_second": 4.829, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 96.0, | |
| "eval_accuracy": 0.9119496855345912, | |
| "eval_loss": 0.1955273449420929, | |
| "eval_runtime": 2.0163, | |
| "eval_samples_per_second": 78.858, | |
| "eval_steps_per_second": 4.96, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 96.98, | |
| "eval_accuracy": 0.9119496855345912, | |
| "eval_loss": 0.19483698904514313, | |
| "eval_runtime": 2.0495, | |
| "eval_samples_per_second": 77.581, | |
| "eval_steps_per_second": 4.879, | |
| "step": 1091 | |
| }, | |
| { | |
| "epoch": 97.78, | |
| "grad_norm": 1.0578981637954712, | |
| "learning_rate": 2.9166666666666666e-05, | |
| "loss": 0.1386, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 97.96, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.19463855028152466, | |
| "eval_runtime": 2.0625, | |
| "eval_samples_per_second": 77.091, | |
| "eval_steps_per_second": 4.849, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 98.93, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.19323910772800446, | |
| "eval_runtime": 2.0028, | |
| "eval_samples_per_second": 79.389, | |
| "eval_steps_per_second": 4.993, | |
| "step": 1113 | |
| }, | |
| { | |
| "epoch": 100.0, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.1841806173324585, | |
| "eval_runtime": 2.1056, | |
| "eval_samples_per_second": 75.512, | |
| "eval_steps_per_second": 4.749, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 100.98, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.18839451670646667, | |
| "eval_runtime": 1.9858, | |
| "eval_samples_per_second": 80.07, | |
| "eval_steps_per_second": 5.036, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 101.96, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.1899903267621994, | |
| "eval_runtime": 2.2196, | |
| "eval_samples_per_second": 71.635, | |
| "eval_steps_per_second": 4.505, | |
| "step": 1147 | |
| }, | |
| { | |
| "epoch": 102.22, | |
| "grad_norm": 0.6229210495948792, | |
| "learning_rate": 2.897727272727273e-05, | |
| "loss": 0.1279, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 102.93, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.184115469455719, | |
| "eval_runtime": 2.0229, | |
| "eval_samples_per_second": 78.602, | |
| "eval_steps_per_second": 4.944, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 104.0, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.19207227230072021, | |
| "eval_runtime": 1.9639, | |
| "eval_samples_per_second": 80.962, | |
| "eval_steps_per_second": 5.092, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 104.98, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.19926591217517853, | |
| "eval_runtime": 2.0509, | |
| "eval_samples_per_second": 77.526, | |
| "eval_steps_per_second": 4.876, | |
| "step": 1181 | |
| }, | |
| { | |
| "epoch": 105.96, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.19455212354660034, | |
| "eval_runtime": 2.0496, | |
| "eval_samples_per_second": 77.577, | |
| "eval_steps_per_second": 4.879, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 106.67, | |
| "grad_norm": 1.2741256952285767, | |
| "learning_rate": 2.8787878787878788e-05, | |
| "loss": 0.1258, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 106.93, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.18963727355003357, | |
| "eval_runtime": 2.0026, | |
| "eval_samples_per_second": 79.395, | |
| "eval_steps_per_second": 4.993, | |
| "step": 1203 | |
| }, | |
| { | |
| "epoch": 108.0, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.1884273737668991, | |
| "eval_runtime": 2.0343, | |
| "eval_samples_per_second": 78.16, | |
| "eval_steps_per_second": 4.916, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 108.98, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.17940251529216766, | |
| "eval_runtime": 2.1734, | |
| "eval_samples_per_second": 73.156, | |
| "eval_steps_per_second": 4.601, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 109.96, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.18589730560779572, | |
| "eval_runtime": 2.0874, | |
| "eval_samples_per_second": 76.17, | |
| "eval_steps_per_second": 4.791, | |
| "step": 1237 | |
| }, | |
| { | |
| "epoch": 110.93, | |
| "eval_accuracy": 0.9119496855345912, | |
| "eval_loss": 0.2194768339395523, | |
| "eval_runtime": 2.0717, | |
| "eval_samples_per_second": 76.747, | |
| "eval_steps_per_second": 4.827, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 111.11, | |
| "grad_norm": 0.3613344430923462, | |
| "learning_rate": 2.859848484848485e-05, | |
| "loss": 0.1258, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 112.0, | |
| "eval_accuracy": 0.9182389937106918, | |
| "eval_loss": 0.20826272666454315, | |
| "eval_runtime": 1.9861, | |
| "eval_samples_per_second": 80.057, | |
| "eval_steps_per_second": 5.035, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 112.98, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.21202689409255981, | |
| "eval_runtime": 2.0132, | |
| "eval_samples_per_second": 78.98, | |
| "eval_steps_per_second": 4.967, | |
| "step": 1271 | |
| }, | |
| { | |
| "epoch": 113.96, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.20663346350193024, | |
| "eval_runtime": 2.02, | |
| "eval_samples_per_second": 78.711, | |
| "eval_steps_per_second": 4.95, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 114.93, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.1931203156709671, | |
| "eval_runtime": 2.033, | |
| "eval_samples_per_second": 78.208, | |
| "eval_steps_per_second": 4.919, | |
| "step": 1293 | |
| }, | |
| { | |
| "epoch": 115.56, | |
| "grad_norm": 0.7503376007080078, | |
| "learning_rate": 2.8409090909090912e-05, | |
| "loss": 0.1023, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 116.0, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.19000084698200226, | |
| "eval_runtime": 2.0014, | |
| "eval_samples_per_second": 79.446, | |
| "eval_steps_per_second": 4.997, | |
| "step": 1305 | |
| }, | |
| { | |
| "epoch": 116.98, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.20288796722888947, | |
| "eval_runtime": 2.0774, | |
| "eval_samples_per_second": 76.539, | |
| "eval_steps_per_second": 4.814, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 117.96, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.19505923986434937, | |
| "eval_runtime": 2.0552, | |
| "eval_samples_per_second": 77.366, | |
| "eval_steps_per_second": 4.866, | |
| "step": 1327 | |
| }, | |
| { | |
| "epoch": 118.93, | |
| "eval_accuracy": 0.9119496855345912, | |
| "eval_loss": 0.20838169753551483, | |
| "eval_runtime": 2.2371, | |
| "eval_samples_per_second": 71.074, | |
| "eval_steps_per_second": 4.47, | |
| "step": 1338 | |
| }, | |
| { | |
| "epoch": 120.0, | |
| "grad_norm": 0.2376416176557541, | |
| "learning_rate": 2.821969696969697e-05, | |
| "loss": 0.0997, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 120.0, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.2159019112586975, | |
| "eval_runtime": 2.0579, | |
| "eval_samples_per_second": 77.264, | |
| "eval_steps_per_second": 4.859, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 120.98, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.21662545204162598, | |
| "eval_runtime": 2.0756, | |
| "eval_samples_per_second": 76.605, | |
| "eval_steps_per_second": 4.818, | |
| "step": 1361 | |
| }, | |
| { | |
| "epoch": 121.96, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.197323277592659, | |
| "eval_runtime": 2.0227, | |
| "eval_samples_per_second": 78.607, | |
| "eval_steps_per_second": 4.944, | |
| "step": 1372 | |
| }, | |
| { | |
| "epoch": 122.93, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.18507684767246246, | |
| "eval_runtime": 2.0728, | |
| "eval_samples_per_second": 76.706, | |
| "eval_steps_per_second": 4.824, | |
| "step": 1383 | |
| }, | |
| { | |
| "epoch": 124.0, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.20666691660881042, | |
| "eval_runtime": 1.9717, | |
| "eval_samples_per_second": 80.642, | |
| "eval_steps_per_second": 5.072, | |
| "step": 1395 | |
| }, | |
| { | |
| "epoch": 124.44, | |
| "grad_norm": 0.3115290403366089, | |
| "learning_rate": 2.803030303030303e-05, | |
| "loss": 0.1021, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 124.98, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.19534242153167725, | |
| "eval_runtime": 2.0497, | |
| "eval_samples_per_second": 77.571, | |
| "eval_steps_per_second": 4.879, | |
| "step": 1406 | |
| }, | |
| { | |
| "epoch": 125.96, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.17650572955608368, | |
| "eval_runtime": 2.239, | |
| "eval_samples_per_second": 71.015, | |
| "eval_steps_per_second": 4.466, | |
| "step": 1417 | |
| }, | |
| { | |
| "epoch": 126.93, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.18782062828540802, | |
| "eval_runtime": 2.0533, | |
| "eval_samples_per_second": 77.437, | |
| "eval_steps_per_second": 4.87, | |
| "step": 1428 | |
| }, | |
| { | |
| "epoch": 128.0, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.20708344876766205, | |
| "eval_runtime": 2.0414, | |
| "eval_samples_per_second": 77.887, | |
| "eval_steps_per_second": 4.899, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 128.89, | |
| "grad_norm": 1.2413551807403564, | |
| "learning_rate": 2.784090909090909e-05, | |
| "loss": 0.0883, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 128.98, | |
| "eval_accuracy": 0.9182389937106918, | |
| "eval_loss": 0.2241077572107315, | |
| "eval_runtime": 1.9826, | |
| "eval_samples_per_second": 80.197, | |
| "eval_steps_per_second": 5.044, | |
| "step": 1451 | |
| }, | |
| { | |
| "epoch": 129.96, | |
| "eval_accuracy": 0.9119496855345912, | |
| "eval_loss": 0.23481474816799164, | |
| "eval_runtime": 1.9747, | |
| "eval_samples_per_second": 80.518, | |
| "eval_steps_per_second": 5.064, | |
| "step": 1462 | |
| }, | |
| { | |
| "epoch": 130.93, | |
| "eval_accuracy": 0.9056603773584906, | |
| "eval_loss": 0.24748335778713226, | |
| "eval_runtime": 1.9737, | |
| "eval_samples_per_second": 80.559, | |
| "eval_steps_per_second": 5.067, | |
| "step": 1473 | |
| }, | |
| { | |
| "epoch": 132.0, | |
| "eval_accuracy": 0.9182389937106918, | |
| "eval_loss": 0.21596243977546692, | |
| "eval_runtime": 2.0455, | |
| "eval_samples_per_second": 77.733, | |
| "eval_steps_per_second": 4.889, | |
| "step": 1485 | |
| }, | |
| { | |
| "epoch": 132.98, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.20896825194358826, | |
| "eval_runtime": 2.047, | |
| "eval_samples_per_second": 77.675, | |
| "eval_steps_per_second": 4.885, | |
| "step": 1496 | |
| }, | |
| { | |
| "epoch": 133.33, | |
| "grad_norm": 0.56540846824646, | |
| "learning_rate": 2.7651515151515152e-05, | |
| "loss": 0.0769, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 133.96, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.21468934416770935, | |
| "eval_runtime": 1.9936, | |
| "eval_samples_per_second": 79.754, | |
| "eval_steps_per_second": 5.016, | |
| "step": 1507 | |
| }, | |
| { | |
| "epoch": 134.93, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.22008037567138672, | |
| "eval_runtime": 2.0857, | |
| "eval_samples_per_second": 76.234, | |
| "eval_steps_per_second": 4.795, | |
| "step": 1518 | |
| }, | |
| { | |
| "epoch": 136.0, | |
| "eval_accuracy": 0.9182389937106918, | |
| "eval_loss": 0.23723578453063965, | |
| "eval_runtime": 2.1872, | |
| "eval_samples_per_second": 72.695, | |
| "eval_steps_per_second": 4.572, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 136.98, | |
| "eval_accuracy": 0.9182389937106918, | |
| "eval_loss": 0.21990692615509033, | |
| "eval_runtime": 2.0473, | |
| "eval_samples_per_second": 77.664, | |
| "eval_steps_per_second": 4.885, | |
| "step": 1541 | |
| }, | |
| { | |
| "epoch": 137.78, | |
| "grad_norm": 1.0245180130004883, | |
| "learning_rate": 2.7462121212121214e-05, | |
| "loss": 0.0786, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 137.96, | |
| "eval_accuracy": 0.9182389937106918, | |
| "eval_loss": 0.2087443619966507, | |
| "eval_runtime": 2.0577, | |
| "eval_samples_per_second": 77.271, | |
| "eval_steps_per_second": 4.86, | |
| "step": 1552 | |
| }, | |
| { | |
| "epoch": 138.93, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.18779344856739044, | |
| "eval_runtime": 2.0799, | |
| "eval_samples_per_second": 76.447, | |
| "eval_steps_per_second": 4.808, | |
| "step": 1563 | |
| }, | |
| { | |
| "epoch": 140.0, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.1914655864238739, | |
| "eval_runtime": 2.043, | |
| "eval_samples_per_second": 77.827, | |
| "eval_steps_per_second": 4.895, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 140.98, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.23168283700942993, | |
| "eval_runtime": 2.0313, | |
| "eval_samples_per_second": 78.277, | |
| "eval_steps_per_second": 4.923, | |
| "step": 1586 | |
| }, | |
| { | |
| "epoch": 141.96, | |
| "eval_accuracy": 0.8930817610062893, | |
| "eval_loss": 0.2865447700023651, | |
| "eval_runtime": 2.0095, | |
| "eval_samples_per_second": 79.125, | |
| "eval_steps_per_second": 4.976, | |
| "step": 1597 | |
| }, | |
| { | |
| "epoch": 142.22, | |
| "grad_norm": 1.393044352531433, | |
| "learning_rate": 2.7272727272727273e-05, | |
| "loss": 0.0714, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 142.93, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.22998519241809845, | |
| "eval_runtime": 2.1842, | |
| "eval_samples_per_second": 72.794, | |
| "eval_steps_per_second": 4.578, | |
| "step": 1608 | |
| }, | |
| { | |
| "epoch": 144.0, | |
| "eval_accuracy": 0.9056603773584906, | |
| "eval_loss": 0.27265357971191406, | |
| "eval_runtime": 2.0318, | |
| "eval_samples_per_second": 78.258, | |
| "eval_steps_per_second": 4.922, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 144.98, | |
| "eval_accuracy": 0.9056603773584906, | |
| "eval_loss": 0.28114742040634155, | |
| "eval_runtime": 2.0949, | |
| "eval_samples_per_second": 75.9, | |
| "eval_steps_per_second": 4.774, | |
| "step": 1631 | |
| }, | |
| { | |
| "epoch": 145.96, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.21014899015426636, | |
| "eval_runtime": 2.0829, | |
| "eval_samples_per_second": 76.335, | |
| "eval_steps_per_second": 4.801, | |
| "step": 1642 | |
| }, | |
| { | |
| "epoch": 146.67, | |
| "grad_norm": 1.1527929306030273, | |
| "learning_rate": 2.7083333333333335e-05, | |
| "loss": 0.0702, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 146.93, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.20363318920135498, | |
| "eval_runtime": 2.0224, | |
| "eval_samples_per_second": 78.618, | |
| "eval_steps_per_second": 4.945, | |
| "step": 1653 | |
| }, | |
| { | |
| "epoch": 148.0, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.22154641151428223, | |
| "eval_runtime": 2.0286, | |
| "eval_samples_per_second": 78.378, | |
| "eval_steps_per_second": 4.929, | |
| "step": 1665 | |
| }, | |
| { | |
| "epoch": 148.98, | |
| "eval_accuracy": 0.9182389937106918, | |
| "eval_loss": 0.21356013417243958, | |
| "eval_runtime": 1.9745, | |
| "eval_samples_per_second": 80.526, | |
| "eval_steps_per_second": 5.065, | |
| "step": 1676 | |
| }, | |
| { | |
| "epoch": 149.96, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.20560431480407715, | |
| "eval_runtime": 2.0343, | |
| "eval_samples_per_second": 78.161, | |
| "eval_steps_per_second": 4.916, | |
| "step": 1687 | |
| }, | |
| { | |
| "epoch": 150.93, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.20028233528137207, | |
| "eval_runtime": 2.0476, | |
| "eval_samples_per_second": 77.65, | |
| "eval_steps_per_second": 4.884, | |
| "step": 1698 | |
| }, | |
| { | |
| "epoch": 151.11, | |
| "grad_norm": 0.6037131547927856, | |
| "learning_rate": 2.6893939393939398e-05, | |
| "loss": 0.0676, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 152.0, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.22495229542255402, | |
| "eval_runtime": 2.0653, | |
| "eval_samples_per_second": 76.985, | |
| "eval_steps_per_second": 4.842, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 152.98, | |
| "eval_accuracy": 0.9559748427672956, | |
| "eval_loss": 0.1910940259695053, | |
| "eval_runtime": 2.2097, | |
| "eval_samples_per_second": 71.955, | |
| "eval_steps_per_second": 4.525, | |
| "step": 1721 | |
| }, | |
| { | |
| "epoch": 153.96, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.2189728170633316, | |
| "eval_runtime": 2.049, | |
| "eval_samples_per_second": 77.598, | |
| "eval_steps_per_second": 4.88, | |
| "step": 1732 | |
| }, | |
| { | |
| "epoch": 154.93, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.1975589245557785, | |
| "eval_runtime": 2.0536, | |
| "eval_samples_per_second": 77.426, | |
| "eval_steps_per_second": 4.87, | |
| "step": 1743 | |
| }, | |
| { | |
| "epoch": 155.56, | |
| "grad_norm": 0.9841188788414001, | |
| "learning_rate": 2.6704545454545453e-05, | |
| "loss": 0.0674, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 156.0, | |
| "eval_accuracy": 0.949685534591195, | |
| "eval_loss": 0.18743836879730225, | |
| "eval_runtime": 2.0593, | |
| "eval_samples_per_second": 77.211, | |
| "eval_steps_per_second": 4.856, | |
| "step": 1755 | |
| }, | |
| { | |
| "epoch": 156.98, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.2022770792245865, | |
| "eval_runtime": 2.0432, | |
| "eval_samples_per_second": 77.821, | |
| "eval_steps_per_second": 4.894, | |
| "step": 1766 | |
| }, | |
| { | |
| "epoch": 157.96, | |
| "eval_accuracy": 0.949685534591195, | |
| "eval_loss": 0.21527531743049622, | |
| "eval_runtime": 1.9951, | |
| "eval_samples_per_second": 79.694, | |
| "eval_steps_per_second": 5.012, | |
| "step": 1777 | |
| }, | |
| { | |
| "epoch": 158.93, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.22451625764369965, | |
| "eval_runtime": 2.1442, | |
| "eval_samples_per_second": 74.155, | |
| "eval_steps_per_second": 4.664, | |
| "step": 1788 | |
| }, | |
| { | |
| "epoch": 160.0, | |
| "grad_norm": 0.5377254486083984, | |
| "learning_rate": 2.6515151515151516e-05, | |
| "loss": 0.0548, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 160.0, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.2431740015745163, | |
| "eval_runtime": 2.2699, | |
| "eval_samples_per_second": 70.046, | |
| "eval_steps_per_second": 4.405, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 160.98, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.2071038782596588, | |
| "eval_runtime": 2.0506, | |
| "eval_samples_per_second": 77.538, | |
| "eval_steps_per_second": 4.877, | |
| "step": 1811 | |
| }, | |
| { | |
| "epoch": 161.96, | |
| "eval_accuracy": 0.949685534591195, | |
| "eval_loss": 0.18368059396743774, | |
| "eval_runtime": 2.2081, | |
| "eval_samples_per_second": 72.006, | |
| "eval_steps_per_second": 4.529, | |
| "step": 1822 | |
| }, | |
| { | |
| "epoch": 162.93, | |
| "eval_accuracy": 0.949685534591195, | |
| "eval_loss": 0.19161438941955566, | |
| "eval_runtime": 1.9999, | |
| "eval_samples_per_second": 79.505, | |
| "eval_steps_per_second": 5.0, | |
| "step": 1833 | |
| }, | |
| { | |
| "epoch": 164.0, | |
| "eval_accuracy": 0.949685534591195, | |
| "eval_loss": 0.22212089598178864, | |
| "eval_runtime": 2.0001, | |
| "eval_samples_per_second": 79.497, | |
| "eval_steps_per_second": 5.0, | |
| "step": 1845 | |
| }, | |
| { | |
| "epoch": 164.44, | |
| "grad_norm": 0.5433365702629089, | |
| "learning_rate": 2.6325757575757575e-05, | |
| "loss": 0.0616, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 164.98, | |
| "eval_accuracy": 0.949685534591195, | |
| "eval_loss": 0.21204246580600739, | |
| "eval_runtime": 2.035, | |
| "eval_samples_per_second": 78.132, | |
| "eval_steps_per_second": 4.914, | |
| "step": 1856 | |
| }, | |
| { | |
| "epoch": 165.96, | |
| "eval_accuracy": 0.9559748427672956, | |
| "eval_loss": 0.18882697820663452, | |
| "eval_runtime": 2.0581, | |
| "eval_samples_per_second": 77.256, | |
| "eval_steps_per_second": 4.859, | |
| "step": 1867 | |
| }, | |
| { | |
| "epoch": 166.93, | |
| "eval_accuracy": 0.949685534591195, | |
| "eval_loss": 0.19714578986167908, | |
| "eval_runtime": 2.002, | |
| "eval_samples_per_second": 79.422, | |
| "eval_steps_per_second": 4.995, | |
| "step": 1878 | |
| }, | |
| { | |
| "epoch": 168.0, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.21613995730876923, | |
| "eval_runtime": 2.0979, | |
| "eval_samples_per_second": 75.789, | |
| "eval_steps_per_second": 4.767, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 168.89, | |
| "grad_norm": 0.4616011083126068, | |
| "learning_rate": 2.6136363636363637e-05, | |
| "loss": 0.0467, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 168.98, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.22824302315711975, | |
| "eval_runtime": 2.0023, | |
| "eval_samples_per_second": 79.407, | |
| "eval_steps_per_second": 4.994, | |
| "step": 1901 | |
| }, | |
| { | |
| "epoch": 169.96, | |
| "eval_accuracy": 0.9056603773584906, | |
| "eval_loss": 0.31181007623672485, | |
| "eval_runtime": 2.2272, | |
| "eval_samples_per_second": 71.39, | |
| "eval_steps_per_second": 4.49, | |
| "step": 1912 | |
| }, | |
| { | |
| "epoch": 170.93, | |
| "eval_accuracy": 0.949685534591195, | |
| "eval_loss": 0.23191651701927185, | |
| "eval_runtime": 2.0759, | |
| "eval_samples_per_second": 76.592, | |
| "eval_steps_per_second": 4.817, | |
| "step": 1923 | |
| }, | |
| { | |
| "epoch": 172.0, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.27404358983039856, | |
| "eval_runtime": 2.0769, | |
| "eval_samples_per_second": 76.555, | |
| "eval_steps_per_second": 4.815, | |
| "step": 1935 | |
| }, | |
| { | |
| "epoch": 172.98, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.2666384279727936, | |
| "eval_runtime": 2.1046, | |
| "eval_samples_per_second": 75.548, | |
| "eval_steps_per_second": 4.751, | |
| "step": 1946 | |
| }, | |
| { | |
| "epoch": 173.33, | |
| "grad_norm": 1.0961925983428955, | |
| "learning_rate": 2.59469696969697e-05, | |
| "loss": 0.0609, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 173.96, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.23152852058410645, | |
| "eval_runtime": 2.0323, | |
| "eval_samples_per_second": 78.237, | |
| "eval_steps_per_second": 4.921, | |
| "step": 1957 | |
| }, | |
| { | |
| "epoch": 174.93, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.22292692959308624, | |
| "eval_runtime": 2.0749, | |
| "eval_samples_per_second": 76.629, | |
| "eval_steps_per_second": 4.819, | |
| "step": 1968 | |
| }, | |
| { | |
| "epoch": 176.0, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.21578945219516754, | |
| "eval_runtime": 2.0472, | |
| "eval_samples_per_second": 77.668, | |
| "eval_steps_per_second": 4.885, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 176.98, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.22257991135120392, | |
| "eval_runtime": 2.1698, | |
| "eval_samples_per_second": 73.278, | |
| "eval_steps_per_second": 4.609, | |
| "step": 1991 | |
| }, | |
| { | |
| "epoch": 177.78, | |
| "grad_norm": 1.6022953987121582, | |
| "learning_rate": 2.575757575757576e-05, | |
| "loss": 0.0522, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 177.96, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.22241446375846863, | |
| "eval_runtime": 2.0341, | |
| "eval_samples_per_second": 78.167, | |
| "eval_steps_per_second": 4.916, | |
| "step": 2002 | |
| }, | |
| { | |
| "epoch": 178.93, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.21375904977321625, | |
| "eval_runtime": 2.1094, | |
| "eval_samples_per_second": 75.377, | |
| "eval_steps_per_second": 4.741, | |
| "step": 2013 | |
| }, | |
| { | |
| "epoch": 180.0, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.21769364178180695, | |
| "eval_runtime": 1.9898, | |
| "eval_samples_per_second": 79.909, | |
| "eval_steps_per_second": 5.026, | |
| "step": 2025 | |
| }, | |
| { | |
| "epoch": 180.98, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.19169649481773376, | |
| "eval_runtime": 2.1326, | |
| "eval_samples_per_second": 74.558, | |
| "eval_steps_per_second": 4.689, | |
| "step": 2036 | |
| }, | |
| { | |
| "epoch": 181.96, | |
| "eval_accuracy": 0.9559748427672956, | |
| "eval_loss": 0.19741381704807281, | |
| "eval_runtime": 2.1931, | |
| "eval_samples_per_second": 72.5, | |
| "eval_steps_per_second": 4.56, | |
| "step": 2047 | |
| }, | |
| { | |
| "epoch": 182.22, | |
| "grad_norm": 0.7399430274963379, | |
| "learning_rate": 2.556818181818182e-05, | |
| "loss": 0.0515, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 182.93, | |
| "eval_accuracy": 0.949685534591195, | |
| "eval_loss": 0.21981187164783478, | |
| "eval_runtime": 2.0417, | |
| "eval_samples_per_second": 77.878, | |
| "eval_steps_per_second": 4.898, | |
| "step": 2058 | |
| }, | |
| { | |
| "epoch": 184.0, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.24247391521930695, | |
| "eval_runtime": 2.1999, | |
| "eval_samples_per_second": 72.278, | |
| "eval_steps_per_second": 4.546, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 184.98, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.24488882720470428, | |
| "eval_runtime": 2.0767, | |
| "eval_samples_per_second": 76.565, | |
| "eval_steps_per_second": 4.815, | |
| "step": 2081 | |
| }, | |
| { | |
| "epoch": 185.96, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.23463451862335205, | |
| "eval_runtime": 2.0674, | |
| "eval_samples_per_second": 76.907, | |
| "eval_steps_per_second": 4.837, | |
| "step": 2092 | |
| }, | |
| { | |
| "epoch": 186.67, | |
| "grad_norm": 0.67291659116745, | |
| "learning_rate": 2.5378787878787876e-05, | |
| "loss": 0.045, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 186.93, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.23308323323726654, | |
| "eval_runtime": 2.2603, | |
| "eval_samples_per_second": 70.346, | |
| "eval_steps_per_second": 4.424, | |
| "step": 2103 | |
| }, | |
| { | |
| "epoch": 188.0, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.2660614252090454, | |
| "eval_runtime": 2.0509, | |
| "eval_samples_per_second": 77.527, | |
| "eval_steps_per_second": 4.876, | |
| "step": 2115 | |
| }, | |
| { | |
| "epoch": 188.98, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.22910529375076294, | |
| "eval_runtime": 2.0536, | |
| "eval_samples_per_second": 77.423, | |
| "eval_steps_per_second": 4.869, | |
| "step": 2126 | |
| }, | |
| { | |
| "epoch": 189.96, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.23477251827716827, | |
| "eval_runtime": 2.0092, | |
| "eval_samples_per_second": 79.134, | |
| "eval_steps_per_second": 4.977, | |
| "step": 2137 | |
| }, | |
| { | |
| "epoch": 190.93, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.23087622225284576, | |
| "eval_runtime": 2.0403, | |
| "eval_samples_per_second": 77.929, | |
| "eval_steps_per_second": 4.901, | |
| "step": 2148 | |
| }, | |
| { | |
| "epoch": 191.11, | |
| "grad_norm": 0.11660194396972656, | |
| "learning_rate": 2.518939393939394e-05, | |
| "loss": 0.0403, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 192.0, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.27889564633369446, | |
| "eval_runtime": 2.0147, | |
| "eval_samples_per_second": 78.921, | |
| "eval_steps_per_second": 4.964, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 192.98, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.2540048658847809, | |
| "eval_runtime": 2.1082, | |
| "eval_samples_per_second": 75.42, | |
| "eval_steps_per_second": 4.743, | |
| "step": 2171 | |
| }, | |
| { | |
| "epoch": 193.96, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.23720349371433258, | |
| "eval_runtime": 2.1791, | |
| "eval_samples_per_second": 72.966, | |
| "eval_steps_per_second": 4.589, | |
| "step": 2182 | |
| }, | |
| { | |
| "epoch": 194.93, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.2507873773574829, | |
| "eval_runtime": 1.986, | |
| "eval_samples_per_second": 80.061, | |
| "eval_steps_per_second": 5.035, | |
| "step": 2193 | |
| }, | |
| { | |
| "epoch": 195.56, | |
| "grad_norm": 0.8518453240394592, | |
| "learning_rate": 2.5e-05, | |
| "loss": 0.0476, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 196.0, | |
| "eval_accuracy": 0.949685534591195, | |
| "eval_loss": 0.2193620353937149, | |
| "eval_runtime": 2.1819, | |
| "eval_samples_per_second": 72.874, | |
| "eval_steps_per_second": 4.583, | |
| "step": 2205 | |
| }, | |
| { | |
| "epoch": 196.98, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.23066306114196777, | |
| "eval_runtime": 2.0482, | |
| "eval_samples_per_second": 77.628, | |
| "eval_steps_per_second": 4.882, | |
| "step": 2216 | |
| }, | |
| { | |
| "epoch": 197.96, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.2719472646713257, | |
| "eval_runtime": 1.9901, | |
| "eval_samples_per_second": 79.896, | |
| "eval_steps_per_second": 5.025, | |
| "step": 2227 | |
| }, | |
| { | |
| "epoch": 198.93, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.28040099143981934, | |
| "eval_runtime": 2.0617, | |
| "eval_samples_per_second": 77.122, | |
| "eval_steps_per_second": 4.85, | |
| "step": 2238 | |
| }, | |
| { | |
| "epoch": 200.0, | |
| "grad_norm": 0.09039253741502762, | |
| "learning_rate": 2.481060606060606e-05, | |
| "loss": 0.0457, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 200.0, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.2755438983440399, | |
| "eval_runtime": 2.0773, | |
| "eval_samples_per_second": 76.541, | |
| "eval_steps_per_second": 4.814, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 200.98, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.2353052794933319, | |
| "eval_runtime": 1.9899, | |
| "eval_samples_per_second": 79.904, | |
| "eval_steps_per_second": 5.025, | |
| "step": 2261 | |
| }, | |
| { | |
| "epoch": 201.96, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.21893078088760376, | |
| "eval_runtime": 2.1045, | |
| "eval_samples_per_second": 75.552, | |
| "eval_steps_per_second": 4.752, | |
| "step": 2272 | |
| }, | |
| { | |
| "epoch": 202.93, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.21625204384326935, | |
| "eval_runtime": 2.0731, | |
| "eval_samples_per_second": 76.697, | |
| "eval_steps_per_second": 4.824, | |
| "step": 2283 | |
| }, | |
| { | |
| "epoch": 204.0, | |
| "eval_accuracy": 0.949685534591195, | |
| "eval_loss": 0.2110479772090912, | |
| "eval_runtime": 2.1463, | |
| "eval_samples_per_second": 74.079, | |
| "eval_steps_per_second": 4.659, | |
| "step": 2295 | |
| }, | |
| { | |
| "epoch": 204.44, | |
| "grad_norm": 0.9943685531616211, | |
| "learning_rate": 2.4621212121212123e-05, | |
| "loss": 0.0393, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 204.98, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.23164410889148712, | |
| "eval_runtime": 2.0606, | |
| "eval_samples_per_second": 77.162, | |
| "eval_steps_per_second": 4.853, | |
| "step": 2306 | |
| }, | |
| { | |
| "epoch": 205.96, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.24650876224040985, | |
| "eval_runtime": 2.0011, | |
| "eval_samples_per_second": 79.455, | |
| "eval_steps_per_second": 4.997, | |
| "step": 2317 | |
| }, | |
| { | |
| "epoch": 206.93, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.23763243854045868, | |
| "eval_runtime": 2.0999, | |
| "eval_samples_per_second": 75.719, | |
| "eval_steps_per_second": 4.762, | |
| "step": 2328 | |
| }, | |
| { | |
| "epoch": 208.0, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.2170635461807251, | |
| "eval_runtime": 2.1575, | |
| "eval_samples_per_second": 73.697, | |
| "eval_steps_per_second": 4.635, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 208.89, | |
| "grad_norm": 0.46173095703125, | |
| "learning_rate": 2.4431818181818185e-05, | |
| "loss": 0.0443, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 208.98, | |
| "eval_accuracy": 0.949685534591195, | |
| "eval_loss": 0.23952844738960266, | |
| "eval_runtime": 2.0014, | |
| "eval_samples_per_second": 79.445, | |
| "eval_steps_per_second": 4.997, | |
| "step": 2351 | |
| }, | |
| { | |
| "epoch": 209.96, | |
| "eval_accuracy": 0.8930817610062893, | |
| "eval_loss": 0.2906019687652588, | |
| "eval_runtime": 2.0133, | |
| "eval_samples_per_second": 78.977, | |
| "eval_steps_per_second": 4.967, | |
| "step": 2362 | |
| }, | |
| { | |
| "epoch": 210.93, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.2608316242694855, | |
| "eval_runtime": 2.1558, | |
| "eval_samples_per_second": 73.755, | |
| "eval_steps_per_second": 4.639, | |
| "step": 2373 | |
| }, | |
| { | |
| "epoch": 212.0, | |
| "eval_accuracy": 0.949685534591195, | |
| "eval_loss": 0.23210321366786957, | |
| "eval_runtime": 2.0606, | |
| "eval_samples_per_second": 77.161, | |
| "eval_steps_per_second": 4.853, | |
| "step": 2385 | |
| }, | |
| { | |
| "epoch": 212.98, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.24640053510665894, | |
| "eval_runtime": 2.2148, | |
| "eval_samples_per_second": 71.79, | |
| "eval_steps_per_second": 4.515, | |
| "step": 2396 | |
| }, | |
| { | |
| "epoch": 213.33, | |
| "grad_norm": 0.94215327501297, | |
| "learning_rate": 2.4242424242424244e-05, | |
| "loss": 0.0539, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 213.96, | |
| "eval_accuracy": 0.9182389937106918, | |
| "eval_loss": 0.2441636025905609, | |
| "eval_runtime": 2.172, | |
| "eval_samples_per_second": 73.203, | |
| "eval_steps_per_second": 4.604, | |
| "step": 2407 | |
| }, | |
| { | |
| "epoch": 214.93, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.2511676847934723, | |
| "eval_runtime": 2.0176, | |
| "eval_samples_per_second": 78.806, | |
| "eval_steps_per_second": 4.956, | |
| "step": 2418 | |
| }, | |
| { | |
| "epoch": 216.0, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.22649481892585754, | |
| "eval_runtime": 2.0103, | |
| "eval_samples_per_second": 79.091, | |
| "eval_steps_per_second": 4.974, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 216.98, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.21274729073047638, | |
| "eval_runtime": 2.0508, | |
| "eval_samples_per_second": 77.529, | |
| "eval_steps_per_second": 4.876, | |
| "step": 2441 | |
| }, | |
| { | |
| "epoch": 217.78, | |
| "grad_norm": 0.7381362318992615, | |
| "learning_rate": 2.4053030303030303e-05, | |
| "loss": 0.0415, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 217.96, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.284365177154541, | |
| "eval_runtime": 2.0321, | |
| "eval_samples_per_second": 78.244, | |
| "eval_steps_per_second": 4.921, | |
| "step": 2452 | |
| }, | |
| { | |
| "epoch": 218.93, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.24891048669815063, | |
| "eval_runtime": 2.0843, | |
| "eval_samples_per_second": 76.285, | |
| "eval_steps_per_second": 4.798, | |
| "step": 2463 | |
| }, | |
| { | |
| "epoch": 220.0, | |
| "eval_accuracy": 0.949685534591195, | |
| "eval_loss": 0.21200108528137207, | |
| "eval_runtime": 1.9938, | |
| "eval_samples_per_second": 79.748, | |
| "eval_steps_per_second": 5.016, | |
| "step": 2475 | |
| }, | |
| { | |
| "epoch": 220.98, | |
| "eval_accuracy": 0.9559748427672956, | |
| "eval_loss": 0.2015109807252884, | |
| "eval_runtime": 2.2098, | |
| "eval_samples_per_second": 71.951, | |
| "eval_steps_per_second": 4.525, | |
| "step": 2486 | |
| }, | |
| { | |
| "epoch": 221.96, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.25095799565315247, | |
| "eval_runtime": 2.0817, | |
| "eval_samples_per_second": 76.381, | |
| "eval_steps_per_second": 4.804, | |
| "step": 2497 | |
| }, | |
| { | |
| "epoch": 222.22, | |
| "grad_norm": 0.3756774961948395, | |
| "learning_rate": 2.3863636363636362e-05, | |
| "loss": 0.0325, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 222.93, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.2875436246395111, | |
| "eval_runtime": 2.0148, | |
| "eval_samples_per_second": 78.915, | |
| "eval_steps_per_second": 4.963, | |
| "step": 2508 | |
| }, | |
| { | |
| "epoch": 224.0, | |
| "eval_accuracy": 0.9622641509433962, | |
| "eval_loss": 0.19936275482177734, | |
| "eval_runtime": 2.0208, | |
| "eval_samples_per_second": 78.682, | |
| "eval_steps_per_second": 4.949, | |
| "step": 2520 | |
| }, | |
| { | |
| "epoch": 224.98, | |
| "eval_accuracy": 0.9622641509433962, | |
| "eval_loss": 0.20330873131752014, | |
| "eval_runtime": 2.1708, | |
| "eval_samples_per_second": 73.243, | |
| "eval_steps_per_second": 4.606, | |
| "step": 2531 | |
| }, | |
| { | |
| "epoch": 225.96, | |
| "eval_accuracy": 0.949685534591195, | |
| "eval_loss": 0.2391451746225357, | |
| "eval_runtime": 1.9988, | |
| "eval_samples_per_second": 79.549, | |
| "eval_steps_per_second": 5.003, | |
| "step": 2542 | |
| }, | |
| { | |
| "epoch": 226.67, | |
| "grad_norm": 0.6930297017097473, | |
| "learning_rate": 2.3674242424242424e-05, | |
| "loss": 0.0249, | |
| "step": 2550 | |
| }, | |
| { | |
| "epoch": 226.93, | |
| "eval_accuracy": 0.9182389937106918, | |
| "eval_loss": 0.30440014600753784, | |
| "eval_runtime": 2.0166, | |
| "eval_samples_per_second": 78.847, | |
| "eval_steps_per_second": 4.959, | |
| "step": 2553 | |
| }, | |
| { | |
| "epoch": 228.0, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.2825218439102173, | |
| "eval_runtime": 2.2235, | |
| "eval_samples_per_second": 71.51, | |
| "eval_steps_per_second": 4.497, | |
| "step": 2565 | |
| }, | |
| { | |
| "epoch": 228.98, | |
| "eval_accuracy": 0.949685534591195, | |
| "eval_loss": 0.234725683927536, | |
| "eval_runtime": 2.0151, | |
| "eval_samples_per_second": 78.905, | |
| "eval_steps_per_second": 4.963, | |
| "step": 2576 | |
| }, | |
| { | |
| "epoch": 229.96, | |
| "eval_accuracy": 0.949685534591195, | |
| "eval_loss": 0.24049904942512512, | |
| "eval_runtime": 2.0305, | |
| "eval_samples_per_second": 78.304, | |
| "eval_steps_per_second": 4.925, | |
| "step": 2587 | |
| }, | |
| { | |
| "epoch": 230.93, | |
| "eval_accuracy": 0.949685534591195, | |
| "eval_loss": 0.25367188453674316, | |
| "eval_runtime": 2.1765, | |
| "eval_samples_per_second": 73.054, | |
| "eval_steps_per_second": 4.595, | |
| "step": 2598 | |
| }, | |
| { | |
| "epoch": 231.11, | |
| "grad_norm": 0.8203662037849426, | |
| "learning_rate": 2.3484848484848487e-05, | |
| "loss": 0.0358, | |
| "step": 2600 | |
| }, | |
| { | |
| "epoch": 232.0, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.27088040113449097, | |
| "eval_runtime": 2.0677, | |
| "eval_samples_per_second": 76.895, | |
| "eval_steps_per_second": 4.836, | |
| "step": 2610 | |
| }, | |
| { | |
| "epoch": 232.98, | |
| "eval_accuracy": 0.949685534591195, | |
| "eval_loss": 0.2444712519645691, | |
| "eval_runtime": 2.123, | |
| "eval_samples_per_second": 74.893, | |
| "eval_steps_per_second": 4.71, | |
| "step": 2621 | |
| }, | |
| { | |
| "epoch": 233.96, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.24358882009983063, | |
| "eval_runtime": 2.0612, | |
| "eval_samples_per_second": 77.139, | |
| "eval_steps_per_second": 4.852, | |
| "step": 2632 | |
| }, | |
| { | |
| "epoch": 234.93, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.22266939282417297, | |
| "eval_runtime": 2.0145, | |
| "eval_samples_per_second": 78.929, | |
| "eval_steps_per_second": 4.964, | |
| "step": 2643 | |
| }, | |
| { | |
| "epoch": 235.56, | |
| "grad_norm": 0.7004448771476746, | |
| "learning_rate": 2.3295454545454546e-05, | |
| "loss": 0.0345, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 236.0, | |
| "eval_accuracy": 0.949685534591195, | |
| "eval_loss": 0.22081993520259857, | |
| "eval_runtime": 2.0852, | |
| "eval_samples_per_second": 76.252, | |
| "eval_steps_per_second": 4.796, | |
| "step": 2655 | |
| }, | |
| { | |
| "epoch": 236.98, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.22930140793323517, | |
| "eval_runtime": 2.038, | |
| "eval_samples_per_second": 78.019, | |
| "eval_steps_per_second": 4.907, | |
| "step": 2666 | |
| }, | |
| { | |
| "epoch": 237.96, | |
| "eval_accuracy": 0.949685534591195, | |
| "eval_loss": 0.2159855216741562, | |
| "eval_runtime": 2.2011, | |
| "eval_samples_per_second": 72.236, | |
| "eval_steps_per_second": 4.543, | |
| "step": 2677 | |
| }, | |
| { | |
| "epoch": 238.93, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.2085605412721634, | |
| "eval_runtime": 2.0845, | |
| "eval_samples_per_second": 76.277, | |
| "eval_steps_per_second": 4.797, | |
| "step": 2688 | |
| }, | |
| { | |
| "epoch": 240.0, | |
| "grad_norm": 1.642115592956543, | |
| "learning_rate": 2.3106060606060608e-05, | |
| "loss": 0.0339, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 240.0, | |
| "eval_accuracy": 0.9182389937106918, | |
| "eval_loss": 0.26398828625679016, | |
| "eval_runtime": 1.9895, | |
| "eval_samples_per_second": 79.918, | |
| "eval_steps_per_second": 5.026, | |
| "step": 2700 | |
| }, | |
| { | |
| "epoch": 240.98, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.2953893542289734, | |
| "eval_runtime": 2.0677, | |
| "eval_samples_per_second": 76.899, | |
| "eval_steps_per_second": 4.836, | |
| "step": 2711 | |
| }, | |
| { | |
| "epoch": 241.96, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.2507174611091614, | |
| "eval_runtime": 2.1213, | |
| "eval_samples_per_second": 74.953, | |
| "eval_steps_per_second": 4.714, | |
| "step": 2722 | |
| }, | |
| { | |
| "epoch": 242.93, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.227327361702919, | |
| "eval_runtime": 1.9774, | |
| "eval_samples_per_second": 80.407, | |
| "eval_steps_per_second": 5.057, | |
| "step": 2733 | |
| }, | |
| { | |
| "epoch": 244.0, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.24215646088123322, | |
| "eval_runtime": 2.0297, | |
| "eval_samples_per_second": 78.336, | |
| "eval_steps_per_second": 4.927, | |
| "step": 2745 | |
| }, | |
| { | |
| "epoch": 244.44, | |
| "grad_norm": 1.2598336935043335, | |
| "learning_rate": 2.2916666666666667e-05, | |
| "loss": 0.0309, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 244.98, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.2931080758571625, | |
| "eval_runtime": 2.1459, | |
| "eval_samples_per_second": 74.093, | |
| "eval_steps_per_second": 4.66, | |
| "step": 2756 | |
| }, | |
| { | |
| "epoch": 245.96, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.2694746255874634, | |
| "eval_runtime": 2.0392, | |
| "eval_samples_per_second": 77.97, | |
| "eval_steps_per_second": 4.904, | |
| "step": 2767 | |
| }, | |
| { | |
| "epoch": 246.93, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.26456066966056824, | |
| "eval_runtime": 2.1011, | |
| "eval_samples_per_second": 75.673, | |
| "eval_steps_per_second": 4.759, | |
| "step": 2778 | |
| }, | |
| { | |
| "epoch": 248.0, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.23147591948509216, | |
| "eval_runtime": 2.0349, | |
| "eval_samples_per_second": 78.135, | |
| "eval_steps_per_second": 4.914, | |
| "step": 2790 | |
| }, | |
| { | |
| "epoch": 248.89, | |
| "grad_norm": 1.3385041952133179, | |
| "learning_rate": 2.272727272727273e-05, | |
| "loss": 0.0301, | |
| "step": 2800 | |
| }, | |
| { | |
| "epoch": 248.98, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.2269720882177353, | |
| "eval_runtime": 2.0267, | |
| "eval_samples_per_second": 78.453, | |
| "eval_steps_per_second": 4.934, | |
| "step": 2801 | |
| }, | |
| { | |
| "epoch": 249.96, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.244718536734581, | |
| "eval_runtime": 2.0507, | |
| "eval_samples_per_second": 77.533, | |
| "eval_steps_per_second": 4.876, | |
| "step": 2812 | |
| }, | |
| { | |
| "epoch": 250.93, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.2586061358451843, | |
| "eval_runtime": 2.0836, | |
| "eval_samples_per_second": 76.312, | |
| "eval_steps_per_second": 4.799, | |
| "step": 2823 | |
| }, | |
| { | |
| "epoch": 252.0, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.3038959503173828, | |
| "eval_runtime": 2.0093, | |
| "eval_samples_per_second": 79.132, | |
| "eval_steps_per_second": 4.977, | |
| "step": 2835 | |
| }, | |
| { | |
| "epoch": 252.98, | |
| "eval_accuracy": 0.9182389937106918, | |
| "eval_loss": 0.27771249413490295, | |
| "eval_runtime": 2.1305, | |
| "eval_samples_per_second": 74.63, | |
| "eval_steps_per_second": 4.694, | |
| "step": 2846 | |
| }, | |
| { | |
| "epoch": 253.33, | |
| "grad_norm": 0.40545353293418884, | |
| "learning_rate": 2.2537878787878788e-05, | |
| "loss": 0.0335, | |
| "step": 2850 | |
| }, | |
| { | |
| "epoch": 253.96, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.256588876247406, | |
| "eval_runtime": 2.1001, | |
| "eval_samples_per_second": 75.709, | |
| "eval_steps_per_second": 4.762, | |
| "step": 2857 | |
| }, | |
| { | |
| "epoch": 254.93, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.26031869649887085, | |
| "eval_runtime": 2.2094, | |
| "eval_samples_per_second": 71.966, | |
| "eval_steps_per_second": 4.526, | |
| "step": 2868 | |
| }, | |
| { | |
| "epoch": 256.0, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.26985806226730347, | |
| "eval_runtime": 1.9916, | |
| "eval_samples_per_second": 79.835, | |
| "eval_steps_per_second": 5.021, | |
| "step": 2880 | |
| }, | |
| { | |
| "epoch": 256.98, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.2838137149810791, | |
| "eval_runtime": 1.9992, | |
| "eval_samples_per_second": 79.532, | |
| "eval_steps_per_second": 5.002, | |
| "step": 2891 | |
| }, | |
| { | |
| "epoch": 257.78, | |
| "grad_norm": 0.2661449611186981, | |
| "learning_rate": 2.2348484848484847e-05, | |
| "loss": 0.0249, | |
| "step": 2900 | |
| }, | |
| { | |
| "epoch": 257.96, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.2572626769542694, | |
| "eval_runtime": 2.0448, | |
| "eval_samples_per_second": 77.758, | |
| "eval_steps_per_second": 4.89, | |
| "step": 2902 | |
| }, | |
| { | |
| "epoch": 258.93, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.2652382254600525, | |
| "eval_runtime": 2.0483, | |
| "eval_samples_per_second": 77.627, | |
| "eval_steps_per_second": 4.882, | |
| "step": 2913 | |
| }, | |
| { | |
| "epoch": 260.0, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.26221606135368347, | |
| "eval_runtime": 1.9761, | |
| "eval_samples_per_second": 80.461, | |
| "eval_steps_per_second": 5.06, | |
| "step": 2925 | |
| }, | |
| { | |
| "epoch": 260.98, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.2583387494087219, | |
| "eval_runtime": 2.0285, | |
| "eval_samples_per_second": 78.384, | |
| "eval_steps_per_second": 4.93, | |
| "step": 2936 | |
| }, | |
| { | |
| "epoch": 261.96, | |
| "eval_accuracy": 0.949685534591195, | |
| "eval_loss": 0.23241400718688965, | |
| "eval_runtime": 2.1753, | |
| "eval_samples_per_second": 73.094, | |
| "eval_steps_per_second": 4.597, | |
| "step": 2947 | |
| }, | |
| { | |
| "epoch": 262.22, | |
| "grad_norm": 0.5177292227745056, | |
| "learning_rate": 2.215909090909091e-05, | |
| "loss": 0.0308, | |
| "step": 2950 | |
| }, | |
| { | |
| "epoch": 262.93, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.2781696319580078, | |
| "eval_runtime": 2.0731, | |
| "eval_samples_per_second": 76.695, | |
| "eval_steps_per_second": 4.824, | |
| "step": 2958 | |
| }, | |
| { | |
| "epoch": 264.0, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.2519301474094391, | |
| "eval_runtime": 2.1326, | |
| "eval_samples_per_second": 74.556, | |
| "eval_steps_per_second": 4.689, | |
| "step": 2970 | |
| }, | |
| { | |
| "epoch": 264.98, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.2634475529193878, | |
| "eval_runtime": 2.0868, | |
| "eval_samples_per_second": 76.194, | |
| "eval_steps_per_second": 4.792, | |
| "step": 2981 | |
| }, | |
| { | |
| "epoch": 265.96, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.2647358775138855, | |
| "eval_runtime": 2.023, | |
| "eval_samples_per_second": 78.596, | |
| "eval_steps_per_second": 4.943, | |
| "step": 2992 | |
| }, | |
| { | |
| "epoch": 266.67, | |
| "grad_norm": 0.311382532119751, | |
| "learning_rate": 2.1969696969696972e-05, | |
| "loss": 0.0282, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 266.93, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.25880536437034607, | |
| "eval_runtime": 2.0166, | |
| "eval_samples_per_second": 78.845, | |
| "eval_steps_per_second": 4.959, | |
| "step": 3003 | |
| }, | |
| { | |
| "epoch": 268.0, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.23151087760925293, | |
| "eval_runtime": 2.1955, | |
| "eval_samples_per_second": 72.42, | |
| "eval_steps_per_second": 4.555, | |
| "step": 3015 | |
| }, | |
| { | |
| "epoch": 268.98, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.22928977012634277, | |
| "eval_runtime": 2.1352, | |
| "eval_samples_per_second": 74.465, | |
| "eval_steps_per_second": 4.683, | |
| "step": 3026 | |
| }, | |
| { | |
| "epoch": 269.96, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.23751527070999146, | |
| "eval_runtime": 2.0031, | |
| "eval_samples_per_second": 79.378, | |
| "eval_steps_per_second": 4.992, | |
| "step": 3037 | |
| }, | |
| { | |
| "epoch": 270.93, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.24385805428028107, | |
| "eval_runtime": 2.109, | |
| "eval_samples_per_second": 75.392, | |
| "eval_steps_per_second": 4.742, | |
| "step": 3048 | |
| }, | |
| { | |
| "epoch": 271.11, | |
| "grad_norm": 0.8252888321876526, | |
| "learning_rate": 2.178030303030303e-05, | |
| "loss": 0.0347, | |
| "step": 3050 | |
| }, | |
| { | |
| "epoch": 272.0, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.2541854679584503, | |
| "eval_runtime": 2.1279, | |
| "eval_samples_per_second": 74.722, | |
| "eval_steps_per_second": 4.699, | |
| "step": 3060 | |
| }, | |
| { | |
| "epoch": 272.98, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.24015812575817108, | |
| "eval_runtime": 1.9697, | |
| "eval_samples_per_second": 80.724, | |
| "eval_steps_per_second": 5.077, | |
| "step": 3071 | |
| }, | |
| { | |
| "epoch": 273.96, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.2365039885044098, | |
| "eval_runtime": 2.1369, | |
| "eval_samples_per_second": 74.406, | |
| "eval_steps_per_second": 4.68, | |
| "step": 3082 | |
| }, | |
| { | |
| "epoch": 274.93, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.2757132053375244, | |
| "eval_runtime": 2.0094, | |
| "eval_samples_per_second": 79.128, | |
| "eval_steps_per_second": 4.977, | |
| "step": 3093 | |
| }, | |
| { | |
| "epoch": 275.56, | |
| "grad_norm": 0.06441498547792435, | |
| "learning_rate": 2.1590909090909093e-05, | |
| "loss": 0.0211, | |
| "step": 3100 | |
| }, | |
| { | |
| "epoch": 276.0, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.25078749656677246, | |
| "eval_runtime": 2.0059, | |
| "eval_samples_per_second": 79.266, | |
| "eval_steps_per_second": 4.985, | |
| "step": 3105 | |
| }, | |
| { | |
| "epoch": 276.98, | |
| "eval_accuracy": 0.949685534591195, | |
| "eval_loss": 0.23951387405395508, | |
| "eval_runtime": 2.174, | |
| "eval_samples_per_second": 73.137, | |
| "eval_steps_per_second": 4.6, | |
| "step": 3116 | |
| }, | |
| { | |
| "epoch": 277.96, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.25363460183143616, | |
| "eval_runtime": 2.0281, | |
| "eval_samples_per_second": 78.399, | |
| "eval_steps_per_second": 4.931, | |
| "step": 3127 | |
| }, | |
| { | |
| "epoch": 278.93, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.26847586035728455, | |
| "eval_runtime": 2.2802, | |
| "eval_samples_per_second": 69.729, | |
| "eval_steps_per_second": 4.385, | |
| "step": 3138 | |
| }, | |
| { | |
| "epoch": 280.0, | |
| "grad_norm": 0.5554720759391785, | |
| "learning_rate": 2.1401515151515152e-05, | |
| "loss": 0.0248, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 280.0, | |
| "eval_accuracy": 0.9182389937106918, | |
| "eval_loss": 0.2974900007247925, | |
| "eval_runtime": 2.0423, | |
| "eval_samples_per_second": 77.852, | |
| "eval_steps_per_second": 4.896, | |
| "step": 3150 | |
| }, | |
| { | |
| "epoch": 280.98, | |
| "eval_accuracy": 0.9182389937106918, | |
| "eval_loss": 0.3234010636806488, | |
| "eval_runtime": 2.0793, | |
| "eval_samples_per_second": 76.469, | |
| "eval_steps_per_second": 4.809, | |
| "step": 3161 | |
| }, | |
| { | |
| "epoch": 281.96, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.2707124352455139, | |
| "eval_runtime": 2.0919, | |
| "eval_samples_per_second": 76.007, | |
| "eval_steps_per_second": 4.78, | |
| "step": 3172 | |
| }, | |
| { | |
| "epoch": 282.93, | |
| "eval_accuracy": 0.9559748427672956, | |
| "eval_loss": 0.22501063346862793, | |
| "eval_runtime": 1.9726, | |
| "eval_samples_per_second": 80.606, | |
| "eval_steps_per_second": 5.07, | |
| "step": 3183 | |
| }, | |
| { | |
| "epoch": 284.0, | |
| "eval_accuracy": 0.9559748427672956, | |
| "eval_loss": 0.23188871145248413, | |
| "eval_runtime": 1.9745, | |
| "eval_samples_per_second": 80.526, | |
| "eval_steps_per_second": 5.065, | |
| "step": 3195 | |
| }, | |
| { | |
| "epoch": 284.44, | |
| "grad_norm": 0.20468498766422272, | |
| "learning_rate": 2.121212121212121e-05, | |
| "loss": 0.0243, | |
| "step": 3200 | |
| }, | |
| { | |
| "epoch": 284.98, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.25254714488983154, | |
| "eval_runtime": 2.1319, | |
| "eval_samples_per_second": 74.582, | |
| "eval_steps_per_second": 4.691, | |
| "step": 3206 | |
| }, | |
| { | |
| "epoch": 285.96, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.26610061526298523, | |
| "eval_runtime": 2.0326, | |
| "eval_samples_per_second": 78.226, | |
| "eval_steps_per_second": 4.92, | |
| "step": 3217 | |
| }, | |
| { | |
| "epoch": 286.93, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.28444719314575195, | |
| "eval_runtime": 2.0467, | |
| "eval_samples_per_second": 77.687, | |
| "eval_steps_per_second": 4.886, | |
| "step": 3228 | |
| }, | |
| { | |
| "epoch": 288.0, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.2571127116680145, | |
| "eval_runtime": 2.1631, | |
| "eval_samples_per_second": 73.504, | |
| "eval_steps_per_second": 4.623, | |
| "step": 3240 | |
| }, | |
| { | |
| "epoch": 288.89, | |
| "grad_norm": 1.0598843097686768, | |
| "learning_rate": 2.1022727272727274e-05, | |
| "loss": 0.0223, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 288.98, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.251703679561615, | |
| "eval_runtime": 2.09, | |
| "eval_samples_per_second": 76.075, | |
| "eval_steps_per_second": 4.785, | |
| "step": 3251 | |
| }, | |
| { | |
| "epoch": 289.96, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.2636191248893738, | |
| "eval_runtime": 2.0348, | |
| "eval_samples_per_second": 78.14, | |
| "eval_steps_per_second": 4.914, | |
| "step": 3262 | |
| }, | |
| { | |
| "epoch": 290.93, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.26941102743148804, | |
| "eval_runtime": 2.0598, | |
| "eval_samples_per_second": 77.193, | |
| "eval_steps_per_second": 4.855, | |
| "step": 3273 | |
| }, | |
| { | |
| "epoch": 292.0, | |
| "eval_accuracy": 0.949685534591195, | |
| "eval_loss": 0.23060773313045502, | |
| "eval_runtime": 2.0528, | |
| "eval_samples_per_second": 77.454, | |
| "eval_steps_per_second": 4.871, | |
| "step": 3285 | |
| }, | |
| { | |
| "epoch": 292.98, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.23769862949848175, | |
| "eval_runtime": 2.0936, | |
| "eval_samples_per_second": 75.945, | |
| "eval_steps_per_second": 4.776, | |
| "step": 3296 | |
| }, | |
| { | |
| "epoch": 293.33, | |
| "grad_norm": 0.6022414565086365, | |
| "learning_rate": 2.0833333333333333e-05, | |
| "loss": 0.0234, | |
| "step": 3300 | |
| }, | |
| { | |
| "epoch": 293.96, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.26981261372566223, | |
| "eval_runtime": 2.0959, | |
| "eval_samples_per_second": 75.861, | |
| "eval_steps_per_second": 4.771, | |
| "step": 3307 | |
| }, | |
| { | |
| "epoch": 294.93, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.28393277525901794, | |
| "eval_runtime": 2.0125, | |
| "eval_samples_per_second": 79.007, | |
| "eval_steps_per_second": 4.969, | |
| "step": 3318 | |
| }, | |
| { | |
| "epoch": 296.0, | |
| "eval_accuracy": 0.949685534591195, | |
| "eval_loss": 0.25016099214553833, | |
| "eval_runtime": 2.1941, | |
| "eval_samples_per_second": 72.467, | |
| "eval_steps_per_second": 4.558, | |
| "step": 3330 | |
| }, | |
| { | |
| "epoch": 296.98, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.27042049169540405, | |
| "eval_runtime": 2.0192, | |
| "eval_samples_per_second": 78.742, | |
| "eval_steps_per_second": 4.952, | |
| "step": 3341 | |
| }, | |
| { | |
| "epoch": 297.78, | |
| "grad_norm": 0.03581221029162407, | |
| "learning_rate": 2.0643939393939395e-05, | |
| "loss": 0.0256, | |
| "step": 3350 | |
| }, | |
| { | |
| "epoch": 297.96, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.28789857029914856, | |
| "eval_runtime": 2.1148, | |
| "eval_samples_per_second": 75.183, | |
| "eval_steps_per_second": 4.729, | |
| "step": 3352 | |
| }, | |
| { | |
| "epoch": 298.93, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.3078269362449646, | |
| "eval_runtime": 2.0062, | |
| "eval_samples_per_second": 79.253, | |
| "eval_steps_per_second": 4.984, | |
| "step": 3363 | |
| }, | |
| { | |
| "epoch": 300.0, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.31602492928504944, | |
| "eval_runtime": 2.0641, | |
| "eval_samples_per_second": 77.031, | |
| "eval_steps_per_second": 4.845, | |
| "step": 3375 | |
| }, | |
| { | |
| "epoch": 300.98, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.2705954313278198, | |
| "eval_runtime": 2.0316, | |
| "eval_samples_per_second": 78.263, | |
| "eval_steps_per_second": 4.922, | |
| "step": 3386 | |
| }, | |
| { | |
| "epoch": 301.96, | |
| "eval_accuracy": 0.949685534591195, | |
| "eval_loss": 0.2504004240036011, | |
| "eval_runtime": 2.1492, | |
| "eval_samples_per_second": 73.982, | |
| "eval_steps_per_second": 4.653, | |
| "step": 3397 | |
| }, | |
| { | |
| "epoch": 302.22, | |
| "grad_norm": 2.553766965866089, | |
| "learning_rate": 2.0454545454545454e-05, | |
| "loss": 0.0224, | |
| "step": 3400 | |
| }, | |
| { | |
| "epoch": 302.93, | |
| "eval_accuracy": 0.9559748427672956, | |
| "eval_loss": 0.24540336430072784, | |
| "eval_runtime": 2.0269, | |
| "eval_samples_per_second": 78.443, | |
| "eval_steps_per_second": 4.934, | |
| "step": 3408 | |
| }, | |
| { | |
| "epoch": 304.0, | |
| "eval_accuracy": 0.9559748427672956, | |
| "eval_loss": 0.24798454344272614, | |
| "eval_runtime": 2.0863, | |
| "eval_samples_per_second": 76.213, | |
| "eval_steps_per_second": 4.793, | |
| "step": 3420 | |
| }, | |
| { | |
| "epoch": 304.98, | |
| "eval_accuracy": 0.949685534591195, | |
| "eval_loss": 0.2511013150215149, | |
| "eval_runtime": 2.0476, | |
| "eval_samples_per_second": 77.651, | |
| "eval_steps_per_second": 4.884, | |
| "step": 3431 | |
| }, | |
| { | |
| "epoch": 305.96, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.2796252369880676, | |
| "eval_runtime": 2.1539, | |
| "eval_samples_per_second": 73.819, | |
| "eval_steps_per_second": 4.643, | |
| "step": 3442 | |
| }, | |
| { | |
| "epoch": 306.67, | |
| "grad_norm": 0.41460466384887695, | |
| "learning_rate": 2.0265151515151516e-05, | |
| "loss": 0.0155, | |
| "step": 3450 | |
| }, | |
| { | |
| "epoch": 306.93, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.29322367906570435, | |
| "eval_runtime": 2.093, | |
| "eval_samples_per_second": 75.966, | |
| "eval_steps_per_second": 4.778, | |
| "step": 3453 | |
| }, | |
| { | |
| "epoch": 308.0, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.2996874153614044, | |
| "eval_runtime": 2.0951, | |
| "eval_samples_per_second": 75.893, | |
| "eval_steps_per_second": 4.773, | |
| "step": 3465 | |
| }, | |
| { | |
| "epoch": 308.98, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.3044210970401764, | |
| "eval_runtime": 1.9749, | |
| "eval_samples_per_second": 80.512, | |
| "eval_steps_per_second": 5.064, | |
| "step": 3476 | |
| }, | |
| { | |
| "epoch": 309.96, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.3255678415298462, | |
| "eval_runtime": 2.0175, | |
| "eval_samples_per_second": 78.81, | |
| "eval_steps_per_second": 4.957, | |
| "step": 3487 | |
| }, | |
| { | |
| "epoch": 310.93, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3400976359844208, | |
| "eval_runtime": 2.0285, | |
| "eval_samples_per_second": 78.381, | |
| "eval_steps_per_second": 4.93, | |
| "step": 3498 | |
| }, | |
| { | |
| "epoch": 311.11, | |
| "grad_norm": 0.5975369811058044, | |
| "learning_rate": 2.007575757575758e-05, | |
| "loss": 0.0226, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 312.0, | |
| "eval_accuracy": 0.9182389937106918, | |
| "eval_loss": 0.30681127309799194, | |
| "eval_runtime": 2.0805, | |
| "eval_samples_per_second": 76.424, | |
| "eval_steps_per_second": 4.807, | |
| "step": 3510 | |
| }, | |
| { | |
| "epoch": 312.98, | |
| "eval_accuracy": 0.9182389937106918, | |
| "eval_loss": 0.30169352889060974, | |
| "eval_runtime": 2.1998, | |
| "eval_samples_per_second": 72.279, | |
| "eval_steps_per_second": 4.546, | |
| "step": 3521 | |
| }, | |
| { | |
| "epoch": 313.96, | |
| "eval_accuracy": 0.9119496855345912, | |
| "eval_loss": 0.29409661889076233, | |
| "eval_runtime": 2.1625, | |
| "eval_samples_per_second": 73.527, | |
| "eval_steps_per_second": 4.624, | |
| "step": 3532 | |
| }, | |
| { | |
| "epoch": 314.93, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.2840117812156677, | |
| "eval_runtime": 2.0614, | |
| "eval_samples_per_second": 77.134, | |
| "eval_steps_per_second": 4.851, | |
| "step": 3543 | |
| }, | |
| { | |
| "epoch": 315.56, | |
| "grad_norm": 0.4768455922603607, | |
| "learning_rate": 1.9886363636363634e-05, | |
| "loss": 0.0153, | |
| "step": 3550 | |
| }, | |
| { | |
| "epoch": 316.0, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.28999558091163635, | |
| "eval_runtime": 2.0423, | |
| "eval_samples_per_second": 77.855, | |
| "eval_steps_per_second": 4.897, | |
| "step": 3555 | |
| }, | |
| { | |
| "epoch": 316.98, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.29232266545295715, | |
| "eval_runtime": 2.0108, | |
| "eval_samples_per_second": 79.073, | |
| "eval_steps_per_second": 4.973, | |
| "step": 3566 | |
| }, | |
| { | |
| "epoch": 317.96, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.2964979112148285, | |
| "eval_runtime": 1.9633, | |
| "eval_samples_per_second": 80.988, | |
| "eval_steps_per_second": 5.094, | |
| "step": 3577 | |
| }, | |
| { | |
| "epoch": 318.93, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3201989531517029, | |
| "eval_runtime": 2.0683, | |
| "eval_samples_per_second": 76.876, | |
| "eval_steps_per_second": 4.835, | |
| "step": 3588 | |
| }, | |
| { | |
| "epoch": 320.0, | |
| "grad_norm": 0.01774447225034237, | |
| "learning_rate": 1.9696969696969697e-05, | |
| "loss": 0.0183, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 320.0, | |
| "eval_accuracy": 0.9119496855345912, | |
| "eval_loss": 0.33252981305122375, | |
| "eval_runtime": 1.9991, | |
| "eval_samples_per_second": 79.534, | |
| "eval_steps_per_second": 5.002, | |
| "step": 3600 | |
| }, | |
| { | |
| "epoch": 320.98, | |
| "eval_accuracy": 0.9182389937106918, | |
| "eval_loss": 0.34411394596099854, | |
| "eval_runtime": 1.9595, | |
| "eval_samples_per_second": 81.143, | |
| "eval_steps_per_second": 5.103, | |
| "step": 3611 | |
| }, | |
| { | |
| "epoch": 321.96, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.3004206120967865, | |
| "eval_runtime": 2.102, | |
| "eval_samples_per_second": 75.644, | |
| "eval_steps_per_second": 4.757, | |
| "step": 3622 | |
| }, | |
| { | |
| "epoch": 322.93, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.3022076487541199, | |
| "eval_runtime": 2.1248, | |
| "eval_samples_per_second": 74.83, | |
| "eval_steps_per_second": 4.706, | |
| "step": 3633 | |
| }, | |
| { | |
| "epoch": 324.0, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.29579004645347595, | |
| "eval_runtime": 2.073, | |
| "eval_samples_per_second": 76.702, | |
| "eval_steps_per_second": 4.824, | |
| "step": 3645 | |
| }, | |
| { | |
| "epoch": 324.44, | |
| "grad_norm": 0.43064549565315247, | |
| "learning_rate": 1.950757575757576e-05, | |
| "loss": 0.0257, | |
| "step": 3650 | |
| }, | |
| { | |
| "epoch": 324.98, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.2942567765712738, | |
| "eval_runtime": 2.08, | |
| "eval_samples_per_second": 76.442, | |
| "eval_steps_per_second": 4.808, | |
| "step": 3656 | |
| }, | |
| { | |
| "epoch": 325.96, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.2944892942905426, | |
| "eval_runtime": 1.9313, | |
| "eval_samples_per_second": 82.326, | |
| "eval_steps_per_second": 5.178, | |
| "step": 3667 | |
| }, | |
| { | |
| "epoch": 326.93, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.29099544882774353, | |
| "eval_runtime": 2.085, | |
| "eval_samples_per_second": 76.26, | |
| "eval_steps_per_second": 4.796, | |
| "step": 3678 | |
| }, | |
| { | |
| "epoch": 328.0, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.2856423258781433, | |
| "eval_runtime": 2.1029, | |
| "eval_samples_per_second": 75.609, | |
| "eval_steps_per_second": 4.755, | |
| "step": 3690 | |
| }, | |
| { | |
| "epoch": 328.89, | |
| "grad_norm": 0.7020539045333862, | |
| "learning_rate": 1.9318181818181818e-05, | |
| "loss": 0.0164, | |
| "step": 3700 | |
| }, | |
| { | |
| "epoch": 328.98, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.25798845291137695, | |
| "eval_runtime": 2.0372, | |
| "eval_samples_per_second": 78.047, | |
| "eval_steps_per_second": 4.909, | |
| "step": 3701 | |
| }, | |
| { | |
| "epoch": 329.96, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.2566261291503906, | |
| "eval_runtime": 2.1479, | |
| "eval_samples_per_second": 74.027, | |
| "eval_steps_per_second": 4.656, | |
| "step": 3712 | |
| }, | |
| { | |
| "epoch": 330.93, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.2538098394870758, | |
| "eval_runtime": 2.0665, | |
| "eval_samples_per_second": 76.941, | |
| "eval_steps_per_second": 4.839, | |
| "step": 3723 | |
| }, | |
| { | |
| "epoch": 332.0, | |
| "eval_accuracy": 0.949685534591195, | |
| "eval_loss": 0.24481499195098877, | |
| "eval_runtime": 2.0898, | |
| "eval_samples_per_second": 76.084, | |
| "eval_steps_per_second": 4.785, | |
| "step": 3735 | |
| }, | |
| { | |
| "epoch": 332.98, | |
| "eval_accuracy": 0.949685534591195, | |
| "eval_loss": 0.2543666958808899, | |
| "eval_runtime": 2.035, | |
| "eval_samples_per_second": 78.134, | |
| "eval_steps_per_second": 4.914, | |
| "step": 3746 | |
| }, | |
| { | |
| "epoch": 333.33, | |
| "grad_norm": 0.9068632125854492, | |
| "learning_rate": 1.912878787878788e-05, | |
| "loss": 0.0222, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 333.96, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.3075094223022461, | |
| "eval_runtime": 2.101, | |
| "eval_samples_per_second": 75.678, | |
| "eval_steps_per_second": 4.76, | |
| "step": 3757 | |
| }, | |
| { | |
| "epoch": 334.93, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.27574771642684937, | |
| "eval_runtime": 2.0253, | |
| "eval_samples_per_second": 78.507, | |
| "eval_steps_per_second": 4.938, | |
| "step": 3768 | |
| }, | |
| { | |
| "epoch": 336.0, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.2714598774909973, | |
| "eval_runtime": 2.2265, | |
| "eval_samples_per_second": 71.412, | |
| "eval_steps_per_second": 4.491, | |
| "step": 3780 | |
| }, | |
| { | |
| "epoch": 336.98, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.3330034911632538, | |
| "eval_runtime": 2.0552, | |
| "eval_samples_per_second": 77.365, | |
| "eval_steps_per_second": 4.866, | |
| "step": 3791 | |
| }, | |
| { | |
| "epoch": 337.78, | |
| "grad_norm": 0.03231671825051308, | |
| "learning_rate": 1.893939393939394e-05, | |
| "loss": 0.0212, | |
| "step": 3800 | |
| }, | |
| { | |
| "epoch": 337.96, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.35598525404930115, | |
| "eval_runtime": 2.0188, | |
| "eval_samples_per_second": 78.762, | |
| "eval_steps_per_second": 4.954, | |
| "step": 3802 | |
| }, | |
| { | |
| "epoch": 338.93, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.28320637345314026, | |
| "eval_runtime": 2.1352, | |
| "eval_samples_per_second": 74.467, | |
| "eval_steps_per_second": 4.683, | |
| "step": 3813 | |
| }, | |
| { | |
| "epoch": 340.0, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.2855217754840851, | |
| "eval_runtime": 2.1886, | |
| "eval_samples_per_second": 72.648, | |
| "eval_steps_per_second": 4.569, | |
| "step": 3825 | |
| }, | |
| { | |
| "epoch": 340.98, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.30631041526794434, | |
| "eval_runtime": 2.0061, | |
| "eval_samples_per_second": 79.26, | |
| "eval_steps_per_second": 4.985, | |
| "step": 3836 | |
| }, | |
| { | |
| "epoch": 341.96, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.29151424765586853, | |
| "eval_runtime": 2.0201, | |
| "eval_samples_per_second": 78.71, | |
| "eval_steps_per_second": 4.95, | |
| "step": 3847 | |
| }, | |
| { | |
| "epoch": 342.22, | |
| "grad_norm": 0.07481174916028976, | |
| "learning_rate": 1.8750000000000002e-05, | |
| "loss": 0.016, | |
| "step": 3850 | |
| }, | |
| { | |
| "epoch": 342.93, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.28358563780784607, | |
| "eval_runtime": 1.9309, | |
| "eval_samples_per_second": 82.344, | |
| "eval_steps_per_second": 5.179, | |
| "step": 3858 | |
| }, | |
| { | |
| "epoch": 344.0, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.28052231669425964, | |
| "eval_runtime": 1.9926, | |
| "eval_samples_per_second": 79.797, | |
| "eval_steps_per_second": 5.019, | |
| "step": 3870 | |
| }, | |
| { | |
| "epoch": 344.98, | |
| "eval_accuracy": 0.949685534591195, | |
| "eval_loss": 0.26776131987571716, | |
| "eval_runtime": 2.1218, | |
| "eval_samples_per_second": 74.936, | |
| "eval_steps_per_second": 4.713, | |
| "step": 3881 | |
| }, | |
| { | |
| "epoch": 345.96, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.2839824855327606, | |
| "eval_runtime": 2.0764, | |
| "eval_samples_per_second": 76.575, | |
| "eval_steps_per_second": 4.816, | |
| "step": 3892 | |
| }, | |
| { | |
| "epoch": 346.67, | |
| "grad_norm": 1.4776334762573242, | |
| "learning_rate": 1.856060606060606e-05, | |
| "loss": 0.0163, | |
| "step": 3900 | |
| }, | |
| { | |
| "epoch": 346.93, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.3048093914985657, | |
| "eval_runtime": 2.1233, | |
| "eval_samples_per_second": 74.885, | |
| "eval_steps_per_second": 4.71, | |
| "step": 3903 | |
| }, | |
| { | |
| "epoch": 348.0, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.27605798840522766, | |
| "eval_runtime": 1.9601, | |
| "eval_samples_per_second": 81.117, | |
| "eval_steps_per_second": 5.102, | |
| "step": 3915 | |
| }, | |
| { | |
| "epoch": 348.98, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.30447614192962646, | |
| "eval_runtime": 2.0457, | |
| "eval_samples_per_second": 77.724, | |
| "eval_steps_per_second": 4.888, | |
| "step": 3926 | |
| }, | |
| { | |
| "epoch": 349.96, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.26728910207748413, | |
| "eval_runtime": 2.0205, | |
| "eval_samples_per_second": 78.692, | |
| "eval_steps_per_second": 4.949, | |
| "step": 3937 | |
| }, | |
| { | |
| "epoch": 350.93, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.2830033600330353, | |
| "eval_runtime": 2.0741, | |
| "eval_samples_per_second": 76.66, | |
| "eval_steps_per_second": 4.821, | |
| "step": 3948 | |
| }, | |
| { | |
| "epoch": 351.11, | |
| "grad_norm": 0.30603834986686707, | |
| "learning_rate": 1.837121212121212e-05, | |
| "loss": 0.0185, | |
| "step": 3950 | |
| }, | |
| { | |
| "epoch": 352.0, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.31495675444602966, | |
| "eval_runtime": 2.0088, | |
| "eval_samples_per_second": 79.152, | |
| "eval_steps_per_second": 4.978, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 352.98, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.2967083156108856, | |
| "eval_runtime": 2.0921, | |
| "eval_samples_per_second": 75.999, | |
| "eval_steps_per_second": 4.78, | |
| "step": 3971 | |
| }, | |
| { | |
| "epoch": 353.96, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.2917640507221222, | |
| "eval_runtime": 2.1439, | |
| "eval_samples_per_second": 74.165, | |
| "eval_steps_per_second": 4.664, | |
| "step": 3982 | |
| }, | |
| { | |
| "epoch": 354.93, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.2848517894744873, | |
| "eval_runtime": 2.0244, | |
| "eval_samples_per_second": 78.541, | |
| "eval_steps_per_second": 4.94, | |
| "step": 3993 | |
| }, | |
| { | |
| "epoch": 355.56, | |
| "grad_norm": 0.6905023455619812, | |
| "learning_rate": 1.8181818181818182e-05, | |
| "loss": 0.0189, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 356.0, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.28043246269226074, | |
| "eval_runtime": 2.0697, | |
| "eval_samples_per_second": 76.823, | |
| "eval_steps_per_second": 4.832, | |
| "step": 4005 | |
| }, | |
| { | |
| "epoch": 356.98, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.29090604186058044, | |
| "eval_runtime": 2.3048, | |
| "eval_samples_per_second": 68.987, | |
| "eval_steps_per_second": 4.339, | |
| "step": 4016 | |
| }, | |
| { | |
| "epoch": 357.96, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.3029940724372864, | |
| "eval_runtime": 2.0213, | |
| "eval_samples_per_second": 78.661, | |
| "eval_steps_per_second": 4.947, | |
| "step": 4027 | |
| }, | |
| { | |
| "epoch": 358.93, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.316310852766037, | |
| "eval_runtime": 2.0126, | |
| "eval_samples_per_second": 79.004, | |
| "eval_steps_per_second": 4.969, | |
| "step": 4038 | |
| }, | |
| { | |
| "epoch": 360.0, | |
| "grad_norm": 0.09516480565071106, | |
| "learning_rate": 1.799242424242424e-05, | |
| "loss": 0.0153, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 360.0, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.32167917490005493, | |
| "eval_runtime": 1.9486, | |
| "eval_samples_per_second": 81.598, | |
| "eval_steps_per_second": 5.132, | |
| "step": 4050 | |
| }, | |
| { | |
| "epoch": 360.98, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.3025132715702057, | |
| "eval_runtime": 2.0179, | |
| "eval_samples_per_second": 78.794, | |
| "eval_steps_per_second": 4.956, | |
| "step": 4061 | |
| }, | |
| { | |
| "epoch": 361.96, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.297443687915802, | |
| "eval_runtime": 1.9969, | |
| "eval_samples_per_second": 79.622, | |
| "eval_steps_per_second": 5.008, | |
| "step": 4072 | |
| }, | |
| { | |
| "epoch": 362.93, | |
| "eval_accuracy": 0.949685534591195, | |
| "eval_loss": 0.28664350509643555, | |
| "eval_runtime": 2.0131, | |
| "eval_samples_per_second": 78.984, | |
| "eval_steps_per_second": 4.968, | |
| "step": 4083 | |
| }, | |
| { | |
| "epoch": 364.0, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.32455363869667053, | |
| "eval_runtime": 2.1216, | |
| "eval_samples_per_second": 74.943, | |
| "eval_steps_per_second": 4.713, | |
| "step": 4095 | |
| }, | |
| { | |
| "epoch": 364.44, | |
| "grad_norm": 0.14960724115371704, | |
| "learning_rate": 1.7803030303030303e-05, | |
| "loss": 0.0169, | |
| "step": 4100 | |
| }, | |
| { | |
| "epoch": 364.98, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.2801210880279541, | |
| "eval_runtime": 1.87, | |
| "eval_samples_per_second": 85.025, | |
| "eval_steps_per_second": 5.347, | |
| "step": 4106 | |
| }, | |
| { | |
| "epoch": 365.96, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.31326618790626526, | |
| "eval_runtime": 1.8975, | |
| "eval_samples_per_second": 83.793, | |
| "eval_steps_per_second": 5.27, | |
| "step": 4117 | |
| }, | |
| { | |
| "epoch": 366.93, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.3283620774745941, | |
| "eval_runtime": 1.8154, | |
| "eval_samples_per_second": 87.585, | |
| "eval_steps_per_second": 5.509, | |
| "step": 4128 | |
| }, | |
| { | |
| "epoch": 368.0, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.2716998755931854, | |
| "eval_runtime": 1.7785, | |
| "eval_samples_per_second": 89.401, | |
| "eval_steps_per_second": 5.623, | |
| "step": 4140 | |
| }, | |
| { | |
| "epoch": 368.89, | |
| "grad_norm": 1.529534935951233, | |
| "learning_rate": 1.7613636363636366e-05, | |
| "loss": 0.0207, | |
| "step": 4150 | |
| }, | |
| { | |
| "epoch": 368.98, | |
| "eval_accuracy": 0.949685534591195, | |
| "eval_loss": 0.26920509338378906, | |
| "eval_runtime": 1.777, | |
| "eval_samples_per_second": 89.477, | |
| "eval_steps_per_second": 5.627, | |
| "step": 4151 | |
| }, | |
| { | |
| "epoch": 369.96, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.2673673927783966, | |
| "eval_runtime": 1.8105, | |
| "eval_samples_per_second": 87.819, | |
| "eval_steps_per_second": 5.523, | |
| "step": 4162 | |
| }, | |
| { | |
| "epoch": 370.93, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.26433154940605164, | |
| "eval_runtime": 1.8098, | |
| "eval_samples_per_second": 87.857, | |
| "eval_steps_per_second": 5.526, | |
| "step": 4173 | |
| }, | |
| { | |
| "epoch": 372.0, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.2969939410686493, | |
| "eval_runtime": 1.7874, | |
| "eval_samples_per_second": 88.954, | |
| "eval_steps_per_second": 5.595, | |
| "step": 4185 | |
| }, | |
| { | |
| "epoch": 372.98, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.2687932550907135, | |
| "eval_runtime": 1.9292, | |
| "eval_samples_per_second": 82.418, | |
| "eval_steps_per_second": 5.184, | |
| "step": 4196 | |
| }, | |
| { | |
| "epoch": 373.33, | |
| "grad_norm": 0.41630563139915466, | |
| "learning_rate": 1.7424242424242425e-05, | |
| "loss": 0.0213, | |
| "step": 4200 | |
| }, | |
| { | |
| "epoch": 373.96, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.2765069603919983, | |
| "eval_runtime": 1.9392, | |
| "eval_samples_per_second": 81.994, | |
| "eval_steps_per_second": 5.157, | |
| "step": 4207 | |
| }, | |
| { | |
| "epoch": 374.93, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.28704383969306946, | |
| "eval_runtime": 1.8427, | |
| "eval_samples_per_second": 86.287, | |
| "eval_steps_per_second": 5.427, | |
| "step": 4218 | |
| }, | |
| { | |
| "epoch": 376.0, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.30059266090393066, | |
| "eval_runtime": 1.8146, | |
| "eval_samples_per_second": 87.624, | |
| "eval_steps_per_second": 5.511, | |
| "step": 4230 | |
| }, | |
| { | |
| "epoch": 376.98, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.2943706512451172, | |
| "eval_runtime": 1.7941, | |
| "eval_samples_per_second": 88.625, | |
| "eval_steps_per_second": 5.574, | |
| "step": 4241 | |
| }, | |
| { | |
| "epoch": 377.78, | |
| "grad_norm": 1.3894481658935547, | |
| "learning_rate": 1.7234848484848487e-05, | |
| "loss": 0.02, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 377.96, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3019978106021881, | |
| "eval_runtime": 1.8046, | |
| "eval_samples_per_second": 88.107, | |
| "eval_steps_per_second": 5.541, | |
| "step": 4252 | |
| }, | |
| { | |
| "epoch": 378.93, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3074227571487427, | |
| "eval_runtime": 1.7835, | |
| "eval_samples_per_second": 89.152, | |
| "eval_steps_per_second": 5.607, | |
| "step": 4263 | |
| }, | |
| { | |
| "epoch": 380.0, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.29427269101142883, | |
| "eval_runtime": 1.8177, | |
| "eval_samples_per_second": 87.473, | |
| "eval_steps_per_second": 5.501, | |
| "step": 4275 | |
| }, | |
| { | |
| "epoch": 380.98, | |
| "eval_accuracy": 0.949685534591195, | |
| "eval_loss": 0.2825266420841217, | |
| "eval_runtime": 1.8911, | |
| "eval_samples_per_second": 84.077, | |
| "eval_steps_per_second": 5.288, | |
| "step": 4286 | |
| }, | |
| { | |
| "epoch": 381.96, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.2760971188545227, | |
| "eval_runtime": 1.9521, | |
| "eval_samples_per_second": 81.451, | |
| "eval_steps_per_second": 5.123, | |
| "step": 4297 | |
| }, | |
| { | |
| "epoch": 382.22, | |
| "grad_norm": 0.021462175995111465, | |
| "learning_rate": 1.7045454545454546e-05, | |
| "loss": 0.0143, | |
| "step": 4300 | |
| }, | |
| { | |
| "epoch": 382.93, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.29204800724983215, | |
| "eval_runtime": 1.9261, | |
| "eval_samples_per_second": 82.551, | |
| "eval_steps_per_second": 5.192, | |
| "step": 4308 | |
| }, | |
| { | |
| "epoch": 384.0, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.29515865445137024, | |
| "eval_runtime": 1.8478, | |
| "eval_samples_per_second": 86.046, | |
| "eval_steps_per_second": 5.412, | |
| "step": 4320 | |
| }, | |
| { | |
| "epoch": 384.98, | |
| "eval_accuracy": 0.949685534591195, | |
| "eval_loss": 0.3164711594581604, | |
| "eval_runtime": 1.7929, | |
| "eval_samples_per_second": 88.684, | |
| "eval_steps_per_second": 5.578, | |
| "step": 4331 | |
| }, | |
| { | |
| "epoch": 385.96, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.2803152799606323, | |
| "eval_runtime": 1.8039, | |
| "eval_samples_per_second": 88.141, | |
| "eval_steps_per_second": 5.543, | |
| "step": 4342 | |
| }, | |
| { | |
| "epoch": 386.67, | |
| "grad_norm": 0.4159376621246338, | |
| "learning_rate": 1.6856060606060605e-05, | |
| "loss": 0.0196, | |
| "step": 4350 | |
| }, | |
| { | |
| "epoch": 386.93, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.28756093978881836, | |
| "eval_runtime": 1.7845, | |
| "eval_samples_per_second": 89.1, | |
| "eval_steps_per_second": 5.604, | |
| "step": 4353 | |
| }, | |
| { | |
| "epoch": 388.0, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.2759377956390381, | |
| "eval_runtime": 1.8441, | |
| "eval_samples_per_second": 86.221, | |
| "eval_steps_per_second": 5.423, | |
| "step": 4365 | |
| }, | |
| { | |
| "epoch": 388.98, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.2701479494571686, | |
| "eval_runtime": 1.7826, | |
| "eval_samples_per_second": 89.198, | |
| "eval_steps_per_second": 5.61, | |
| "step": 4376 | |
| }, | |
| { | |
| "epoch": 389.96, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.2951464354991913, | |
| "eval_runtime": 1.9039, | |
| "eval_samples_per_second": 83.514, | |
| "eval_steps_per_second": 5.252, | |
| "step": 4387 | |
| }, | |
| { | |
| "epoch": 390.93, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.2950435280799866, | |
| "eval_runtime": 1.8731, | |
| "eval_samples_per_second": 84.885, | |
| "eval_steps_per_second": 5.339, | |
| "step": 4398 | |
| }, | |
| { | |
| "epoch": 391.11, | |
| "grad_norm": 0.057938866317272186, | |
| "learning_rate": 1.6670454545454544e-05, | |
| "loss": 0.0234, | |
| "step": 4400 | |
| }, | |
| { | |
| "epoch": 392.0, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.29603102803230286, | |
| "eval_runtime": 1.9831, | |
| "eval_samples_per_second": 80.176, | |
| "eval_steps_per_second": 5.043, | |
| "step": 4410 | |
| }, | |
| { | |
| "epoch": 392.98, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.3337320387363434, | |
| "eval_runtime": 1.847, | |
| "eval_samples_per_second": 86.084, | |
| "eval_steps_per_second": 5.414, | |
| "step": 4421 | |
| }, | |
| { | |
| "epoch": 393.96, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.33828112483024597, | |
| "eval_runtime": 1.8496, | |
| "eval_samples_per_second": 85.964, | |
| "eval_steps_per_second": 5.407, | |
| "step": 4432 | |
| }, | |
| { | |
| "epoch": 394.93, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3078320026397705, | |
| "eval_runtime": 1.8258, | |
| "eval_samples_per_second": 87.084, | |
| "eval_steps_per_second": 5.477, | |
| "step": 4443 | |
| }, | |
| { | |
| "epoch": 395.56, | |
| "grad_norm": 0.39662787318229675, | |
| "learning_rate": 1.6481060606060606e-05, | |
| "loss": 0.0161, | |
| "step": 4450 | |
| }, | |
| { | |
| "epoch": 396.0, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3138676881790161, | |
| "eval_runtime": 1.7627, | |
| "eval_samples_per_second": 90.205, | |
| "eval_steps_per_second": 5.673, | |
| "step": 4455 | |
| }, | |
| { | |
| "epoch": 396.98, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.31875431537628174, | |
| "eval_runtime": 1.7584, | |
| "eval_samples_per_second": 90.422, | |
| "eval_steps_per_second": 5.687, | |
| "step": 4466 | |
| }, | |
| { | |
| "epoch": 397.96, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.3307281732559204, | |
| "eval_runtime": 1.7976, | |
| "eval_samples_per_second": 88.452, | |
| "eval_steps_per_second": 5.563, | |
| "step": 4477 | |
| }, | |
| { | |
| "epoch": 398.93, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.31634414196014404, | |
| "eval_runtime": 1.8551, | |
| "eval_samples_per_second": 85.711, | |
| "eval_steps_per_second": 5.391, | |
| "step": 4488 | |
| }, | |
| { | |
| "epoch": 400.0, | |
| "grad_norm": 0.7240819931030273, | |
| "learning_rate": 1.6291666666666665e-05, | |
| "loss": 0.0162, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 400.0, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.3018243908882141, | |
| "eval_runtime": 1.9085, | |
| "eval_samples_per_second": 83.313, | |
| "eval_steps_per_second": 5.24, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 400.98, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.2813258469104767, | |
| "eval_runtime": 2.0304, | |
| "eval_samples_per_second": 78.308, | |
| "eval_steps_per_second": 4.925, | |
| "step": 4511 | |
| }, | |
| { | |
| "epoch": 401.96, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3019176125526428, | |
| "eval_runtime": 1.8259, | |
| "eval_samples_per_second": 87.08, | |
| "eval_steps_per_second": 5.477, | |
| "step": 4522 | |
| }, | |
| { | |
| "epoch": 402.93, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.28099265694618225, | |
| "eval_runtime": 1.7238, | |
| "eval_samples_per_second": 92.239, | |
| "eval_steps_per_second": 5.801, | |
| "step": 4533 | |
| }, | |
| { | |
| "epoch": 404.0, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.2745566666126251, | |
| "eval_runtime": 1.7857, | |
| "eval_samples_per_second": 89.039, | |
| "eval_steps_per_second": 5.6, | |
| "step": 4545 | |
| }, | |
| { | |
| "epoch": 404.44, | |
| "grad_norm": 0.8649039268493652, | |
| "learning_rate": 1.6102272727272727e-05, | |
| "loss": 0.023, | |
| "step": 4550 | |
| }, | |
| { | |
| "epoch": 404.98, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.2850847542285919, | |
| "eval_runtime": 1.8274, | |
| "eval_samples_per_second": 87.011, | |
| "eval_steps_per_second": 5.472, | |
| "step": 4556 | |
| }, | |
| { | |
| "epoch": 405.96, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.31582126021385193, | |
| "eval_runtime": 1.742, | |
| "eval_samples_per_second": 91.274, | |
| "eval_steps_per_second": 5.741, | |
| "step": 4567 | |
| }, | |
| { | |
| "epoch": 406.93, | |
| "eval_accuracy": 0.9119496855345912, | |
| "eval_loss": 0.34668126702308655, | |
| "eval_runtime": 1.8815, | |
| "eval_samples_per_second": 84.506, | |
| "eval_steps_per_second": 5.315, | |
| "step": 4578 | |
| }, | |
| { | |
| "epoch": 408.0, | |
| "eval_accuracy": 0.9182389937106918, | |
| "eval_loss": 0.34958958625793457, | |
| "eval_runtime": 2.0856, | |
| "eval_samples_per_second": 76.236, | |
| "eval_steps_per_second": 4.795, | |
| "step": 4590 | |
| }, | |
| { | |
| "epoch": 408.89, | |
| "grad_norm": 1.8184185028076172, | |
| "learning_rate": 1.591287878787879e-05, | |
| "loss": 0.0164, | |
| "step": 4600 | |
| }, | |
| { | |
| "epoch": 408.98, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.33241006731987, | |
| "eval_runtime": 2.1278, | |
| "eval_samples_per_second": 74.727, | |
| "eval_steps_per_second": 4.7, | |
| "step": 4601 | |
| }, | |
| { | |
| "epoch": 409.96, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.32462239265441895, | |
| "eval_runtime": 2.221, | |
| "eval_samples_per_second": 71.589, | |
| "eval_steps_per_second": 4.502, | |
| "step": 4612 | |
| }, | |
| { | |
| "epoch": 410.93, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.3765309154987335, | |
| "eval_runtime": 2.0273, | |
| "eval_samples_per_second": 78.43, | |
| "eval_steps_per_second": 4.933, | |
| "step": 4623 | |
| }, | |
| { | |
| "epoch": 412.0, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.3543161451816559, | |
| "eval_runtime": 2.0351, | |
| "eval_samples_per_second": 78.129, | |
| "eval_steps_per_second": 4.914, | |
| "step": 4635 | |
| }, | |
| { | |
| "epoch": 412.98, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.3280029594898224, | |
| "eval_runtime": 2.1541, | |
| "eval_samples_per_second": 73.813, | |
| "eval_steps_per_second": 4.642, | |
| "step": 4646 | |
| }, | |
| { | |
| "epoch": 413.33, | |
| "grad_norm": 1.7262401580810547, | |
| "learning_rate": 1.572348484848485e-05, | |
| "loss": 0.0189, | |
| "step": 4650 | |
| }, | |
| { | |
| "epoch": 413.96, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.30754944682121277, | |
| "eval_runtime": 1.987, | |
| "eval_samples_per_second": 80.018, | |
| "eval_steps_per_second": 5.033, | |
| "step": 4657 | |
| }, | |
| { | |
| "epoch": 414.93, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3012823462486267, | |
| "eval_runtime": 2.084, | |
| "eval_samples_per_second": 76.297, | |
| "eval_steps_per_second": 4.799, | |
| "step": 4668 | |
| }, | |
| { | |
| "epoch": 416.0, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.3047963082790375, | |
| "eval_runtime": 2.1147, | |
| "eval_samples_per_second": 75.187, | |
| "eval_steps_per_second": 4.729, | |
| "step": 4680 | |
| }, | |
| { | |
| "epoch": 416.98, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.297464519739151, | |
| "eval_runtime": 2.0439, | |
| "eval_samples_per_second": 77.791, | |
| "eval_steps_per_second": 4.893, | |
| "step": 4691 | |
| }, | |
| { | |
| "epoch": 417.78, | |
| "grad_norm": 0.03005032427608967, | |
| "learning_rate": 1.553409090909091e-05, | |
| "loss": 0.018, | |
| "step": 4700 | |
| }, | |
| { | |
| "epoch": 417.96, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.30111947655677795, | |
| "eval_runtime": 2.0823, | |
| "eval_samples_per_second": 76.356, | |
| "eval_steps_per_second": 4.802, | |
| "step": 4702 | |
| }, | |
| { | |
| "epoch": 418.93, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.3059113621711731, | |
| "eval_runtime": 2.0164, | |
| "eval_samples_per_second": 78.853, | |
| "eval_steps_per_second": 4.959, | |
| "step": 4713 | |
| }, | |
| { | |
| "epoch": 420.0, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3002815544605255, | |
| "eval_runtime": 2.0599, | |
| "eval_samples_per_second": 77.187, | |
| "eval_steps_per_second": 4.855, | |
| "step": 4725 | |
| }, | |
| { | |
| "epoch": 420.98, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.2898853123188019, | |
| "eval_runtime": 2.1653, | |
| "eval_samples_per_second": 73.43, | |
| "eval_steps_per_second": 4.618, | |
| "step": 4736 | |
| }, | |
| { | |
| "epoch": 421.96, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.27394920587539673, | |
| "eval_runtime": 1.976, | |
| "eval_samples_per_second": 80.464, | |
| "eval_steps_per_second": 5.061, | |
| "step": 4747 | |
| }, | |
| { | |
| "epoch": 422.22, | |
| "grad_norm": 0.05734672769904137, | |
| "learning_rate": 1.534469696969697e-05, | |
| "loss": 0.014, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 422.93, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.28232210874557495, | |
| "eval_runtime": 2.0336, | |
| "eval_samples_per_second": 78.186, | |
| "eval_steps_per_second": 4.917, | |
| "step": 4758 | |
| }, | |
| { | |
| "epoch": 424.0, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.3002234697341919, | |
| "eval_runtime": 2.1015, | |
| "eval_samples_per_second": 75.661, | |
| "eval_steps_per_second": 4.759, | |
| "step": 4770 | |
| }, | |
| { | |
| "epoch": 424.98, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.31039535999298096, | |
| "eval_runtime": 2.0591, | |
| "eval_samples_per_second": 77.218, | |
| "eval_steps_per_second": 4.856, | |
| "step": 4781 | |
| }, | |
| { | |
| "epoch": 425.96, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.2992786467075348, | |
| "eval_runtime": 2.219, | |
| "eval_samples_per_second": 71.652, | |
| "eval_steps_per_second": 4.506, | |
| "step": 4792 | |
| }, | |
| { | |
| "epoch": 426.67, | |
| "grad_norm": 0.20316560566425323, | |
| "learning_rate": 1.5155303030303031e-05, | |
| "loss": 0.0161, | |
| "step": 4800 | |
| }, | |
| { | |
| "epoch": 426.93, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.28384503722190857, | |
| "eval_runtime": 2.3212, | |
| "eval_samples_per_second": 68.5, | |
| "eval_steps_per_second": 4.308, | |
| "step": 4803 | |
| }, | |
| { | |
| "epoch": 428.0, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.303459495306015, | |
| "eval_runtime": 2.0531, | |
| "eval_samples_per_second": 77.442, | |
| "eval_steps_per_second": 4.871, | |
| "step": 4815 | |
| }, | |
| { | |
| "epoch": 428.98, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.31719303131103516, | |
| "eval_runtime": 2.0034, | |
| "eval_samples_per_second": 79.365, | |
| "eval_steps_per_second": 4.992, | |
| "step": 4826 | |
| }, | |
| { | |
| "epoch": 429.96, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.2884739935398102, | |
| "eval_runtime": 2.1854, | |
| "eval_samples_per_second": 72.756, | |
| "eval_steps_per_second": 4.576, | |
| "step": 4837 | |
| }, | |
| { | |
| "epoch": 430.93, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.2915368676185608, | |
| "eval_runtime": 2.0672, | |
| "eval_samples_per_second": 76.914, | |
| "eval_steps_per_second": 4.837, | |
| "step": 4848 | |
| }, | |
| { | |
| "epoch": 431.11, | |
| "grad_norm": 0.1926555037498474, | |
| "learning_rate": 1.496590909090909e-05, | |
| "loss": 0.0181, | |
| "step": 4850 | |
| }, | |
| { | |
| "epoch": 432.0, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.32380226254463196, | |
| "eval_runtime": 2.0107, | |
| "eval_samples_per_second": 79.076, | |
| "eval_steps_per_second": 4.973, | |
| "step": 4860 | |
| }, | |
| { | |
| "epoch": 432.98, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3051411807537079, | |
| "eval_runtime": 2.0979, | |
| "eval_samples_per_second": 75.789, | |
| "eval_steps_per_second": 4.767, | |
| "step": 4871 | |
| }, | |
| { | |
| "epoch": 433.96, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.2746570408344269, | |
| "eval_runtime": 2.0978, | |
| "eval_samples_per_second": 75.795, | |
| "eval_steps_per_second": 4.767, | |
| "step": 4882 | |
| }, | |
| { | |
| "epoch": 434.93, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.27779048681259155, | |
| "eval_runtime": 2.1681, | |
| "eval_samples_per_second": 73.336, | |
| "eval_steps_per_second": 4.612, | |
| "step": 4893 | |
| }, | |
| { | |
| "epoch": 435.56, | |
| "grad_norm": 0.2639506757259369, | |
| "learning_rate": 1.4776515151515152e-05, | |
| "loss": 0.0152, | |
| "step": 4900 | |
| }, | |
| { | |
| "epoch": 436.0, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.3142688274383545, | |
| "eval_runtime": 2.0074, | |
| "eval_samples_per_second": 79.208, | |
| "eval_steps_per_second": 4.982, | |
| "step": 4905 | |
| }, | |
| { | |
| "epoch": 436.98, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.29534852504730225, | |
| "eval_runtime": 2.0119, | |
| "eval_samples_per_second": 79.031, | |
| "eval_steps_per_second": 4.97, | |
| "step": 4916 | |
| }, | |
| { | |
| "epoch": 437.96, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.2987271249294281, | |
| "eval_runtime": 2.0466, | |
| "eval_samples_per_second": 77.691, | |
| "eval_steps_per_second": 4.886, | |
| "step": 4927 | |
| }, | |
| { | |
| "epoch": 438.93, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.3240003287792206, | |
| "eval_runtime": 2.1303, | |
| "eval_samples_per_second": 74.638, | |
| "eval_steps_per_second": 4.694, | |
| "step": 4938 | |
| }, | |
| { | |
| "epoch": 440.0, | |
| "grad_norm": 1.0273933410644531, | |
| "learning_rate": 1.4587121212121213e-05, | |
| "loss": 0.0233, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 440.0, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.2931964099407196, | |
| "eval_runtime": 2.0028, | |
| "eval_samples_per_second": 79.388, | |
| "eval_steps_per_second": 4.993, | |
| "step": 4950 | |
| }, | |
| { | |
| "epoch": 440.98, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.30667683482170105, | |
| "eval_runtime": 2.1028, | |
| "eval_samples_per_second": 75.614, | |
| "eval_steps_per_second": 4.756, | |
| "step": 4961 | |
| }, | |
| { | |
| "epoch": 441.96, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.31695908308029175, | |
| "eval_runtime": 2.1429, | |
| "eval_samples_per_second": 74.198, | |
| "eval_steps_per_second": 4.667, | |
| "step": 4972 | |
| }, | |
| { | |
| "epoch": 442.93, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.33484575152397156, | |
| "eval_runtime": 2.2487, | |
| "eval_samples_per_second": 70.709, | |
| "eval_steps_per_second": 4.447, | |
| "step": 4983 | |
| }, | |
| { | |
| "epoch": 444.0, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.3350779116153717, | |
| "eval_runtime": 2.2089, | |
| "eval_samples_per_second": 71.981, | |
| "eval_steps_per_second": 4.527, | |
| "step": 4995 | |
| }, | |
| { | |
| "epoch": 444.44, | |
| "grad_norm": 0.05571739375591278, | |
| "learning_rate": 1.4397727272727274e-05, | |
| "loss": 0.0134, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 444.98, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.33779439330101013, | |
| "eval_runtime": 2.155, | |
| "eval_samples_per_second": 73.781, | |
| "eval_steps_per_second": 4.64, | |
| "step": 5006 | |
| }, | |
| { | |
| "epoch": 445.96, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.32037729024887085, | |
| "eval_runtime": 2.1415, | |
| "eval_samples_per_second": 74.247, | |
| "eval_steps_per_second": 4.67, | |
| "step": 5017 | |
| }, | |
| { | |
| "epoch": 446.93, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.30960965156555176, | |
| "eval_runtime": 2.0664, | |
| "eval_samples_per_second": 76.947, | |
| "eval_steps_per_second": 4.839, | |
| "step": 5028 | |
| }, | |
| { | |
| "epoch": 448.0, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.3135194480419159, | |
| "eval_runtime": 2.1609, | |
| "eval_samples_per_second": 73.581, | |
| "eval_steps_per_second": 4.628, | |
| "step": 5040 | |
| }, | |
| { | |
| "epoch": 448.89, | |
| "grad_norm": 1.2499555349349976, | |
| "learning_rate": 1.4208333333333333e-05, | |
| "loss": 0.0185, | |
| "step": 5050 | |
| }, | |
| { | |
| "epoch": 448.98, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.32047778367996216, | |
| "eval_runtime": 2.0116, | |
| "eval_samples_per_second": 79.04, | |
| "eval_steps_per_second": 4.971, | |
| "step": 5051 | |
| }, | |
| { | |
| "epoch": 449.96, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3151703476905823, | |
| "eval_runtime": 1.9982, | |
| "eval_samples_per_second": 79.571, | |
| "eval_steps_per_second": 5.004, | |
| "step": 5062 | |
| }, | |
| { | |
| "epoch": 450.93, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.32720035314559937, | |
| "eval_runtime": 2.0554, | |
| "eval_samples_per_second": 77.357, | |
| "eval_steps_per_second": 4.865, | |
| "step": 5073 | |
| }, | |
| { | |
| "epoch": 452.0, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.31637299060821533, | |
| "eval_runtime": 2.0655, | |
| "eval_samples_per_second": 76.978, | |
| "eval_steps_per_second": 4.841, | |
| "step": 5085 | |
| }, | |
| { | |
| "epoch": 452.98, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.3297300934791565, | |
| "eval_runtime": 2.0194, | |
| "eval_samples_per_second": 78.737, | |
| "eval_steps_per_second": 4.952, | |
| "step": 5096 | |
| }, | |
| { | |
| "epoch": 453.33, | |
| "grad_norm": 0.4623982012271881, | |
| "learning_rate": 1.4018939393939395e-05, | |
| "loss": 0.0149, | |
| "step": 5100 | |
| }, | |
| { | |
| "epoch": 453.96, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.3299054801464081, | |
| "eval_runtime": 2.027, | |
| "eval_samples_per_second": 78.441, | |
| "eval_steps_per_second": 4.933, | |
| "step": 5107 | |
| }, | |
| { | |
| "epoch": 454.93, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.34270188212394714, | |
| "eval_runtime": 2.0462, | |
| "eval_samples_per_second": 77.705, | |
| "eval_steps_per_second": 4.887, | |
| "step": 5118 | |
| }, | |
| { | |
| "epoch": 456.0, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.3775523006916046, | |
| "eval_runtime": 2.0532, | |
| "eval_samples_per_second": 77.442, | |
| "eval_steps_per_second": 4.871, | |
| "step": 5130 | |
| }, | |
| { | |
| "epoch": 456.98, | |
| "eval_accuracy": 0.9182389937106918, | |
| "eval_loss": 0.376447468996048, | |
| "eval_runtime": 2.0839, | |
| "eval_samples_per_second": 76.298, | |
| "eval_steps_per_second": 4.799, | |
| "step": 5141 | |
| }, | |
| { | |
| "epoch": 457.78, | |
| "grad_norm": 0.23284748196601868, | |
| "learning_rate": 1.3829545454545456e-05, | |
| "loss": 0.0099, | |
| "step": 5150 | |
| }, | |
| { | |
| "epoch": 457.96, | |
| "eval_accuracy": 0.9182389937106918, | |
| "eval_loss": 0.3852477967739105, | |
| "eval_runtime": 2.0765, | |
| "eval_samples_per_second": 76.569, | |
| "eval_steps_per_second": 4.816, | |
| "step": 5152 | |
| }, | |
| { | |
| "epoch": 458.93, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.35552406311035156, | |
| "eval_runtime": 2.0834, | |
| "eval_samples_per_second": 76.318, | |
| "eval_steps_per_second": 4.8, | |
| "step": 5163 | |
| }, | |
| { | |
| "epoch": 460.0, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.3497180640697479, | |
| "eval_runtime": 2.1727, | |
| "eval_samples_per_second": 73.182, | |
| "eval_steps_per_second": 4.603, | |
| "step": 5175 | |
| }, | |
| { | |
| "epoch": 460.98, | |
| "eval_accuracy": 0.9119496855345912, | |
| "eval_loss": 0.3959099054336548, | |
| "eval_runtime": 2.2063, | |
| "eval_samples_per_second": 72.066, | |
| "eval_steps_per_second": 4.532, | |
| "step": 5186 | |
| }, | |
| { | |
| "epoch": 461.96, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.3428646922111511, | |
| "eval_runtime": 2.0667, | |
| "eval_samples_per_second": 76.934, | |
| "eval_steps_per_second": 4.839, | |
| "step": 5197 | |
| }, | |
| { | |
| "epoch": 462.22, | |
| "grad_norm": 0.01973637193441391, | |
| "learning_rate": 1.3640151515151516e-05, | |
| "loss": 0.0123, | |
| "step": 5200 | |
| }, | |
| { | |
| "epoch": 462.93, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.3277600407600403, | |
| "eval_runtime": 2.0262, | |
| "eval_samples_per_second": 78.472, | |
| "eval_steps_per_second": 4.935, | |
| "step": 5208 | |
| }, | |
| { | |
| "epoch": 464.0, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.307450532913208, | |
| "eval_runtime": 2.1318, | |
| "eval_samples_per_second": 74.586, | |
| "eval_steps_per_second": 4.691, | |
| "step": 5220 | |
| }, | |
| { | |
| "epoch": 464.98, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.30191025137901306, | |
| "eval_runtime": 2.0236, | |
| "eval_samples_per_second": 78.574, | |
| "eval_steps_per_second": 4.942, | |
| "step": 5231 | |
| }, | |
| { | |
| "epoch": 465.96, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3069049119949341, | |
| "eval_runtime": 1.9794, | |
| "eval_samples_per_second": 80.326, | |
| "eval_steps_per_second": 5.052, | |
| "step": 5242 | |
| }, | |
| { | |
| "epoch": 466.67, | |
| "grad_norm": 1.7077068090438843, | |
| "learning_rate": 1.3450757575757575e-05, | |
| "loss": 0.0169, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 466.93, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3036327362060547, | |
| "eval_runtime": 2.2515, | |
| "eval_samples_per_second": 70.62, | |
| "eval_steps_per_second": 4.442, | |
| "step": 5253 | |
| }, | |
| { | |
| "epoch": 468.0, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.32558977603912354, | |
| "eval_runtime": 2.0075, | |
| "eval_samples_per_second": 79.202, | |
| "eval_steps_per_second": 4.981, | |
| "step": 5265 | |
| }, | |
| { | |
| "epoch": 468.98, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.3241185247898102, | |
| "eval_runtime": 2.079, | |
| "eval_samples_per_second": 76.48, | |
| "eval_steps_per_second": 4.81, | |
| "step": 5276 | |
| }, | |
| { | |
| "epoch": 469.96, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.32361313700675964, | |
| "eval_runtime": 2.2276, | |
| "eval_samples_per_second": 71.378, | |
| "eval_steps_per_second": 4.489, | |
| "step": 5287 | |
| }, | |
| { | |
| "epoch": 470.93, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.32213094830513, | |
| "eval_runtime": 2.0555, | |
| "eval_samples_per_second": 77.353, | |
| "eval_steps_per_second": 4.865, | |
| "step": 5298 | |
| }, | |
| { | |
| "epoch": 471.11, | |
| "grad_norm": 2.2473459243774414, | |
| "learning_rate": 1.3261363636363636e-05, | |
| "loss": 0.0114, | |
| "step": 5300 | |
| }, | |
| { | |
| "epoch": 472.0, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.2958085536956787, | |
| "eval_runtime": 2.1042, | |
| "eval_samples_per_second": 75.563, | |
| "eval_steps_per_second": 4.752, | |
| "step": 5310 | |
| }, | |
| { | |
| "epoch": 472.98, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.2994365692138672, | |
| "eval_runtime": 2.0536, | |
| "eval_samples_per_second": 77.424, | |
| "eval_steps_per_second": 4.869, | |
| "step": 5321 | |
| }, | |
| { | |
| "epoch": 473.96, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.29937687516212463, | |
| "eval_runtime": 2.0807, | |
| "eval_samples_per_second": 76.417, | |
| "eval_steps_per_second": 4.806, | |
| "step": 5332 | |
| }, | |
| { | |
| "epoch": 474.93, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.4239935576915741, | |
| "eval_runtime": 2.0885, | |
| "eval_samples_per_second": 76.13, | |
| "eval_steps_per_second": 4.788, | |
| "step": 5343 | |
| }, | |
| { | |
| "epoch": 475.56, | |
| "grad_norm": 0.01770736277103424, | |
| "learning_rate": 1.3071969696969698e-05, | |
| "loss": 0.0148, | |
| "step": 5350 | |
| }, | |
| { | |
| "epoch": 476.0, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.32858237624168396, | |
| "eval_runtime": 2.0527, | |
| "eval_samples_per_second": 77.46, | |
| "eval_steps_per_second": 4.872, | |
| "step": 5355 | |
| }, | |
| { | |
| "epoch": 476.98, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.2954269051551819, | |
| "eval_runtime": 2.1594, | |
| "eval_samples_per_second": 73.63, | |
| "eval_steps_per_second": 4.631, | |
| "step": 5366 | |
| }, | |
| { | |
| "epoch": 477.96, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.29593905806541443, | |
| "eval_runtime": 2.1654, | |
| "eval_samples_per_second": 73.426, | |
| "eval_steps_per_second": 4.618, | |
| "step": 5377 | |
| }, | |
| { | |
| "epoch": 478.93, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.2928108274936676, | |
| "eval_runtime": 2.2433, | |
| "eval_samples_per_second": 70.877, | |
| "eval_steps_per_second": 4.458, | |
| "step": 5388 | |
| }, | |
| { | |
| "epoch": 480.0, | |
| "grad_norm": 1.7406607866287231, | |
| "learning_rate": 1.2882575757575757e-05, | |
| "loss": 0.0171, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 480.0, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.2977100610733032, | |
| "eval_runtime": 2.0243, | |
| "eval_samples_per_second": 78.544, | |
| "eval_steps_per_second": 4.94, | |
| "step": 5400 | |
| }, | |
| { | |
| "epoch": 480.98, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.30747535824775696, | |
| "eval_runtime": 2.0298, | |
| "eval_samples_per_second": 78.334, | |
| "eval_steps_per_second": 4.927, | |
| "step": 5411 | |
| }, | |
| { | |
| "epoch": 481.96, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.3572753071784973, | |
| "eval_runtime": 2.0524, | |
| "eval_samples_per_second": 77.47, | |
| "eval_steps_per_second": 4.872, | |
| "step": 5422 | |
| }, | |
| { | |
| "epoch": 482.93, | |
| "eval_accuracy": 0.9182389937106918, | |
| "eval_loss": 0.3878822326660156, | |
| "eval_runtime": 2.0986, | |
| "eval_samples_per_second": 75.766, | |
| "eval_steps_per_second": 4.765, | |
| "step": 5433 | |
| }, | |
| { | |
| "epoch": 484.0, | |
| "eval_accuracy": 0.9119496855345912, | |
| "eval_loss": 0.3886529803276062, | |
| "eval_runtime": 2.078, | |
| "eval_samples_per_second": 76.517, | |
| "eval_steps_per_second": 4.812, | |
| "step": 5445 | |
| }, | |
| { | |
| "epoch": 484.44, | |
| "grad_norm": 0.06283226609230042, | |
| "learning_rate": 1.2693181818181818e-05, | |
| "loss": 0.0166, | |
| "step": 5450 | |
| }, | |
| { | |
| "epoch": 484.98, | |
| "eval_accuracy": 0.9182389937106918, | |
| "eval_loss": 0.3698625862598419, | |
| "eval_runtime": 2.094, | |
| "eval_samples_per_second": 75.932, | |
| "eval_steps_per_second": 4.776, | |
| "step": 5456 | |
| }, | |
| { | |
| "epoch": 485.96, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.351385235786438, | |
| "eval_runtime": 2.0668, | |
| "eval_samples_per_second": 76.93, | |
| "eval_steps_per_second": 4.838, | |
| "step": 5467 | |
| }, | |
| { | |
| "epoch": 486.93, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.34395086765289307, | |
| "eval_runtime": 2.12, | |
| "eval_samples_per_second": 74.999, | |
| "eval_steps_per_second": 4.717, | |
| "step": 5478 | |
| }, | |
| { | |
| "epoch": 488.0, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.31205570697784424, | |
| "eval_runtime": 2.2336, | |
| "eval_samples_per_second": 71.184, | |
| "eval_steps_per_second": 4.477, | |
| "step": 5490 | |
| }, | |
| { | |
| "epoch": 488.89, | |
| "grad_norm": 1.9271873235702515, | |
| "learning_rate": 1.2503787878787879e-05, | |
| "loss": 0.0169, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 488.98, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3185611069202423, | |
| "eval_runtime": 1.9966, | |
| "eval_samples_per_second": 79.635, | |
| "eval_steps_per_second": 5.008, | |
| "step": 5501 | |
| }, | |
| { | |
| "epoch": 489.96, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.3383605182170868, | |
| "eval_runtime": 2.0874, | |
| "eval_samples_per_second": 76.17, | |
| "eval_steps_per_second": 4.791, | |
| "step": 5512 | |
| }, | |
| { | |
| "epoch": 490.93, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.35870400071144104, | |
| "eval_runtime": 2.2491, | |
| "eval_samples_per_second": 70.694, | |
| "eval_steps_per_second": 4.446, | |
| "step": 5523 | |
| }, | |
| { | |
| "epoch": 492.0, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3265625238418579, | |
| "eval_runtime": 2.0134, | |
| "eval_samples_per_second": 78.971, | |
| "eval_steps_per_second": 4.967, | |
| "step": 5535 | |
| }, | |
| { | |
| "epoch": 492.98, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.3273981213569641, | |
| "eval_runtime": 2.0972, | |
| "eval_samples_per_second": 75.815, | |
| "eval_steps_per_second": 4.768, | |
| "step": 5546 | |
| }, | |
| { | |
| "epoch": 493.33, | |
| "grad_norm": 0.3140685260295868, | |
| "learning_rate": 1.2314393939393941e-05, | |
| "loss": 0.0162, | |
| "step": 5550 | |
| }, | |
| { | |
| "epoch": 493.96, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.3433980345726013, | |
| "eval_runtime": 4.4757, | |
| "eval_samples_per_second": 35.525, | |
| "eval_steps_per_second": 2.234, | |
| "step": 5557 | |
| }, | |
| { | |
| "epoch": 494.93, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.3295518755912781, | |
| "eval_runtime": 2.0317, | |
| "eval_samples_per_second": 78.259, | |
| "eval_steps_per_second": 4.922, | |
| "step": 5568 | |
| }, | |
| { | |
| "epoch": 496.0, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.31786414980888367, | |
| "eval_runtime": 2.1435, | |
| "eval_samples_per_second": 74.179, | |
| "eval_steps_per_second": 4.665, | |
| "step": 5580 | |
| }, | |
| { | |
| "epoch": 496.98, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.32228994369506836, | |
| "eval_runtime": 2.0036, | |
| "eval_samples_per_second": 79.357, | |
| "eval_steps_per_second": 4.991, | |
| "step": 5591 | |
| }, | |
| { | |
| "epoch": 497.78, | |
| "grad_norm": 1.7739616632461548, | |
| "learning_rate": 1.2125e-05, | |
| "loss": 0.0128, | |
| "step": 5600 | |
| }, | |
| { | |
| "epoch": 497.96, | |
| "eval_accuracy": 0.9182389937106918, | |
| "eval_loss": 0.3525673747062683, | |
| "eval_runtime": 2.0848, | |
| "eval_samples_per_second": 76.266, | |
| "eval_steps_per_second": 4.797, | |
| "step": 5602 | |
| }, | |
| { | |
| "epoch": 498.93, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.3345227539539337, | |
| "eval_runtime": 2.0597, | |
| "eval_samples_per_second": 77.194, | |
| "eval_steps_per_second": 4.855, | |
| "step": 5613 | |
| }, | |
| { | |
| "epoch": 500.0, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.3081194758415222, | |
| "eval_runtime": 2.14, | |
| "eval_samples_per_second": 74.297, | |
| "eval_steps_per_second": 4.673, | |
| "step": 5625 | |
| }, | |
| { | |
| "epoch": 500.98, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.3136290907859802, | |
| "eval_runtime": 2.0866, | |
| "eval_samples_per_second": 76.201, | |
| "eval_steps_per_second": 4.793, | |
| "step": 5636 | |
| }, | |
| { | |
| "epoch": 501.96, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.31603533029556274, | |
| "eval_runtime": 2.272, | |
| "eval_samples_per_second": 69.983, | |
| "eval_steps_per_second": 4.401, | |
| "step": 5647 | |
| }, | |
| { | |
| "epoch": 502.22, | |
| "grad_norm": 0.024508927017450333, | |
| "learning_rate": 1.193560606060606e-05, | |
| "loss": 0.0089, | |
| "step": 5650 | |
| }, | |
| { | |
| "epoch": 502.93, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.3217502236366272, | |
| "eval_runtime": 2.2568, | |
| "eval_samples_per_second": 70.454, | |
| "eval_steps_per_second": 4.431, | |
| "step": 5658 | |
| }, | |
| { | |
| "epoch": 504.0, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.3330020606517792, | |
| "eval_runtime": 2.1528, | |
| "eval_samples_per_second": 73.857, | |
| "eval_steps_per_second": 4.645, | |
| "step": 5670 | |
| }, | |
| { | |
| "epoch": 504.98, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.3610976040363312, | |
| "eval_runtime": 2.1981, | |
| "eval_samples_per_second": 72.335, | |
| "eval_steps_per_second": 4.549, | |
| "step": 5681 | |
| }, | |
| { | |
| "epoch": 505.96, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.3819771111011505, | |
| "eval_runtime": 2.0605, | |
| "eval_samples_per_second": 77.167, | |
| "eval_steps_per_second": 4.853, | |
| "step": 5692 | |
| }, | |
| { | |
| "epoch": 506.67, | |
| "grad_norm": 0.13250546157360077, | |
| "learning_rate": 1.1746212121212121e-05, | |
| "loss": 0.0168, | |
| "step": 5700 | |
| }, | |
| { | |
| "epoch": 506.93, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.3471725881099701, | |
| "eval_runtime": 2.0816, | |
| "eval_samples_per_second": 76.384, | |
| "eval_steps_per_second": 4.804, | |
| "step": 5703 | |
| }, | |
| { | |
| "epoch": 508.0, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.3075188100337982, | |
| "eval_runtime": 2.1057, | |
| "eval_samples_per_second": 75.51, | |
| "eval_steps_per_second": 4.749, | |
| "step": 5715 | |
| }, | |
| { | |
| "epoch": 508.98, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.30466988682746887, | |
| "eval_runtime": 2.1027, | |
| "eval_samples_per_second": 75.617, | |
| "eval_steps_per_second": 4.756, | |
| "step": 5726 | |
| }, | |
| { | |
| "epoch": 509.96, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.314418226480484, | |
| "eval_runtime": 2.1578, | |
| "eval_samples_per_second": 73.686, | |
| "eval_steps_per_second": 4.634, | |
| "step": 5737 | |
| }, | |
| { | |
| "epoch": 510.93, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3144315183162689, | |
| "eval_runtime": 2.2895, | |
| "eval_samples_per_second": 69.447, | |
| "eval_steps_per_second": 4.368, | |
| "step": 5748 | |
| }, | |
| { | |
| "epoch": 511.11, | |
| "grad_norm": 1.371584415435791, | |
| "learning_rate": 1.1556818181818184e-05, | |
| "loss": 0.0143, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 512.0, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.30977222323417664, | |
| "eval_runtime": 2.0905, | |
| "eval_samples_per_second": 76.059, | |
| "eval_steps_per_second": 4.784, | |
| "step": 5760 | |
| }, | |
| { | |
| "epoch": 512.98, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.31324854493141174, | |
| "eval_runtime": 2.2018, | |
| "eval_samples_per_second": 72.212, | |
| "eval_steps_per_second": 4.542, | |
| "step": 5771 | |
| }, | |
| { | |
| "epoch": 513.96, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3324536979198456, | |
| "eval_runtime": 2.096, | |
| "eval_samples_per_second": 75.859, | |
| "eval_steps_per_second": 4.771, | |
| "step": 5782 | |
| }, | |
| { | |
| "epoch": 514.93, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.32093632221221924, | |
| "eval_runtime": 2.049, | |
| "eval_samples_per_second": 77.599, | |
| "eval_steps_per_second": 4.88, | |
| "step": 5793 | |
| }, | |
| { | |
| "epoch": 515.56, | |
| "grad_norm": 1.4226562976837158, | |
| "learning_rate": 1.1367424242424243e-05, | |
| "loss": 0.014, | |
| "step": 5800 | |
| }, | |
| { | |
| "epoch": 516.0, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3191947937011719, | |
| "eval_runtime": 2.0898, | |
| "eval_samples_per_second": 76.083, | |
| "eval_steps_per_second": 4.785, | |
| "step": 5805 | |
| }, | |
| { | |
| "epoch": 516.98, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.311814546585083, | |
| "eval_runtime": 2.0315, | |
| "eval_samples_per_second": 78.269, | |
| "eval_steps_per_second": 4.923, | |
| "step": 5816 | |
| }, | |
| { | |
| "epoch": 517.96, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.31416967511177063, | |
| "eval_runtime": 2.0132, | |
| "eval_samples_per_second": 78.978, | |
| "eval_steps_per_second": 4.967, | |
| "step": 5827 | |
| }, | |
| { | |
| "epoch": 518.93, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3255424201488495, | |
| "eval_runtime": 2.4361, | |
| "eval_samples_per_second": 65.269, | |
| "eval_steps_per_second": 4.105, | |
| "step": 5838 | |
| }, | |
| { | |
| "epoch": 520.0, | |
| "grad_norm": 0.1621515154838562, | |
| "learning_rate": 1.1178030303030303e-05, | |
| "loss": 0.0111, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 520.0, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.32208895683288574, | |
| "eval_runtime": 2.0821, | |
| "eval_samples_per_second": 76.364, | |
| "eval_steps_per_second": 4.803, | |
| "step": 5850 | |
| }, | |
| { | |
| "epoch": 520.98, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.3211723566055298, | |
| "eval_runtime": 2.0312, | |
| "eval_samples_per_second": 78.28, | |
| "eval_steps_per_second": 4.923, | |
| "step": 5861 | |
| }, | |
| { | |
| "epoch": 521.96, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.32905757427215576, | |
| "eval_runtime": 2.0294, | |
| "eval_samples_per_second": 78.349, | |
| "eval_steps_per_second": 4.928, | |
| "step": 5872 | |
| }, | |
| { | |
| "epoch": 522.93, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.33144110441207886, | |
| "eval_runtime": 2.032, | |
| "eval_samples_per_second": 78.249, | |
| "eval_steps_per_second": 4.921, | |
| "step": 5883 | |
| }, | |
| { | |
| "epoch": 524.0, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.3268250823020935, | |
| "eval_runtime": 2.0687, | |
| "eval_samples_per_second": 76.859, | |
| "eval_steps_per_second": 4.834, | |
| "step": 5895 | |
| }, | |
| { | |
| "epoch": 524.44, | |
| "grad_norm": 0.008243849501013756, | |
| "learning_rate": 1.0988636363636364e-05, | |
| "loss": 0.0107, | |
| "step": 5900 | |
| }, | |
| { | |
| "epoch": 524.98, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3351696729660034, | |
| "eval_runtime": 2.155, | |
| "eval_samples_per_second": 73.782, | |
| "eval_steps_per_second": 4.64, | |
| "step": 5906 | |
| }, | |
| { | |
| "epoch": 525.96, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.34242841601371765, | |
| "eval_runtime": 2.0063, | |
| "eval_samples_per_second": 79.249, | |
| "eval_steps_per_second": 4.984, | |
| "step": 5917 | |
| }, | |
| { | |
| "epoch": 526.93, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.33888906240463257, | |
| "eval_runtime": 2.2365, | |
| "eval_samples_per_second": 71.093, | |
| "eval_steps_per_second": 4.471, | |
| "step": 5928 | |
| }, | |
| { | |
| "epoch": 528.0, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3547358810901642, | |
| "eval_runtime": 2.0755, | |
| "eval_samples_per_second": 76.609, | |
| "eval_steps_per_second": 4.818, | |
| "step": 5940 | |
| }, | |
| { | |
| "epoch": 528.89, | |
| "grad_norm": 0.47511938214302063, | |
| "learning_rate": 1.0799242424242423e-05, | |
| "loss": 0.01, | |
| "step": 5950 | |
| }, | |
| { | |
| "epoch": 528.98, | |
| "eval_accuracy": 0.9182389937106918, | |
| "eval_loss": 0.34747716784477234, | |
| "eval_runtime": 2.0823, | |
| "eval_samples_per_second": 76.358, | |
| "eval_steps_per_second": 4.802, | |
| "step": 5951 | |
| }, | |
| { | |
| "epoch": 529.96, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.35945838689804077, | |
| "eval_runtime": 2.0524, | |
| "eval_samples_per_second": 77.469, | |
| "eval_steps_per_second": 4.872, | |
| "step": 5962 | |
| }, | |
| { | |
| "epoch": 530.93, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.3673442602157593, | |
| "eval_runtime": 2.0276, | |
| "eval_samples_per_second": 78.419, | |
| "eval_steps_per_second": 4.932, | |
| "step": 5973 | |
| }, | |
| { | |
| "epoch": 532.0, | |
| "eval_accuracy": 0.9119496855345912, | |
| "eval_loss": 0.41652363538742065, | |
| "eval_runtime": 2.0573, | |
| "eval_samples_per_second": 77.285, | |
| "eval_steps_per_second": 4.861, | |
| "step": 5985 | |
| }, | |
| { | |
| "epoch": 532.98, | |
| "eval_accuracy": 0.9182389937106918, | |
| "eval_loss": 0.42472416162490845, | |
| "eval_runtime": 2.1003, | |
| "eval_samples_per_second": 75.704, | |
| "eval_steps_per_second": 4.761, | |
| "step": 5996 | |
| }, | |
| { | |
| "epoch": 533.33, | |
| "grad_norm": 0.15851238369941711, | |
| "learning_rate": 1.0609848484848485e-05, | |
| "loss": 0.0126, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 533.96, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.4061521589756012, | |
| "eval_runtime": 2.0889, | |
| "eval_samples_per_second": 76.116, | |
| "eval_steps_per_second": 4.787, | |
| "step": 6007 | |
| }, | |
| { | |
| "epoch": 534.93, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.3752112090587616, | |
| "eval_runtime": 2.0476, | |
| "eval_samples_per_second": 77.651, | |
| "eval_steps_per_second": 4.884, | |
| "step": 6018 | |
| }, | |
| { | |
| "epoch": 536.0, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.35743284225463867, | |
| "eval_runtime": 2.2159, | |
| "eval_samples_per_second": 71.753, | |
| "eval_steps_per_second": 4.513, | |
| "step": 6030 | |
| }, | |
| { | |
| "epoch": 536.98, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.3824201226234436, | |
| "eval_runtime": 2.0455, | |
| "eval_samples_per_second": 77.732, | |
| "eval_steps_per_second": 4.889, | |
| "step": 6041 | |
| }, | |
| { | |
| "epoch": 537.78, | |
| "grad_norm": 0.0922364741563797, | |
| "learning_rate": 1.0420454545454546e-05, | |
| "loss": 0.0126, | |
| "step": 6050 | |
| }, | |
| { | |
| "epoch": 537.96, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3730430006980896, | |
| "eval_runtime": 2.1192, | |
| "eval_samples_per_second": 75.028, | |
| "eval_steps_per_second": 4.719, | |
| "step": 6052 | |
| }, | |
| { | |
| "epoch": 538.93, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3703514337539673, | |
| "eval_runtime": 2.2056, | |
| "eval_samples_per_second": 72.091, | |
| "eval_steps_per_second": 4.534, | |
| "step": 6063 | |
| }, | |
| { | |
| "epoch": 540.0, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.38142630457878113, | |
| "eval_runtime": 2.0818, | |
| "eval_samples_per_second": 76.376, | |
| "eval_steps_per_second": 4.804, | |
| "step": 6075 | |
| }, | |
| { | |
| "epoch": 540.98, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3648853302001953, | |
| "eval_runtime": 2.2199, | |
| "eval_samples_per_second": 71.625, | |
| "eval_steps_per_second": 4.505, | |
| "step": 6086 | |
| }, | |
| { | |
| "epoch": 541.96, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.3810517489910126, | |
| "eval_runtime": 2.0826, | |
| "eval_samples_per_second": 76.345, | |
| "eval_steps_per_second": 4.802, | |
| "step": 6097 | |
| }, | |
| { | |
| "epoch": 542.22, | |
| "grad_norm": 0.04241061210632324, | |
| "learning_rate": 1.0231060606060607e-05, | |
| "loss": 0.012, | |
| "step": 6100 | |
| }, | |
| { | |
| "epoch": 542.93, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.3544082045555115, | |
| "eval_runtime": 2.08, | |
| "eval_samples_per_second": 76.442, | |
| "eval_steps_per_second": 4.808, | |
| "step": 6108 | |
| }, | |
| { | |
| "epoch": 544.0, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3614555597305298, | |
| "eval_runtime": 2.2123, | |
| "eval_samples_per_second": 71.871, | |
| "eval_steps_per_second": 4.52, | |
| "step": 6120 | |
| }, | |
| { | |
| "epoch": 544.98, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.35575661063194275, | |
| "eval_runtime": 2.1324, | |
| "eval_samples_per_second": 74.564, | |
| "eval_steps_per_second": 4.69, | |
| "step": 6131 | |
| }, | |
| { | |
| "epoch": 545.96, | |
| "eval_accuracy": 0.9182389937106918, | |
| "eval_loss": 0.34816914796829224, | |
| "eval_runtime": 2.0819, | |
| "eval_samples_per_second": 76.371, | |
| "eval_steps_per_second": 4.803, | |
| "step": 6142 | |
| }, | |
| { | |
| "epoch": 546.67, | |
| "grad_norm": 0.5738076567649841, | |
| "learning_rate": 1.0041666666666666e-05, | |
| "loss": 0.0135, | |
| "step": 6150 | |
| }, | |
| { | |
| "epoch": 546.93, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.36677080392837524, | |
| "eval_runtime": 2.1421, | |
| "eval_samples_per_second": 74.226, | |
| "eval_steps_per_second": 4.668, | |
| "step": 6153 | |
| }, | |
| { | |
| "epoch": 548.0, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.34037116169929504, | |
| "eval_runtime": 2.0657, | |
| "eval_samples_per_second": 76.972, | |
| "eval_steps_per_second": 4.841, | |
| "step": 6165 | |
| }, | |
| { | |
| "epoch": 548.98, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.33401021361351013, | |
| "eval_runtime": 2.0325, | |
| "eval_samples_per_second": 78.229, | |
| "eval_steps_per_second": 4.92, | |
| "step": 6176 | |
| }, | |
| { | |
| "epoch": 549.96, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.3377488851547241, | |
| "eval_runtime": 2.1646, | |
| "eval_samples_per_second": 73.456, | |
| "eval_steps_per_second": 4.62, | |
| "step": 6187 | |
| }, | |
| { | |
| "epoch": 550.93, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.3406839966773987, | |
| "eval_runtime": 2.1382, | |
| "eval_samples_per_second": 74.36, | |
| "eval_steps_per_second": 4.677, | |
| "step": 6198 | |
| }, | |
| { | |
| "epoch": 551.11, | |
| "grad_norm": 0.34322044253349304, | |
| "learning_rate": 9.852272727272728e-06, | |
| "loss": 0.0101, | |
| "step": 6200 | |
| }, | |
| { | |
| "epoch": 552.0, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.33890071511268616, | |
| "eval_runtime": 2.0917, | |
| "eval_samples_per_second": 76.015, | |
| "eval_steps_per_second": 4.781, | |
| "step": 6210 | |
| }, | |
| { | |
| "epoch": 552.98, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.33053550124168396, | |
| "eval_runtime": 2.2779, | |
| "eval_samples_per_second": 69.8, | |
| "eval_steps_per_second": 4.39, | |
| "step": 6221 | |
| }, | |
| { | |
| "epoch": 553.96, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.31986501812934875, | |
| "eval_runtime": 1.9669, | |
| "eval_samples_per_second": 80.836, | |
| "eval_steps_per_second": 5.084, | |
| "step": 6232 | |
| }, | |
| { | |
| "epoch": 554.93, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.33377256989479065, | |
| "eval_runtime": 2.0395, | |
| "eval_samples_per_second": 77.96, | |
| "eval_steps_per_second": 4.903, | |
| "step": 6243 | |
| }, | |
| { | |
| "epoch": 555.56, | |
| "grad_norm": 0.1416609138250351, | |
| "learning_rate": 9.662878787878789e-06, | |
| "loss": 0.0175, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 556.0, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.33228349685668945, | |
| "eval_runtime": 2.1542, | |
| "eval_samples_per_second": 73.811, | |
| "eval_steps_per_second": 4.642, | |
| "step": 6255 | |
| }, | |
| { | |
| "epoch": 556.98, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.340250164270401, | |
| "eval_runtime": 2.0563, | |
| "eval_samples_per_second": 77.325, | |
| "eval_steps_per_second": 4.863, | |
| "step": 6266 | |
| }, | |
| { | |
| "epoch": 557.96, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.34735485911369324, | |
| "eval_runtime": 2.0285, | |
| "eval_samples_per_second": 78.384, | |
| "eval_steps_per_second": 4.93, | |
| "step": 6277 | |
| }, | |
| { | |
| "epoch": 558.93, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.34990042448043823, | |
| "eval_runtime": 2.1967, | |
| "eval_samples_per_second": 72.38, | |
| "eval_steps_per_second": 4.552, | |
| "step": 6288 | |
| }, | |
| { | |
| "epoch": 560.0, | |
| "grad_norm": 0.09764547646045685, | |
| "learning_rate": 9.473484848484848e-06, | |
| "loss": 0.0108, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 560.0, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.342894971370697, | |
| "eval_runtime": 2.026, | |
| "eval_samples_per_second": 78.479, | |
| "eval_steps_per_second": 4.936, | |
| "step": 6300 | |
| }, | |
| { | |
| "epoch": 560.98, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.3396158218383789, | |
| "eval_runtime": 2.3052, | |
| "eval_samples_per_second": 68.976, | |
| "eval_steps_per_second": 4.338, | |
| "step": 6311 | |
| }, | |
| { | |
| "epoch": 561.96, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.3467164933681488, | |
| "eval_runtime": 2.0425, | |
| "eval_samples_per_second": 77.846, | |
| "eval_steps_per_second": 4.896, | |
| "step": 6322 | |
| }, | |
| { | |
| "epoch": 562.93, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.3349219858646393, | |
| "eval_runtime": 2.0651, | |
| "eval_samples_per_second": 76.992, | |
| "eval_steps_per_second": 4.842, | |
| "step": 6333 | |
| }, | |
| { | |
| "epoch": 564.0, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3380991518497467, | |
| "eval_runtime": 2.111, | |
| "eval_samples_per_second": 75.32, | |
| "eval_steps_per_second": 4.737, | |
| "step": 6345 | |
| }, | |
| { | |
| "epoch": 564.44, | |
| "grad_norm": 0.021107789129018784, | |
| "learning_rate": 9.284090909090908e-06, | |
| "loss": 0.0139, | |
| "step": 6350 | |
| }, | |
| { | |
| "epoch": 564.98, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.32741737365722656, | |
| "eval_runtime": 2.1143, | |
| "eval_samples_per_second": 75.203, | |
| "eval_steps_per_second": 4.73, | |
| "step": 6356 | |
| }, | |
| { | |
| "epoch": 565.96, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3318650722503662, | |
| "eval_runtime": 1.9953, | |
| "eval_samples_per_second": 79.688, | |
| "eval_steps_per_second": 5.012, | |
| "step": 6367 | |
| }, | |
| { | |
| "epoch": 566.93, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.33214500546455383, | |
| "eval_runtime": 2.0923, | |
| "eval_samples_per_second": 75.992, | |
| "eval_steps_per_second": 4.779, | |
| "step": 6378 | |
| }, | |
| { | |
| "epoch": 568.0, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.3546938896179199, | |
| "eval_runtime": 2.1191, | |
| "eval_samples_per_second": 75.033, | |
| "eval_steps_per_second": 4.719, | |
| "step": 6390 | |
| }, | |
| { | |
| "epoch": 568.89, | |
| "grad_norm": 1.3278522491455078, | |
| "learning_rate": 9.09469696969697e-06, | |
| "loss": 0.0138, | |
| "step": 6400 | |
| }, | |
| { | |
| "epoch": 568.98, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.366202175617218, | |
| "eval_runtime": 2.0849, | |
| "eval_samples_per_second": 76.261, | |
| "eval_steps_per_second": 4.796, | |
| "step": 6401 | |
| }, | |
| { | |
| "epoch": 569.96, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.34554189443588257, | |
| "eval_runtime": 2.2433, | |
| "eval_samples_per_second": 70.878, | |
| "eval_steps_per_second": 4.458, | |
| "step": 6412 | |
| }, | |
| { | |
| "epoch": 570.93, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3477872908115387, | |
| "eval_runtime": 2.0921, | |
| "eval_samples_per_second": 76.0, | |
| "eval_steps_per_second": 4.78, | |
| "step": 6423 | |
| }, | |
| { | |
| "epoch": 572.0, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.3400007486343384, | |
| "eval_runtime": 2.0746, | |
| "eval_samples_per_second": 76.641, | |
| "eval_steps_per_second": 4.82, | |
| "step": 6435 | |
| }, | |
| { | |
| "epoch": 572.98, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.3512841463088989, | |
| "eval_runtime": 2.0975, | |
| "eval_samples_per_second": 75.803, | |
| "eval_steps_per_second": 4.767, | |
| "step": 6446 | |
| }, | |
| { | |
| "epoch": 573.33, | |
| "grad_norm": 0.1855485886335373, | |
| "learning_rate": 8.905303030303031e-06, | |
| "loss": 0.0095, | |
| "step": 6450 | |
| }, | |
| { | |
| "epoch": 573.96, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.3461546301841736, | |
| "eval_runtime": 2.067, | |
| "eval_samples_per_second": 76.921, | |
| "eval_steps_per_second": 4.838, | |
| "step": 6457 | |
| }, | |
| { | |
| "epoch": 574.93, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.33488187193870544, | |
| "eval_runtime": 2.0691, | |
| "eval_samples_per_second": 76.846, | |
| "eval_steps_per_second": 4.833, | |
| "step": 6468 | |
| }, | |
| { | |
| "epoch": 576.0, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.337620347738266, | |
| "eval_runtime": 2.018, | |
| "eval_samples_per_second": 78.793, | |
| "eval_steps_per_second": 4.956, | |
| "step": 6480 | |
| }, | |
| { | |
| "epoch": 576.98, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.33732709288597107, | |
| "eval_runtime": 2.0922, | |
| "eval_samples_per_second": 75.996, | |
| "eval_steps_per_second": 4.78, | |
| "step": 6491 | |
| }, | |
| { | |
| "epoch": 577.78, | |
| "grad_norm": 0.9204933643341064, | |
| "learning_rate": 8.71590909090909e-06, | |
| "loss": 0.0138, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 577.96, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.3310604989528656, | |
| "eval_runtime": 2.1334, | |
| "eval_samples_per_second": 74.528, | |
| "eval_steps_per_second": 4.687, | |
| "step": 6502 | |
| }, | |
| { | |
| "epoch": 578.93, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.33120694756507874, | |
| "eval_runtime": 2.1395, | |
| "eval_samples_per_second": 74.316, | |
| "eval_steps_per_second": 4.674, | |
| "step": 6513 | |
| }, | |
| { | |
| "epoch": 580.0, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.3291258215904236, | |
| "eval_runtime": 2.1193, | |
| "eval_samples_per_second": 75.024, | |
| "eval_steps_per_second": 4.719, | |
| "step": 6525 | |
| }, | |
| { | |
| "epoch": 580.98, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.3441867232322693, | |
| "eval_runtime": 2.081, | |
| "eval_samples_per_second": 76.405, | |
| "eval_steps_per_second": 4.805, | |
| "step": 6536 | |
| }, | |
| { | |
| "epoch": 581.96, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.3806348145008087, | |
| "eval_runtime": 2.08, | |
| "eval_samples_per_second": 76.443, | |
| "eval_steps_per_second": 4.808, | |
| "step": 6547 | |
| }, | |
| { | |
| "epoch": 582.22, | |
| "grad_norm": 1.3162257671356201, | |
| "learning_rate": 8.526515151515151e-06, | |
| "loss": 0.0163, | |
| "step": 6550 | |
| }, | |
| { | |
| "epoch": 582.93, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.39340561628341675, | |
| "eval_runtime": 2.0419, | |
| "eval_samples_per_second": 77.868, | |
| "eval_steps_per_second": 4.897, | |
| "step": 6558 | |
| }, | |
| { | |
| "epoch": 584.0, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.3990216851234436, | |
| "eval_runtime": 2.049, | |
| "eval_samples_per_second": 77.599, | |
| "eval_steps_per_second": 4.88, | |
| "step": 6570 | |
| }, | |
| { | |
| "epoch": 584.98, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.353302925825119, | |
| "eval_runtime": 2.1595, | |
| "eval_samples_per_second": 73.629, | |
| "eval_steps_per_second": 4.631, | |
| "step": 6581 | |
| }, | |
| { | |
| "epoch": 585.96, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.34103333950042725, | |
| "eval_runtime": 2.2099, | |
| "eval_samples_per_second": 71.948, | |
| "eval_steps_per_second": 4.525, | |
| "step": 6592 | |
| }, | |
| { | |
| "epoch": 586.67, | |
| "grad_norm": 0.35993504524230957, | |
| "learning_rate": 8.337121212121213e-06, | |
| "loss": 0.0152, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 586.93, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3351433575153351, | |
| "eval_runtime": 2.2699, | |
| "eval_samples_per_second": 70.046, | |
| "eval_steps_per_second": 4.405, | |
| "step": 6603 | |
| }, | |
| { | |
| "epoch": 588.0, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.3369242250919342, | |
| "eval_runtime": 2.117, | |
| "eval_samples_per_second": 75.106, | |
| "eval_steps_per_second": 4.724, | |
| "step": 6615 | |
| }, | |
| { | |
| "epoch": 588.98, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.35417425632476807, | |
| "eval_runtime": 2.144, | |
| "eval_samples_per_second": 74.161, | |
| "eval_steps_per_second": 4.664, | |
| "step": 6626 | |
| }, | |
| { | |
| "epoch": 589.96, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.3728938102722168, | |
| "eval_runtime": 2.0531, | |
| "eval_samples_per_second": 77.443, | |
| "eval_steps_per_second": 4.871, | |
| "step": 6637 | |
| }, | |
| { | |
| "epoch": 590.93, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.34069618582725525, | |
| "eval_runtime": 2.1327, | |
| "eval_samples_per_second": 74.555, | |
| "eval_steps_per_second": 4.689, | |
| "step": 6648 | |
| }, | |
| { | |
| "epoch": 591.11, | |
| "grad_norm": 0.19336657226085663, | |
| "learning_rate": 8.147727272727274e-06, | |
| "loss": 0.017, | |
| "step": 6650 | |
| }, | |
| { | |
| "epoch": 592.0, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.3440462052822113, | |
| "eval_runtime": 2.0686, | |
| "eval_samples_per_second": 76.865, | |
| "eval_steps_per_second": 4.834, | |
| "step": 6660 | |
| }, | |
| { | |
| "epoch": 592.98, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3493140935897827, | |
| "eval_runtime": 2.0648, | |
| "eval_samples_per_second": 77.004, | |
| "eval_steps_per_second": 4.843, | |
| "step": 6671 | |
| }, | |
| { | |
| "epoch": 593.96, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.37120524048805237, | |
| "eval_runtime": 2.2033, | |
| "eval_samples_per_second": 72.165, | |
| "eval_steps_per_second": 4.539, | |
| "step": 6682 | |
| }, | |
| { | |
| "epoch": 594.93, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.36460721492767334, | |
| "eval_runtime": 2.2563, | |
| "eval_samples_per_second": 70.47, | |
| "eval_steps_per_second": 4.432, | |
| "step": 6693 | |
| }, | |
| { | |
| "epoch": 595.56, | |
| "grad_norm": 0.017406007274985313, | |
| "learning_rate": 7.958333333333333e-06, | |
| "loss": 0.0113, | |
| "step": 6700 | |
| }, | |
| { | |
| "epoch": 596.0, | |
| "eval_accuracy": 0.9182389937106918, | |
| "eval_loss": 0.36630791425704956, | |
| "eval_runtime": 2.0788, | |
| "eval_samples_per_second": 76.486, | |
| "eval_steps_per_second": 4.81, | |
| "step": 6705 | |
| }, | |
| { | |
| "epoch": 596.98, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.3725621700286865, | |
| "eval_runtime": 2.226, | |
| "eval_samples_per_second": 71.429, | |
| "eval_steps_per_second": 4.492, | |
| "step": 6716 | |
| }, | |
| { | |
| "epoch": 597.96, | |
| "eval_accuracy": 0.9182389937106918, | |
| "eval_loss": 0.35295018553733826, | |
| "eval_runtime": 2.16, | |
| "eval_samples_per_second": 73.611, | |
| "eval_steps_per_second": 4.63, | |
| "step": 6727 | |
| }, | |
| { | |
| "epoch": 598.93, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.3451589047908783, | |
| "eval_runtime": 2.0598, | |
| "eval_samples_per_second": 77.193, | |
| "eval_steps_per_second": 4.855, | |
| "step": 6738 | |
| }, | |
| { | |
| "epoch": 600.0, | |
| "grad_norm": 0.1029694527387619, | |
| "learning_rate": 7.768939393939394e-06, | |
| "loss": 0.0115, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 600.0, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3340095281600952, | |
| "eval_runtime": 2.1945, | |
| "eval_samples_per_second": 72.455, | |
| "eval_steps_per_second": 4.557, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 600.98, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.34892547130584717, | |
| "eval_runtime": 2.1247, | |
| "eval_samples_per_second": 74.836, | |
| "eval_steps_per_second": 4.707, | |
| "step": 6761 | |
| }, | |
| { | |
| "epoch": 601.96, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.3408372402191162, | |
| "eval_runtime": 2.1827, | |
| "eval_samples_per_second": 72.846, | |
| "eval_steps_per_second": 4.582, | |
| "step": 6772 | |
| }, | |
| { | |
| "epoch": 602.93, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.3423627018928528, | |
| "eval_runtime": 2.2182, | |
| "eval_samples_per_second": 71.68, | |
| "eval_steps_per_second": 4.508, | |
| "step": 6783 | |
| }, | |
| { | |
| "epoch": 604.0, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.34804755449295044, | |
| "eval_runtime": 2.1754, | |
| "eval_samples_per_second": 73.091, | |
| "eval_steps_per_second": 4.597, | |
| "step": 6795 | |
| }, | |
| { | |
| "epoch": 604.44, | |
| "grad_norm": 0.7808576822280884, | |
| "learning_rate": 7.579545454545454e-06, | |
| "loss": 0.0132, | |
| "step": 6800 | |
| }, | |
| { | |
| "epoch": 604.98, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.34386932849884033, | |
| "eval_runtime": 2.0311, | |
| "eval_samples_per_second": 78.283, | |
| "eval_steps_per_second": 4.923, | |
| "step": 6806 | |
| }, | |
| { | |
| "epoch": 605.96, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.3530921936035156, | |
| "eval_runtime": 2.102, | |
| "eval_samples_per_second": 75.641, | |
| "eval_steps_per_second": 4.757, | |
| "step": 6817 | |
| }, | |
| { | |
| "epoch": 606.93, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.3807942271232605, | |
| "eval_runtime": 2.1101, | |
| "eval_samples_per_second": 75.351, | |
| "eval_steps_per_second": 4.739, | |
| "step": 6828 | |
| }, | |
| { | |
| "epoch": 608.0, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.3441016674041748, | |
| "eval_runtime": 2.1163, | |
| "eval_samples_per_second": 75.133, | |
| "eval_steps_per_second": 4.725, | |
| "step": 6840 | |
| }, | |
| { | |
| "epoch": 608.89, | |
| "grad_norm": 0.31322968006134033, | |
| "learning_rate": 7.390151515151515e-06, | |
| "loss": 0.014, | |
| "step": 6850 | |
| }, | |
| { | |
| "epoch": 608.98, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.3534349203109741, | |
| "eval_runtime": 2.0731, | |
| "eval_samples_per_second": 76.696, | |
| "eval_steps_per_second": 4.824, | |
| "step": 6851 | |
| }, | |
| { | |
| "epoch": 609.96, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3583095371723175, | |
| "eval_runtime": 2.1365, | |
| "eval_samples_per_second": 74.419, | |
| "eval_steps_per_second": 4.68, | |
| "step": 6862 | |
| }, | |
| { | |
| "epoch": 610.93, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3640231490135193, | |
| "eval_runtime": 2.3226, | |
| "eval_samples_per_second": 68.457, | |
| "eval_steps_per_second": 4.305, | |
| "step": 6873 | |
| }, | |
| { | |
| "epoch": 612.0, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3587685227394104, | |
| "eval_runtime": 2.0532, | |
| "eval_samples_per_second": 77.44, | |
| "eval_steps_per_second": 4.87, | |
| "step": 6885 | |
| }, | |
| { | |
| "epoch": 612.98, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3662501275539398, | |
| "eval_runtime": 2.1672, | |
| "eval_samples_per_second": 73.368, | |
| "eval_steps_per_second": 4.614, | |
| "step": 6896 | |
| }, | |
| { | |
| "epoch": 613.33, | |
| "grad_norm": 1.508801817893982, | |
| "learning_rate": 7.200757575757576e-06, | |
| "loss": 0.0089, | |
| "step": 6900 | |
| }, | |
| { | |
| "epoch": 613.96, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3788923919200897, | |
| "eval_runtime": 2.0361, | |
| "eval_samples_per_second": 78.092, | |
| "eval_steps_per_second": 4.911, | |
| "step": 6907 | |
| }, | |
| { | |
| "epoch": 614.93, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.378842294216156, | |
| "eval_runtime": 2.0538, | |
| "eval_samples_per_second": 77.417, | |
| "eval_steps_per_second": 4.869, | |
| "step": 6918 | |
| }, | |
| { | |
| "epoch": 616.0, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.3528358042240143, | |
| "eval_runtime": 2.0973, | |
| "eval_samples_per_second": 75.811, | |
| "eval_steps_per_second": 4.768, | |
| "step": 6930 | |
| }, | |
| { | |
| "epoch": 616.98, | |
| "eval_accuracy": 0.9182389937106918, | |
| "eval_loss": 0.3626009523868561, | |
| "eval_runtime": 2.1285, | |
| "eval_samples_per_second": 74.701, | |
| "eval_steps_per_second": 4.698, | |
| "step": 6941 | |
| }, | |
| { | |
| "epoch": 617.78, | |
| "grad_norm": 0.027906352654099464, | |
| "learning_rate": 7.0113636363636365e-06, | |
| "loss": 0.0135, | |
| "step": 6950 | |
| }, | |
| { | |
| "epoch": 617.96, | |
| "eval_accuracy": 0.9182389937106918, | |
| "eval_loss": 0.3760795593261719, | |
| "eval_runtime": 2.0573, | |
| "eval_samples_per_second": 77.285, | |
| "eval_steps_per_second": 4.861, | |
| "step": 6952 | |
| }, | |
| { | |
| "epoch": 618.93, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.3911431133747101, | |
| "eval_runtime": 2.3187, | |
| "eval_samples_per_second": 68.573, | |
| "eval_steps_per_second": 4.313, | |
| "step": 6963 | |
| }, | |
| { | |
| "epoch": 620.0, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3900914192199707, | |
| "eval_runtime": 2.1186, | |
| "eval_samples_per_second": 75.049, | |
| "eval_steps_per_second": 4.72, | |
| "step": 6975 | |
| }, | |
| { | |
| "epoch": 620.98, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.4003194272518158, | |
| "eval_runtime": 2.1007, | |
| "eval_samples_per_second": 75.689, | |
| "eval_steps_per_second": 4.76, | |
| "step": 6986 | |
| }, | |
| { | |
| "epoch": 621.96, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.36526620388031006, | |
| "eval_runtime": 2.1753, | |
| "eval_samples_per_second": 73.093, | |
| "eval_steps_per_second": 4.597, | |
| "step": 6997 | |
| }, | |
| { | |
| "epoch": 622.22, | |
| "grad_norm": 0.05157339572906494, | |
| "learning_rate": 6.821969696969697e-06, | |
| "loss": 0.0071, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 622.93, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.33499374985694885, | |
| "eval_runtime": 2.11, | |
| "eval_samples_per_second": 75.356, | |
| "eval_steps_per_second": 4.739, | |
| "step": 7008 | |
| }, | |
| { | |
| "epoch": 624.0, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3353654444217682, | |
| "eval_runtime": 2.0902, | |
| "eval_samples_per_second": 76.069, | |
| "eval_steps_per_second": 4.784, | |
| "step": 7020 | |
| }, | |
| { | |
| "epoch": 624.98, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.37156394124031067, | |
| "eval_runtime": 2.0375, | |
| "eval_samples_per_second": 78.038, | |
| "eval_steps_per_second": 4.908, | |
| "step": 7031 | |
| }, | |
| { | |
| "epoch": 625.96, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3520486354827881, | |
| "eval_runtime": 2.0907, | |
| "eval_samples_per_second": 76.051, | |
| "eval_steps_per_second": 4.783, | |
| "step": 7042 | |
| }, | |
| { | |
| "epoch": 626.67, | |
| "grad_norm": 0.5914948582649231, | |
| "learning_rate": 6.632575757575758e-06, | |
| "loss": 0.0129, | |
| "step": 7050 | |
| }, | |
| { | |
| "epoch": 626.93, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3306971490383148, | |
| "eval_runtime": 2.0739, | |
| "eval_samples_per_second": 76.667, | |
| "eval_steps_per_second": 4.822, | |
| "step": 7053 | |
| }, | |
| { | |
| "epoch": 628.0, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.33053889870643616, | |
| "eval_runtime": 2.2479, | |
| "eval_samples_per_second": 70.731, | |
| "eval_steps_per_second": 4.449, | |
| "step": 7065 | |
| }, | |
| { | |
| "epoch": 628.98, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.3301643431186676, | |
| "eval_runtime": 1.9998, | |
| "eval_samples_per_second": 79.509, | |
| "eval_steps_per_second": 5.001, | |
| "step": 7076 | |
| }, | |
| { | |
| "epoch": 629.96, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.3290785253047943, | |
| "eval_runtime": 2.0356, | |
| "eval_samples_per_second": 78.11, | |
| "eval_steps_per_second": 4.913, | |
| "step": 7087 | |
| }, | |
| { | |
| "epoch": 630.93, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.3329908847808838, | |
| "eval_runtime": 2.0116, | |
| "eval_samples_per_second": 79.04, | |
| "eval_steps_per_second": 4.971, | |
| "step": 7098 | |
| }, | |
| { | |
| "epoch": 631.11, | |
| "grad_norm": 1.9344037771224976, | |
| "learning_rate": 6.4431818181818185e-06, | |
| "loss": 0.0091, | |
| "step": 7100 | |
| }, | |
| { | |
| "epoch": 632.0, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3331502079963684, | |
| "eval_runtime": 2.1322, | |
| "eval_samples_per_second": 74.572, | |
| "eval_steps_per_second": 4.69, | |
| "step": 7110 | |
| }, | |
| { | |
| "epoch": 632.98, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.33215317130088806, | |
| "eval_runtime": 2.0089, | |
| "eval_samples_per_second": 79.146, | |
| "eval_steps_per_second": 4.978, | |
| "step": 7121 | |
| }, | |
| { | |
| "epoch": 633.96, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3437711000442505, | |
| "eval_runtime": 2.1614, | |
| "eval_samples_per_second": 73.562, | |
| "eval_steps_per_second": 4.627, | |
| "step": 7132 | |
| }, | |
| { | |
| "epoch": 634.93, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.36110153794288635, | |
| "eval_runtime": 2.1038, | |
| "eval_samples_per_second": 75.577, | |
| "eval_steps_per_second": 4.753, | |
| "step": 7143 | |
| }, | |
| { | |
| "epoch": 635.56, | |
| "grad_norm": 0.008998346514999866, | |
| "learning_rate": 6.253787878787879e-06, | |
| "loss": 0.0107, | |
| "step": 7150 | |
| }, | |
| { | |
| "epoch": 636.0, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.34894272685050964, | |
| "eval_runtime": 2.1178, | |
| "eval_samples_per_second": 75.077, | |
| "eval_steps_per_second": 4.722, | |
| "step": 7155 | |
| }, | |
| { | |
| "epoch": 636.98, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3357524573802948, | |
| "eval_runtime": 2.1256, | |
| "eval_samples_per_second": 74.803, | |
| "eval_steps_per_second": 4.705, | |
| "step": 7166 | |
| }, | |
| { | |
| "epoch": 637.96, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3372538983821869, | |
| "eval_runtime": 2.0938, | |
| "eval_samples_per_second": 75.939, | |
| "eval_steps_per_second": 4.776, | |
| "step": 7177 | |
| }, | |
| { | |
| "epoch": 638.93, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3444075584411621, | |
| "eval_runtime": 2.1377, | |
| "eval_samples_per_second": 74.379, | |
| "eval_steps_per_second": 4.678, | |
| "step": 7188 | |
| }, | |
| { | |
| "epoch": 640.0, | |
| "grad_norm": 0.753413736820221, | |
| "learning_rate": 6.06439393939394e-06, | |
| "loss": 0.0125, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 640.0, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.36328038573265076, | |
| "eval_runtime": 2.0555, | |
| "eval_samples_per_second": 77.354, | |
| "eval_steps_per_second": 4.865, | |
| "step": 7200 | |
| }, | |
| { | |
| "epoch": 640.98, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.3562980592250824, | |
| "eval_runtime": 2.0343, | |
| "eval_samples_per_second": 78.159, | |
| "eval_steps_per_second": 4.916, | |
| "step": 7211 | |
| }, | |
| { | |
| "epoch": 641.96, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.35727426409721375, | |
| "eval_runtime": 2.0513, | |
| "eval_samples_per_second": 77.513, | |
| "eval_steps_per_second": 4.875, | |
| "step": 7222 | |
| }, | |
| { | |
| "epoch": 642.93, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.3534907400608063, | |
| "eval_runtime": 2.109, | |
| "eval_samples_per_second": 75.393, | |
| "eval_steps_per_second": 4.742, | |
| "step": 7233 | |
| }, | |
| { | |
| "epoch": 644.0, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.34685295820236206, | |
| "eval_runtime": 2.1171, | |
| "eval_samples_per_second": 75.104, | |
| "eval_steps_per_second": 4.724, | |
| "step": 7245 | |
| }, | |
| { | |
| "epoch": 644.44, | |
| "grad_norm": 0.040267378091812134, | |
| "learning_rate": 5.8750000000000005e-06, | |
| "loss": 0.0071, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 644.98, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.34481677412986755, | |
| "eval_runtime": 2.2433, | |
| "eval_samples_per_second": 70.878, | |
| "eval_steps_per_second": 4.458, | |
| "step": 7256 | |
| }, | |
| { | |
| "epoch": 645.96, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3445126414299011, | |
| "eval_runtime": 2.09, | |
| "eval_samples_per_second": 76.075, | |
| "eval_steps_per_second": 4.785, | |
| "step": 7267 | |
| }, | |
| { | |
| "epoch": 646.93, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3418070077896118, | |
| "eval_runtime": 2.1179, | |
| "eval_samples_per_second": 75.074, | |
| "eval_steps_per_second": 4.722, | |
| "step": 7278 | |
| }, | |
| { | |
| "epoch": 648.0, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3541422188282013, | |
| "eval_runtime": 2.0491, | |
| "eval_samples_per_second": 77.596, | |
| "eval_steps_per_second": 4.88, | |
| "step": 7290 | |
| }, | |
| { | |
| "epoch": 648.89, | |
| "grad_norm": 0.02006547898054123, | |
| "learning_rate": 5.685606060606061e-06, | |
| "loss": 0.0076, | |
| "step": 7300 | |
| }, | |
| { | |
| "epoch": 648.98, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.34063196182250977, | |
| "eval_runtime": 2.1334, | |
| "eval_samples_per_second": 74.528, | |
| "eval_steps_per_second": 4.687, | |
| "step": 7301 | |
| }, | |
| { | |
| "epoch": 649.96, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.3326892852783203, | |
| "eval_runtime": 2.0215, | |
| "eval_samples_per_second": 78.656, | |
| "eval_steps_per_second": 4.947, | |
| "step": 7312 | |
| }, | |
| { | |
| "epoch": 650.93, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.3381519019603729, | |
| "eval_runtime": 2.1234, | |
| "eval_samples_per_second": 74.878, | |
| "eval_steps_per_second": 4.709, | |
| "step": 7323 | |
| }, | |
| { | |
| "epoch": 652.0, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3574288785457611, | |
| "eval_runtime": 2.2212, | |
| "eval_samples_per_second": 71.583, | |
| "eval_steps_per_second": 4.502, | |
| "step": 7335 | |
| }, | |
| { | |
| "epoch": 652.98, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3462476134300232, | |
| "eval_runtime": 2.3846, | |
| "eval_samples_per_second": 66.678, | |
| "eval_steps_per_second": 4.194, | |
| "step": 7346 | |
| }, | |
| { | |
| "epoch": 653.33, | |
| "grad_norm": 0.1632642298936844, | |
| "learning_rate": 5.5e-06, | |
| "loss": 0.0131, | |
| "step": 7350 | |
| }, | |
| { | |
| "epoch": 653.96, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.33882516622543335, | |
| "eval_runtime": 2.0171, | |
| "eval_samples_per_second": 78.826, | |
| "eval_steps_per_second": 4.958, | |
| "step": 7357 | |
| }, | |
| { | |
| "epoch": 654.93, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.337929904460907, | |
| "eval_runtime": 2.1283, | |
| "eval_samples_per_second": 74.708, | |
| "eval_steps_per_second": 4.699, | |
| "step": 7368 | |
| }, | |
| { | |
| "epoch": 656.0, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.3396049737930298, | |
| "eval_runtime": 2.0868, | |
| "eval_samples_per_second": 76.193, | |
| "eval_steps_per_second": 4.792, | |
| "step": 7380 | |
| }, | |
| { | |
| "epoch": 656.98, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3436720371246338, | |
| "eval_runtime": 2.0283, | |
| "eval_samples_per_second": 78.391, | |
| "eval_steps_per_second": 4.93, | |
| "step": 7391 | |
| }, | |
| { | |
| "epoch": 657.78, | |
| "grad_norm": 1.5342937707901, | |
| "learning_rate": 5.3106060606060605e-06, | |
| "loss": 0.0086, | |
| "step": 7400 | |
| }, | |
| { | |
| "epoch": 657.96, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.3466395139694214, | |
| "eval_runtime": 2.1077, | |
| "eval_samples_per_second": 75.438, | |
| "eval_steps_per_second": 4.745, | |
| "step": 7402 | |
| }, | |
| { | |
| "epoch": 658.93, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3453463315963745, | |
| "eval_runtime": 2.0776, | |
| "eval_samples_per_second": 76.532, | |
| "eval_steps_per_second": 4.813, | |
| "step": 7413 | |
| }, | |
| { | |
| "epoch": 660.0, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3420422077178955, | |
| "eval_runtime": 2.0546, | |
| "eval_samples_per_second": 77.386, | |
| "eval_steps_per_second": 4.867, | |
| "step": 7425 | |
| }, | |
| { | |
| "epoch": 660.98, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.33706873655319214, | |
| "eval_runtime": 2.1267, | |
| "eval_samples_per_second": 74.764, | |
| "eval_steps_per_second": 4.702, | |
| "step": 7436 | |
| }, | |
| { | |
| "epoch": 661.96, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.34426021575927734, | |
| "eval_runtime": 2.0904, | |
| "eval_samples_per_second": 76.061, | |
| "eval_steps_per_second": 4.784, | |
| "step": 7447 | |
| }, | |
| { | |
| "epoch": 662.22, | |
| "grad_norm": 0.16996954381465912, | |
| "learning_rate": 5.121212121212121e-06, | |
| "loss": 0.0123, | |
| "step": 7450 | |
| }, | |
| { | |
| "epoch": 662.93, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3473140299320221, | |
| "eval_runtime": 2.0509, | |
| "eval_samples_per_second": 77.526, | |
| "eval_steps_per_second": 4.876, | |
| "step": 7458 | |
| }, | |
| { | |
| "epoch": 664.0, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3424939215183258, | |
| "eval_runtime": 2.0641, | |
| "eval_samples_per_second": 77.031, | |
| "eval_steps_per_second": 4.845, | |
| "step": 7470 | |
| }, | |
| { | |
| "epoch": 664.98, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.345442533493042, | |
| "eval_runtime": 2.0612, | |
| "eval_samples_per_second": 77.138, | |
| "eval_steps_per_second": 4.851, | |
| "step": 7481 | |
| }, | |
| { | |
| "epoch": 665.96, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3493753969669342, | |
| "eval_runtime": 1.9848, | |
| "eval_samples_per_second": 80.108, | |
| "eval_steps_per_second": 5.038, | |
| "step": 7492 | |
| }, | |
| { | |
| "epoch": 666.67, | |
| "grad_norm": 0.08370883017778397, | |
| "learning_rate": 4.931818181818182e-06, | |
| "loss": 0.0083, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 666.93, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.35356974601745605, | |
| "eval_runtime": 2.1097, | |
| "eval_samples_per_second": 75.368, | |
| "eval_steps_per_second": 4.74, | |
| "step": 7503 | |
| }, | |
| { | |
| "epoch": 668.0, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.34760990738868713, | |
| "eval_runtime": 2.1147, | |
| "eval_samples_per_second": 75.188, | |
| "eval_steps_per_second": 4.729, | |
| "step": 7515 | |
| }, | |
| { | |
| "epoch": 668.98, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.34870967268943787, | |
| "eval_runtime": 2.0331, | |
| "eval_samples_per_second": 78.206, | |
| "eval_steps_per_second": 4.919, | |
| "step": 7526 | |
| }, | |
| { | |
| "epoch": 669.96, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.35328802466392517, | |
| "eval_runtime": 2.4514, | |
| "eval_samples_per_second": 64.861, | |
| "eval_steps_per_second": 4.079, | |
| "step": 7537 | |
| }, | |
| { | |
| "epoch": 670.93, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.35539668798446655, | |
| "eval_runtime": 2.1199, | |
| "eval_samples_per_second": 75.003, | |
| "eval_steps_per_second": 4.717, | |
| "step": 7548 | |
| }, | |
| { | |
| "epoch": 671.11, | |
| "grad_norm": 2.100541353225708, | |
| "learning_rate": 4.7424242424242426e-06, | |
| "loss": 0.0079, | |
| "step": 7550 | |
| }, | |
| { | |
| "epoch": 672.0, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.3482361435890198, | |
| "eval_runtime": 2.1456, | |
| "eval_samples_per_second": 74.104, | |
| "eval_steps_per_second": 4.661, | |
| "step": 7560 | |
| }, | |
| { | |
| "epoch": 672.98, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.34814804792404175, | |
| "eval_runtime": 2.0856, | |
| "eval_samples_per_second": 76.239, | |
| "eval_steps_per_second": 4.795, | |
| "step": 7571 | |
| }, | |
| { | |
| "epoch": 673.96, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.344621866941452, | |
| "eval_runtime": 2.2762, | |
| "eval_samples_per_second": 69.852, | |
| "eval_steps_per_second": 4.393, | |
| "step": 7582 | |
| }, | |
| { | |
| "epoch": 674.93, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3432255983352661, | |
| "eval_runtime": 2.0507, | |
| "eval_samples_per_second": 77.533, | |
| "eval_steps_per_second": 4.876, | |
| "step": 7593 | |
| }, | |
| { | |
| "epoch": 675.56, | |
| "grad_norm": 0.598809003829956, | |
| "learning_rate": 4.553030303030303e-06, | |
| "loss": 0.0111, | |
| "step": 7600 | |
| }, | |
| { | |
| "epoch": 676.0, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.34696489572525024, | |
| "eval_runtime": 2.1457, | |
| "eval_samples_per_second": 74.102, | |
| "eval_steps_per_second": 4.66, | |
| "step": 7605 | |
| }, | |
| { | |
| "epoch": 676.98, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.33925533294677734, | |
| "eval_runtime": 2.134, | |
| "eval_samples_per_second": 74.507, | |
| "eval_steps_per_second": 4.686, | |
| "step": 7616 | |
| }, | |
| { | |
| "epoch": 677.96, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.3386417627334595, | |
| "eval_runtime": 2.0634, | |
| "eval_samples_per_second": 77.059, | |
| "eval_steps_per_second": 4.846, | |
| "step": 7627 | |
| }, | |
| { | |
| "epoch": 678.93, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.3310278058052063, | |
| "eval_runtime": 2.0308, | |
| "eval_samples_per_second": 78.293, | |
| "eval_steps_per_second": 4.924, | |
| "step": 7638 | |
| }, | |
| { | |
| "epoch": 680.0, | |
| "grad_norm": 0.0734761655330658, | |
| "learning_rate": 4.363636363636364e-06, | |
| "loss": 0.0107, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 680.0, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.329887717962265, | |
| "eval_runtime": 2.214, | |
| "eval_samples_per_second": 71.816, | |
| "eval_steps_per_second": 4.517, | |
| "step": 7650 | |
| }, | |
| { | |
| "epoch": 680.98, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.33161696791648865, | |
| "eval_runtime": 2.0168, | |
| "eval_samples_per_second": 78.839, | |
| "eval_steps_per_second": 4.958, | |
| "step": 7661 | |
| }, | |
| { | |
| "epoch": 681.96, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.33317527174949646, | |
| "eval_runtime": 2.1533, | |
| "eval_samples_per_second": 73.84, | |
| "eval_steps_per_second": 4.644, | |
| "step": 7672 | |
| }, | |
| { | |
| "epoch": 682.93, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.3443678021430969, | |
| "eval_runtime": 2.1824, | |
| "eval_samples_per_second": 72.855, | |
| "eval_steps_per_second": 4.582, | |
| "step": 7683 | |
| }, | |
| { | |
| "epoch": 684.0, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.3444632291793823, | |
| "eval_runtime": 1.9773, | |
| "eval_samples_per_second": 80.414, | |
| "eval_steps_per_second": 5.058, | |
| "step": 7695 | |
| }, | |
| { | |
| "epoch": 684.44, | |
| "grad_norm": 1.5188406705856323, | |
| "learning_rate": 4.1742424242424246e-06, | |
| "loss": 0.0091, | |
| "step": 7700 | |
| }, | |
| { | |
| "epoch": 684.98, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.3443754017353058, | |
| "eval_runtime": 2.0477, | |
| "eval_samples_per_second": 77.647, | |
| "eval_steps_per_second": 4.883, | |
| "step": 7706 | |
| }, | |
| { | |
| "epoch": 685.96, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.34085437655448914, | |
| "eval_runtime": 2.2252, | |
| "eval_samples_per_second": 71.453, | |
| "eval_steps_per_second": 4.494, | |
| "step": 7717 | |
| }, | |
| { | |
| "epoch": 686.93, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.34413453936576843, | |
| "eval_runtime": 2.1451, | |
| "eval_samples_per_second": 74.121, | |
| "eval_steps_per_second": 4.662, | |
| "step": 7728 | |
| }, | |
| { | |
| "epoch": 688.0, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.35173678398132324, | |
| "eval_runtime": 2.0413, | |
| "eval_samples_per_second": 77.89, | |
| "eval_steps_per_second": 4.899, | |
| "step": 7740 | |
| }, | |
| { | |
| "epoch": 688.89, | |
| "grad_norm": 1.0382517576217651, | |
| "learning_rate": 3.984848484848484e-06, | |
| "loss": 0.0081, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 688.98, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.3521307110786438, | |
| "eval_runtime": 2.0937, | |
| "eval_samples_per_second": 75.942, | |
| "eval_steps_per_second": 4.776, | |
| "step": 7751 | |
| }, | |
| { | |
| "epoch": 689.96, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.350664883852005, | |
| "eval_runtime": 2.1003, | |
| "eval_samples_per_second": 75.703, | |
| "eval_steps_per_second": 4.761, | |
| "step": 7762 | |
| }, | |
| { | |
| "epoch": 690.93, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3460524082183838, | |
| "eval_runtime": 2.0791, | |
| "eval_samples_per_second": 76.475, | |
| "eval_steps_per_second": 4.81, | |
| "step": 7773 | |
| }, | |
| { | |
| "epoch": 692.0, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.349832683801651, | |
| "eval_runtime": 2.0457, | |
| "eval_samples_per_second": 77.724, | |
| "eval_steps_per_second": 4.888, | |
| "step": 7785 | |
| }, | |
| { | |
| "epoch": 692.98, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.35444310307502747, | |
| "eval_runtime": 2.1547, | |
| "eval_samples_per_second": 73.793, | |
| "eval_steps_per_second": 4.641, | |
| "step": 7796 | |
| }, | |
| { | |
| "epoch": 693.33, | |
| "grad_norm": 0.36742502450942993, | |
| "learning_rate": 3.795454545454546e-06, | |
| "loss": 0.009, | |
| "step": 7800 | |
| }, | |
| { | |
| "epoch": 693.96, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.35569891333580017, | |
| "eval_runtime": 2.0236, | |
| "eval_samples_per_second": 78.575, | |
| "eval_steps_per_second": 4.942, | |
| "step": 7807 | |
| }, | |
| { | |
| "epoch": 694.93, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.35327550768852234, | |
| "eval_runtime": 2.057, | |
| "eval_samples_per_second": 77.297, | |
| "eval_steps_per_second": 4.861, | |
| "step": 7818 | |
| }, | |
| { | |
| "epoch": 696.0, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3559163212776184, | |
| "eval_runtime": 2.203, | |
| "eval_samples_per_second": 72.173, | |
| "eval_steps_per_second": 4.539, | |
| "step": 7830 | |
| }, | |
| { | |
| "epoch": 696.98, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.35951152443885803, | |
| "eval_runtime": 2.0835, | |
| "eval_samples_per_second": 76.315, | |
| "eval_steps_per_second": 4.8, | |
| "step": 7841 | |
| }, | |
| { | |
| "epoch": 697.78, | |
| "grad_norm": 0.10021142661571503, | |
| "learning_rate": 3.606060606060606e-06, | |
| "loss": 0.0078, | |
| "step": 7850 | |
| }, | |
| { | |
| "epoch": 697.96, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3617425560951233, | |
| "eval_runtime": 2.0937, | |
| "eval_samples_per_second": 75.941, | |
| "eval_steps_per_second": 4.776, | |
| "step": 7852 | |
| }, | |
| { | |
| "epoch": 698.93, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3614467978477478, | |
| "eval_runtime": 2.2589, | |
| "eval_samples_per_second": 70.389, | |
| "eval_steps_per_second": 4.427, | |
| "step": 7863 | |
| }, | |
| { | |
| "epoch": 700.0, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.34519079327583313, | |
| "eval_runtime": 2.046, | |
| "eval_samples_per_second": 77.712, | |
| "eval_steps_per_second": 4.888, | |
| "step": 7875 | |
| }, | |
| { | |
| "epoch": 700.98, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.34312644600868225, | |
| "eval_runtime": 2.143, | |
| "eval_samples_per_second": 74.196, | |
| "eval_steps_per_second": 4.666, | |
| "step": 7886 | |
| }, | |
| { | |
| "epoch": 701.96, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.34687530994415283, | |
| "eval_runtime": 2.1317, | |
| "eval_samples_per_second": 74.59, | |
| "eval_steps_per_second": 4.691, | |
| "step": 7897 | |
| }, | |
| { | |
| "epoch": 702.22, | |
| "grad_norm": 0.013305970467627048, | |
| "learning_rate": 3.416666666666667e-06, | |
| "loss": 0.0102, | |
| "step": 7900 | |
| }, | |
| { | |
| "epoch": 702.93, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3564489483833313, | |
| "eval_runtime": 2.0468, | |
| "eval_samples_per_second": 77.682, | |
| "eval_steps_per_second": 4.886, | |
| "step": 7908 | |
| }, | |
| { | |
| "epoch": 704.0, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.35935157537460327, | |
| "eval_runtime": 2.0233, | |
| "eval_samples_per_second": 78.584, | |
| "eval_steps_per_second": 4.942, | |
| "step": 7920 | |
| }, | |
| { | |
| "epoch": 704.98, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3517804443836212, | |
| "eval_runtime": 2.2107, | |
| "eval_samples_per_second": 71.924, | |
| "eval_steps_per_second": 4.524, | |
| "step": 7931 | |
| }, | |
| { | |
| "epoch": 705.96, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.3444287180900574, | |
| "eval_runtime": 2.0396, | |
| "eval_samples_per_second": 77.958, | |
| "eval_steps_per_second": 4.903, | |
| "step": 7942 | |
| }, | |
| { | |
| "epoch": 706.67, | |
| "grad_norm": 1.1949517726898193, | |
| "learning_rate": 3.2272727272727275e-06, | |
| "loss": 0.008, | |
| "step": 7950 | |
| }, | |
| { | |
| "epoch": 706.93, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.34264177083969116, | |
| "eval_runtime": 2.0811, | |
| "eval_samples_per_second": 76.402, | |
| "eval_steps_per_second": 4.805, | |
| "step": 7953 | |
| }, | |
| { | |
| "epoch": 708.0, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.34593525528907776, | |
| "eval_runtime": 2.1049, | |
| "eval_samples_per_second": 75.537, | |
| "eval_steps_per_second": 4.751, | |
| "step": 7965 | |
| }, | |
| { | |
| "epoch": 708.98, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3511156439781189, | |
| "eval_runtime": 2.0385, | |
| "eval_samples_per_second": 77.999, | |
| "eval_steps_per_second": 4.906, | |
| "step": 7976 | |
| }, | |
| { | |
| "epoch": 709.96, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.35437288880348206, | |
| "eval_runtime": 2.0421, | |
| "eval_samples_per_second": 77.862, | |
| "eval_steps_per_second": 4.897, | |
| "step": 7987 | |
| }, | |
| { | |
| "epoch": 710.93, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3566732704639435, | |
| "eval_runtime": 2.2624, | |
| "eval_samples_per_second": 70.28, | |
| "eval_steps_per_second": 4.42, | |
| "step": 7998 | |
| }, | |
| { | |
| "epoch": 711.11, | |
| "grad_norm": 0.8354963660240173, | |
| "learning_rate": 3.0378787878787878e-06, | |
| "loss": 0.0053, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 712.0, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3673837184906006, | |
| "eval_runtime": 2.0161, | |
| "eval_samples_per_second": 78.866, | |
| "eval_steps_per_second": 4.96, | |
| "step": 8010 | |
| }, | |
| { | |
| "epoch": 712.98, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3630300760269165, | |
| "eval_runtime": 2.0691, | |
| "eval_samples_per_second": 76.844, | |
| "eval_steps_per_second": 4.833, | |
| "step": 8021 | |
| }, | |
| { | |
| "epoch": 713.96, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3602018654346466, | |
| "eval_runtime": 2.0814, | |
| "eval_samples_per_second": 76.389, | |
| "eval_steps_per_second": 4.804, | |
| "step": 8032 | |
| }, | |
| { | |
| "epoch": 714.93, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.35657405853271484, | |
| "eval_runtime": 2.0547, | |
| "eval_samples_per_second": 77.384, | |
| "eval_steps_per_second": 4.867, | |
| "step": 8043 | |
| }, | |
| { | |
| "epoch": 715.56, | |
| "grad_norm": 0.17041368782520294, | |
| "learning_rate": 2.8484848484848484e-06, | |
| "loss": 0.0071, | |
| "step": 8050 | |
| }, | |
| { | |
| "epoch": 716.0, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3645796477794647, | |
| "eval_runtime": 2.0104, | |
| "eval_samples_per_second": 79.087, | |
| "eval_steps_per_second": 4.974, | |
| "step": 8055 | |
| }, | |
| { | |
| "epoch": 716.98, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.364641398191452, | |
| "eval_runtime": 2.0723, | |
| "eval_samples_per_second": 76.725, | |
| "eval_steps_per_second": 4.825, | |
| "step": 8066 | |
| }, | |
| { | |
| "epoch": 717.96, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3593458831310272, | |
| "eval_runtime": 2.017, | |
| "eval_samples_per_second": 78.83, | |
| "eval_steps_per_second": 4.958, | |
| "step": 8077 | |
| }, | |
| { | |
| "epoch": 718.93, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3625403344631195, | |
| "eval_runtime": 2.1034, | |
| "eval_samples_per_second": 75.591, | |
| "eval_steps_per_second": 4.754, | |
| "step": 8088 | |
| }, | |
| { | |
| "epoch": 720.0, | |
| "grad_norm": 0.7891609072685242, | |
| "learning_rate": 2.659090909090909e-06, | |
| "loss": 0.0071, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 720.0, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.36099299788475037, | |
| "eval_runtime": 2.0137, | |
| "eval_samples_per_second": 78.958, | |
| "eval_steps_per_second": 4.966, | |
| "step": 8100 | |
| }, | |
| { | |
| "epoch": 720.98, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.35885581374168396, | |
| "eval_runtime": 2.0236, | |
| "eval_samples_per_second": 78.572, | |
| "eval_steps_per_second": 4.942, | |
| "step": 8111 | |
| }, | |
| { | |
| "epoch": 721.96, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3528722822666168, | |
| "eval_runtime": 2.0499, | |
| "eval_samples_per_second": 77.565, | |
| "eval_steps_per_second": 4.878, | |
| "step": 8122 | |
| }, | |
| { | |
| "epoch": 722.93, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.34843915700912476, | |
| "eval_runtime": 2.0515, | |
| "eval_samples_per_second": 77.504, | |
| "eval_steps_per_second": 4.874, | |
| "step": 8133 | |
| }, | |
| { | |
| "epoch": 724.0, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.3468559682369232, | |
| "eval_runtime": 2.0267, | |
| "eval_samples_per_second": 78.452, | |
| "eval_steps_per_second": 4.934, | |
| "step": 8145 | |
| }, | |
| { | |
| "epoch": 724.44, | |
| "grad_norm": 0.013204416260123253, | |
| "learning_rate": 2.46969696969697e-06, | |
| "loss": 0.0098, | |
| "step": 8150 | |
| }, | |
| { | |
| "epoch": 724.98, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.34806957840919495, | |
| "eval_runtime": 2.0094, | |
| "eval_samples_per_second": 79.126, | |
| "eval_steps_per_second": 4.976, | |
| "step": 8156 | |
| }, | |
| { | |
| "epoch": 725.96, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.34636813402175903, | |
| "eval_runtime": 2.1662, | |
| "eval_samples_per_second": 73.4, | |
| "eval_steps_per_second": 4.616, | |
| "step": 8167 | |
| }, | |
| { | |
| "epoch": 726.93, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.34824779629707336, | |
| "eval_runtime": 2.0311, | |
| "eval_samples_per_second": 78.282, | |
| "eval_steps_per_second": 4.923, | |
| "step": 8178 | |
| }, | |
| { | |
| "epoch": 728.0, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.34667864441871643, | |
| "eval_runtime": 2.2582, | |
| "eval_samples_per_second": 70.411, | |
| "eval_steps_per_second": 4.428, | |
| "step": 8190 | |
| }, | |
| { | |
| "epoch": 728.89, | |
| "grad_norm": 1.7239004373550415, | |
| "learning_rate": 2.2803030303030305e-06, | |
| "loss": 0.0159, | |
| "step": 8200 | |
| }, | |
| { | |
| "epoch": 728.98, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.346113383769989, | |
| "eval_runtime": 2.0824, | |
| "eval_samples_per_second": 76.353, | |
| "eval_steps_per_second": 4.802, | |
| "step": 8201 | |
| }, | |
| { | |
| "epoch": 729.96, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.3437664210796356, | |
| "eval_runtime": 2.0394, | |
| "eval_samples_per_second": 77.966, | |
| "eval_steps_per_second": 4.904, | |
| "step": 8212 | |
| }, | |
| { | |
| "epoch": 730.93, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.33936139941215515, | |
| "eval_runtime": 2.0701, | |
| "eval_samples_per_second": 76.809, | |
| "eval_steps_per_second": 4.831, | |
| "step": 8223 | |
| }, | |
| { | |
| "epoch": 732.0, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.3355594277381897, | |
| "eval_runtime": 2.1359, | |
| "eval_samples_per_second": 74.442, | |
| "eval_steps_per_second": 4.682, | |
| "step": 8235 | |
| }, | |
| { | |
| "epoch": 732.98, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.3355758488178253, | |
| "eval_runtime": 2.0241, | |
| "eval_samples_per_second": 78.553, | |
| "eval_steps_per_second": 4.94, | |
| "step": 8246 | |
| }, | |
| { | |
| "epoch": 733.33, | |
| "grad_norm": 1.1134917736053467, | |
| "learning_rate": 2.090909090909091e-06, | |
| "loss": 0.0128, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 733.96, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.337179034948349, | |
| "eval_runtime": 2.162, | |
| "eval_samples_per_second": 73.543, | |
| "eval_steps_per_second": 4.625, | |
| "step": 8257 | |
| }, | |
| { | |
| "epoch": 734.93, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.3391708731651306, | |
| "eval_runtime": 2.0183, | |
| "eval_samples_per_second": 78.778, | |
| "eval_steps_per_second": 4.955, | |
| "step": 8268 | |
| }, | |
| { | |
| "epoch": 736.0, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3454706072807312, | |
| "eval_runtime": 2.037, | |
| "eval_samples_per_second": 78.056, | |
| "eval_steps_per_second": 4.909, | |
| "step": 8280 | |
| }, | |
| { | |
| "epoch": 736.98, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.34865179657936096, | |
| "eval_runtime": 2.1268, | |
| "eval_samples_per_second": 74.76, | |
| "eval_steps_per_second": 4.702, | |
| "step": 8291 | |
| }, | |
| { | |
| "epoch": 737.78, | |
| "grad_norm": 0.008208476938307285, | |
| "learning_rate": 1.9015151515151518e-06, | |
| "loss": 0.0086, | |
| "step": 8300 | |
| }, | |
| { | |
| "epoch": 737.96, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3467850983142853, | |
| "eval_runtime": 2.1854, | |
| "eval_samples_per_second": 72.756, | |
| "eval_steps_per_second": 4.576, | |
| "step": 8302 | |
| }, | |
| { | |
| "epoch": 738.93, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.344488263130188, | |
| "eval_runtime": 2.0623, | |
| "eval_samples_per_second": 77.099, | |
| "eval_steps_per_second": 4.849, | |
| "step": 8313 | |
| }, | |
| { | |
| "epoch": 740.0, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.34248578548431396, | |
| "eval_runtime": 2.0582, | |
| "eval_samples_per_second": 77.254, | |
| "eval_steps_per_second": 4.859, | |
| "step": 8325 | |
| }, | |
| { | |
| "epoch": 740.98, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3452531397342682, | |
| "eval_runtime": 2.1556, | |
| "eval_samples_per_second": 73.762, | |
| "eval_steps_per_second": 4.639, | |
| "step": 8336 | |
| }, | |
| { | |
| "epoch": 741.96, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.34475868940353394, | |
| "eval_runtime": 2.0516, | |
| "eval_samples_per_second": 77.5, | |
| "eval_steps_per_second": 4.874, | |
| "step": 8347 | |
| }, | |
| { | |
| "epoch": 742.22, | |
| "grad_norm": 0.2444353848695755, | |
| "learning_rate": 1.712121212121212e-06, | |
| "loss": 0.011, | |
| "step": 8350 | |
| }, | |
| { | |
| "epoch": 742.93, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.34120240807533264, | |
| "eval_runtime": 2.0936, | |
| "eval_samples_per_second": 75.945, | |
| "eval_steps_per_second": 4.776, | |
| "step": 8358 | |
| }, | |
| { | |
| "epoch": 744.0, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.33924660086631775, | |
| "eval_runtime": 2.2099, | |
| "eval_samples_per_second": 71.948, | |
| "eval_steps_per_second": 4.525, | |
| "step": 8370 | |
| }, | |
| { | |
| "epoch": 744.98, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3390309512615204, | |
| "eval_runtime": 1.9925, | |
| "eval_samples_per_second": 79.801, | |
| "eval_steps_per_second": 5.019, | |
| "step": 8381 | |
| }, | |
| { | |
| "epoch": 745.96, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3395291268825531, | |
| "eval_runtime": 1.9807, | |
| "eval_samples_per_second": 80.274, | |
| "eval_steps_per_second": 5.049, | |
| "step": 8392 | |
| }, | |
| { | |
| "epoch": 746.67, | |
| "grad_norm": 0.8103430867195129, | |
| "learning_rate": 1.5227272727272727e-06, | |
| "loss": 0.0074, | |
| "step": 8400 | |
| }, | |
| { | |
| "epoch": 746.93, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3383350074291229, | |
| "eval_runtime": 2.2625, | |
| "eval_samples_per_second": 70.276, | |
| "eval_steps_per_second": 4.42, | |
| "step": 8403 | |
| }, | |
| { | |
| "epoch": 748.0, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.33776676654815674, | |
| "eval_runtime": 2.0087, | |
| "eval_samples_per_second": 79.157, | |
| "eval_steps_per_second": 4.978, | |
| "step": 8415 | |
| }, | |
| { | |
| "epoch": 748.98, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3348415195941925, | |
| "eval_runtime": 2.0796, | |
| "eval_samples_per_second": 76.457, | |
| "eval_steps_per_second": 4.809, | |
| "step": 8426 | |
| }, | |
| { | |
| "epoch": 749.96, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.33347979187965393, | |
| "eval_runtime": 2.1871, | |
| "eval_samples_per_second": 72.698, | |
| "eval_steps_per_second": 4.572, | |
| "step": 8437 | |
| }, | |
| { | |
| "epoch": 750.93, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.33422428369522095, | |
| "eval_runtime": 2.069, | |
| "eval_samples_per_second": 76.849, | |
| "eval_steps_per_second": 4.833, | |
| "step": 8448 | |
| }, | |
| { | |
| "epoch": 751.11, | |
| "grad_norm": 1.5617446899414062, | |
| "learning_rate": 1.3333333333333334e-06, | |
| "loss": 0.0087, | |
| "step": 8450 | |
| }, | |
| { | |
| "epoch": 752.0, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.33466464281082153, | |
| "eval_runtime": 2.0175, | |
| "eval_samples_per_second": 78.81, | |
| "eval_steps_per_second": 4.957, | |
| "step": 8460 | |
| }, | |
| { | |
| "epoch": 752.98, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.33632901310920715, | |
| "eval_runtime": 2.2613, | |
| "eval_samples_per_second": 70.315, | |
| "eval_steps_per_second": 4.422, | |
| "step": 8471 | |
| }, | |
| { | |
| "epoch": 753.96, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.3377835154533386, | |
| "eval_runtime": 2.0093, | |
| "eval_samples_per_second": 79.131, | |
| "eval_steps_per_second": 4.977, | |
| "step": 8482 | |
| }, | |
| { | |
| "epoch": 754.93, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.3383637070655823, | |
| "eval_runtime": 2.0348, | |
| "eval_samples_per_second": 78.139, | |
| "eval_steps_per_second": 4.914, | |
| "step": 8493 | |
| }, | |
| { | |
| "epoch": 755.56, | |
| "grad_norm": 1.2671109437942505, | |
| "learning_rate": 1.143939393939394e-06, | |
| "loss": 0.0061, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 756.0, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.3406466245651245, | |
| "eval_runtime": 2.0595, | |
| "eval_samples_per_second": 77.203, | |
| "eval_steps_per_second": 4.856, | |
| "step": 8505 | |
| }, | |
| { | |
| "epoch": 756.98, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.34400761127471924, | |
| "eval_runtime": 1.9798, | |
| "eval_samples_per_second": 80.313, | |
| "eval_steps_per_second": 5.051, | |
| "step": 8516 | |
| }, | |
| { | |
| "epoch": 757.96, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.34409239888191223, | |
| "eval_runtime": 2.0569, | |
| "eval_samples_per_second": 77.301, | |
| "eval_steps_per_second": 4.862, | |
| "step": 8527 | |
| }, | |
| { | |
| "epoch": 758.93, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.34241315722465515, | |
| "eval_runtime": 2.0733, | |
| "eval_samples_per_second": 76.691, | |
| "eval_steps_per_second": 4.823, | |
| "step": 8538 | |
| }, | |
| { | |
| "epoch": 760.0, | |
| "grad_norm": 2.0512726306915283, | |
| "learning_rate": 9.545454545454546e-07, | |
| "loss": 0.0119, | |
| "step": 8550 | |
| }, | |
| { | |
| "epoch": 760.0, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3426421582698822, | |
| "eval_runtime": 2.0315, | |
| "eval_samples_per_second": 78.268, | |
| "eval_steps_per_second": 4.922, | |
| "step": 8550 | |
| }, | |
| { | |
| "epoch": 760.98, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.3427829444408417, | |
| "eval_runtime": 2.1633, | |
| "eval_samples_per_second": 73.499, | |
| "eval_steps_per_second": 4.623, | |
| "step": 8561 | |
| }, | |
| { | |
| "epoch": 761.96, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.34399789571762085, | |
| "eval_runtime": 2.1363, | |
| "eval_samples_per_second": 74.428, | |
| "eval_steps_per_second": 4.681, | |
| "step": 8572 | |
| }, | |
| { | |
| "epoch": 762.93, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.3443286418914795, | |
| "eval_runtime": 2.0533, | |
| "eval_samples_per_second": 77.437, | |
| "eval_steps_per_second": 4.87, | |
| "step": 8583 | |
| }, | |
| { | |
| "epoch": 764.0, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.345469206571579, | |
| "eval_runtime": 1.9651, | |
| "eval_samples_per_second": 80.911, | |
| "eval_steps_per_second": 5.089, | |
| "step": 8595 | |
| }, | |
| { | |
| "epoch": 764.44, | |
| "grad_norm": 0.15614187717437744, | |
| "learning_rate": 7.651515151515152e-07, | |
| "loss": 0.0056, | |
| "step": 8600 | |
| }, | |
| { | |
| "epoch": 764.98, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.34602606296539307, | |
| "eval_runtime": 2.0712, | |
| "eval_samples_per_second": 76.769, | |
| "eval_steps_per_second": 4.828, | |
| "step": 8606 | |
| }, | |
| { | |
| "epoch": 765.96, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.3463137745857239, | |
| "eval_runtime": 1.9634, | |
| "eval_samples_per_second": 80.983, | |
| "eval_steps_per_second": 5.093, | |
| "step": 8617 | |
| }, | |
| { | |
| "epoch": 766.93, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.34662124514579773, | |
| "eval_runtime": 2.0264, | |
| "eval_samples_per_second": 78.466, | |
| "eval_steps_per_second": 4.935, | |
| "step": 8628 | |
| }, | |
| { | |
| "epoch": 768.0, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.3465888202190399, | |
| "eval_runtime": 2.1276, | |
| "eval_samples_per_second": 74.732, | |
| "eval_steps_per_second": 4.7, | |
| "step": 8640 | |
| }, | |
| { | |
| "epoch": 768.89, | |
| "grad_norm": 0.13273529708385468, | |
| "learning_rate": 5.757575757575757e-07, | |
| "loss": 0.0094, | |
| "step": 8650 | |
| }, | |
| { | |
| "epoch": 768.98, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.34740516543388367, | |
| "eval_runtime": 1.986, | |
| "eval_samples_per_second": 80.062, | |
| "eval_steps_per_second": 5.035, | |
| "step": 8651 | |
| }, | |
| { | |
| "epoch": 769.96, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.3476426601409912, | |
| "eval_runtime": 2.2993, | |
| "eval_samples_per_second": 69.152, | |
| "eval_steps_per_second": 4.349, | |
| "step": 8662 | |
| }, | |
| { | |
| "epoch": 770.93, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.34822559356689453, | |
| "eval_runtime": 2.054, | |
| "eval_samples_per_second": 77.411, | |
| "eval_steps_per_second": 4.869, | |
| "step": 8673 | |
| }, | |
| { | |
| "epoch": 772.0, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.348609060049057, | |
| "eval_runtime": 2.0775, | |
| "eval_samples_per_second": 76.533, | |
| "eval_steps_per_second": 4.813, | |
| "step": 8685 | |
| }, | |
| { | |
| "epoch": 772.98, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.34849491715431213, | |
| "eval_runtime": 1.9848, | |
| "eval_samples_per_second": 80.11, | |
| "eval_steps_per_second": 5.038, | |
| "step": 8696 | |
| }, | |
| { | |
| "epoch": 773.33, | |
| "grad_norm": 2.092862606048584, | |
| "learning_rate": 3.8636363636363636e-07, | |
| "loss": 0.014, | |
| "step": 8700 | |
| }, | |
| { | |
| "epoch": 773.96, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.3478315770626068, | |
| "eval_runtime": 2.0318, | |
| "eval_samples_per_second": 78.257, | |
| "eval_steps_per_second": 4.922, | |
| "step": 8707 | |
| }, | |
| { | |
| "epoch": 774.93, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.347221702337265, | |
| "eval_runtime": 2.0723, | |
| "eval_samples_per_second": 76.726, | |
| "eval_steps_per_second": 4.826, | |
| "step": 8718 | |
| }, | |
| { | |
| "epoch": 776.0, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.34651002287864685, | |
| "eval_runtime": 1.9895, | |
| "eval_samples_per_second": 79.92, | |
| "eval_steps_per_second": 5.026, | |
| "step": 8730 | |
| }, | |
| { | |
| "epoch": 776.98, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.3461478352546692, | |
| "eval_runtime": 2.0438, | |
| "eval_samples_per_second": 77.796, | |
| "eval_steps_per_second": 4.893, | |
| "step": 8741 | |
| }, | |
| { | |
| "epoch": 777.78, | |
| "grad_norm": 0.42866629362106323, | |
| "learning_rate": 1.9696969696969696e-07, | |
| "loss": 0.0126, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 777.96, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.3467194736003876, | |
| "eval_runtime": 2.0767, | |
| "eval_samples_per_second": 76.564, | |
| "eval_steps_per_second": 4.815, | |
| "step": 8752 | |
| }, | |
| { | |
| "epoch": 778.93, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.3471050262451172, | |
| "eval_runtime": 2.0846, | |
| "eval_samples_per_second": 76.272, | |
| "eval_steps_per_second": 4.797, | |
| "step": 8763 | |
| }, | |
| { | |
| "epoch": 780.0, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.34714454412460327, | |
| "eval_runtime": 2.1516, | |
| "eval_samples_per_second": 73.897, | |
| "eval_steps_per_second": 4.648, | |
| "step": 8775 | |
| }, | |
| { | |
| "epoch": 780.98, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.3471665382385254, | |
| "eval_runtime": 2.0781, | |
| "eval_samples_per_second": 76.511, | |
| "eval_steps_per_second": 4.812, | |
| "step": 8786 | |
| }, | |
| { | |
| "epoch": 781.96, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.3471100628376007, | |
| "eval_runtime": 2.0029, | |
| "eval_samples_per_second": 79.386, | |
| "eval_steps_per_second": 4.993, | |
| "step": 8797 | |
| }, | |
| { | |
| "epoch": 782.22, | |
| "grad_norm": 0.060126595199108124, | |
| "learning_rate": 7.575757575757576e-09, | |
| "loss": 0.0048, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 782.22, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.3471885025501251, | |
| "eval_runtime": 2.0337, | |
| "eval_samples_per_second": 78.181, | |
| "eval_steps_per_second": 4.917, | |
| "step": 8800 | |
| }, | |
| { | |
| "epoch": 782.22, | |
| "step": 8800, | |
| "total_flos": 4.912188447589224e+18, | |
| "train_loss": 0.0709631282125007, | |
| "train_runtime": 5794.2307, | |
| "train_samples_per_second": 98.995, | |
| "train_steps_per_second": 1.519 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 8800, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 800, | |
| "save_steps": 500, | |
| "total_flos": 4.912188447589224e+18, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |