| { | |
| "best_metric": 0.9622641509433962, | |
| "best_model_checkpoint": "wav2vec2-2Class-easy-train-test-large/checkpoint-2520", | |
| "epoch": 224.0, | |
| "eval_steps": 500, | |
| "global_step": 2520, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.98, | |
| "eval_accuracy": 0.4088050314465409, | |
| "eval_loss": 0.7003181576728821, | |
| "eval_runtime": 1.8048, | |
| "eval_samples_per_second": 88.1, | |
| "eval_steps_per_second": 5.541, | |
| "step": 11 | |
| }, | |
| { | |
| "epoch": 1.96, | |
| "eval_accuracy": 0.4088050314465409, | |
| "eval_loss": 0.7001124620437622, | |
| "eval_runtime": 1.7728, | |
| "eval_samples_per_second": 89.69, | |
| "eval_steps_per_second": 5.641, | |
| "step": 22 | |
| }, | |
| { | |
| "epoch": 2.93, | |
| "eval_accuracy": 0.41509433962264153, | |
| "eval_loss": 0.69970703125, | |
| "eval_runtime": 1.7593, | |
| "eval_samples_per_second": 90.375, | |
| "eval_steps_per_second": 5.684, | |
| "step": 33 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.42138364779874216, | |
| "eval_loss": 0.6991450786590576, | |
| "eval_runtime": 1.7582, | |
| "eval_samples_per_second": 90.433, | |
| "eval_steps_per_second": 5.688, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 4.44, | |
| "grad_norm": 0.8353477716445923, | |
| "learning_rate": 1.7045454545454546e-06, | |
| "loss": 0.6976, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 4.98, | |
| "eval_accuracy": 0.4276729559748428, | |
| "eval_loss": 0.6984724998474121, | |
| "eval_runtime": 1.7849, | |
| "eval_samples_per_second": 89.08, | |
| "eval_steps_per_second": 5.603, | |
| "step": 56 | |
| }, | |
| { | |
| "epoch": 5.96, | |
| "eval_accuracy": 0.44025157232704404, | |
| "eval_loss": 0.697744607925415, | |
| "eval_runtime": 2.127, | |
| "eval_samples_per_second": 74.753, | |
| "eval_steps_per_second": 4.701, | |
| "step": 67 | |
| }, | |
| { | |
| "epoch": 6.93, | |
| "eval_accuracy": 0.44654088050314467, | |
| "eval_loss": 0.6968724727630615, | |
| "eval_runtime": 2.2513, | |
| "eval_samples_per_second": 70.624, | |
| "eval_steps_per_second": 4.442, | |
| "step": 78 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.46540880503144655, | |
| "eval_loss": 0.6957085728645325, | |
| "eval_runtime": 2.1194, | |
| "eval_samples_per_second": 75.021, | |
| "eval_steps_per_second": 4.718, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 8.89, | |
| "grad_norm": 0.45805710554122925, | |
| "learning_rate": 3.409090909090909e-06, | |
| "loss": 0.6952, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 8.98, | |
| "eval_accuracy": 0.46540880503144655, | |
| "eval_loss": 0.6945385932922363, | |
| "eval_runtime": 2.2918, | |
| "eval_samples_per_second": 69.378, | |
| "eval_steps_per_second": 4.363, | |
| "step": 101 | |
| }, | |
| { | |
| "epoch": 9.96, | |
| "eval_accuracy": 0.4779874213836478, | |
| "eval_loss": 0.6933900117874146, | |
| "eval_runtime": 2.2504, | |
| "eval_samples_per_second": 70.654, | |
| "eval_steps_per_second": 4.444, | |
| "step": 112 | |
| }, | |
| { | |
| "epoch": 10.93, | |
| "eval_accuracy": 0.49056603773584906, | |
| "eval_loss": 0.692146360874176, | |
| "eval_runtime": 2.1543, | |
| "eval_samples_per_second": 73.804, | |
| "eval_steps_per_second": 4.642, | |
| "step": 123 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.5471698113207547, | |
| "eval_loss": 0.6906170845031738, | |
| "eval_runtime": 2.0832, | |
| "eval_samples_per_second": 76.326, | |
| "eval_steps_per_second": 4.8, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 12.98, | |
| "eval_accuracy": 0.610062893081761, | |
| "eval_loss": 0.6892228722572327, | |
| "eval_runtime": 2.0269, | |
| "eval_samples_per_second": 78.443, | |
| "eval_steps_per_second": 4.934, | |
| "step": 146 | |
| }, | |
| { | |
| "epoch": 13.33, | |
| "grad_norm": 0.6493268609046936, | |
| "learning_rate": 5.1136363636363635e-06, | |
| "loss": 0.6911, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 13.96, | |
| "eval_accuracy": 0.6037735849056604, | |
| "eval_loss": 0.6878040432929993, | |
| "eval_runtime": 2.1502, | |
| "eval_samples_per_second": 73.946, | |
| "eval_steps_per_second": 4.651, | |
| "step": 157 | |
| }, | |
| { | |
| "epoch": 14.93, | |
| "eval_accuracy": 0.5911949685534591, | |
| "eval_loss": 0.6863483190536499, | |
| "eval_runtime": 2.0844, | |
| "eval_samples_per_second": 76.279, | |
| "eval_steps_per_second": 4.797, | |
| "step": 168 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.5911949685534591, | |
| "eval_loss": 0.6847361326217651, | |
| "eval_runtime": 2.1372, | |
| "eval_samples_per_second": 74.395, | |
| "eval_steps_per_second": 4.679, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 16.98, | |
| "eval_accuracy": 0.5849056603773585, | |
| "eval_loss": 0.6830993294715881, | |
| "eval_runtime": 2.3473, | |
| "eval_samples_per_second": 67.739, | |
| "eval_steps_per_second": 4.26, | |
| "step": 191 | |
| }, | |
| { | |
| "epoch": 17.78, | |
| "grad_norm": 0.5862739086151123, | |
| "learning_rate": 6.818181818181818e-06, | |
| "loss": 0.6852, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 17.96, | |
| "eval_accuracy": 0.5849056603773585, | |
| "eval_loss": 0.6815393567085266, | |
| "eval_runtime": 2.1307, | |
| "eval_samples_per_second": 74.623, | |
| "eval_steps_per_second": 4.693, | |
| "step": 202 | |
| }, | |
| { | |
| "epoch": 18.93, | |
| "eval_accuracy": 0.5849056603773585, | |
| "eval_loss": 0.679994523525238, | |
| "eval_runtime": 2.082, | |
| "eval_samples_per_second": 76.37, | |
| "eval_steps_per_second": 4.803, | |
| "step": 213 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.5849056603773585, | |
| "eval_loss": 0.6782289147377014, | |
| "eval_runtime": 2.1302, | |
| "eval_samples_per_second": 74.641, | |
| "eval_steps_per_second": 4.694, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 20.98, | |
| "eval_accuracy": 0.5849056603773585, | |
| "eval_loss": 0.6765275001525879, | |
| "eval_runtime": 2.0229, | |
| "eval_samples_per_second": 78.601, | |
| "eval_steps_per_second": 4.943, | |
| "step": 236 | |
| }, | |
| { | |
| "epoch": 21.96, | |
| "eval_accuracy": 0.5849056603773585, | |
| "eval_loss": 0.6749551892280579, | |
| "eval_runtime": 2.0505, | |
| "eval_samples_per_second": 77.542, | |
| "eval_steps_per_second": 4.877, | |
| "step": 247 | |
| }, | |
| { | |
| "epoch": 22.22, | |
| "grad_norm": 0.10243403911590576, | |
| "learning_rate": 8.522727272727273e-06, | |
| "loss": 0.6783, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 22.93, | |
| "eval_accuracy": 0.5849056603773585, | |
| "eval_loss": 0.6732170581817627, | |
| "eval_runtime": 2.0616, | |
| "eval_samples_per_second": 77.125, | |
| "eval_steps_per_second": 4.851, | |
| "step": 258 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_accuracy": 0.5849056603773585, | |
| "eval_loss": 0.6713252067565918, | |
| "eval_runtime": 2.1605, | |
| "eval_samples_per_second": 73.595, | |
| "eval_steps_per_second": 4.629, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 24.98, | |
| "eval_accuracy": 0.5849056603773585, | |
| "eval_loss": 0.6694673895835876, | |
| "eval_runtime": 2.0526, | |
| "eval_samples_per_second": 77.462, | |
| "eval_steps_per_second": 4.872, | |
| "step": 281 | |
| }, | |
| { | |
| "epoch": 25.96, | |
| "eval_accuracy": 0.5849056603773585, | |
| "eval_loss": 0.6674391031265259, | |
| "eval_runtime": 2.1284, | |
| "eval_samples_per_second": 74.704, | |
| "eval_steps_per_second": 4.698, | |
| "step": 292 | |
| }, | |
| { | |
| "epoch": 26.67, | |
| "grad_norm": 0.3114006221294403, | |
| "learning_rate": 1.0227272727272727e-05, | |
| "loss": 0.6676, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 26.93, | |
| "eval_accuracy": 0.5849056603773585, | |
| "eval_loss": 0.6654335856437683, | |
| "eval_runtime": 1.9991, | |
| "eval_samples_per_second": 79.535, | |
| "eval_steps_per_second": 5.002, | |
| "step": 303 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_accuracy": 0.5849056603773585, | |
| "eval_loss": 0.6630644202232361, | |
| "eval_runtime": 2.0451, | |
| "eval_samples_per_second": 77.745, | |
| "eval_steps_per_second": 4.89, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 28.98, | |
| "eval_accuracy": 0.5849056603773585, | |
| "eval_loss": 0.6605831980705261, | |
| "eval_runtime": 2.0625, | |
| "eval_samples_per_second": 77.092, | |
| "eval_steps_per_second": 4.849, | |
| "step": 326 | |
| }, | |
| { | |
| "epoch": 29.96, | |
| "eval_accuracy": 0.5849056603773585, | |
| "eval_loss": 0.6578991413116455, | |
| "eval_runtime": 2.0381, | |
| "eval_samples_per_second": 78.014, | |
| "eval_steps_per_second": 4.907, | |
| "step": 337 | |
| }, | |
| { | |
| "epoch": 30.93, | |
| "eval_accuracy": 0.5849056603773585, | |
| "eval_loss": 0.6539114713668823, | |
| "eval_runtime": 1.9774, | |
| "eval_samples_per_second": 80.407, | |
| "eval_steps_per_second": 5.057, | |
| "step": 348 | |
| }, | |
| { | |
| "epoch": 31.11, | |
| "grad_norm": 0.2134709656238556, | |
| "learning_rate": 1.1931818181818181e-05, | |
| "loss": 0.6516, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "eval_accuracy": 0.5974842767295597, | |
| "eval_loss": 0.6492742896080017, | |
| "eval_runtime": 2.0601, | |
| "eval_samples_per_second": 77.182, | |
| "eval_steps_per_second": 4.854, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 32.98, | |
| "eval_accuracy": 0.610062893081761, | |
| "eval_loss": 0.6441397070884705, | |
| "eval_runtime": 2.0739, | |
| "eval_samples_per_second": 76.667, | |
| "eval_steps_per_second": 4.822, | |
| "step": 371 | |
| }, | |
| { | |
| "epoch": 33.96, | |
| "eval_accuracy": 0.6226415094339622, | |
| "eval_loss": 0.6348815560340881, | |
| "eval_runtime": 2.1526, | |
| "eval_samples_per_second": 73.865, | |
| "eval_steps_per_second": 4.646, | |
| "step": 382 | |
| }, | |
| { | |
| "epoch": 34.93, | |
| "eval_accuracy": 0.6289308176100629, | |
| "eval_loss": 0.6257140040397644, | |
| "eval_runtime": 2.0081, | |
| "eval_samples_per_second": 79.179, | |
| "eval_steps_per_second": 4.98, | |
| "step": 393 | |
| }, | |
| { | |
| "epoch": 35.56, | |
| "grad_norm": 0.8974349498748779, | |
| "learning_rate": 1.3636363636363637e-05, | |
| "loss": 0.6124, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "eval_accuracy": 0.6415094339622641, | |
| "eval_loss": 0.611738920211792, | |
| "eval_runtime": 1.9854, | |
| "eval_samples_per_second": 80.083, | |
| "eval_steps_per_second": 5.037, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 36.98, | |
| "eval_accuracy": 0.6666666666666666, | |
| "eval_loss": 0.5910706520080566, | |
| "eval_runtime": 2.0618, | |
| "eval_samples_per_second": 77.117, | |
| "eval_steps_per_second": 4.85, | |
| "step": 416 | |
| }, | |
| { | |
| "epoch": 37.96, | |
| "eval_accuracy": 0.6918238993710691, | |
| "eval_loss": 0.5672016143798828, | |
| "eval_runtime": 2.0402, | |
| "eval_samples_per_second": 77.932, | |
| "eval_steps_per_second": 4.901, | |
| "step": 427 | |
| }, | |
| { | |
| "epoch": 38.93, | |
| "eval_accuracy": 0.7232704402515723, | |
| "eval_loss": 0.5392354130744934, | |
| "eval_runtime": 2.2936, | |
| "eval_samples_per_second": 69.324, | |
| "eval_steps_per_second": 4.36, | |
| "step": 438 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "grad_norm": 0.7736309170722961, | |
| "learning_rate": 1.534090909090909e-05, | |
| "loss": 0.5073, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "eval_accuracy": 0.7547169811320755, | |
| "eval_loss": 0.5041937232017517, | |
| "eval_runtime": 2.1247, | |
| "eval_samples_per_second": 74.835, | |
| "eval_steps_per_second": 4.707, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 40.98, | |
| "eval_accuracy": 0.7672955974842768, | |
| "eval_loss": 0.47902750968933105, | |
| "eval_runtime": 2.163, | |
| "eval_samples_per_second": 73.509, | |
| "eval_steps_per_second": 4.623, | |
| "step": 461 | |
| }, | |
| { | |
| "epoch": 41.96, | |
| "eval_accuracy": 0.779874213836478, | |
| "eval_loss": 0.47594940662384033, | |
| "eval_runtime": 2.1321, | |
| "eval_samples_per_second": 74.574, | |
| "eval_steps_per_second": 4.69, | |
| "step": 472 | |
| }, | |
| { | |
| "epoch": 42.93, | |
| "eval_accuracy": 0.7987421383647799, | |
| "eval_loss": 0.4369964003562927, | |
| "eval_runtime": 2.1555, | |
| "eval_samples_per_second": 73.765, | |
| "eval_steps_per_second": 4.639, | |
| "step": 483 | |
| }, | |
| { | |
| "epoch": 44.0, | |
| "eval_accuracy": 0.7987421383647799, | |
| "eval_loss": 0.43516698479652405, | |
| "eval_runtime": 2.032, | |
| "eval_samples_per_second": 78.249, | |
| "eval_steps_per_second": 4.921, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 44.44, | |
| "grad_norm": 0.4976819157600403, | |
| "learning_rate": 1.7045454545454546e-05, | |
| "loss": 0.3489, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 44.98, | |
| "eval_accuracy": 0.7987421383647799, | |
| "eval_loss": 0.4422326385974884, | |
| "eval_runtime": 2.1135, | |
| "eval_samples_per_second": 75.231, | |
| "eval_steps_per_second": 4.732, | |
| "step": 506 | |
| }, | |
| { | |
| "epoch": 45.96, | |
| "eval_accuracy": 0.8050314465408805, | |
| "eval_loss": 0.41540881991386414, | |
| "eval_runtime": 2.0847, | |
| "eval_samples_per_second": 76.27, | |
| "eval_steps_per_second": 4.797, | |
| "step": 517 | |
| }, | |
| { | |
| "epoch": 46.93, | |
| "eval_accuracy": 0.8050314465408805, | |
| "eval_loss": 0.4131433367729187, | |
| "eval_runtime": 1.9752, | |
| "eval_samples_per_second": 80.498, | |
| "eval_steps_per_second": 5.063, | |
| "step": 528 | |
| }, | |
| { | |
| "epoch": 48.0, | |
| "eval_accuracy": 0.8113207547169812, | |
| "eval_loss": 0.3975575864315033, | |
| "eval_runtime": 2.01, | |
| "eval_samples_per_second": 79.104, | |
| "eval_steps_per_second": 4.975, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 48.89, | |
| "grad_norm": 0.5197520852088928, | |
| "learning_rate": 1.8750000000000002e-05, | |
| "loss": 0.2962, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 48.98, | |
| "eval_accuracy": 0.8113207547169812, | |
| "eval_loss": 0.39397454261779785, | |
| "eval_runtime": 2.0261, | |
| "eval_samples_per_second": 78.474, | |
| "eval_steps_per_second": 4.935, | |
| "step": 551 | |
| }, | |
| { | |
| "epoch": 49.96, | |
| "eval_accuracy": 0.8238993710691824, | |
| "eval_loss": 0.371494859457016, | |
| "eval_runtime": 2.0246, | |
| "eval_samples_per_second": 78.535, | |
| "eval_steps_per_second": 4.939, | |
| "step": 562 | |
| }, | |
| { | |
| "epoch": 50.93, | |
| "eval_accuracy": 0.8427672955974843, | |
| "eval_loss": 0.34951409697532654, | |
| "eval_runtime": 2.3286, | |
| "eval_samples_per_second": 68.281, | |
| "eval_steps_per_second": 4.294, | |
| "step": 573 | |
| }, | |
| { | |
| "epoch": 52.0, | |
| "eval_accuracy": 0.8364779874213837, | |
| "eval_loss": 0.3481156826019287, | |
| "eval_runtime": 1.9542, | |
| "eval_samples_per_second": 81.362, | |
| "eval_steps_per_second": 5.117, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 52.98, | |
| "eval_accuracy": 0.8176100628930818, | |
| "eval_loss": 0.3817409873008728, | |
| "eval_runtime": 2.0789, | |
| "eval_samples_per_second": 76.484, | |
| "eval_steps_per_second": 4.81, | |
| "step": 596 | |
| }, | |
| { | |
| "epoch": 53.33, | |
| "grad_norm": 0.5608111023902893, | |
| "learning_rate": 2.0454545454545454e-05, | |
| "loss": 0.2573, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 53.96, | |
| "eval_accuracy": 0.8490566037735849, | |
| "eval_loss": 0.3412492871284485, | |
| "eval_runtime": 2.0746, | |
| "eval_samples_per_second": 76.642, | |
| "eval_steps_per_second": 4.82, | |
| "step": 607 | |
| }, | |
| { | |
| "epoch": 54.93, | |
| "eval_accuracy": 0.8490566037735849, | |
| "eval_loss": 0.32929155230522156, | |
| "eval_runtime": 1.9991, | |
| "eval_samples_per_second": 79.538, | |
| "eval_steps_per_second": 5.002, | |
| "step": 618 | |
| }, | |
| { | |
| "epoch": 56.0, | |
| "eval_accuracy": 0.8427672955974843, | |
| "eval_loss": 0.3547687232494354, | |
| "eval_runtime": 2.1242, | |
| "eval_samples_per_second": 74.851, | |
| "eval_steps_per_second": 4.708, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 56.98, | |
| "eval_accuracy": 0.8427672955974843, | |
| "eval_loss": 0.3044220209121704, | |
| "eval_runtime": 2.0508, | |
| "eval_samples_per_second": 77.532, | |
| "eval_steps_per_second": 4.876, | |
| "step": 641 | |
| }, | |
| { | |
| "epoch": 57.78, | |
| "grad_norm": 0.894092321395874, | |
| "learning_rate": 2.215909090909091e-05, | |
| "loss": 0.2279, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 57.96, | |
| "eval_accuracy": 0.8490566037735849, | |
| "eval_loss": 0.32347577810287476, | |
| "eval_runtime": 2.2095, | |
| "eval_samples_per_second": 71.963, | |
| "eval_steps_per_second": 4.526, | |
| "step": 652 | |
| }, | |
| { | |
| "epoch": 58.93, | |
| "eval_accuracy": 0.8490566037735849, | |
| "eval_loss": 0.3371436297893524, | |
| "eval_runtime": 2.1055, | |
| "eval_samples_per_second": 75.518, | |
| "eval_steps_per_second": 4.75, | |
| "step": 663 | |
| }, | |
| { | |
| "epoch": 60.0, | |
| "eval_accuracy": 0.8490566037735849, | |
| "eval_loss": 0.31275492906570435, | |
| "eval_runtime": 2.1311, | |
| "eval_samples_per_second": 74.61, | |
| "eval_steps_per_second": 4.692, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 60.98, | |
| "eval_accuracy": 0.8553459119496856, | |
| "eval_loss": 0.32111966609954834, | |
| "eval_runtime": 2.0639, | |
| "eval_samples_per_second": 77.038, | |
| "eval_steps_per_second": 4.845, | |
| "step": 686 | |
| }, | |
| { | |
| "epoch": 61.96, | |
| "eval_accuracy": 0.8616352201257862, | |
| "eval_loss": 0.302960604429245, | |
| "eval_runtime": 2.0241, | |
| "eval_samples_per_second": 78.552, | |
| "eval_steps_per_second": 4.94, | |
| "step": 697 | |
| }, | |
| { | |
| "epoch": 62.22, | |
| "grad_norm": 0.4315973222255707, | |
| "learning_rate": 2.3863636363636362e-05, | |
| "loss": 0.2167, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 62.93, | |
| "eval_accuracy": 0.8616352201257862, | |
| "eval_loss": 0.29696550965309143, | |
| "eval_runtime": 2.034, | |
| "eval_samples_per_second": 78.169, | |
| "eval_steps_per_second": 4.916, | |
| "step": 708 | |
| }, | |
| { | |
| "epoch": 64.0, | |
| "eval_accuracy": 0.8679245283018868, | |
| "eval_loss": 0.29949402809143066, | |
| "eval_runtime": 2.095, | |
| "eval_samples_per_second": 75.897, | |
| "eval_steps_per_second": 4.773, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 64.98, | |
| "eval_accuracy": 0.8742138364779874, | |
| "eval_loss": 0.2867083251476288, | |
| "eval_runtime": 2.0417, | |
| "eval_samples_per_second": 77.876, | |
| "eval_steps_per_second": 4.898, | |
| "step": 731 | |
| }, | |
| { | |
| "epoch": 65.96, | |
| "eval_accuracy": 0.8930817610062893, | |
| "eval_loss": 0.26363295316696167, | |
| "eval_runtime": 2.1382, | |
| "eval_samples_per_second": 74.363, | |
| "eval_steps_per_second": 4.677, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 66.67, | |
| "grad_norm": 0.37665870785713196, | |
| "learning_rate": 2.556818181818182e-05, | |
| "loss": 0.207, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 66.93, | |
| "eval_accuracy": 0.8805031446540881, | |
| "eval_loss": 0.28482353687286377, | |
| "eval_runtime": 2.1166, | |
| "eval_samples_per_second": 75.119, | |
| "eval_steps_per_second": 4.724, | |
| "step": 753 | |
| }, | |
| { | |
| "epoch": 68.0, | |
| "eval_accuracy": 0.8867924528301887, | |
| "eval_loss": 0.2750767767429352, | |
| "eval_runtime": 2.1981, | |
| "eval_samples_per_second": 72.336, | |
| "eval_steps_per_second": 4.549, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 68.98, | |
| "eval_accuracy": 0.8930817610062893, | |
| "eval_loss": 0.256393700838089, | |
| "eval_runtime": 2.033, | |
| "eval_samples_per_second": 78.211, | |
| "eval_steps_per_second": 4.919, | |
| "step": 776 | |
| }, | |
| { | |
| "epoch": 69.96, | |
| "eval_accuracy": 0.8930817610062893, | |
| "eval_loss": 0.25443732738494873, | |
| "eval_runtime": 2.0096, | |
| "eval_samples_per_second": 79.121, | |
| "eval_steps_per_second": 4.976, | |
| "step": 787 | |
| }, | |
| { | |
| "epoch": 70.93, | |
| "eval_accuracy": 0.8742138364779874, | |
| "eval_loss": 0.2954423129558563, | |
| "eval_runtime": 2.1018, | |
| "eval_samples_per_second": 75.649, | |
| "eval_steps_per_second": 4.758, | |
| "step": 798 | |
| }, | |
| { | |
| "epoch": 71.11, | |
| "grad_norm": 0.7302255630493164, | |
| "learning_rate": 2.7272727272727273e-05, | |
| "loss": 0.1899, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 72.0, | |
| "eval_accuracy": 0.8930817610062893, | |
| "eval_loss": 0.25169771909713745, | |
| "eval_runtime": 2.041, | |
| "eval_samples_per_second": 77.904, | |
| "eval_steps_per_second": 4.9, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 72.98, | |
| "eval_accuracy": 0.8930817610062893, | |
| "eval_loss": 0.2506076693534851, | |
| "eval_runtime": 2.0257, | |
| "eval_samples_per_second": 78.49, | |
| "eval_steps_per_second": 4.936, | |
| "step": 821 | |
| }, | |
| { | |
| "epoch": 73.96, | |
| "eval_accuracy": 0.8930817610062893, | |
| "eval_loss": 0.2434261441230774, | |
| "eval_runtime": 2.0325, | |
| "eval_samples_per_second": 78.23, | |
| "eval_steps_per_second": 4.92, | |
| "step": 832 | |
| }, | |
| { | |
| "epoch": 74.93, | |
| "eval_accuracy": 0.89937106918239, | |
| "eval_loss": 0.23832084238529205, | |
| "eval_runtime": 2.1871, | |
| "eval_samples_per_second": 72.699, | |
| "eval_steps_per_second": 4.572, | |
| "step": 843 | |
| }, | |
| { | |
| "epoch": 75.56, | |
| "grad_norm": 0.5180615186691284, | |
| "learning_rate": 2.897727272727273e-05, | |
| "loss": 0.1801, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 76.0, | |
| "eval_accuracy": 0.89937106918239, | |
| "eval_loss": 0.23464229702949524, | |
| "eval_runtime": 2.026, | |
| "eval_samples_per_second": 78.48, | |
| "eval_steps_per_second": 4.936, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 76.98, | |
| "eval_accuracy": 0.89937106918239, | |
| "eval_loss": 0.22975026071071625, | |
| "eval_runtime": 2.0881, | |
| "eval_samples_per_second": 76.147, | |
| "eval_steps_per_second": 4.789, | |
| "step": 866 | |
| }, | |
| { | |
| "epoch": 77.96, | |
| "eval_accuracy": 0.9056603773584906, | |
| "eval_loss": 0.2403678596019745, | |
| "eval_runtime": 2.075, | |
| "eval_samples_per_second": 76.626, | |
| "eval_steps_per_second": 4.819, | |
| "step": 877 | |
| }, | |
| { | |
| "epoch": 78.93, | |
| "eval_accuracy": 0.8930817610062893, | |
| "eval_loss": 0.2674010097980499, | |
| "eval_runtime": 2.037, | |
| "eval_samples_per_second": 78.057, | |
| "eval_steps_per_second": 4.909, | |
| "step": 888 | |
| }, | |
| { | |
| "epoch": 80.0, | |
| "grad_norm": 1.2135472297668457, | |
| "learning_rate": 2.9924242424242427e-05, | |
| "loss": 0.1692, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 80.0, | |
| "eval_accuracy": 0.89937106918239, | |
| "eval_loss": 0.2231501042842865, | |
| "eval_runtime": 2.0398, | |
| "eval_samples_per_second": 77.949, | |
| "eval_steps_per_second": 4.902, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 80.98, | |
| "eval_accuracy": 0.89937106918239, | |
| "eval_loss": 0.2390480935573578, | |
| "eval_runtime": 1.9822, | |
| "eval_samples_per_second": 80.213, | |
| "eval_steps_per_second": 5.045, | |
| "step": 911 | |
| }, | |
| { | |
| "epoch": 81.96, | |
| "eval_accuracy": 0.8930817610062893, | |
| "eval_loss": 0.20583955943584442, | |
| "eval_runtime": 2.0665, | |
| "eval_samples_per_second": 76.94, | |
| "eval_steps_per_second": 4.839, | |
| "step": 922 | |
| }, | |
| { | |
| "epoch": 82.93, | |
| "eval_accuracy": 0.9056603773584906, | |
| "eval_loss": 0.2114023119211197, | |
| "eval_runtime": 2.0736, | |
| "eval_samples_per_second": 76.678, | |
| "eval_steps_per_second": 4.823, | |
| "step": 933 | |
| }, | |
| { | |
| "epoch": 84.0, | |
| "eval_accuracy": 0.89937106918239, | |
| "eval_loss": 0.24830691516399384, | |
| "eval_runtime": 2.0148, | |
| "eval_samples_per_second": 78.915, | |
| "eval_steps_per_second": 4.963, | |
| "step": 945 | |
| }, | |
| { | |
| "epoch": 84.44, | |
| "grad_norm": 0.5111488103866577, | |
| "learning_rate": 2.9734848484848486e-05, | |
| "loss": 0.1691, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 84.98, | |
| "eval_accuracy": 0.9119496855345912, | |
| "eval_loss": 0.2259017676115036, | |
| "eval_runtime": 2.2201, | |
| "eval_samples_per_second": 71.618, | |
| "eval_steps_per_second": 4.504, | |
| "step": 956 | |
| }, | |
| { | |
| "epoch": 85.96, | |
| "eval_accuracy": 0.9119496855345912, | |
| "eval_loss": 0.20239894092082977, | |
| "eval_runtime": 2.0671, | |
| "eval_samples_per_second": 76.918, | |
| "eval_steps_per_second": 4.838, | |
| "step": 967 | |
| }, | |
| { | |
| "epoch": 86.93, | |
| "eval_accuracy": 0.89937106918239, | |
| "eval_loss": 0.20193150639533997, | |
| "eval_runtime": 2.0416, | |
| "eval_samples_per_second": 77.879, | |
| "eval_steps_per_second": 4.898, | |
| "step": 978 | |
| }, | |
| { | |
| "epoch": 88.0, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.19625458121299744, | |
| "eval_runtime": 2.0196, | |
| "eval_samples_per_second": 78.73, | |
| "eval_steps_per_second": 4.952, | |
| "step": 990 | |
| }, | |
| { | |
| "epoch": 88.89, | |
| "grad_norm": 0.4683234989643097, | |
| "learning_rate": 2.9545454545454545e-05, | |
| "loss": 0.1609, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 88.98, | |
| "eval_accuracy": 0.9119496855345912, | |
| "eval_loss": 0.21583892405033112, | |
| "eval_runtime": 2.0254, | |
| "eval_samples_per_second": 78.503, | |
| "eval_steps_per_second": 4.937, | |
| "step": 1001 | |
| }, | |
| { | |
| "epoch": 89.96, | |
| "eval_accuracy": 0.9119496855345912, | |
| "eval_loss": 0.197691410779953, | |
| "eval_runtime": 1.9978, | |
| "eval_samples_per_second": 79.586, | |
| "eval_steps_per_second": 5.005, | |
| "step": 1012 | |
| }, | |
| { | |
| "epoch": 90.93, | |
| "eval_accuracy": 0.9182389937106918, | |
| "eval_loss": 0.19791610538959503, | |
| "eval_runtime": 2.0853, | |
| "eval_samples_per_second": 76.248, | |
| "eval_steps_per_second": 4.795, | |
| "step": 1023 | |
| }, | |
| { | |
| "epoch": 92.0, | |
| "eval_accuracy": 0.9119496855345912, | |
| "eval_loss": 0.20358721911907196, | |
| "eval_runtime": 2.1963, | |
| "eval_samples_per_second": 72.393, | |
| "eval_steps_per_second": 4.553, | |
| "step": 1035 | |
| }, | |
| { | |
| "epoch": 92.98, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.19769711792469025, | |
| "eval_runtime": 2.0089, | |
| "eval_samples_per_second": 79.146, | |
| "eval_steps_per_second": 4.978, | |
| "step": 1046 | |
| }, | |
| { | |
| "epoch": 93.33, | |
| "grad_norm": 0.6099847555160522, | |
| "learning_rate": 2.9356060606060604e-05, | |
| "loss": 0.1516, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 93.96, | |
| "eval_accuracy": 0.9182389937106918, | |
| "eval_loss": 0.1974458247423172, | |
| "eval_runtime": 2.1182, | |
| "eval_samples_per_second": 75.065, | |
| "eval_steps_per_second": 4.721, | |
| "step": 1057 | |
| }, | |
| { | |
| "epoch": 94.93, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.1993919163942337, | |
| "eval_runtime": 2.0707, | |
| "eval_samples_per_second": 76.787, | |
| "eval_steps_per_second": 4.829, | |
| "step": 1068 | |
| }, | |
| { | |
| "epoch": 96.0, | |
| "eval_accuracy": 0.9119496855345912, | |
| "eval_loss": 0.1955273449420929, | |
| "eval_runtime": 2.0163, | |
| "eval_samples_per_second": 78.858, | |
| "eval_steps_per_second": 4.96, | |
| "step": 1080 | |
| }, | |
| { | |
| "epoch": 96.98, | |
| "eval_accuracy": 0.9119496855345912, | |
| "eval_loss": 0.19483698904514313, | |
| "eval_runtime": 2.0495, | |
| "eval_samples_per_second": 77.581, | |
| "eval_steps_per_second": 4.879, | |
| "step": 1091 | |
| }, | |
| { | |
| "epoch": 97.78, | |
| "grad_norm": 1.0578981637954712, | |
| "learning_rate": 2.9166666666666666e-05, | |
| "loss": 0.1386, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 97.96, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.19463855028152466, | |
| "eval_runtime": 2.0625, | |
| "eval_samples_per_second": 77.091, | |
| "eval_steps_per_second": 4.849, | |
| "step": 1102 | |
| }, | |
| { | |
| "epoch": 98.93, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.19323910772800446, | |
| "eval_runtime": 2.0028, | |
| "eval_samples_per_second": 79.389, | |
| "eval_steps_per_second": 4.993, | |
| "step": 1113 | |
| }, | |
| { | |
| "epoch": 100.0, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.1841806173324585, | |
| "eval_runtime": 2.1056, | |
| "eval_samples_per_second": 75.512, | |
| "eval_steps_per_second": 4.749, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 100.98, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.18839451670646667, | |
| "eval_runtime": 1.9858, | |
| "eval_samples_per_second": 80.07, | |
| "eval_steps_per_second": 5.036, | |
| "step": 1136 | |
| }, | |
| { | |
| "epoch": 101.96, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.1899903267621994, | |
| "eval_runtime": 2.2196, | |
| "eval_samples_per_second": 71.635, | |
| "eval_steps_per_second": 4.505, | |
| "step": 1147 | |
| }, | |
| { | |
| "epoch": 102.22, | |
| "grad_norm": 0.6229210495948792, | |
| "learning_rate": 2.897727272727273e-05, | |
| "loss": 0.1279, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 102.93, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.184115469455719, | |
| "eval_runtime": 2.0229, | |
| "eval_samples_per_second": 78.602, | |
| "eval_steps_per_second": 4.944, | |
| "step": 1158 | |
| }, | |
| { | |
| "epoch": 104.0, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.19207227230072021, | |
| "eval_runtime": 1.9639, | |
| "eval_samples_per_second": 80.962, | |
| "eval_steps_per_second": 5.092, | |
| "step": 1170 | |
| }, | |
| { | |
| "epoch": 104.98, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.19926591217517853, | |
| "eval_runtime": 2.0509, | |
| "eval_samples_per_second": 77.526, | |
| "eval_steps_per_second": 4.876, | |
| "step": 1181 | |
| }, | |
| { | |
| "epoch": 105.96, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.19455212354660034, | |
| "eval_runtime": 2.0496, | |
| "eval_samples_per_second": 77.577, | |
| "eval_steps_per_second": 4.879, | |
| "step": 1192 | |
| }, | |
| { | |
| "epoch": 106.67, | |
| "grad_norm": 1.2741256952285767, | |
| "learning_rate": 2.8787878787878788e-05, | |
| "loss": 0.1258, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 106.93, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.18963727355003357, | |
| "eval_runtime": 2.0026, | |
| "eval_samples_per_second": 79.395, | |
| "eval_steps_per_second": 4.993, | |
| "step": 1203 | |
| }, | |
| { | |
| "epoch": 108.0, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.1884273737668991, | |
| "eval_runtime": 2.0343, | |
| "eval_samples_per_second": 78.16, | |
| "eval_steps_per_second": 4.916, | |
| "step": 1215 | |
| }, | |
| { | |
| "epoch": 108.98, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.17940251529216766, | |
| "eval_runtime": 2.1734, | |
| "eval_samples_per_second": 73.156, | |
| "eval_steps_per_second": 4.601, | |
| "step": 1226 | |
| }, | |
| { | |
| "epoch": 109.96, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.18589730560779572, | |
| "eval_runtime": 2.0874, | |
| "eval_samples_per_second": 76.17, | |
| "eval_steps_per_second": 4.791, | |
| "step": 1237 | |
| }, | |
| { | |
| "epoch": 110.93, | |
| "eval_accuracy": 0.9119496855345912, | |
| "eval_loss": 0.2194768339395523, | |
| "eval_runtime": 2.0717, | |
| "eval_samples_per_second": 76.747, | |
| "eval_steps_per_second": 4.827, | |
| "step": 1248 | |
| }, | |
| { | |
| "epoch": 111.11, | |
| "grad_norm": 0.3613344430923462, | |
| "learning_rate": 2.859848484848485e-05, | |
| "loss": 0.1258, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 112.0, | |
| "eval_accuracy": 0.9182389937106918, | |
| "eval_loss": 0.20826272666454315, | |
| "eval_runtime": 1.9861, | |
| "eval_samples_per_second": 80.057, | |
| "eval_steps_per_second": 5.035, | |
| "step": 1260 | |
| }, | |
| { | |
| "epoch": 112.98, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.21202689409255981, | |
| "eval_runtime": 2.0132, | |
| "eval_samples_per_second": 78.98, | |
| "eval_steps_per_second": 4.967, | |
| "step": 1271 | |
| }, | |
| { | |
| "epoch": 113.96, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.20663346350193024, | |
| "eval_runtime": 2.02, | |
| "eval_samples_per_second": 78.711, | |
| "eval_steps_per_second": 4.95, | |
| "step": 1282 | |
| }, | |
| { | |
| "epoch": 114.93, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.1931203156709671, | |
| "eval_runtime": 2.033, | |
| "eval_samples_per_second": 78.208, | |
| "eval_steps_per_second": 4.919, | |
| "step": 1293 | |
| }, | |
| { | |
| "epoch": 115.56, | |
| "grad_norm": 0.7503376007080078, | |
| "learning_rate": 2.8409090909090912e-05, | |
| "loss": 0.1023, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 116.0, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.19000084698200226, | |
| "eval_runtime": 2.0014, | |
| "eval_samples_per_second": 79.446, | |
| "eval_steps_per_second": 4.997, | |
| "step": 1305 | |
| }, | |
| { | |
| "epoch": 116.98, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.20288796722888947, | |
| "eval_runtime": 2.0774, | |
| "eval_samples_per_second": 76.539, | |
| "eval_steps_per_second": 4.814, | |
| "step": 1316 | |
| }, | |
| { | |
| "epoch": 117.96, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.19505923986434937, | |
| "eval_runtime": 2.0552, | |
| "eval_samples_per_second": 77.366, | |
| "eval_steps_per_second": 4.866, | |
| "step": 1327 | |
| }, | |
| { | |
| "epoch": 118.93, | |
| "eval_accuracy": 0.9119496855345912, | |
| "eval_loss": 0.20838169753551483, | |
| "eval_runtime": 2.2371, | |
| "eval_samples_per_second": 71.074, | |
| "eval_steps_per_second": 4.47, | |
| "step": 1338 | |
| }, | |
| { | |
| "epoch": 120.0, | |
| "grad_norm": 0.2376416176557541, | |
| "learning_rate": 2.821969696969697e-05, | |
| "loss": 0.0997, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 120.0, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.2159019112586975, | |
| "eval_runtime": 2.0579, | |
| "eval_samples_per_second": 77.264, | |
| "eval_steps_per_second": 4.859, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 120.98, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.21662545204162598, | |
| "eval_runtime": 2.0756, | |
| "eval_samples_per_second": 76.605, | |
| "eval_steps_per_second": 4.818, | |
| "step": 1361 | |
| }, | |
| { | |
| "epoch": 121.96, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.197323277592659, | |
| "eval_runtime": 2.0227, | |
| "eval_samples_per_second": 78.607, | |
| "eval_steps_per_second": 4.944, | |
| "step": 1372 | |
| }, | |
| { | |
| "epoch": 122.93, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.18507684767246246, | |
| "eval_runtime": 2.0728, | |
| "eval_samples_per_second": 76.706, | |
| "eval_steps_per_second": 4.824, | |
| "step": 1383 | |
| }, | |
| { | |
| "epoch": 124.0, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.20666691660881042, | |
| "eval_runtime": 1.9717, | |
| "eval_samples_per_second": 80.642, | |
| "eval_steps_per_second": 5.072, | |
| "step": 1395 | |
| }, | |
| { | |
| "epoch": 124.44, | |
| "grad_norm": 0.3115290403366089, | |
| "learning_rate": 2.803030303030303e-05, | |
| "loss": 0.1021, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 124.98, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.19534242153167725, | |
| "eval_runtime": 2.0497, | |
| "eval_samples_per_second": 77.571, | |
| "eval_steps_per_second": 4.879, | |
| "step": 1406 | |
| }, | |
| { | |
| "epoch": 125.96, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.17650572955608368, | |
| "eval_runtime": 2.239, | |
| "eval_samples_per_second": 71.015, | |
| "eval_steps_per_second": 4.466, | |
| "step": 1417 | |
| }, | |
| { | |
| "epoch": 126.93, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.18782062828540802, | |
| "eval_runtime": 2.0533, | |
| "eval_samples_per_second": 77.437, | |
| "eval_steps_per_second": 4.87, | |
| "step": 1428 | |
| }, | |
| { | |
| "epoch": 128.0, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.20708344876766205, | |
| "eval_runtime": 2.0414, | |
| "eval_samples_per_second": 77.887, | |
| "eval_steps_per_second": 4.899, | |
| "step": 1440 | |
| }, | |
| { | |
| "epoch": 128.89, | |
| "grad_norm": 1.2413551807403564, | |
| "learning_rate": 2.784090909090909e-05, | |
| "loss": 0.0883, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 128.98, | |
| "eval_accuracy": 0.9182389937106918, | |
| "eval_loss": 0.2241077572107315, | |
| "eval_runtime": 1.9826, | |
| "eval_samples_per_second": 80.197, | |
| "eval_steps_per_second": 5.044, | |
| "step": 1451 | |
| }, | |
| { | |
| "epoch": 129.96, | |
| "eval_accuracy": 0.9119496855345912, | |
| "eval_loss": 0.23481474816799164, | |
| "eval_runtime": 1.9747, | |
| "eval_samples_per_second": 80.518, | |
| "eval_steps_per_second": 5.064, | |
| "step": 1462 | |
| }, | |
| { | |
| "epoch": 130.93, | |
| "eval_accuracy": 0.9056603773584906, | |
| "eval_loss": 0.24748335778713226, | |
| "eval_runtime": 1.9737, | |
| "eval_samples_per_second": 80.559, | |
| "eval_steps_per_second": 5.067, | |
| "step": 1473 | |
| }, | |
| { | |
| "epoch": 132.0, | |
| "eval_accuracy": 0.9182389937106918, | |
| "eval_loss": 0.21596243977546692, | |
| "eval_runtime": 2.0455, | |
| "eval_samples_per_second": 77.733, | |
| "eval_steps_per_second": 4.889, | |
| "step": 1485 | |
| }, | |
| { | |
| "epoch": 132.98, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.20896825194358826, | |
| "eval_runtime": 2.047, | |
| "eval_samples_per_second": 77.675, | |
| "eval_steps_per_second": 4.885, | |
| "step": 1496 | |
| }, | |
| { | |
| "epoch": 133.33, | |
| "grad_norm": 0.56540846824646, | |
| "learning_rate": 2.7651515151515152e-05, | |
| "loss": 0.0769, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 133.96, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.21468934416770935, | |
| "eval_runtime": 1.9936, | |
| "eval_samples_per_second": 79.754, | |
| "eval_steps_per_second": 5.016, | |
| "step": 1507 | |
| }, | |
| { | |
| "epoch": 134.93, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.22008037567138672, | |
| "eval_runtime": 2.0857, | |
| "eval_samples_per_second": 76.234, | |
| "eval_steps_per_second": 4.795, | |
| "step": 1518 | |
| }, | |
| { | |
| "epoch": 136.0, | |
| "eval_accuracy": 0.9182389937106918, | |
| "eval_loss": 0.23723578453063965, | |
| "eval_runtime": 2.1872, | |
| "eval_samples_per_second": 72.695, | |
| "eval_steps_per_second": 4.572, | |
| "step": 1530 | |
| }, | |
| { | |
| "epoch": 136.98, | |
| "eval_accuracy": 0.9182389937106918, | |
| "eval_loss": 0.21990692615509033, | |
| "eval_runtime": 2.0473, | |
| "eval_samples_per_second": 77.664, | |
| "eval_steps_per_second": 4.885, | |
| "step": 1541 | |
| }, | |
| { | |
| "epoch": 137.78, | |
| "grad_norm": 1.0245180130004883, | |
| "learning_rate": 2.7462121212121214e-05, | |
| "loss": 0.0786, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 137.96, | |
| "eval_accuracy": 0.9182389937106918, | |
| "eval_loss": 0.2087443619966507, | |
| "eval_runtime": 2.0577, | |
| "eval_samples_per_second": 77.271, | |
| "eval_steps_per_second": 4.86, | |
| "step": 1552 | |
| }, | |
| { | |
| "epoch": 138.93, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.18779344856739044, | |
| "eval_runtime": 2.0799, | |
| "eval_samples_per_second": 76.447, | |
| "eval_steps_per_second": 4.808, | |
| "step": 1563 | |
| }, | |
| { | |
| "epoch": 140.0, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.1914655864238739, | |
| "eval_runtime": 2.043, | |
| "eval_samples_per_second": 77.827, | |
| "eval_steps_per_second": 4.895, | |
| "step": 1575 | |
| }, | |
| { | |
| "epoch": 140.98, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.23168283700942993, | |
| "eval_runtime": 2.0313, | |
| "eval_samples_per_second": 78.277, | |
| "eval_steps_per_second": 4.923, | |
| "step": 1586 | |
| }, | |
| { | |
| "epoch": 141.96, | |
| "eval_accuracy": 0.8930817610062893, | |
| "eval_loss": 0.2865447700023651, | |
| "eval_runtime": 2.0095, | |
| "eval_samples_per_second": 79.125, | |
| "eval_steps_per_second": 4.976, | |
| "step": 1597 | |
| }, | |
| { | |
| "epoch": 142.22, | |
| "grad_norm": 1.393044352531433, | |
| "learning_rate": 2.7272727272727273e-05, | |
| "loss": 0.0714, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 142.93, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.22998519241809845, | |
| "eval_runtime": 2.1842, | |
| "eval_samples_per_second": 72.794, | |
| "eval_steps_per_second": 4.578, | |
| "step": 1608 | |
| }, | |
| { | |
| "epoch": 144.0, | |
| "eval_accuracy": 0.9056603773584906, | |
| "eval_loss": 0.27265357971191406, | |
| "eval_runtime": 2.0318, | |
| "eval_samples_per_second": 78.258, | |
| "eval_steps_per_second": 4.922, | |
| "step": 1620 | |
| }, | |
| { | |
| "epoch": 144.98, | |
| "eval_accuracy": 0.9056603773584906, | |
| "eval_loss": 0.28114742040634155, | |
| "eval_runtime": 2.0949, | |
| "eval_samples_per_second": 75.9, | |
| "eval_steps_per_second": 4.774, | |
| "step": 1631 | |
| }, | |
| { | |
| "epoch": 145.96, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.21014899015426636, | |
| "eval_runtime": 2.0829, | |
| "eval_samples_per_second": 76.335, | |
| "eval_steps_per_second": 4.801, | |
| "step": 1642 | |
| }, | |
| { | |
| "epoch": 146.67, | |
| "grad_norm": 1.1527929306030273, | |
| "learning_rate": 2.7083333333333335e-05, | |
| "loss": 0.0702, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 146.93, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.20363318920135498, | |
| "eval_runtime": 2.0224, | |
| "eval_samples_per_second": 78.618, | |
| "eval_steps_per_second": 4.945, | |
| "step": 1653 | |
| }, | |
| { | |
| "epoch": 148.0, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.22154641151428223, | |
| "eval_runtime": 2.0286, | |
| "eval_samples_per_second": 78.378, | |
| "eval_steps_per_second": 4.929, | |
| "step": 1665 | |
| }, | |
| { | |
| "epoch": 148.98, | |
| "eval_accuracy": 0.9182389937106918, | |
| "eval_loss": 0.21356013417243958, | |
| "eval_runtime": 1.9745, | |
| "eval_samples_per_second": 80.526, | |
| "eval_steps_per_second": 5.065, | |
| "step": 1676 | |
| }, | |
| { | |
| "epoch": 149.96, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.20560431480407715, | |
| "eval_runtime": 2.0343, | |
| "eval_samples_per_second": 78.161, | |
| "eval_steps_per_second": 4.916, | |
| "step": 1687 | |
| }, | |
| { | |
| "epoch": 150.93, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.20028233528137207, | |
| "eval_runtime": 2.0476, | |
| "eval_samples_per_second": 77.65, | |
| "eval_steps_per_second": 4.884, | |
| "step": 1698 | |
| }, | |
| { | |
| "epoch": 151.11, | |
| "grad_norm": 0.6037131547927856, | |
| "learning_rate": 2.6893939393939398e-05, | |
| "loss": 0.0676, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 152.0, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.22495229542255402, | |
| "eval_runtime": 2.0653, | |
| "eval_samples_per_second": 76.985, | |
| "eval_steps_per_second": 4.842, | |
| "step": 1710 | |
| }, | |
| { | |
| "epoch": 152.98, | |
| "eval_accuracy": 0.9559748427672956, | |
| "eval_loss": 0.1910940259695053, | |
| "eval_runtime": 2.2097, | |
| "eval_samples_per_second": 71.955, | |
| "eval_steps_per_second": 4.525, | |
| "step": 1721 | |
| }, | |
| { | |
| "epoch": 153.96, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.2189728170633316, | |
| "eval_runtime": 2.049, | |
| "eval_samples_per_second": 77.598, | |
| "eval_steps_per_second": 4.88, | |
| "step": 1732 | |
| }, | |
| { | |
| "epoch": 154.93, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.1975589245557785, | |
| "eval_runtime": 2.0536, | |
| "eval_samples_per_second": 77.426, | |
| "eval_steps_per_second": 4.87, | |
| "step": 1743 | |
| }, | |
| { | |
| "epoch": 155.56, | |
| "grad_norm": 0.9841188788414001, | |
| "learning_rate": 2.6704545454545453e-05, | |
| "loss": 0.0674, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 156.0, | |
| "eval_accuracy": 0.949685534591195, | |
| "eval_loss": 0.18743836879730225, | |
| "eval_runtime": 2.0593, | |
| "eval_samples_per_second": 77.211, | |
| "eval_steps_per_second": 4.856, | |
| "step": 1755 | |
| }, | |
| { | |
| "epoch": 156.98, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.2022770792245865, | |
| "eval_runtime": 2.0432, | |
| "eval_samples_per_second": 77.821, | |
| "eval_steps_per_second": 4.894, | |
| "step": 1766 | |
| }, | |
| { | |
| "epoch": 157.96, | |
| "eval_accuracy": 0.949685534591195, | |
| "eval_loss": 0.21527531743049622, | |
| "eval_runtime": 1.9951, | |
| "eval_samples_per_second": 79.694, | |
| "eval_steps_per_second": 5.012, | |
| "step": 1777 | |
| }, | |
| { | |
| "epoch": 158.93, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.22451625764369965, | |
| "eval_runtime": 2.1442, | |
| "eval_samples_per_second": 74.155, | |
| "eval_steps_per_second": 4.664, | |
| "step": 1788 | |
| }, | |
| { | |
| "epoch": 160.0, | |
| "grad_norm": 0.5377254486083984, | |
| "learning_rate": 2.6515151515151516e-05, | |
| "loss": 0.0548, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 160.0, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.2431740015745163, | |
| "eval_runtime": 2.2699, | |
| "eval_samples_per_second": 70.046, | |
| "eval_steps_per_second": 4.405, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 160.98, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.2071038782596588, | |
| "eval_runtime": 2.0506, | |
| "eval_samples_per_second": 77.538, | |
| "eval_steps_per_second": 4.877, | |
| "step": 1811 | |
| }, | |
| { | |
| "epoch": 161.96, | |
| "eval_accuracy": 0.949685534591195, | |
| "eval_loss": 0.18368059396743774, | |
| "eval_runtime": 2.2081, | |
| "eval_samples_per_second": 72.006, | |
| "eval_steps_per_second": 4.529, | |
| "step": 1822 | |
| }, | |
| { | |
| "epoch": 162.93, | |
| "eval_accuracy": 0.949685534591195, | |
| "eval_loss": 0.19161438941955566, | |
| "eval_runtime": 1.9999, | |
| "eval_samples_per_second": 79.505, | |
| "eval_steps_per_second": 5.0, | |
| "step": 1833 | |
| }, | |
| { | |
| "epoch": 164.0, | |
| "eval_accuracy": 0.949685534591195, | |
| "eval_loss": 0.22212089598178864, | |
| "eval_runtime": 2.0001, | |
| "eval_samples_per_second": 79.497, | |
| "eval_steps_per_second": 5.0, | |
| "step": 1845 | |
| }, | |
| { | |
| "epoch": 164.44, | |
| "grad_norm": 0.5433365702629089, | |
| "learning_rate": 2.6325757575757575e-05, | |
| "loss": 0.0616, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 164.98, | |
| "eval_accuracy": 0.949685534591195, | |
| "eval_loss": 0.21204246580600739, | |
| "eval_runtime": 2.035, | |
| "eval_samples_per_second": 78.132, | |
| "eval_steps_per_second": 4.914, | |
| "step": 1856 | |
| }, | |
| { | |
| "epoch": 165.96, | |
| "eval_accuracy": 0.9559748427672956, | |
| "eval_loss": 0.18882697820663452, | |
| "eval_runtime": 2.0581, | |
| "eval_samples_per_second": 77.256, | |
| "eval_steps_per_second": 4.859, | |
| "step": 1867 | |
| }, | |
| { | |
| "epoch": 166.93, | |
| "eval_accuracy": 0.949685534591195, | |
| "eval_loss": 0.19714578986167908, | |
| "eval_runtime": 2.002, | |
| "eval_samples_per_second": 79.422, | |
| "eval_steps_per_second": 4.995, | |
| "step": 1878 | |
| }, | |
| { | |
| "epoch": 168.0, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.21613995730876923, | |
| "eval_runtime": 2.0979, | |
| "eval_samples_per_second": 75.789, | |
| "eval_steps_per_second": 4.767, | |
| "step": 1890 | |
| }, | |
| { | |
| "epoch": 168.89, | |
| "grad_norm": 0.4616011083126068, | |
| "learning_rate": 2.6136363636363637e-05, | |
| "loss": 0.0467, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 168.98, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.22824302315711975, | |
| "eval_runtime": 2.0023, | |
| "eval_samples_per_second": 79.407, | |
| "eval_steps_per_second": 4.994, | |
| "step": 1901 | |
| }, | |
| { | |
| "epoch": 169.96, | |
| "eval_accuracy": 0.9056603773584906, | |
| "eval_loss": 0.31181007623672485, | |
| "eval_runtime": 2.2272, | |
| "eval_samples_per_second": 71.39, | |
| "eval_steps_per_second": 4.49, | |
| "step": 1912 | |
| }, | |
| { | |
| "epoch": 170.93, | |
| "eval_accuracy": 0.949685534591195, | |
| "eval_loss": 0.23191651701927185, | |
| "eval_runtime": 2.0759, | |
| "eval_samples_per_second": 76.592, | |
| "eval_steps_per_second": 4.817, | |
| "step": 1923 | |
| }, | |
| { | |
| "epoch": 172.0, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.27404358983039856, | |
| "eval_runtime": 2.0769, | |
| "eval_samples_per_second": 76.555, | |
| "eval_steps_per_second": 4.815, | |
| "step": 1935 | |
| }, | |
| { | |
| "epoch": 172.98, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.2666384279727936, | |
| "eval_runtime": 2.1046, | |
| "eval_samples_per_second": 75.548, | |
| "eval_steps_per_second": 4.751, | |
| "step": 1946 | |
| }, | |
| { | |
| "epoch": 173.33, | |
| "grad_norm": 1.0961925983428955, | |
| "learning_rate": 2.59469696969697e-05, | |
| "loss": 0.0609, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 173.96, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.23152852058410645, | |
| "eval_runtime": 2.0323, | |
| "eval_samples_per_second": 78.237, | |
| "eval_steps_per_second": 4.921, | |
| "step": 1957 | |
| }, | |
| { | |
| "epoch": 174.93, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.22292692959308624, | |
| "eval_runtime": 2.0749, | |
| "eval_samples_per_second": 76.629, | |
| "eval_steps_per_second": 4.819, | |
| "step": 1968 | |
| }, | |
| { | |
| "epoch": 176.0, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.21578945219516754, | |
| "eval_runtime": 2.0472, | |
| "eval_samples_per_second": 77.668, | |
| "eval_steps_per_second": 4.885, | |
| "step": 1980 | |
| }, | |
| { | |
| "epoch": 176.98, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.22257991135120392, | |
| "eval_runtime": 2.1698, | |
| "eval_samples_per_second": 73.278, | |
| "eval_steps_per_second": 4.609, | |
| "step": 1991 | |
| }, | |
| { | |
| "epoch": 177.78, | |
| "grad_norm": 1.6022953987121582, | |
| "learning_rate": 2.575757575757576e-05, | |
| "loss": 0.0522, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 177.96, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.22241446375846863, | |
| "eval_runtime": 2.0341, | |
| "eval_samples_per_second": 78.167, | |
| "eval_steps_per_second": 4.916, | |
| "step": 2002 | |
| }, | |
| { | |
| "epoch": 178.93, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.21375904977321625, | |
| "eval_runtime": 2.1094, | |
| "eval_samples_per_second": 75.377, | |
| "eval_steps_per_second": 4.741, | |
| "step": 2013 | |
| }, | |
| { | |
| "epoch": 180.0, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.21769364178180695, | |
| "eval_runtime": 1.9898, | |
| "eval_samples_per_second": 79.909, | |
| "eval_steps_per_second": 5.026, | |
| "step": 2025 | |
| }, | |
| { | |
| "epoch": 180.98, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.19169649481773376, | |
| "eval_runtime": 2.1326, | |
| "eval_samples_per_second": 74.558, | |
| "eval_steps_per_second": 4.689, | |
| "step": 2036 | |
| }, | |
| { | |
| "epoch": 181.96, | |
| "eval_accuracy": 0.9559748427672956, | |
| "eval_loss": 0.19741381704807281, | |
| "eval_runtime": 2.1931, | |
| "eval_samples_per_second": 72.5, | |
| "eval_steps_per_second": 4.56, | |
| "step": 2047 | |
| }, | |
| { | |
| "epoch": 182.22, | |
| "grad_norm": 0.7399430274963379, | |
| "learning_rate": 2.556818181818182e-05, | |
| "loss": 0.0515, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 182.93, | |
| "eval_accuracy": 0.949685534591195, | |
| "eval_loss": 0.21981187164783478, | |
| "eval_runtime": 2.0417, | |
| "eval_samples_per_second": 77.878, | |
| "eval_steps_per_second": 4.898, | |
| "step": 2058 | |
| }, | |
| { | |
| "epoch": 184.0, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.24247391521930695, | |
| "eval_runtime": 2.1999, | |
| "eval_samples_per_second": 72.278, | |
| "eval_steps_per_second": 4.546, | |
| "step": 2070 | |
| }, | |
| { | |
| "epoch": 184.98, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.24488882720470428, | |
| "eval_runtime": 2.0767, | |
| "eval_samples_per_second": 76.565, | |
| "eval_steps_per_second": 4.815, | |
| "step": 2081 | |
| }, | |
| { | |
| "epoch": 185.96, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.23463451862335205, | |
| "eval_runtime": 2.0674, | |
| "eval_samples_per_second": 76.907, | |
| "eval_steps_per_second": 4.837, | |
| "step": 2092 | |
| }, | |
| { | |
| "epoch": 186.67, | |
| "grad_norm": 0.67291659116745, | |
| "learning_rate": 2.5378787878787876e-05, | |
| "loss": 0.045, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 186.93, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.23308323323726654, | |
| "eval_runtime": 2.2603, | |
| "eval_samples_per_second": 70.346, | |
| "eval_steps_per_second": 4.424, | |
| "step": 2103 | |
| }, | |
| { | |
| "epoch": 188.0, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.2660614252090454, | |
| "eval_runtime": 2.0509, | |
| "eval_samples_per_second": 77.527, | |
| "eval_steps_per_second": 4.876, | |
| "step": 2115 | |
| }, | |
| { | |
| "epoch": 188.98, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.22910529375076294, | |
| "eval_runtime": 2.0536, | |
| "eval_samples_per_second": 77.423, | |
| "eval_steps_per_second": 4.869, | |
| "step": 2126 | |
| }, | |
| { | |
| "epoch": 189.96, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.23477251827716827, | |
| "eval_runtime": 2.0092, | |
| "eval_samples_per_second": 79.134, | |
| "eval_steps_per_second": 4.977, | |
| "step": 2137 | |
| }, | |
| { | |
| "epoch": 190.93, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.23087622225284576, | |
| "eval_runtime": 2.0403, | |
| "eval_samples_per_second": 77.929, | |
| "eval_steps_per_second": 4.901, | |
| "step": 2148 | |
| }, | |
| { | |
| "epoch": 191.11, | |
| "grad_norm": 0.11660194396972656, | |
| "learning_rate": 2.518939393939394e-05, | |
| "loss": 0.0403, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 192.0, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.27889564633369446, | |
| "eval_runtime": 2.0147, | |
| "eval_samples_per_second": 78.921, | |
| "eval_steps_per_second": 4.964, | |
| "step": 2160 | |
| }, | |
| { | |
| "epoch": 192.98, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.2540048658847809, | |
| "eval_runtime": 2.1082, | |
| "eval_samples_per_second": 75.42, | |
| "eval_steps_per_second": 4.743, | |
| "step": 2171 | |
| }, | |
| { | |
| "epoch": 193.96, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.23720349371433258, | |
| "eval_runtime": 2.1791, | |
| "eval_samples_per_second": 72.966, | |
| "eval_steps_per_second": 4.589, | |
| "step": 2182 | |
| }, | |
| { | |
| "epoch": 194.93, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.2507873773574829, | |
| "eval_runtime": 1.986, | |
| "eval_samples_per_second": 80.061, | |
| "eval_steps_per_second": 5.035, | |
| "step": 2193 | |
| }, | |
| { | |
| "epoch": 195.56, | |
| "grad_norm": 0.8518453240394592, | |
| "learning_rate": 2.5e-05, | |
| "loss": 0.0476, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 196.0, | |
| "eval_accuracy": 0.949685534591195, | |
| "eval_loss": 0.2193620353937149, | |
| "eval_runtime": 2.1819, | |
| "eval_samples_per_second": 72.874, | |
| "eval_steps_per_second": 4.583, | |
| "step": 2205 | |
| }, | |
| { | |
| "epoch": 196.98, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.23066306114196777, | |
| "eval_runtime": 2.0482, | |
| "eval_samples_per_second": 77.628, | |
| "eval_steps_per_second": 4.882, | |
| "step": 2216 | |
| }, | |
| { | |
| "epoch": 197.96, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.2719472646713257, | |
| "eval_runtime": 1.9901, | |
| "eval_samples_per_second": 79.896, | |
| "eval_steps_per_second": 5.025, | |
| "step": 2227 | |
| }, | |
| { | |
| "epoch": 198.93, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.28040099143981934, | |
| "eval_runtime": 2.0617, | |
| "eval_samples_per_second": 77.122, | |
| "eval_steps_per_second": 4.85, | |
| "step": 2238 | |
| }, | |
| { | |
| "epoch": 200.0, | |
| "grad_norm": 0.09039253741502762, | |
| "learning_rate": 2.481060606060606e-05, | |
| "loss": 0.0457, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 200.0, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.2755438983440399, | |
| "eval_runtime": 2.0773, | |
| "eval_samples_per_second": 76.541, | |
| "eval_steps_per_second": 4.814, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 200.98, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.2353052794933319, | |
| "eval_runtime": 1.9899, | |
| "eval_samples_per_second": 79.904, | |
| "eval_steps_per_second": 5.025, | |
| "step": 2261 | |
| }, | |
| { | |
| "epoch": 201.96, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.21893078088760376, | |
| "eval_runtime": 2.1045, | |
| "eval_samples_per_second": 75.552, | |
| "eval_steps_per_second": 4.752, | |
| "step": 2272 | |
| }, | |
| { | |
| "epoch": 202.93, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.21625204384326935, | |
| "eval_runtime": 2.0731, | |
| "eval_samples_per_second": 76.697, | |
| "eval_steps_per_second": 4.824, | |
| "step": 2283 | |
| }, | |
| { | |
| "epoch": 204.0, | |
| "eval_accuracy": 0.949685534591195, | |
| "eval_loss": 0.2110479772090912, | |
| "eval_runtime": 2.1463, | |
| "eval_samples_per_second": 74.079, | |
| "eval_steps_per_second": 4.659, | |
| "step": 2295 | |
| }, | |
| { | |
| "epoch": 204.44, | |
| "grad_norm": 0.9943685531616211, | |
| "learning_rate": 2.4621212121212123e-05, | |
| "loss": 0.0393, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 204.98, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.23164410889148712, | |
| "eval_runtime": 2.0606, | |
| "eval_samples_per_second": 77.162, | |
| "eval_steps_per_second": 4.853, | |
| "step": 2306 | |
| }, | |
| { | |
| "epoch": 205.96, | |
| "eval_accuracy": 0.9308176100628931, | |
| "eval_loss": 0.24650876224040985, | |
| "eval_runtime": 2.0011, | |
| "eval_samples_per_second": 79.455, | |
| "eval_steps_per_second": 4.997, | |
| "step": 2317 | |
| }, | |
| { | |
| "epoch": 206.93, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.23763243854045868, | |
| "eval_runtime": 2.0999, | |
| "eval_samples_per_second": 75.719, | |
| "eval_steps_per_second": 4.762, | |
| "step": 2328 | |
| }, | |
| { | |
| "epoch": 208.0, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.2170635461807251, | |
| "eval_runtime": 2.1575, | |
| "eval_samples_per_second": 73.697, | |
| "eval_steps_per_second": 4.635, | |
| "step": 2340 | |
| }, | |
| { | |
| "epoch": 208.89, | |
| "grad_norm": 0.46173095703125, | |
| "learning_rate": 2.4431818181818185e-05, | |
| "loss": 0.0443, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 208.98, | |
| "eval_accuracy": 0.949685534591195, | |
| "eval_loss": 0.23952844738960266, | |
| "eval_runtime": 2.0014, | |
| "eval_samples_per_second": 79.445, | |
| "eval_steps_per_second": 4.997, | |
| "step": 2351 | |
| }, | |
| { | |
| "epoch": 209.96, | |
| "eval_accuracy": 0.8930817610062893, | |
| "eval_loss": 0.2906019687652588, | |
| "eval_runtime": 2.0133, | |
| "eval_samples_per_second": 78.977, | |
| "eval_steps_per_second": 4.967, | |
| "step": 2362 | |
| }, | |
| { | |
| "epoch": 210.93, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.2608316242694855, | |
| "eval_runtime": 2.1558, | |
| "eval_samples_per_second": 73.755, | |
| "eval_steps_per_second": 4.639, | |
| "step": 2373 | |
| }, | |
| { | |
| "epoch": 212.0, | |
| "eval_accuracy": 0.949685534591195, | |
| "eval_loss": 0.23210321366786957, | |
| "eval_runtime": 2.0606, | |
| "eval_samples_per_second": 77.161, | |
| "eval_steps_per_second": 4.853, | |
| "step": 2385 | |
| }, | |
| { | |
| "epoch": 212.98, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.24640053510665894, | |
| "eval_runtime": 2.2148, | |
| "eval_samples_per_second": 71.79, | |
| "eval_steps_per_second": 4.515, | |
| "step": 2396 | |
| }, | |
| { | |
| "epoch": 213.33, | |
| "grad_norm": 0.94215327501297, | |
| "learning_rate": 2.4242424242424244e-05, | |
| "loss": 0.0539, | |
| "step": 2400 | |
| }, | |
| { | |
| "epoch": 213.96, | |
| "eval_accuracy": 0.9182389937106918, | |
| "eval_loss": 0.2441636025905609, | |
| "eval_runtime": 2.172, | |
| "eval_samples_per_second": 73.203, | |
| "eval_steps_per_second": 4.604, | |
| "step": 2407 | |
| }, | |
| { | |
| "epoch": 214.93, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.2511676847934723, | |
| "eval_runtime": 2.0176, | |
| "eval_samples_per_second": 78.806, | |
| "eval_steps_per_second": 4.956, | |
| "step": 2418 | |
| }, | |
| { | |
| "epoch": 216.0, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.22649481892585754, | |
| "eval_runtime": 2.0103, | |
| "eval_samples_per_second": 79.091, | |
| "eval_steps_per_second": 4.974, | |
| "step": 2430 | |
| }, | |
| { | |
| "epoch": 216.98, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.21274729073047638, | |
| "eval_runtime": 2.0508, | |
| "eval_samples_per_second": 77.529, | |
| "eval_steps_per_second": 4.876, | |
| "step": 2441 | |
| }, | |
| { | |
| "epoch": 217.78, | |
| "grad_norm": 0.7381362318992615, | |
| "learning_rate": 2.4053030303030303e-05, | |
| "loss": 0.0415, | |
| "step": 2450 | |
| }, | |
| { | |
| "epoch": 217.96, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.284365177154541, | |
| "eval_runtime": 2.0321, | |
| "eval_samples_per_second": 78.244, | |
| "eval_steps_per_second": 4.921, | |
| "step": 2452 | |
| }, | |
| { | |
| "epoch": 218.93, | |
| "eval_accuracy": 0.9433962264150944, | |
| "eval_loss": 0.24891048669815063, | |
| "eval_runtime": 2.0843, | |
| "eval_samples_per_second": 76.285, | |
| "eval_steps_per_second": 4.798, | |
| "step": 2463 | |
| }, | |
| { | |
| "epoch": 220.0, | |
| "eval_accuracy": 0.949685534591195, | |
| "eval_loss": 0.21200108528137207, | |
| "eval_runtime": 1.9938, | |
| "eval_samples_per_second": 79.748, | |
| "eval_steps_per_second": 5.016, | |
| "step": 2475 | |
| }, | |
| { | |
| "epoch": 220.98, | |
| "eval_accuracy": 0.9559748427672956, | |
| "eval_loss": 0.2015109807252884, | |
| "eval_runtime": 2.2098, | |
| "eval_samples_per_second": 71.951, | |
| "eval_steps_per_second": 4.525, | |
| "step": 2486 | |
| }, | |
| { | |
| "epoch": 221.96, | |
| "eval_accuracy": 0.9245283018867925, | |
| "eval_loss": 0.25095799565315247, | |
| "eval_runtime": 2.0817, | |
| "eval_samples_per_second": 76.381, | |
| "eval_steps_per_second": 4.804, | |
| "step": 2497 | |
| }, | |
| { | |
| "epoch": 222.22, | |
| "grad_norm": 0.3756774961948395, | |
| "learning_rate": 2.3863636363636362e-05, | |
| "loss": 0.0325, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 222.93, | |
| "eval_accuracy": 0.9371069182389937, | |
| "eval_loss": 0.2875436246395111, | |
| "eval_runtime": 2.0148, | |
| "eval_samples_per_second": 78.915, | |
| "eval_steps_per_second": 4.963, | |
| "step": 2508 | |
| }, | |
| { | |
| "epoch": 224.0, | |
| "eval_accuracy": 0.9622641509433962, | |
| "eval_loss": 0.19936275482177734, | |
| "eval_runtime": 2.0208, | |
| "eval_samples_per_second": 78.682, | |
| "eval_steps_per_second": 4.949, | |
| "step": 2520 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 8800, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 800, | |
| "save_steps": 500, | |
| "total_flos": 1.406670474295296e+18, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |