| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 100.0, | |
| "eval_steps": 500, | |
| "global_step": 900, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.1111111111111111, | |
| "eval_loss": 1.3930704593658447, | |
| "eval_runtime": 0.7497, | |
| "eval_samples_per_second": 24.01, | |
| "eval_steps_per_second": 4.002, | |
| "step": 9 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.16666666666666666, | |
| "eval_loss": 1.376913070678711, | |
| "eval_runtime": 0.5755, | |
| "eval_samples_per_second": 31.277, | |
| "eval_steps_per_second": 5.213, | |
| "step": 18 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.5555555555555556, | |
| "eval_loss": 1.3498488664627075, | |
| "eval_runtime": 0.5792, | |
| "eval_samples_per_second": 31.078, | |
| "eval_steps_per_second": 5.18, | |
| "step": 27 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.6666666666666666, | |
| "eval_loss": 1.3331588506698608, | |
| "eval_runtime": 0.5535, | |
| "eval_samples_per_second": 32.518, | |
| "eval_steps_per_second": 5.42, | |
| "step": 36 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.7777777777777778, | |
| "eval_loss": 1.3144757747650146, | |
| "eval_runtime": 0.5818, | |
| "eval_samples_per_second": 30.94, | |
| "eval_steps_per_second": 5.157, | |
| "step": 45 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.7777777777777778, | |
| "eval_loss": 1.300213098526001, | |
| "eval_runtime": 0.5539, | |
| "eval_samples_per_second": 32.496, | |
| "eval_steps_per_second": 5.416, | |
| "step": 54 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.7777777777777778, | |
| "eval_loss": 1.2916626930236816, | |
| "eval_runtime": 0.5489, | |
| "eval_samples_per_second": 32.791, | |
| "eval_steps_per_second": 5.465, | |
| "step": 63 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.7777777777777778, | |
| "eval_loss": 1.270052194595337, | |
| "eval_runtime": 0.5746, | |
| "eval_samples_per_second": 31.328, | |
| "eval_steps_per_second": 5.221, | |
| "step": 72 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.7777777777777778, | |
| "eval_loss": 1.2555011510849, | |
| "eval_runtime": 0.5489, | |
| "eval_samples_per_second": 32.791, | |
| "eval_steps_per_second": 5.465, | |
| "step": 81 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.7777777777777778, | |
| "eval_loss": 1.2477619647979736, | |
| "eval_runtime": 0.5687, | |
| "eval_samples_per_second": 31.651, | |
| "eval_steps_per_second": 5.275, | |
| "step": 90 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_accuracy": 0.7777777777777778, | |
| "eval_loss": 1.2382668256759644, | |
| "eval_runtime": 0.5502, | |
| "eval_samples_per_second": 32.716, | |
| "eval_steps_per_second": 5.453, | |
| "step": 99 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.7222222222222222, | |
| "eval_loss": 1.223071575164795, | |
| "eval_runtime": 0.5476, | |
| "eval_samples_per_second": 32.873, | |
| "eval_steps_per_second": 5.479, | |
| "step": 108 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_accuracy": 0.7222222222222222, | |
| "eval_loss": 1.2033451795578003, | |
| "eval_runtime": 0.5538, | |
| "eval_samples_per_second": 32.503, | |
| "eval_steps_per_second": 5.417, | |
| "step": 117 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.7777777777777778, | |
| "eval_loss": 1.186124563217163, | |
| "eval_runtime": 0.5442, | |
| "eval_samples_per_second": 33.075, | |
| "eval_steps_per_second": 5.512, | |
| "step": 126 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_accuracy": 0.7777777777777778, | |
| "eval_loss": 1.182234287261963, | |
| "eval_runtime": 0.5735, | |
| "eval_samples_per_second": 31.387, | |
| "eval_steps_per_second": 5.231, | |
| "step": 135 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.7777777777777778, | |
| "eval_loss": 1.1588956117630005, | |
| "eval_runtime": 0.5533, | |
| "eval_samples_per_second": 32.534, | |
| "eval_steps_per_second": 5.422, | |
| "step": 144 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_accuracy": 0.7777777777777778, | |
| "eval_loss": 1.1478044986724854, | |
| "eval_runtime": 0.5826, | |
| "eval_samples_per_second": 30.897, | |
| "eval_steps_per_second": 5.15, | |
| "step": 153 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_accuracy": 0.7777777777777778, | |
| "eval_loss": 1.1322474479675293, | |
| "eval_runtime": 0.5883, | |
| "eval_samples_per_second": 30.598, | |
| "eval_steps_per_second": 5.1, | |
| "step": 162 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_accuracy": 0.7777777777777778, | |
| "eval_loss": 1.110813021659851, | |
| "eval_runtime": 0.5594, | |
| "eval_samples_per_second": 32.177, | |
| "eval_steps_per_second": 5.363, | |
| "step": 171 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.7222222222222222, | |
| "eval_loss": 1.1011286973953247, | |
| "eval_runtime": 0.5793, | |
| "eval_samples_per_second": 31.073, | |
| "eval_steps_per_second": 5.179, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_accuracy": 0.7222222222222222, | |
| "eval_loss": 1.093163251876831, | |
| "eval_runtime": 0.5654, | |
| "eval_samples_per_second": 31.836, | |
| "eval_steps_per_second": 5.306, | |
| "step": 189 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_accuracy": 0.7222222222222222, | |
| "eval_loss": 1.0637242794036865, | |
| "eval_runtime": 0.5679, | |
| "eval_samples_per_second": 31.695, | |
| "eval_steps_per_second": 5.283, | |
| "step": 198 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_accuracy": 0.7222222222222222, | |
| "eval_loss": 1.0390856266021729, | |
| "eval_runtime": 0.574, | |
| "eval_samples_per_second": 31.358, | |
| "eval_steps_per_second": 5.226, | |
| "step": 207 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_accuracy": 0.7222222222222222, | |
| "eval_loss": 1.0522559881210327, | |
| "eval_runtime": 0.6203, | |
| "eval_samples_per_second": 29.02, | |
| "eval_steps_per_second": 4.837, | |
| "step": 216 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_accuracy": 0.7777777777777778, | |
| "eval_loss": 1.0473227500915527, | |
| "eval_runtime": 0.5963, | |
| "eval_samples_per_second": 30.186, | |
| "eval_steps_per_second": 5.031, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_accuracy": 0.7222222222222222, | |
| "eval_loss": 0.9998855590820312, | |
| "eval_runtime": 0.6467, | |
| "eval_samples_per_second": 27.834, | |
| "eval_steps_per_second": 4.639, | |
| "step": 234 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "eval_accuracy": 0.7222222222222222, | |
| "eval_loss": 1.0170878171920776, | |
| "eval_runtime": 0.5994, | |
| "eval_samples_per_second": 30.032, | |
| "eval_steps_per_second": 5.005, | |
| "step": 243 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_accuracy": 0.7222222222222222, | |
| "eval_loss": 1.028573989868164, | |
| "eval_runtime": 0.6332, | |
| "eval_samples_per_second": 28.427, | |
| "eval_steps_per_second": 4.738, | |
| "step": 252 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "eval_accuracy": 0.7222222222222222, | |
| "eval_loss": 1.0290330648422241, | |
| "eval_runtime": 0.5676, | |
| "eval_samples_per_second": 31.712, | |
| "eval_steps_per_second": 5.285, | |
| "step": 261 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_accuracy": 0.7777777777777778, | |
| "eval_loss": 0.9571393132209778, | |
| "eval_runtime": 0.5753, | |
| "eval_samples_per_second": 31.286, | |
| "eval_steps_per_second": 5.214, | |
| "step": 270 | |
| }, | |
| { | |
| "epoch": 31.0, | |
| "eval_accuracy": 0.7777777777777778, | |
| "eval_loss": 0.9450912475585938, | |
| "eval_runtime": 0.5556, | |
| "eval_samples_per_second": 32.4, | |
| "eval_steps_per_second": 5.4, | |
| "step": 279 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "eval_accuracy": 0.7777777777777778, | |
| "eval_loss": 0.85059654712677, | |
| "eval_runtime": 0.5543, | |
| "eval_samples_per_second": 32.473, | |
| "eval_steps_per_second": 5.412, | |
| "step": 288 | |
| }, | |
| { | |
| "epoch": 33.0, | |
| "eval_accuracy": 0.7777777777777778, | |
| "eval_loss": 0.8480208516120911, | |
| "eval_runtime": 0.5762, | |
| "eval_samples_per_second": 31.241, | |
| "eval_steps_per_second": 5.207, | |
| "step": 297 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "eval_accuracy": 0.7777777777777778, | |
| "eval_loss": 0.8499312400817871, | |
| "eval_runtime": 0.555, | |
| "eval_samples_per_second": 32.433, | |
| "eval_steps_per_second": 5.405, | |
| "step": 306 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "eval_accuracy": 0.7777777777777778, | |
| "eval_loss": 0.8403282165527344, | |
| "eval_runtime": 0.5709, | |
| "eval_samples_per_second": 31.532, | |
| "eval_steps_per_second": 5.255, | |
| "step": 315 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "eval_accuracy": 0.7777777777777778, | |
| "eval_loss": 0.7771400809288025, | |
| "eval_runtime": 0.5569, | |
| "eval_samples_per_second": 32.319, | |
| "eval_steps_per_second": 5.387, | |
| "step": 324 | |
| }, | |
| { | |
| "epoch": 37.0, | |
| "eval_accuracy": 0.7777777777777778, | |
| "eval_loss": 0.7591123580932617, | |
| "eval_runtime": 0.6487, | |
| "eval_samples_per_second": 27.747, | |
| "eval_steps_per_second": 4.625, | |
| "step": 333 | |
| }, | |
| { | |
| "epoch": 38.0, | |
| "eval_accuracy": 0.7777777777777778, | |
| "eval_loss": 0.7476389408111572, | |
| "eval_runtime": 0.5654, | |
| "eval_samples_per_second": 31.835, | |
| "eval_steps_per_second": 5.306, | |
| "step": 342 | |
| }, | |
| { | |
| "epoch": 39.0, | |
| "eval_accuracy": 0.7777777777777778, | |
| "eval_loss": 0.7831047773361206, | |
| "eval_runtime": 0.5458, | |
| "eval_samples_per_second": 32.977, | |
| "eval_steps_per_second": 5.496, | |
| "step": 351 | |
| }, | |
| { | |
| "epoch": 40.0, | |
| "eval_accuracy": 0.7777777777777778, | |
| "eval_loss": 0.7049207091331482, | |
| "eval_runtime": 0.5872, | |
| "eval_samples_per_second": 30.653, | |
| "eval_steps_per_second": 5.109, | |
| "step": 360 | |
| }, | |
| { | |
| "epoch": 41.0, | |
| "eval_accuracy": 0.7777777777777778, | |
| "eval_loss": 0.6811972856521606, | |
| "eval_runtime": 0.5667, | |
| "eval_samples_per_second": 31.762, | |
| "eval_steps_per_second": 5.294, | |
| "step": 369 | |
| }, | |
| { | |
| "epoch": 42.0, | |
| "eval_accuracy": 0.7777777777777778, | |
| "eval_loss": 0.6736953258514404, | |
| "eval_runtime": 0.5538, | |
| "eval_samples_per_second": 32.502, | |
| "eval_steps_per_second": 5.417, | |
| "step": 378 | |
| }, | |
| { | |
| "epoch": 43.0, | |
| "eval_accuracy": 0.7777777777777778, | |
| "eval_loss": 0.6515324711799622, | |
| "eval_runtime": 0.5632, | |
| "eval_samples_per_second": 31.958, | |
| "eval_steps_per_second": 5.326, | |
| "step": 387 | |
| }, | |
| { | |
| "epoch": 44.0, | |
| "eval_accuracy": 0.7777777777777778, | |
| "eval_loss": 0.6634184122085571, | |
| "eval_runtime": 1.1806, | |
| "eval_samples_per_second": 15.246, | |
| "eval_steps_per_second": 2.541, | |
| "step": 396 | |
| }, | |
| { | |
| "epoch": 45.0, | |
| "eval_accuracy": 0.8333333333333334, | |
| "eval_loss": 0.6234365105628967, | |
| "eval_runtime": 0.6418, | |
| "eval_samples_per_second": 28.044, | |
| "eval_steps_per_second": 4.674, | |
| "step": 405 | |
| }, | |
| { | |
| "epoch": 46.0, | |
| "eval_accuracy": 0.8333333333333334, | |
| "eval_loss": 0.8482791185379028, | |
| "eval_runtime": 0.5859, | |
| "eval_samples_per_second": 30.723, | |
| "eval_steps_per_second": 5.121, | |
| "step": 414 | |
| }, | |
| { | |
| "epoch": 47.0, | |
| "eval_accuracy": 0.8333333333333334, | |
| "eval_loss": 0.7264916896820068, | |
| "eval_runtime": 0.5835, | |
| "eval_samples_per_second": 30.847, | |
| "eval_steps_per_second": 5.141, | |
| "step": 423 | |
| }, | |
| { | |
| "epoch": 48.0, | |
| "eval_accuracy": 0.7777777777777778, | |
| "eval_loss": 0.7383356094360352, | |
| "eval_runtime": 0.5549, | |
| "eval_samples_per_second": 32.437, | |
| "eval_steps_per_second": 5.406, | |
| "step": 432 | |
| }, | |
| { | |
| "epoch": 49.0, | |
| "eval_accuracy": 0.8333333333333334, | |
| "eval_loss": 0.7005217671394348, | |
| "eval_runtime": 0.5608, | |
| "eval_samples_per_second": 32.095, | |
| "eval_steps_per_second": 5.349, | |
| "step": 441 | |
| }, | |
| { | |
| "epoch": 50.0, | |
| "eval_accuracy": 0.8333333333333334, | |
| "eval_loss": 0.5740242004394531, | |
| "eval_runtime": 0.5536, | |
| "eval_samples_per_second": 32.517, | |
| "eval_steps_per_second": 5.42, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 51.0, | |
| "eval_accuracy": 0.8333333333333334, | |
| "eval_loss": 0.5622536540031433, | |
| "eval_runtime": 0.5935, | |
| "eval_samples_per_second": 30.329, | |
| "eval_steps_per_second": 5.055, | |
| "step": 459 | |
| }, | |
| { | |
| "epoch": 52.0, | |
| "eval_accuracy": 0.8333333333333334, | |
| "eval_loss": 0.557184636592865, | |
| "eval_runtime": 0.5545, | |
| "eval_samples_per_second": 32.463, | |
| "eval_steps_per_second": 5.41, | |
| "step": 468 | |
| }, | |
| { | |
| "epoch": 53.0, | |
| "eval_accuracy": 0.8333333333333334, | |
| "eval_loss": 0.5769361853599548, | |
| "eval_runtime": 0.5844, | |
| "eval_samples_per_second": 30.802, | |
| "eval_steps_per_second": 5.134, | |
| "step": 477 | |
| }, | |
| { | |
| "epoch": 54.0, | |
| "eval_accuracy": 0.8333333333333334, | |
| "eval_loss": 0.550247311592102, | |
| "eval_runtime": 0.615, | |
| "eval_samples_per_second": 29.266, | |
| "eval_steps_per_second": 4.878, | |
| "step": 486 | |
| }, | |
| { | |
| "epoch": 55.0, | |
| "eval_accuracy": 0.8888888888888888, | |
| "eval_loss": 0.6281833052635193, | |
| "eval_runtime": 0.5457, | |
| "eval_samples_per_second": 32.988, | |
| "eval_steps_per_second": 5.498, | |
| "step": 495 | |
| }, | |
| { | |
| "epoch": 55.56, | |
| "learning_rate": 2.2222222222222223e-05, | |
| "loss": 1.1157, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 56.0, | |
| "eval_accuracy": 0.8333333333333334, | |
| "eval_loss": 0.5906974077224731, | |
| "eval_runtime": 0.5821, | |
| "eval_samples_per_second": 30.92, | |
| "eval_steps_per_second": 5.153, | |
| "step": 504 | |
| }, | |
| { | |
| "epoch": 57.0, | |
| "eval_accuracy": 0.9444444444444444, | |
| "eval_loss": 0.6346855163574219, | |
| "eval_runtime": 0.5542, | |
| "eval_samples_per_second": 32.481, | |
| "eval_steps_per_second": 5.414, | |
| "step": 513 | |
| }, | |
| { | |
| "epoch": 58.0, | |
| "eval_accuracy": 0.8888888888888888, | |
| "eval_loss": 0.6413730382919312, | |
| "eval_runtime": 0.5829, | |
| "eval_samples_per_second": 30.882, | |
| "eval_steps_per_second": 5.147, | |
| "step": 522 | |
| }, | |
| { | |
| "epoch": 59.0, | |
| "eval_accuracy": 0.8888888888888888, | |
| "eval_loss": 0.465614378452301, | |
| "eval_runtime": 0.5965, | |
| "eval_samples_per_second": 30.178, | |
| "eval_steps_per_second": 5.03, | |
| "step": 531 | |
| }, | |
| { | |
| "epoch": 60.0, | |
| "eval_accuracy": 0.8888888888888888, | |
| "eval_loss": 0.48488152027130127, | |
| "eval_runtime": 0.5621, | |
| "eval_samples_per_second": 32.02, | |
| "eval_steps_per_second": 5.337, | |
| "step": 540 | |
| }, | |
| { | |
| "epoch": 61.0, | |
| "eval_accuracy": 0.9444444444444444, | |
| "eval_loss": 0.8426976799964905, | |
| "eval_runtime": 0.5798, | |
| "eval_samples_per_second": 31.045, | |
| "eval_steps_per_second": 5.174, | |
| "step": 549 | |
| }, | |
| { | |
| "epoch": 62.0, | |
| "eval_accuracy": 0.8888888888888888, | |
| "eval_loss": 0.5708574652671814, | |
| "eval_runtime": 0.5531, | |
| "eval_samples_per_second": 32.544, | |
| "eval_steps_per_second": 5.424, | |
| "step": 558 | |
| }, | |
| { | |
| "epoch": 63.0, | |
| "eval_accuracy": 0.8888888888888888, | |
| "eval_loss": 0.5026788115501404, | |
| "eval_runtime": 0.5575, | |
| "eval_samples_per_second": 32.289, | |
| "eval_steps_per_second": 5.381, | |
| "step": 567 | |
| }, | |
| { | |
| "epoch": 64.0, | |
| "eval_accuracy": 0.8888888888888888, | |
| "eval_loss": 0.5724208354949951, | |
| "eval_runtime": 0.5481, | |
| "eval_samples_per_second": 32.844, | |
| "eval_steps_per_second": 5.474, | |
| "step": 576 | |
| }, | |
| { | |
| "epoch": 65.0, | |
| "eval_accuracy": 0.8888888888888888, | |
| "eval_loss": 0.5301716327667236, | |
| "eval_runtime": 0.553, | |
| "eval_samples_per_second": 32.551, | |
| "eval_steps_per_second": 5.425, | |
| "step": 585 | |
| }, | |
| { | |
| "epoch": 66.0, | |
| "eval_accuracy": 0.8333333333333334, | |
| "eval_loss": 0.5272272825241089, | |
| "eval_runtime": 0.5566, | |
| "eval_samples_per_second": 32.34, | |
| "eval_steps_per_second": 5.39, | |
| "step": 594 | |
| }, | |
| { | |
| "epoch": 67.0, | |
| "eval_accuracy": 0.8888888888888888, | |
| "eval_loss": 0.5444329380989075, | |
| "eval_runtime": 0.555, | |
| "eval_samples_per_second": 32.435, | |
| "eval_steps_per_second": 5.406, | |
| "step": 603 | |
| }, | |
| { | |
| "epoch": 68.0, | |
| "eval_accuracy": 0.8888888888888888, | |
| "eval_loss": 0.3936518132686615, | |
| "eval_runtime": 0.6001, | |
| "eval_samples_per_second": 29.994, | |
| "eval_steps_per_second": 4.999, | |
| "step": 612 | |
| }, | |
| { | |
| "epoch": 69.0, | |
| "eval_accuracy": 0.9444444444444444, | |
| "eval_loss": 0.41802236437797546, | |
| "eval_runtime": 0.5658, | |
| "eval_samples_per_second": 31.816, | |
| "eval_steps_per_second": 5.303, | |
| "step": 621 | |
| }, | |
| { | |
| "epoch": 70.0, | |
| "eval_accuracy": 0.8888888888888888, | |
| "eval_loss": 0.5185115337371826, | |
| "eval_runtime": 0.5619, | |
| "eval_samples_per_second": 32.036, | |
| "eval_steps_per_second": 5.339, | |
| "step": 630 | |
| }, | |
| { | |
| "epoch": 71.0, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 0.39606520533561707, | |
| "eval_runtime": 0.5983, | |
| "eval_samples_per_second": 30.085, | |
| "eval_steps_per_second": 5.014, | |
| "step": 639 | |
| }, | |
| { | |
| "epoch": 72.0, | |
| "eval_accuracy": 0.9444444444444444, | |
| "eval_loss": 0.3859682083129883, | |
| "eval_runtime": 0.5757, | |
| "eval_samples_per_second": 31.268, | |
| "eval_steps_per_second": 5.211, | |
| "step": 648 | |
| }, | |
| { | |
| "epoch": 73.0, | |
| "eval_accuracy": 0.9444444444444444, | |
| "eval_loss": 0.39656686782836914, | |
| "eval_runtime": 0.6158, | |
| "eval_samples_per_second": 29.228, | |
| "eval_steps_per_second": 4.871, | |
| "step": 657 | |
| }, | |
| { | |
| "epoch": 74.0, | |
| "eval_accuracy": 0.8888888888888888, | |
| "eval_loss": 0.39676183462142944, | |
| "eval_runtime": 0.6324, | |
| "eval_samples_per_second": 28.462, | |
| "eval_steps_per_second": 4.744, | |
| "step": 666 | |
| }, | |
| { | |
| "epoch": 75.0, | |
| "eval_accuracy": 0.8888888888888888, | |
| "eval_loss": 0.4546321630477905, | |
| "eval_runtime": 0.5605, | |
| "eval_samples_per_second": 32.114, | |
| "eval_steps_per_second": 5.352, | |
| "step": 675 | |
| }, | |
| { | |
| "epoch": 76.0, | |
| "eval_accuracy": 0.8888888888888888, | |
| "eval_loss": 0.4021334648132324, | |
| "eval_runtime": 0.553, | |
| "eval_samples_per_second": 32.55, | |
| "eval_steps_per_second": 5.425, | |
| "step": 684 | |
| }, | |
| { | |
| "epoch": 77.0, | |
| "eval_accuracy": 0.9444444444444444, | |
| "eval_loss": 0.414422869682312, | |
| "eval_runtime": 0.5759, | |
| "eval_samples_per_second": 31.256, | |
| "eval_steps_per_second": 5.209, | |
| "step": 693 | |
| }, | |
| { | |
| "epoch": 78.0, | |
| "eval_accuracy": 1.0, | |
| "eval_loss": 0.35500773787498474, | |
| "eval_runtime": 0.5802, | |
| "eval_samples_per_second": 31.024, | |
| "eval_steps_per_second": 5.171, | |
| "step": 702 | |
| }, | |
| { | |
| "epoch": 79.0, | |
| "eval_accuracy": 0.9444444444444444, | |
| "eval_loss": 0.3838707506656647, | |
| "eval_runtime": 0.5616, | |
| "eval_samples_per_second": 32.052, | |
| "eval_steps_per_second": 5.342, | |
| "step": 711 | |
| }, | |
| { | |
| "epoch": 80.0, | |
| "eval_accuracy": 0.8888888888888888, | |
| "eval_loss": 0.437086820602417, | |
| "eval_runtime": 0.554, | |
| "eval_samples_per_second": 32.49, | |
| "eval_steps_per_second": 5.415, | |
| "step": 720 | |
| }, | |
| { | |
| "epoch": 81.0, | |
| "eval_accuracy": 0.8888888888888888, | |
| "eval_loss": 0.36943355202674866, | |
| "eval_runtime": 0.5894, | |
| "eval_samples_per_second": 30.537, | |
| "eval_steps_per_second": 5.09, | |
| "step": 729 | |
| }, | |
| { | |
| "epoch": 82.0, | |
| "eval_accuracy": 0.8888888888888888, | |
| "eval_loss": 0.36648380756378174, | |
| "eval_runtime": 0.5615, | |
| "eval_samples_per_second": 32.058, | |
| "eval_steps_per_second": 5.343, | |
| "step": 738 | |
| }, | |
| { | |
| "epoch": 83.0, | |
| "eval_accuracy": 0.8888888888888888, | |
| "eval_loss": 0.4732191264629364, | |
| "eval_runtime": 0.5632, | |
| "eval_samples_per_second": 31.959, | |
| "eval_steps_per_second": 5.327, | |
| "step": 747 | |
| }, | |
| { | |
| "epoch": 84.0, | |
| "eval_accuracy": 0.9444444444444444, | |
| "eval_loss": 0.4652000665664673, | |
| "eval_runtime": 0.592, | |
| "eval_samples_per_second": 30.406, | |
| "eval_steps_per_second": 5.068, | |
| "step": 756 | |
| }, | |
| { | |
| "epoch": 85.0, | |
| "eval_accuracy": 0.8888888888888888, | |
| "eval_loss": 0.670432448387146, | |
| "eval_runtime": 0.5801, | |
| "eval_samples_per_second": 31.028, | |
| "eval_steps_per_second": 5.171, | |
| "step": 765 | |
| }, | |
| { | |
| "epoch": 86.0, | |
| "eval_accuracy": 0.8333333333333334, | |
| "eval_loss": 0.7354382872581482, | |
| "eval_runtime": 0.5599, | |
| "eval_samples_per_second": 32.151, | |
| "eval_steps_per_second": 5.359, | |
| "step": 774 | |
| }, | |
| { | |
| "epoch": 87.0, | |
| "eval_accuracy": 0.8333333333333334, | |
| "eval_loss": 0.5439589023590088, | |
| "eval_runtime": 0.5486, | |
| "eval_samples_per_second": 32.811, | |
| "eval_steps_per_second": 5.468, | |
| "step": 783 | |
| }, | |
| { | |
| "epoch": 88.0, | |
| "eval_accuracy": 0.8888888888888888, | |
| "eval_loss": 0.4809061586856842, | |
| "eval_runtime": 0.6101, | |
| "eval_samples_per_second": 29.505, | |
| "eval_steps_per_second": 4.917, | |
| "step": 792 | |
| }, | |
| { | |
| "epoch": 89.0, | |
| "eval_accuracy": 0.9444444444444444, | |
| "eval_loss": 0.6704312562942505, | |
| "eval_runtime": 0.5954, | |
| "eval_samples_per_second": 30.229, | |
| "eval_steps_per_second": 5.038, | |
| "step": 801 | |
| }, | |
| { | |
| "epoch": 90.0, | |
| "eval_accuracy": 0.8888888888888888, | |
| "eval_loss": 0.41327810287475586, | |
| "eval_runtime": 0.5783, | |
| "eval_samples_per_second": 31.124, | |
| "eval_steps_per_second": 5.187, | |
| "step": 810 | |
| }, | |
| { | |
| "epoch": 91.0, | |
| "eval_accuracy": 0.9444444444444444, | |
| "eval_loss": 0.350969523191452, | |
| "eval_runtime": 0.5507, | |
| "eval_samples_per_second": 32.686, | |
| "eval_steps_per_second": 5.448, | |
| "step": 819 | |
| }, | |
| { | |
| "epoch": 92.0, | |
| "eval_accuracy": 0.8888888888888888, | |
| "eval_loss": 0.39819759130477905, | |
| "eval_runtime": 0.5465, | |
| "eval_samples_per_second": 32.939, | |
| "eval_steps_per_second": 5.49, | |
| "step": 828 | |
| }, | |
| { | |
| "epoch": 93.0, | |
| "eval_accuracy": 0.9444444444444444, | |
| "eval_loss": 0.34932640194892883, | |
| "eval_runtime": 0.5635, | |
| "eval_samples_per_second": 31.941, | |
| "eval_steps_per_second": 5.323, | |
| "step": 837 | |
| }, | |
| { | |
| "epoch": 94.0, | |
| "eval_accuracy": 0.8888888888888888, | |
| "eval_loss": 0.48363664746284485, | |
| "eval_runtime": 0.5411, | |
| "eval_samples_per_second": 33.263, | |
| "eval_steps_per_second": 5.544, | |
| "step": 846 | |
| }, | |
| { | |
| "epoch": 95.0, | |
| "eval_accuracy": 0.9444444444444444, | |
| "eval_loss": 0.4434005320072174, | |
| "eval_runtime": 0.5964, | |
| "eval_samples_per_second": 30.181, | |
| "eval_steps_per_second": 5.03, | |
| "step": 855 | |
| }, | |
| { | |
| "epoch": 96.0, | |
| "eval_accuracy": 0.8333333333333334, | |
| "eval_loss": 0.4290742874145508, | |
| "eval_runtime": 0.5584, | |
| "eval_samples_per_second": 32.235, | |
| "eval_steps_per_second": 5.373, | |
| "step": 864 | |
| }, | |
| { | |
| "epoch": 97.0, | |
| "eval_accuracy": 0.9444444444444444, | |
| "eval_loss": 0.34131091833114624, | |
| "eval_runtime": 0.5715, | |
| "eval_samples_per_second": 31.493, | |
| "eval_steps_per_second": 5.249, | |
| "step": 873 | |
| }, | |
| { | |
| "epoch": 98.0, | |
| "eval_accuracy": 0.8888888888888888, | |
| "eval_loss": 0.3645610511302948, | |
| "eval_runtime": 0.5506, | |
| "eval_samples_per_second": 32.692, | |
| "eval_steps_per_second": 5.449, | |
| "step": 882 | |
| }, | |
| { | |
| "epoch": 99.0, | |
| "eval_accuracy": 0.9444444444444444, | |
| "eval_loss": 0.5591509938240051, | |
| "eval_runtime": 0.5908, | |
| "eval_samples_per_second": 30.468, | |
| "eval_steps_per_second": 5.078, | |
| "step": 891 | |
| }, | |
| { | |
| "epoch": 100.0, | |
| "eval_accuracy": 0.9444444444444444, | |
| "eval_loss": 0.48491573333740234, | |
| "eval_runtime": 0.5689, | |
| "eval_samples_per_second": 31.643, | |
| "eval_steps_per_second": 5.274, | |
| "step": 900 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 900, | |
| "num_train_epochs": 100, | |
| "save_steps": 500, | |
| "total_flos": 1.507976427331584e+17, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |