| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 37.0, | |
| "global_step": 3922, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 1.3832186408159307e-05, | |
| "loss": 3.0358, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.543347000166316, | |
| "eval_loss": 2.7585761547088623, | |
| "eval_runtime": 25.1028, | |
| "eval_samples_per_second": 70.51, | |
| "eval_steps_per_second": 0.239, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 1.5888124272106204e-05, | |
| "loss": 2.7114, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.55558049451544, | |
| "eval_loss": 2.6144614219665527, | |
| "eval_runtime": 24.4673, | |
| "eval_samples_per_second": 72.341, | |
| "eval_steps_per_second": 0.245, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 1.7090770826327895e-05, | |
| "loss": 2.4868, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.6212061200839091, | |
| "eval_loss": 2.034228563308716, | |
| "eval_runtime": 24.2933, | |
| "eval_samples_per_second": 72.86, | |
| "eval_steps_per_second": 0.247, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "learning_rate": 1.7944062136053104e-05, | |
| "loss": 2.1388, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.6854297952501626, | |
| "eval_loss": 1.5866005420684814, | |
| "eval_runtime": 24.3894, | |
| "eval_samples_per_second": 72.572, | |
| "eval_steps_per_second": 0.246, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "learning_rate": 1.860592629580032e-05, | |
| "loss": 1.8311, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.7191362268359381, | |
| "eval_loss": 1.3652117252349854, | |
| "eval_runtime": 24.3545, | |
| "eval_samples_per_second": 72.677, | |
| "eval_steps_per_second": 0.246, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "learning_rate": 1.9146708690274792e-05, | |
| "loss": 1.5704, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.7248188353074707, | |
| "eval_loss": 1.3337750434875488, | |
| "eval_runtime": 24.3217, | |
| "eval_samples_per_second": 72.775, | |
| "eval_steps_per_second": 0.247, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "learning_rate": 1.9603933689955228e-05, | |
| "loss": 1.4498, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.7381350781490347, | |
| "eval_loss": 1.2443161010742188, | |
| "eval_runtime": 24.5188, | |
| "eval_samples_per_second": 72.189, | |
| "eval_steps_per_second": 0.245, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.3744, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.7413300423856433, | |
| "eval_loss": 1.2314091920852661, | |
| "eval_runtime": 24.3764, | |
| "eval_samples_per_second": 72.611, | |
| "eval_steps_per_second": 0.246, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.3207, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.7439575072718211, | |
| "eval_loss": 1.2015495300292969, | |
| "eval_runtime": 24.5167, | |
| "eval_samples_per_second": 72.196, | |
| "eval_steps_per_second": 0.245, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.2892, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.7558549767302207, | |
| "eval_loss": 1.1335448026657104, | |
| "eval_runtime": 24.3508, | |
| "eval_samples_per_second": 72.688, | |
| "eval_steps_per_second": 0.246, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.25, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_accuracy": 0.7594773916347587, | |
| "eval_loss": 1.1179267168045044, | |
| "eval_runtime": 24.3594, | |
| "eval_samples_per_second": 72.662, | |
| "eval_steps_per_second": 0.246, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.2274, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.7580528411147304, | |
| "eval_loss": 1.1233829259872437, | |
| "eval_runtime": 24.3279, | |
| "eval_samples_per_second": 72.756, | |
| "eval_steps_per_second": 0.247, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.2027, | |
| "step": 1378 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_accuracy": 0.7637997528180235, | |
| "eval_loss": 1.0827549695968628, | |
| "eval_runtime": 24.4052, | |
| "eval_samples_per_second": 72.525, | |
| "eval_steps_per_second": 0.246, | |
| "step": 1378 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.1838, | |
| "step": 1484 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.7666054329273585, | |
| "eval_loss": 1.0769394636154175, | |
| "eval_runtime": 24.4017, | |
| "eval_samples_per_second": 72.536, | |
| "eval_steps_per_second": 0.246, | |
| "step": 1484 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.1736, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_accuracy": 0.7698640416703452, | |
| "eval_loss": 1.0607359409332275, | |
| "eval_runtime": 24.3587, | |
| "eval_samples_per_second": 72.664, | |
| "eval_steps_per_second": 0.246, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.1534, | |
| "step": 1696 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.7714122091985482, | |
| "eval_loss": 1.0373061895370483, | |
| "eval_runtime": 24.4342, | |
| "eval_samples_per_second": 72.439, | |
| "eval_steps_per_second": 0.246, | |
| "step": 1696 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.1388, | |
| "step": 1802 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_accuracy": 0.7687794470619611, | |
| "eval_loss": 1.0571210384368896, | |
| "eval_runtime": 24.3831, | |
| "eval_samples_per_second": 72.591, | |
| "eval_steps_per_second": 0.246, | |
| "step": 1802 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.1273, | |
| "step": 1908 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_accuracy": 0.7713278633378673, | |
| "eval_loss": 1.0348763465881348, | |
| "eval_runtime": 24.3437, | |
| "eval_samples_per_second": 72.709, | |
| "eval_steps_per_second": 0.246, | |
| "step": 1908 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.1161, | |
| "step": 2014 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_accuracy": 0.777816642254917, | |
| "eval_loss": 1.0058482885360718, | |
| "eval_runtime": 24.086, | |
| "eval_samples_per_second": 73.487, | |
| "eval_steps_per_second": 0.249, | |
| "step": 2014 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.1037, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.778376203991329, | |
| "eval_loss": 1.0106089115142822, | |
| "eval_runtime": 24.3319, | |
| "eval_samples_per_second": 72.744, | |
| "eval_steps_per_second": 0.247, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0897, | |
| "step": 2226 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_accuracy": 0.7792838874680307, | |
| "eval_loss": 0.9971184730529785, | |
| "eval_runtime": 24.2829, | |
| "eval_samples_per_second": 72.891, | |
| "eval_steps_per_second": 0.247, | |
| "step": 2226 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0833, | |
| "step": 2332 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_accuracy": 0.7836239126411253, | |
| "eval_loss": 0.9799665212631226, | |
| "eval_runtime": 24.3154, | |
| "eval_samples_per_second": 72.793, | |
| "eval_steps_per_second": 0.247, | |
| "step": 2332 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0747, | |
| "step": 2438 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_accuracy": 0.7836160467857196, | |
| "eval_loss": 0.9719156622886658, | |
| "eval_runtime": 24.3874, | |
| "eval_samples_per_second": 72.578, | |
| "eval_steps_per_second": 0.246, | |
| "step": 2438 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0708, | |
| "step": 2544 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_accuracy": 0.7880468638591382, | |
| "eval_loss": 0.9512822031974792, | |
| "eval_runtime": 26.1688, | |
| "eval_samples_per_second": 67.638, | |
| "eval_steps_per_second": 0.229, | |
| "step": 2544 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0577, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_accuracy": 0.7839817403876498, | |
| "eval_loss": 0.9685712456703186, | |
| "eval_runtime": 24.2295, | |
| "eval_samples_per_second": 73.052, | |
| "eval_steps_per_second": 0.248, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0551, | |
| "step": 2756 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_accuracy": 0.7858663548179133, | |
| "eval_loss": 0.9629907608032227, | |
| "eval_runtime": 24.4249, | |
| "eval_samples_per_second": 72.467, | |
| "eval_steps_per_second": 0.246, | |
| "step": 2756 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0489, | |
| "step": 2862 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "eval_accuracy": 0.7881803814914183, | |
| "eval_loss": 0.9433470964431763, | |
| "eval_runtime": 24.4582, | |
| "eval_samples_per_second": 72.368, | |
| "eval_steps_per_second": 0.245, | |
| "step": 2862 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0483, | |
| "step": 2968 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_accuracy": 0.7872097449562239, | |
| "eval_loss": 0.9574456214904785, | |
| "eval_runtime": 24.3523, | |
| "eval_samples_per_second": 72.683, | |
| "eval_steps_per_second": 0.246, | |
| "step": 2968 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0327, | |
| "step": 3074 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "eval_accuracy": 0.7909898637829464, | |
| "eval_loss": 0.9401029348373413, | |
| "eval_runtime": 24.3913, | |
| "eval_samples_per_second": 72.567, | |
| "eval_steps_per_second": 0.246, | |
| "step": 3074 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0362, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_accuracy": 0.7919005554242496, | |
| "eval_loss": 0.9270448684692383, | |
| "eval_runtime": 24.4335, | |
| "eval_samples_per_second": 72.441, | |
| "eval_steps_per_second": 0.246, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 31.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0243, | |
| "step": 3286 | |
| }, | |
| { | |
| "epoch": 31.0, | |
| "eval_accuracy": 0.7986601387439376, | |
| "eval_loss": 0.8908094167709351, | |
| "eval_runtime": 24.1448, | |
| "eval_samples_per_second": 73.308, | |
| "eval_steps_per_second": 0.249, | |
| "step": 3286 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0189, | |
| "step": 3392 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "eval_accuracy": 0.7921468329354021, | |
| "eval_loss": 0.9229845404624939, | |
| "eval_runtime": 24.3268, | |
| "eval_samples_per_second": 72.759, | |
| "eval_steps_per_second": 0.247, | |
| "step": 3392 | |
| }, | |
| { | |
| "epoch": 33.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0079, | |
| "step": 3498 | |
| }, | |
| { | |
| "epoch": 33.0, | |
| "eval_accuracy": 0.7888655988210297, | |
| "eval_loss": 0.9461591243743896, | |
| "eval_runtime": 24.4302, | |
| "eval_samples_per_second": 72.451, | |
| "eval_steps_per_second": 0.246, | |
| "step": 3498 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0018, | |
| "step": 3604 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "eval_accuracy": 0.7890031028955456, | |
| "eval_loss": 0.9435957074165344, | |
| "eval_runtime": 24.4529, | |
| "eval_samples_per_second": 72.384, | |
| "eval_steps_per_second": 0.245, | |
| "step": 3604 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0063, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "eval_accuracy": 0.7977425395315658, | |
| "eval_loss": 0.8957004547119141, | |
| "eval_runtime": 24.4025, | |
| "eval_samples_per_second": 72.534, | |
| "eval_steps_per_second": 0.246, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "learning_rate": 2e-05, | |
| "loss": 0.9982, | |
| "step": 3816 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "eval_accuracy": 0.7993480649855951, | |
| "eval_loss": 0.8885225057601929, | |
| "eval_runtime": 24.4063, | |
| "eval_samples_per_second": 72.522, | |
| "eval_steps_per_second": 0.246, | |
| "step": 3816 | |
| }, | |
| { | |
| "epoch": 37.0, | |
| "learning_rate": 2e-05, | |
| "loss": 0.9911, | |
| "step": 3922 | |
| }, | |
| { | |
| "epoch": 37.0, | |
| "eval_accuracy": 0.8001705662056435, | |
| "eval_loss": 0.8866317868232727, | |
| "eval_runtime": 24.3478, | |
| "eval_samples_per_second": 72.697, | |
| "eval_steps_per_second": 0.246, | |
| "step": 3922 | |
| } | |
| ], | |
| "max_steps": 4240, | |
| "num_train_epochs": 40, | |
| "total_flos": 472663961108480.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |