| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 38.0, | |
| "global_step": 4028, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 1.3832186408159307e-05, | |
| "loss": 1.7921, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.7072680181157848, | |
| "eval_loss": 1.4908801317214966, | |
| "eval_runtime": 35.2427, | |
| "eval_samples_per_second": 50.223, | |
| "eval_steps_per_second": 0.17, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 1.5888124272106204e-05, | |
| "loss": 1.4864, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.7269994857792617, | |
| "eval_loss": 1.3473752737045288, | |
| "eval_runtime": 34.9927, | |
| "eval_samples_per_second": 50.582, | |
| "eval_steps_per_second": 0.171, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 1.7090770826327895e-05, | |
| "loss": 1.3756, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.7408165661368673, | |
| "eval_loss": 1.2464056015014648, | |
| "eval_runtime": 34.6698, | |
| "eval_samples_per_second": 51.053, | |
| "eval_steps_per_second": 0.173, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "learning_rate": 1.7944062136053104e-05, | |
| "loss": 1.3032, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.7535117507143178, | |
| "eval_loss": 1.1721432209014893, | |
| "eval_runtime": 34.7274, | |
| "eval_samples_per_second": 50.968, | |
| "eval_steps_per_second": 0.173, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "learning_rate": 1.860592629580032e-05, | |
| "loss": 1.2584, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.7601500829124033, | |
| "eval_loss": 1.1279706954956055, | |
| "eval_runtime": 34.8917, | |
| "eval_samples_per_second": 50.728, | |
| "eval_steps_per_second": 0.172, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "learning_rate": 1.9146708690274792e-05, | |
| "loss": 1.2103, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.7582043276054062, | |
| "eval_loss": 1.1379011869430542, | |
| "eval_runtime": 34.6334, | |
| "eval_samples_per_second": 51.107, | |
| "eval_steps_per_second": 0.173, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "learning_rate": 1.9603933689955228e-05, | |
| "loss": 1.183, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.7704412195286752, | |
| "eval_loss": 1.062961220741272, | |
| "eval_runtime": 34.3317, | |
| "eval_samples_per_second": 51.556, | |
| "eval_steps_per_second": 0.175, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.1546, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.7686654368803604, | |
| "eval_loss": 1.0676072835922241, | |
| "eval_runtime": 34.7431, | |
| "eval_samples_per_second": 50.945, | |
| "eval_steps_per_second": 0.173, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.1263, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.7708787569856306, | |
| "eval_loss": 1.0571365356445312, | |
| "eval_runtime": 34.8256, | |
| "eval_samples_per_second": 50.825, | |
| "eval_steps_per_second": 0.172, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.1191, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.781194386249465, | |
| "eval_loss": 0.9872472286224365, | |
| "eval_runtime": 34.7655, | |
| "eval_samples_per_second": 50.913, | |
| "eval_steps_per_second": 0.173, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0948, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_accuracy": 0.7812378898109896, | |
| "eval_loss": 0.9977697134017944, | |
| "eval_runtime": 34.7432, | |
| "eval_samples_per_second": 50.945, | |
| "eval_steps_per_second": 0.173, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0841, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.7803615513259536, | |
| "eval_loss": 0.9978513717651367, | |
| "eval_runtime": 34.6545, | |
| "eval_samples_per_second": 51.076, | |
| "eval_steps_per_second": 0.173, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0688, | |
| "step": 1378 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_accuracy": 0.7858584354172097, | |
| "eval_loss": 0.9791596531867981, | |
| "eval_runtime": 34.9372, | |
| "eval_samples_per_second": 50.662, | |
| "eval_steps_per_second": 0.172, | |
| "step": 1378 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0605, | |
| "step": 1484 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.7874798359386868, | |
| "eval_loss": 0.9556354284286499, | |
| "eval_runtime": 34.6859, | |
| "eval_samples_per_second": 51.029, | |
| "eval_steps_per_second": 0.173, | |
| "step": 1484 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0499, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_accuracy": 0.7894171406706451, | |
| "eval_loss": 0.9544544816017151, | |
| "eval_runtime": 34.7413, | |
| "eval_samples_per_second": 50.948, | |
| "eval_steps_per_second": 0.173, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0351, | |
| "step": 1696 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.7894932603326688, | |
| "eval_loss": 0.9460939168930054, | |
| "eval_runtime": 34.704, | |
| "eval_samples_per_second": 51.003, | |
| "eval_steps_per_second": 0.173, | |
| "step": 1696 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0286, | |
| "step": 1802 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_accuracy": 0.7885454720602293, | |
| "eval_loss": 0.9521207809448242, | |
| "eval_runtime": 34.7606, | |
| "eval_samples_per_second": 50.92, | |
| "eval_steps_per_second": 0.173, | |
| "step": 1802 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0173, | |
| "step": 1908 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_accuracy": 0.7890063270306843, | |
| "eval_loss": 0.9481790661811829, | |
| "eval_runtime": 34.6779, | |
| "eval_samples_per_second": 51.041, | |
| "eval_steps_per_second": 0.173, | |
| "step": 1908 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0079, | |
| "step": 2014 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_accuracy": 0.7932532825907861, | |
| "eval_loss": 0.9254797101020813, | |
| "eval_runtime": 34.6588, | |
| "eval_samples_per_second": 51.069, | |
| "eval_steps_per_second": 0.173, | |
| "step": 2014 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.7949952307142005, | |
| "eval_loss": 0.9181823134422302, | |
| "eval_runtime": 34.812, | |
| "eval_samples_per_second": 50.845, | |
| "eval_steps_per_second": 0.172, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "learning_rate": 2e-05, | |
| "loss": 0.993, | |
| "step": 2226 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_accuracy": 0.7947981315379723, | |
| "eval_loss": 0.9146238565444946, | |
| "eval_runtime": 34.6312, | |
| "eval_samples_per_second": 51.11, | |
| "eval_steps_per_second": 0.173, | |
| "step": 2226 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "learning_rate": 2e-05, | |
| "loss": 0.9814, | |
| "step": 2332 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_accuracy": 0.7955541733309638, | |
| "eval_loss": 0.9044105410575867, | |
| "eval_runtime": 35.6117, | |
| "eval_samples_per_second": 49.703, | |
| "eval_steps_per_second": 0.168, | |
| "step": 2332 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "learning_rate": 2e-05, | |
| "loss": 0.9733, | |
| "step": 2438 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_accuracy": 0.7970804770891298, | |
| "eval_loss": 0.8979274034500122, | |
| "eval_runtime": 35.0401, | |
| "eval_samples_per_second": 50.514, | |
| "eval_steps_per_second": 0.171, | |
| "step": 2438 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "learning_rate": 2e-05, | |
| "loss": 0.9725, | |
| "step": 2544 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_accuracy": 0.8038572415242785, | |
| "eval_loss": 0.867423415184021, | |
| "eval_runtime": 34.3534, | |
| "eval_samples_per_second": 51.523, | |
| "eval_steps_per_second": 0.175, | |
| "step": 2544 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "learning_rate": 2e-05, | |
| "loss": 0.963, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_accuracy": 0.7997166910097103, | |
| "eval_loss": 0.8823758959770203, | |
| "eval_runtime": 34.7324, | |
| "eval_samples_per_second": 50.961, | |
| "eval_steps_per_second": 0.173, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "learning_rate": 2e-05, | |
| "loss": 0.9587, | |
| "step": 2756 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_accuracy": 0.8008366256100395, | |
| "eval_loss": 0.8787974119186401, | |
| "eval_runtime": 34.68, | |
| "eval_samples_per_second": 51.038, | |
| "eval_steps_per_second": 0.173, | |
| "step": 2756 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "learning_rate": 2e-05, | |
| "loss": 0.9523, | |
| "step": 2862 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "eval_accuracy": 0.802576527423399, | |
| "eval_loss": 0.8776472806930542, | |
| "eval_runtime": 34.5847, | |
| "eval_samples_per_second": 51.179, | |
| "eval_steps_per_second": 0.173, | |
| "step": 2862 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "learning_rate": 2e-05, | |
| "loss": 0.9503, | |
| "step": 2968 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_accuracy": 0.8046389019572702, | |
| "eval_loss": 0.8658241629600525, | |
| "eval_runtime": 34.7046, | |
| "eval_samples_per_second": 51.002, | |
| "eval_steps_per_second": 0.173, | |
| "step": 2968 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "learning_rate": 2e-05, | |
| "loss": 0.9357, | |
| "step": 3074 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "eval_accuracy": 0.8068694155258628, | |
| "eval_loss": 0.8559547066688538, | |
| "eval_runtime": 34.6364, | |
| "eval_samples_per_second": 51.102, | |
| "eval_steps_per_second": 0.173, | |
| "step": 3074 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "learning_rate": 2e-05, | |
| "loss": 0.9404, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_accuracy": 0.8065936664154513, | |
| "eval_loss": 0.8534895777702332, | |
| "eval_runtime": 34.816, | |
| "eval_samples_per_second": 50.839, | |
| "eval_steps_per_second": 0.172, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 31.0, | |
| "learning_rate": 2e-05, | |
| "loss": 0.931, | |
| "step": 3286 | |
| }, | |
| { | |
| "epoch": 31.0, | |
| "eval_accuracy": 0.8132200449776263, | |
| "eval_loss": 0.8150569796562195, | |
| "eval_runtime": 34.836, | |
| "eval_samples_per_second": 50.81, | |
| "eval_steps_per_second": 0.172, | |
| "step": 3286 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "learning_rate": 2e-05, | |
| "loss": 0.925, | |
| "step": 3392 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "eval_accuracy": 0.8086711486831303, | |
| "eval_loss": 0.8455161452293396, | |
| "eval_runtime": 35.717, | |
| "eval_samples_per_second": 49.556, | |
| "eval_steps_per_second": 0.168, | |
| "step": 3392 | |
| }, | |
| { | |
| "epoch": 33.0, | |
| "learning_rate": 2e-05, | |
| "loss": 0.9159, | |
| "step": 3498 | |
| }, | |
| { | |
| "epoch": 33.0, | |
| "eval_accuracy": 0.8050653384287288, | |
| "eval_loss": 0.8572449088096619, | |
| "eval_runtime": 34.373, | |
| "eval_samples_per_second": 51.494, | |
| "eval_steps_per_second": 0.175, | |
| "step": 3498 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "learning_rate": 2e-05, | |
| "loss": 0.9102, | |
| "step": 3604 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "eval_accuracy": 0.8064940892606243, | |
| "eval_loss": 0.8592977523803711, | |
| "eval_runtime": 34.6394, | |
| "eval_samples_per_second": 51.098, | |
| "eval_steps_per_second": 0.173, | |
| "step": 3604 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "learning_rate": 2e-05, | |
| "loss": 0.9128, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "eval_accuracy": 0.8136768783591909, | |
| "eval_loss": 0.813452959060669, | |
| "eval_runtime": 34.7174, | |
| "eval_samples_per_second": 50.983, | |
| "eval_steps_per_second": 0.173, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "learning_rate": 2e-05, | |
| "loss": 0.9067, | |
| "step": 3816 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "eval_accuracy": 0.8131441385250527, | |
| "eval_loss": 0.821983277797699, | |
| "eval_runtime": 34.8233, | |
| "eval_samples_per_second": 50.828, | |
| "eval_steps_per_second": 0.172, | |
| "step": 3816 | |
| }, | |
| { | |
| "epoch": 37.0, | |
| "learning_rate": 2e-05, | |
| "loss": 0.8989, | |
| "step": 3922 | |
| }, | |
| { | |
| "epoch": 37.0, | |
| "eval_accuracy": 0.8117314414207155, | |
| "eval_loss": 0.827382504940033, | |
| "eval_runtime": 34.7538, | |
| "eval_samples_per_second": 50.93, | |
| "eval_steps_per_second": 0.173, | |
| "step": 3922 | |
| }, | |
| { | |
| "epoch": 38.0, | |
| "learning_rate": 2e-05, | |
| "loss": 0.8928, | |
| "step": 4028 | |
| }, | |
| { | |
| "epoch": 38.0, | |
| "eval_accuracy": 0.8077465845969074, | |
| "eval_loss": 0.8441253900527954, | |
| "eval_runtime": 34.7385, | |
| "eval_samples_per_second": 50.952, | |
| "eval_steps_per_second": 0.173, | |
| "step": 4028 | |
| } | |
| ], | |
| "max_steps": 4240, | |
| "num_train_epochs": 40, | |
| "total_flos": 1274410698801152.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |