| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 39.0, | |
| "global_step": 4134, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 1.3832186408159307e-05, | |
| "loss": 2.0685, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.6720276077614272, | |
| "eval_loss": 1.7388017177581787, | |
| "eval_runtime": 36.8297, | |
| "eval_samples_per_second": 48.059, | |
| "eval_steps_per_second": 0.163, | |
| "step": 106 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "learning_rate": 1.5888124272106204e-05, | |
| "loss": 1.7284, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.6931985254177139, | |
| "eval_loss": 1.5560609102249146, | |
| "eval_runtime": 37.6763, | |
| "eval_samples_per_second": 46.979, | |
| "eval_steps_per_second": 0.159, | |
| "step": 212 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "learning_rate": 1.7090770826327895e-05, | |
| "loss": 1.5997, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.7099605218744038, | |
| "eval_loss": 1.4386627674102783, | |
| "eval_runtime": 36.094, | |
| "eval_samples_per_second": 49.039, | |
| "eval_steps_per_second": 0.166, | |
| "step": 318 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "learning_rate": 1.7944062136053104e-05, | |
| "loss": 1.5195, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.7235852020285128, | |
| "eval_loss": 1.3606762886047363, | |
| "eval_runtime": 37.6742, | |
| "eval_samples_per_second": 46.982, | |
| "eval_steps_per_second": 0.159, | |
| "step": 424 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "learning_rate": 1.860592629580032e-05, | |
| "loss": 1.4706, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.7306072855931103, | |
| "eval_loss": 1.3053652048110962, | |
| "eval_runtime": 36.8913, | |
| "eval_samples_per_second": 47.979, | |
| "eval_steps_per_second": 0.163, | |
| "step": 530 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "learning_rate": 1.9146708690274792e-05, | |
| "loss": 1.4153, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.7296665866066945, | |
| "eval_loss": 1.3213350772857666, | |
| "eval_runtime": 37.6502, | |
| "eval_samples_per_second": 47.012, | |
| "eval_steps_per_second": 0.159, | |
| "step": 636 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "learning_rate": 1.9603933689955228e-05, | |
| "loss": 1.3838, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.7412067658165602, | |
| "eval_loss": 1.2423616647720337, | |
| "eval_runtime": 36.7828, | |
| "eval_samples_per_second": 48.12, | |
| "eval_steps_per_second": 0.163, | |
| "step": 742 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.3512, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.7385993664885682, | |
| "eval_loss": 1.2403146028518677, | |
| "eval_runtime": 37.6141, | |
| "eval_samples_per_second": 47.057, | |
| "eval_steps_per_second": 0.16, | |
| "step": 848 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.3188, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.7413660954138545, | |
| "eval_loss": 1.2292009592056274, | |
| "eval_runtime": 36.8608, | |
| "eval_samples_per_second": 48.019, | |
| "eval_steps_per_second": 0.163, | |
| "step": 954 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.3098, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.7530054891006436, | |
| "eval_loss": 1.1540861129760742, | |
| "eval_runtime": 36.7518, | |
| "eval_samples_per_second": 48.161, | |
| "eval_steps_per_second": 0.163, | |
| "step": 1060 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.2827, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_accuracy": 0.7532555843965927, | |
| "eval_loss": 1.1605820655822754, | |
| "eval_runtime": 37.6556, | |
| "eval_samples_per_second": 47.005, | |
| "eval_steps_per_second": 0.159, | |
| "step": 1166 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.2693, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.7515547286193142, | |
| "eval_loss": 1.167082667350769, | |
| "eval_runtime": 36.7311, | |
| "eval_samples_per_second": 48.188, | |
| "eval_steps_per_second": 0.163, | |
| "step": 1272 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.2521, | |
| "step": 1378 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_accuracy": 0.7581785009494731, | |
| "eval_loss": 1.134334921836853, | |
| "eval_runtime": 37.5802, | |
| "eval_samples_per_second": 47.099, | |
| "eval_steps_per_second": 0.16, | |
| "step": 1378 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.2421, | |
| "step": 1484 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.7592175982523275, | |
| "eval_loss": 1.1171754598617554, | |
| "eval_runtime": 36.9701, | |
| "eval_samples_per_second": 47.877, | |
| "eval_steps_per_second": 0.162, | |
| "step": 1484 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.2308, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_accuracy": 0.7617716097984533, | |
| "eval_loss": 1.1091045141220093, | |
| "eval_runtime": 37.8672, | |
| "eval_samples_per_second": 46.742, | |
| "eval_steps_per_second": 0.158, | |
| "step": 1590 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.2132, | |
| "step": 1696 | |
| }, | |
| { | |
| "epoch": 16.0, | |
| "eval_accuracy": 0.7612774124438361, | |
| "eval_loss": 1.1064728498458862, | |
| "eval_runtime": 36.7747, | |
| "eval_samples_per_second": 48.131, | |
| "eval_steps_per_second": 0.163, | |
| "step": 1696 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.2055, | |
| "step": 1802 | |
| }, | |
| { | |
| "epoch": 17.0, | |
| "eval_accuracy": 0.761372646367941, | |
| "eval_loss": 1.1087865829467773, | |
| "eval_runtime": 36.8483, | |
| "eval_samples_per_second": 48.035, | |
| "eval_steps_per_second": 0.163, | |
| "step": 1802 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.1931, | |
| "step": 1908 | |
| }, | |
| { | |
| "epoch": 18.0, | |
| "eval_accuracy": 0.7599678162601031, | |
| "eval_loss": 1.1089411973953247, | |
| "eval_runtime": 35.9217, | |
| "eval_samples_per_second": 49.274, | |
| "eval_steps_per_second": 0.167, | |
| "step": 1908 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.1815, | |
| "step": 2014 | |
| }, | |
| { | |
| "epoch": 19.0, | |
| "eval_accuracy": 0.7659135481142874, | |
| "eval_loss": 1.0751391649246216, | |
| "eval_runtime": 36.8871, | |
| "eval_samples_per_second": 47.984, | |
| "eval_steps_per_second": 0.163, | |
| "step": 2014 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.1728, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 20.0, | |
| "eval_accuracy": 0.768600214617861, | |
| "eval_loss": 1.069868803024292, | |
| "eval_runtime": 36.7387, | |
| "eval_samples_per_second": 48.178, | |
| "eval_steps_per_second": 0.163, | |
| "step": 2120 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.164, | |
| "step": 2226 | |
| }, | |
| { | |
| "epoch": 21.0, | |
| "eval_accuracy": 0.7675022113058035, | |
| "eval_loss": 1.065330147743225, | |
| "eval_runtime": 36.8047, | |
| "eval_samples_per_second": 48.092, | |
| "eval_steps_per_second": 0.163, | |
| "step": 2226 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.1524, | |
| "step": 2332 | |
| }, | |
| { | |
| "epoch": 22.0, | |
| "eval_accuracy": 0.7698299863752147, | |
| "eval_loss": 1.0548479557037354, | |
| "eval_runtime": 36.7052, | |
| "eval_samples_per_second": 48.222, | |
| "eval_steps_per_second": 0.163, | |
| "step": 2332 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.1425, | |
| "step": 2438 | |
| }, | |
| { | |
| "epoch": 23.0, | |
| "eval_accuracy": 0.769697828632807, | |
| "eval_loss": 1.047703504562378, | |
| "eval_runtime": 36.8466, | |
| "eval_samples_per_second": 48.037, | |
| "eval_steps_per_second": 0.163, | |
| "step": 2438 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.143, | |
| "step": 2544 | |
| }, | |
| { | |
| "epoch": 24.0, | |
| "eval_accuracy": 0.7772214047626197, | |
| "eval_loss": 1.0133404731750488, | |
| "eval_runtime": 36.8371, | |
| "eval_samples_per_second": 48.049, | |
| "eval_steps_per_second": 0.163, | |
| "step": 2544 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.1308, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 25.0, | |
| "eval_accuracy": 0.7741821838279008, | |
| "eval_loss": 1.0260401964187622, | |
| "eval_runtime": 36.8437, | |
| "eval_samples_per_second": 48.041, | |
| "eval_steps_per_second": 0.163, | |
| "step": 2650 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.1271, | |
| "step": 2756 | |
| }, | |
| { | |
| "epoch": 26.0, | |
| "eval_accuracy": 0.773661286574258, | |
| "eval_loss": 1.0230038166046143, | |
| "eval_runtime": 36.8925, | |
| "eval_samples_per_second": 47.977, | |
| "eval_steps_per_second": 0.163, | |
| "step": 2756 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.1202, | |
| "step": 2862 | |
| }, | |
| { | |
| "epoch": 27.0, | |
| "eval_accuracy": 0.7754215166511026, | |
| "eval_loss": 1.0241199731826782, | |
| "eval_runtime": 36.7992, | |
| "eval_samples_per_second": 48.099, | |
| "eval_steps_per_second": 0.163, | |
| "step": 2862 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.1168, | |
| "step": 2968 | |
| }, | |
| { | |
| "epoch": 28.0, | |
| "eval_accuracy": 0.7775761513243871, | |
| "eval_loss": 1.0062930583953857, | |
| "eval_runtime": 38.045, | |
| "eval_samples_per_second": 46.524, | |
| "eval_steps_per_second": 0.158, | |
| "step": 2968 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.1019, | |
| "step": 3074 | |
| }, | |
| { | |
| "epoch": 29.0, | |
| "eval_accuracy": 0.7796242613030875, | |
| "eval_loss": 0.9990780353546143, | |
| "eval_runtime": 37.599, | |
| "eval_samples_per_second": 47.076, | |
| "eval_steps_per_second": 0.16, | |
| "step": 3074 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.1071, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "eval_accuracy": 0.7805559249900252, | |
| "eval_loss": 0.991283655166626, | |
| "eval_runtime": 36.7892, | |
| "eval_samples_per_second": 48.112, | |
| "eval_steps_per_second": 0.163, | |
| "step": 3180 | |
| }, | |
| { | |
| "epoch": 31.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0963, | |
| "step": 3286 | |
| }, | |
| { | |
| "epoch": 31.0, | |
| "eval_accuracy": 0.7864944315242504, | |
| "eval_loss": 0.9553370475769043, | |
| "eval_runtime": 36.7173, | |
| "eval_samples_per_second": 48.206, | |
| "eval_steps_per_second": 0.163, | |
| "step": 3286 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.089, | |
| "step": 3392 | |
| }, | |
| { | |
| "epoch": 32.0, | |
| "eval_accuracy": 0.783273055389144, | |
| "eval_loss": 0.9850459098815918, | |
| "eval_runtime": 36.7845, | |
| "eval_samples_per_second": 48.118, | |
| "eval_steps_per_second": 0.163, | |
| "step": 3392 | |
| }, | |
| { | |
| "epoch": 33.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0807, | |
| "step": 3498 | |
| }, | |
| { | |
| "epoch": 33.0, | |
| "eval_accuracy": 0.7774915196015286, | |
| "eval_loss": 0.9987505674362183, | |
| "eval_runtime": 36.8264, | |
| "eval_samples_per_second": 48.063, | |
| "eval_steps_per_second": 0.163, | |
| "step": 3498 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0735, | |
| "step": 3604 | |
| }, | |
| { | |
| "epoch": 34.0, | |
| "eval_accuracy": 0.7789523242042387, | |
| "eval_loss": 0.9969209432601929, | |
| "eval_runtime": 37.6346, | |
| "eval_samples_per_second": 47.031, | |
| "eval_steps_per_second": 0.159, | |
| "step": 3604 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0766, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 35.0, | |
| "eval_accuracy": 0.7880373776195184, | |
| "eval_loss": 0.9506202936172485, | |
| "eval_runtime": 36.9744, | |
| "eval_samples_per_second": 47.871, | |
| "eval_steps_per_second": 0.162, | |
| "step": 3710 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0698, | |
| "step": 3816 | |
| }, | |
| { | |
| "epoch": 36.0, | |
| "eval_accuracy": 0.7880023166711217, | |
| "eval_loss": 0.9565942883491516, | |
| "eval_runtime": 38.7958, | |
| "eval_samples_per_second": 45.623, | |
| "eval_steps_per_second": 0.155, | |
| "step": 3816 | |
| }, | |
| { | |
| "epoch": 37.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0608, | |
| "step": 3922 | |
| }, | |
| { | |
| "epoch": 37.0, | |
| "eval_accuracy": 0.7855791137596568, | |
| "eval_loss": 0.9620457291603088, | |
| "eval_runtime": 37.7795, | |
| "eval_samples_per_second": 46.851, | |
| "eval_steps_per_second": 0.159, | |
| "step": 3922 | |
| }, | |
| { | |
| "epoch": 38.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0543, | |
| "step": 4028 | |
| }, | |
| { | |
| "epoch": 38.0, | |
| "eval_accuracy": 0.7822399039183305, | |
| "eval_loss": 0.9812787175178528, | |
| "eval_runtime": 36.8483, | |
| "eval_samples_per_second": 48.035, | |
| "eval_steps_per_second": 0.163, | |
| "step": 4028 | |
| }, | |
| { | |
| "epoch": 39.0, | |
| "learning_rate": 2e-05, | |
| "loss": 1.0521, | |
| "step": 4134 | |
| }, | |
| { | |
| "epoch": 39.0, | |
| "eval_accuracy": 0.7884438270952546, | |
| "eval_loss": 0.9478756785392761, | |
| "eval_runtime": 36.7861, | |
| "eval_samples_per_second": 48.116, | |
| "eval_steps_per_second": 0.163, | |
| "step": 4134 | |
| } | |
| ], | |
| "max_steps": 4240, | |
| "num_train_epochs": 40, | |
| "total_flos": 498219970723840.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |