| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 15.0, | |
| "global_step": 19800, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 9.747474747474748e-06, | |
| "loss": 4.7208, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "learning_rate": 9.494949494949497e-06, | |
| "loss": 3.9925, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.4223034679889679, | |
| "eval_loss": 3.095407724380493, | |
| "eval_runtime": 2.1093, | |
| "eval_samples_per_second": 518.654, | |
| "eval_steps_per_second": 64.95, | |
| "step": 1320 | |
| }, | |
| { | |
| "epoch": 1.14, | |
| "learning_rate": 9.242424242424244e-06, | |
| "loss": 3.4313, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.52, | |
| "learning_rate": 8.98989898989899e-06, | |
| "loss": 2.8522, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 8.737373737373738e-06, | |
| "loss": 2.5041, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.6563071012496948, | |
| "eval_loss": 1.976155400276184, | |
| "eval_runtime": 2.2997, | |
| "eval_samples_per_second": 475.72, | |
| "eval_steps_per_second": 59.574, | |
| "step": 2640 | |
| }, | |
| { | |
| "epoch": 2.27, | |
| "learning_rate": 8.484848484848486e-06, | |
| "loss": 2.084, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 2.65, | |
| "learning_rate": 8.232323232323233e-06, | |
| "loss": 1.8061, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.7952467799186707, | |
| "eval_loss": 1.3196039199829102, | |
| "eval_runtime": 2.1005, | |
| "eval_samples_per_second": 520.832, | |
| "eval_steps_per_second": 65.223, | |
| "step": 3960 | |
| }, | |
| { | |
| "epoch": 3.03, | |
| "learning_rate": 7.97979797979798e-06, | |
| "loss": 1.5077, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 3.41, | |
| "learning_rate": 7.727272727272727e-06, | |
| "loss": 1.2211, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 3.79, | |
| "learning_rate": 7.474747474747476e-06, | |
| "loss": 1.0694, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.851005494594574, | |
| "eval_loss": 0.9303537011146545, | |
| "eval_runtime": 2.0914, | |
| "eval_samples_per_second": 523.095, | |
| "eval_steps_per_second": 65.506, | |
| "step": 5280 | |
| }, | |
| { | |
| "epoch": 4.17, | |
| "learning_rate": 7.222222222222223e-06, | |
| "loss": 0.9075, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 4.55, | |
| "learning_rate": 6.969696969696971e-06, | |
| "loss": 0.7228, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 4.92, | |
| "learning_rate": 6.717171717171718e-06, | |
| "loss": 0.6479, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.8820840716362, | |
| "eval_loss": 0.6875461339950562, | |
| "eval_runtime": 2.109, | |
| "eval_samples_per_second": 518.717, | |
| "eval_steps_per_second": 64.958, | |
| "step": 6600 | |
| }, | |
| { | |
| "epoch": 5.3, | |
| "learning_rate": 6.464646464646466e-06, | |
| "loss": 0.4916, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 5.68, | |
| "learning_rate": 6.212121212121213e-06, | |
| "loss": 0.4408, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.8976234197616577, | |
| "eval_loss": 0.5691984295845032, | |
| "eval_runtime": 2.091, | |
| "eval_samples_per_second": 523.186, | |
| "eval_steps_per_second": 65.518, | |
| "step": 7920 | |
| }, | |
| { | |
| "epoch": 6.06, | |
| "learning_rate": 5.95959595959596e-06, | |
| "loss": 0.3744, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 6.44, | |
| "learning_rate": 5.7070707070707075e-06, | |
| "loss": 0.2804, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 6.82, | |
| "learning_rate": 5.4545454545454545e-06, | |
| "loss": 0.2542, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.8948811888694763, | |
| "eval_loss": 0.5291352868080139, | |
| "eval_runtime": 2.1086, | |
| "eval_samples_per_second": 518.837, | |
| "eval_steps_per_second": 64.973, | |
| "step": 9240 | |
| }, | |
| { | |
| "epoch": 7.2, | |
| "learning_rate": 5.202020202020202e-06, | |
| "loss": 0.2309, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 7.58, | |
| "learning_rate": 4.94949494949495e-06, | |
| "loss": 0.175, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 7.95, | |
| "learning_rate": 4.696969696969698e-06, | |
| "loss": 0.1709, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 8.0, | |
| "eval_accuracy": 0.9058501124382019, | |
| "eval_loss": 0.5037628412246704, | |
| "eval_runtime": 2.0979, | |
| "eval_samples_per_second": 521.468, | |
| "eval_steps_per_second": 65.303, | |
| "step": 10560 | |
| }, | |
| { | |
| "epoch": 8.33, | |
| "learning_rate": 4.444444444444444e-06, | |
| "loss": 0.1309, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 8.71, | |
| "learning_rate": 4.191919191919192e-06, | |
| "loss": 0.1181, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "eval_accuracy": 0.9049360156059265, | |
| "eval_loss": 0.48846620321273804, | |
| "eval_runtime": 2.0923, | |
| "eval_samples_per_second": 522.864, | |
| "eval_steps_per_second": 65.478, | |
| "step": 11880 | |
| }, | |
| { | |
| "epoch": 9.09, | |
| "learning_rate": 3.93939393939394e-06, | |
| "loss": 0.1117, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 9.47, | |
| "learning_rate": 3.686868686868687e-06, | |
| "loss": 0.0835, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 9.85, | |
| "learning_rate": 3.4343434343434347e-06, | |
| "loss": 0.0878, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 10.0, | |
| "eval_accuracy": 0.9049360156059265, | |
| "eval_loss": 0.49004697799682617, | |
| "eval_runtime": 2.1177, | |
| "eval_samples_per_second": 516.602, | |
| "eval_steps_per_second": 64.693, | |
| "step": 13200 | |
| }, | |
| { | |
| "epoch": 10.23, | |
| "learning_rate": 3.181818181818182e-06, | |
| "loss": 0.069, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 10.61, | |
| "learning_rate": 2.9292929292929295e-06, | |
| "loss": 0.0703, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 10.98, | |
| "learning_rate": 2.676767676767677e-06, | |
| "loss": 0.0702, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "eval_accuracy": 0.9085923433303833, | |
| "eval_loss": 0.49296098947525024, | |
| "eval_runtime": 2.3138, | |
| "eval_samples_per_second": 472.81, | |
| "eval_steps_per_second": 59.209, | |
| "step": 14520 | |
| }, | |
| { | |
| "epoch": 11.36, | |
| "learning_rate": 2.4242424242424244e-06, | |
| "loss": 0.0497, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 11.74, | |
| "learning_rate": 2.171717171717172e-06, | |
| "loss": 0.0528, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 12.0, | |
| "eval_accuracy": 0.9113345742225647, | |
| "eval_loss": 0.49874168634414673, | |
| "eval_runtime": 2.5428, | |
| "eval_samples_per_second": 430.243, | |
| "eval_steps_per_second": 53.879, | |
| "step": 15840 | |
| }, | |
| { | |
| "epoch": 12.12, | |
| "learning_rate": 1.9191919191919192e-06, | |
| "loss": 0.0504, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 12.5, | |
| "learning_rate": 1.6666666666666667e-06, | |
| "loss": 0.048, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 12.88, | |
| "learning_rate": 1.4141414141414143e-06, | |
| "loss": 0.0406, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 13.0, | |
| "eval_accuracy": 0.9113345742225647, | |
| "eval_loss": 0.5008538961410522, | |
| "eval_runtime": 2.4539, | |
| "eval_samples_per_second": 445.826, | |
| "eval_steps_per_second": 55.83, | |
| "step": 17160 | |
| }, | |
| { | |
| "epoch": 13.26, | |
| "learning_rate": 1.1616161616161617e-06, | |
| "loss": 0.0453, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 13.64, | |
| "learning_rate": 9.090909090909091e-07, | |
| "loss": 0.0321, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 14.0, | |
| "eval_accuracy": 0.9104204773902893, | |
| "eval_loss": 0.5017415881156921, | |
| "eval_runtime": 2.4256, | |
| "eval_samples_per_second": 451.027, | |
| "eval_steps_per_second": 56.481, | |
| "step": 18480 | |
| }, | |
| { | |
| "epoch": 14.02, | |
| "learning_rate": 6.565656565656567e-07, | |
| "loss": 0.0436, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 14.39, | |
| "learning_rate": 4.040404040404041e-07, | |
| "loss": 0.037, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 14.77, | |
| "learning_rate": 1.5151515151515152e-07, | |
| "loss": 0.0308, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "eval_accuracy": 0.9076782464981079, | |
| "eval_loss": 0.5025216341018677, | |
| "eval_runtime": 2.4844, | |
| "eval_samples_per_second": 440.345, | |
| "eval_steps_per_second": 55.144, | |
| "step": 19800 | |
| }, | |
| { | |
| "epoch": 15.0, | |
| "step": 19800, | |
| "total_flos": 5215995096399360.0, | |
| "train_loss": 0.7848883512285021, | |
| "train_runtime": 2097.012, | |
| "train_samples_per_second": 75.507, | |
| "train_steps_per_second": 9.442 | |
| } | |
| ], | |
| "max_steps": 19800, | |
| "num_train_epochs": 15, | |
| "total_flos": 5215995096399360.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |