| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 30.0, | |
| "global_step": 47460, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 4.94732406236831e-05, | |
| "loss": 2.451, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "learning_rate": 4.8946481247366205e-05, | |
| "loss": 2.2767, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 4.8419721871049303e-05, | |
| "loss": 2.2059, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 1.26, | |
| "learning_rate": 4.789296249473241e-05, | |
| "loss": 2.1348, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.58, | |
| "learning_rate": 4.7366203118415506e-05, | |
| "loss": 2.0683, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.9, | |
| "learning_rate": 4.683944374209861e-05, | |
| "loss": 2.0449, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 2.21, | |
| "learning_rate": 4.631268436578171e-05, | |
| "loss": 1.9823, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 2.53, | |
| "learning_rate": 4.5785924989464814e-05, | |
| "loss": 1.9737, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 2.84, | |
| "learning_rate": 4.525916561314791e-05, | |
| "loss": 1.9638, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 3.16, | |
| "learning_rate": 4.473240623683102e-05, | |
| "loss": 1.8954, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 3.48, | |
| "learning_rate": 4.420564686051412e-05, | |
| "loss": 1.8848, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 3.79, | |
| "learning_rate": 4.367888748419722e-05, | |
| "loss": 1.8964, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 4.11, | |
| "learning_rate": 4.3152128107880325e-05, | |
| "loss": 1.8572, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 4.42, | |
| "learning_rate": 4.262536873156342e-05, | |
| "loss": 1.8132, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 4.74, | |
| "learning_rate": 4.209860935524653e-05, | |
| "loss": 1.8463, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 5.06, | |
| "learning_rate": 4.1571849978929626e-05, | |
| "loss": 1.8012, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 5.37, | |
| "learning_rate": 4.104509060261273e-05, | |
| "loss": 1.7557, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 5.69, | |
| "learning_rate": 4.051833122629583e-05, | |
| "loss": 1.7647, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 6.01, | |
| "learning_rate": 3.9991571849978934e-05, | |
| "loss": 1.7886, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 6.32, | |
| "learning_rate": 3.946481247366203e-05, | |
| "loss": 1.7078, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 6.64, | |
| "learning_rate": 3.893805309734514e-05, | |
| "loss": 1.7211, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 6.95, | |
| "learning_rate": 3.8411293721028235e-05, | |
| "loss": 1.7393, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 7.27, | |
| "learning_rate": 3.788453434471134e-05, | |
| "loss": 1.6734, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 7.59, | |
| "learning_rate": 3.735777496839444e-05, | |
| "loss": 1.6889, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 7.9, | |
| "learning_rate": 3.6831015592077536e-05, | |
| "loss": 1.6733, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 8.22, | |
| "learning_rate": 3.630425621576064e-05, | |
| "loss": 1.6575, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 8.53, | |
| "learning_rate": 3.577749683944374e-05, | |
| "loss": 1.6172, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 8.85, | |
| "learning_rate": 3.5250737463126844e-05, | |
| "loss": 1.6498, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 9.17, | |
| "learning_rate": 3.472397808680995e-05, | |
| "loss": 1.6185, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 9.48, | |
| "learning_rate": 3.419721871049305e-05, | |
| "loss": 1.5877, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 9.8, | |
| "learning_rate": 3.367045933417615e-05, | |
| "loss": 1.6174, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 10.11, | |
| "learning_rate": 3.3143699957859256e-05, | |
| "loss": 1.6148, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 10.43, | |
| "learning_rate": 3.2616940581542354e-05, | |
| "loss": 1.5563, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 10.75, | |
| "learning_rate": 3.209018120522546e-05, | |
| "loss": 1.5754, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 11.06, | |
| "learning_rate": 3.156342182890856e-05, | |
| "loss": 1.5547, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 11.38, | |
| "learning_rate": 3.1036662452591655e-05, | |
| "loss": 1.5282, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 11.69, | |
| "learning_rate": 3.050990307627476e-05, | |
| "loss": 1.5312, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 12.01, | |
| "learning_rate": 2.998314369995786e-05, | |
| "loss": 1.5508, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 12.33, | |
| "learning_rate": 2.9456384323640963e-05, | |
| "loss": 1.4954, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 12.64, | |
| "learning_rate": 2.8929624947324065e-05, | |
| "loss": 1.5208, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 12.96, | |
| "learning_rate": 2.8402865571007166e-05, | |
| "loss": 1.5115, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 13.27, | |
| "learning_rate": 2.7876106194690264e-05, | |
| "loss": 1.4648, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 13.59, | |
| "learning_rate": 2.7349346818373366e-05, | |
| "loss": 1.4778, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 13.91, | |
| "learning_rate": 2.6822587442056467e-05, | |
| "loss": 1.4992, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 14.22, | |
| "learning_rate": 2.629582806573957e-05, | |
| "loss": 1.4505, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 14.54, | |
| "learning_rate": 2.576906868942267e-05, | |
| "loss": 1.4585, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 14.85, | |
| "learning_rate": 2.524230931310577e-05, | |
| "loss": 1.4571, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 15.17, | |
| "learning_rate": 2.4715549936788876e-05, | |
| "loss": 1.441, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 15.49, | |
| "learning_rate": 2.4188790560471978e-05, | |
| "loss": 1.4268, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 15.8, | |
| "learning_rate": 2.366203118415508e-05, | |
| "loss": 1.4248, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 16.12, | |
| "learning_rate": 2.313527180783818e-05, | |
| "loss": 1.4385, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 16.43, | |
| "learning_rate": 2.2608512431521282e-05, | |
| "loss": 1.3802, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 16.75, | |
| "learning_rate": 2.2081753055204384e-05, | |
| "loss": 1.4195, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 17.07, | |
| "learning_rate": 2.1554993678887485e-05, | |
| "loss": 1.4127, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 17.38, | |
| "learning_rate": 2.1028234302570587e-05, | |
| "loss": 1.3683, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 17.7, | |
| "learning_rate": 2.0501474926253688e-05, | |
| "loss": 1.3864, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 18.02, | |
| "learning_rate": 1.997471554993679e-05, | |
| "loss": 1.405, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 18.33, | |
| "learning_rate": 1.944795617361989e-05, | |
| "loss": 1.3668, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 18.65, | |
| "learning_rate": 1.8921196797302992e-05, | |
| "loss": 1.3738, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 18.96, | |
| "learning_rate": 1.8394437420986094e-05, | |
| "loss": 1.3662, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 19.28, | |
| "learning_rate": 1.7867678044669195e-05, | |
| "loss": 1.3227, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 19.6, | |
| "learning_rate": 1.7340918668352297e-05, | |
| "loss": 1.3413, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 19.91, | |
| "learning_rate": 1.6814159292035402e-05, | |
| "loss": 1.3815, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 20.23, | |
| "learning_rate": 1.62873999157185e-05, | |
| "loss": 1.3393, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 20.54, | |
| "learning_rate": 1.57606405394016e-05, | |
| "loss": 1.3243, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 20.86, | |
| "learning_rate": 1.5233881163084704e-05, | |
| "loss": 1.3235, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 21.18, | |
| "learning_rate": 1.4707121786767806e-05, | |
| "loss": 1.3202, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 21.49, | |
| "learning_rate": 1.4180362410450906e-05, | |
| "loss": 1.3038, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 21.81, | |
| "learning_rate": 1.3653603034134007e-05, | |
| "loss": 1.3142, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 22.12, | |
| "learning_rate": 1.3126843657817109e-05, | |
| "loss": 1.3412, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 22.44, | |
| "learning_rate": 1.260008428150021e-05, | |
| "loss": 1.293, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 22.76, | |
| "learning_rate": 1.2073324905183313e-05, | |
| "loss": 1.3082, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 23.07, | |
| "learning_rate": 1.1546565528866415e-05, | |
| "loss": 1.3009, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 23.39, | |
| "learning_rate": 1.1019806152549515e-05, | |
| "loss": 1.3034, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 23.7, | |
| "learning_rate": 1.0493046776232618e-05, | |
| "loss": 1.2953, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 24.02, | |
| "learning_rate": 9.96628739991572e-06, | |
| "loss": 1.2674, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 24.34, | |
| "learning_rate": 9.43952802359882e-06, | |
| "loss": 1.2845, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 24.65, | |
| "learning_rate": 8.912768647281922e-06, | |
| "loss": 1.2762, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 24.97, | |
| "learning_rate": 8.386009270965024e-06, | |
| "loss": 1.2756, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 25.28, | |
| "learning_rate": 7.859249894648125e-06, | |
| "loss": 1.2749, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 25.6, | |
| "learning_rate": 7.3324905183312265e-06, | |
| "loss": 1.2762, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 25.92, | |
| "learning_rate": 6.805731142014328e-06, | |
| "loss": 1.2609, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 26.23, | |
| "learning_rate": 6.2789717656974295e-06, | |
| "loss": 1.2401, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 26.55, | |
| "learning_rate": 5.752212389380531e-06, | |
| "loss": 1.2583, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 26.86, | |
| "learning_rate": 5.225453013063633e-06, | |
| "loss": 1.2767, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 27.18, | |
| "learning_rate": 4.698693636746735e-06, | |
| "loss": 1.2532, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 27.5, | |
| "learning_rate": 4.171934260429835e-06, | |
| "loss": 1.2522, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 27.81, | |
| "learning_rate": 3.6451748841129377e-06, | |
| "loss": 1.2567, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 28.13, | |
| "learning_rate": 3.1184155077960387e-06, | |
| "loss": 1.2599, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 28.45, | |
| "learning_rate": 2.59165613147914e-06, | |
| "loss": 1.2493, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 28.76, | |
| "learning_rate": 2.064896755162242e-06, | |
| "loss": 1.245, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 29.08, | |
| "learning_rate": 1.5381373788453435e-06, | |
| "loss": 1.2396, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 29.39, | |
| "learning_rate": 1.0113780025284452e-06, | |
| "loss": 1.2441, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 29.71, | |
| "learning_rate": 4.846186262115466e-07, | |
| "loss": 1.2251, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 30.0, | |
| "step": 47460, | |
| "total_flos": 4.958800183296e+16, | |
| "train_loss": 1.5244493435467037, | |
| "train_runtime": 20965.488, | |
| "train_samples_per_second": 4.526, | |
| "train_steps_per_second": 2.264 | |
| } | |
| ], | |
| "max_steps": 47460, | |
| "num_train_epochs": 30, | |
| "total_flos": 4.958800183296e+16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |