| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 18.0, | |
| "global_step": 94050, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.0, | |
| "learning_rate": 5e-09, | |
| "loss": 10.4865, | |
| "step": 1 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 2.5e-06, | |
| "loss": 9.4439, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 5e-06, | |
| "loss": 7.6179, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 7.5e-06, | |
| "loss": 6.3619, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 1e-05, | |
| "loss": 6.0809, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 1.25e-05, | |
| "loss": 5.957, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 1.5e-05, | |
| "loss": 5.8758, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 1.75e-05, | |
| "loss": 5.8102, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 2e-05, | |
| "loss": 5.7625, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 2.25e-05, | |
| "loss": 5.7195, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 2.5e-05, | |
| "loss": 5.6801, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 2.7500000000000004e-05, | |
| "loss": 5.6449, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 3e-05, | |
| "loss": 5.6134, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.24, | |
| "learning_rate": 3.2500000000000004e-05, | |
| "loss": 5.591, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 1.34, | |
| "learning_rate": 3.5e-05, | |
| "loss": 5.5683, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.44, | |
| "learning_rate": 3.7500000000000003e-05, | |
| "loss": 5.5419, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 1.53, | |
| "learning_rate": 4e-05, | |
| "loss": 5.5231, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.63, | |
| "learning_rate": 4.25e-05, | |
| "loss": 5.506, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 1.72, | |
| "learning_rate": 4.4995000000000005e-05, | |
| "loss": 5.4871, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.82, | |
| "learning_rate": 4.7495e-05, | |
| "loss": 5.4763, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 1.91, | |
| "learning_rate": 4.9995000000000005e-05, | |
| "loss": 5.4615, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 2.01, | |
| "learning_rate": 4.998800480769231e-05, | |
| "loss": 5.4468, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 2.11, | |
| "learning_rate": 4.997600961538462e-05, | |
| "loss": 5.4341, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 2.2, | |
| "learning_rate": 4.996399038461539e-05, | |
| "loss": 5.4224, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 2.3, | |
| "learning_rate": 4.995197115384615e-05, | |
| "loss": 5.4099, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 2.39, | |
| "learning_rate": 4.993995192307693e-05, | |
| "loss": 5.3978, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 2.49, | |
| "learning_rate": 4.992795673076923e-05, | |
| "loss": 5.3897, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 2.58, | |
| "learning_rate": 4.991593750000001e-05, | |
| "loss": 5.3836, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 2.68, | |
| "learning_rate": 4.990391826923077e-05, | |
| "loss": 5.3737, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 2.78, | |
| "learning_rate": 4.989189903846154e-05, | |
| "loss": 5.3668, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 2.87, | |
| "learning_rate": 4.987990384615385e-05, | |
| "loss": 5.3597, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 2.97, | |
| "learning_rate": 4.986788461538462e-05, | |
| "loss": 5.3485, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 3.06, | |
| "learning_rate": 4.9855889423076926e-05, | |
| "loss": 5.3413, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 3.16, | |
| "learning_rate": 4.9843870192307694e-05, | |
| "loss": 5.338, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 3.25, | |
| "learning_rate": 4.983185096153846e-05, | |
| "loss": 5.3304, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 3.35, | |
| "learning_rate": 4.981983173076924e-05, | |
| "loss": 5.3258, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 3.44, | |
| "learning_rate": 4.98078125e-05, | |
| "loss": 5.317, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 3.54, | |
| "learning_rate": 4.9795793269230774e-05, | |
| "loss": 5.3134, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 3.64, | |
| "learning_rate": 4.978377403846154e-05, | |
| "loss": 5.3097, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 3.73, | |
| "learning_rate": 4.977175480769231e-05, | |
| "loss": 5.3019, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 3.83, | |
| "learning_rate": 4.9759759615384614e-05, | |
| "loss": 5.2985, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 3.92, | |
| "learning_rate": 4.974774038461539e-05, | |
| "loss": 5.2942, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 4.02, | |
| "learning_rate": 4.973572115384616e-05, | |
| "loss": 5.2893, | |
| "step": 21000 | |
| }, | |
| { | |
| "epoch": 4.11, | |
| "learning_rate": 4.9723701923076925e-05, | |
| "loss": 5.2843, | |
| "step": 21500 | |
| }, | |
| { | |
| "epoch": 4.21, | |
| "learning_rate": 4.971170673076923e-05, | |
| "loss": 5.2784, | |
| "step": 22000 | |
| }, | |
| { | |
| "epoch": 4.31, | |
| "learning_rate": 4.9699687500000004e-05, | |
| "loss": 5.2732, | |
| "step": 22500 | |
| }, | |
| { | |
| "epoch": 4.4, | |
| "learning_rate": 4.968766826923077e-05, | |
| "loss": 5.2701, | |
| "step": 23000 | |
| }, | |
| { | |
| "epoch": 4.5, | |
| "learning_rate": 4.967564903846154e-05, | |
| "loss": 5.2677, | |
| "step": 23500 | |
| }, | |
| { | |
| "epoch": 4.59, | |
| "learning_rate": 4.9663653846153844e-05, | |
| "loss": 5.2644, | |
| "step": 24000 | |
| }, | |
| { | |
| "epoch": 4.69, | |
| "learning_rate": 4.965163461538462e-05, | |
| "loss": 5.2562, | |
| "step": 24500 | |
| }, | |
| { | |
| "epoch": 4.78, | |
| "learning_rate": 4.963963942307693e-05, | |
| "loss": 5.2557, | |
| "step": 25000 | |
| }, | |
| { | |
| "epoch": 4.88, | |
| "learning_rate": 4.96276201923077e-05, | |
| "loss": 5.2529, | |
| "step": 25500 | |
| }, | |
| { | |
| "epoch": 4.98, | |
| "learning_rate": 4.9615600961538466e-05, | |
| "loss": 5.2504, | |
| "step": 26000 | |
| }, | |
| { | |
| "epoch": 5.07, | |
| "learning_rate": 4.9603581730769234e-05, | |
| "loss": 5.1431, | |
| "step": 26500 | |
| }, | |
| { | |
| "epoch": 5.17, | |
| "learning_rate": 4.95915625e-05, | |
| "loss": 4.8144, | |
| "step": 27000 | |
| }, | |
| { | |
| "epoch": 5.26, | |
| "learning_rate": 4.957954326923077e-05, | |
| "loss": 4.5379, | |
| "step": 27500 | |
| }, | |
| { | |
| "epoch": 5.36, | |
| "learning_rate": 4.9567524038461545e-05, | |
| "loss": 4.282, | |
| "step": 28000 | |
| }, | |
| { | |
| "epoch": 5.45, | |
| "learning_rate": 4.955550480769231e-05, | |
| "loss": 3.7642, | |
| "step": 28500 | |
| }, | |
| { | |
| "epoch": 5.55, | |
| "learning_rate": 4.9543485576923075e-05, | |
| "loss": 3.0854, | |
| "step": 29000 | |
| }, | |
| { | |
| "epoch": 5.65, | |
| "learning_rate": 4.9531490384615385e-05, | |
| "loss": 2.6674, | |
| "step": 29500 | |
| }, | |
| { | |
| "epoch": 5.74, | |
| "learning_rate": 4.951947115384616e-05, | |
| "loss": 2.2254, | |
| "step": 30000 | |
| }, | |
| { | |
| "epoch": 5.84, | |
| "learning_rate": 4.950745192307693e-05, | |
| "loss": 1.9446, | |
| "step": 30500 | |
| }, | |
| { | |
| "epoch": 5.93, | |
| "learning_rate": 4.94954326923077e-05, | |
| "loss": 1.7693, | |
| "step": 31000 | |
| }, | |
| { | |
| "epoch": 6.03, | |
| "learning_rate": 4.94834375e-05, | |
| "loss": 1.6527, | |
| "step": 31500 | |
| }, | |
| { | |
| "epoch": 6.12, | |
| "learning_rate": 4.9471418269230775e-05, | |
| "loss": 1.5696, | |
| "step": 32000 | |
| }, | |
| { | |
| "epoch": 6.22, | |
| "learning_rate": 4.9459399038461544e-05, | |
| "loss": 1.5054, | |
| "step": 32500 | |
| }, | |
| { | |
| "epoch": 6.32, | |
| "learning_rate": 4.944737980769231e-05, | |
| "loss": 1.4447, | |
| "step": 33000 | |
| }, | |
| { | |
| "epoch": 6.41, | |
| "learning_rate": 4.9435384615384616e-05, | |
| "loss": 1.3901, | |
| "step": 33500 | |
| }, | |
| { | |
| "epoch": 6.51, | |
| "learning_rate": 4.942336538461539e-05, | |
| "loss": 1.3332, | |
| "step": 34000 | |
| }, | |
| { | |
| "epoch": 6.6, | |
| "learning_rate": 4.941134615384615e-05, | |
| "loss": 1.2692, | |
| "step": 34500 | |
| }, | |
| { | |
| "epoch": 6.7, | |
| "learning_rate": 4.939932692307693e-05, | |
| "loss": 1.217, | |
| "step": 35000 | |
| }, | |
| { | |
| "epoch": 6.79, | |
| "learning_rate": 4.938733173076923e-05, | |
| "loss": 1.178, | |
| "step": 35500 | |
| }, | |
| { | |
| "epoch": 6.89, | |
| "learning_rate": 4.9375312500000006e-05, | |
| "loss": 1.143, | |
| "step": 36000 | |
| }, | |
| { | |
| "epoch": 6.99, | |
| "learning_rate": 4.936329326923077e-05, | |
| "loss": 1.1109, | |
| "step": 36500 | |
| }, | |
| { | |
| "epoch": 7.08, | |
| "learning_rate": 4.935129807692308e-05, | |
| "loss": 1.0859, | |
| "step": 37000 | |
| }, | |
| { | |
| "epoch": 7.18, | |
| "learning_rate": 4.9339278846153846e-05, | |
| "loss": 1.0619, | |
| "step": 37500 | |
| }, | |
| { | |
| "epoch": 7.27, | |
| "learning_rate": 4.932725961538462e-05, | |
| "loss": 1.0387, | |
| "step": 38000 | |
| }, | |
| { | |
| "epoch": 7.37, | |
| "learning_rate": 4.931524038461538e-05, | |
| "loss": 1.0205, | |
| "step": 38500 | |
| }, | |
| { | |
| "epoch": 7.46, | |
| "learning_rate": 4.930322115384616e-05, | |
| "loss": 1.0017, | |
| "step": 39000 | |
| }, | |
| { | |
| "epoch": 7.56, | |
| "learning_rate": 4.929122596153846e-05, | |
| "loss": 0.9856, | |
| "step": 39500 | |
| }, | |
| { | |
| "epoch": 7.66, | |
| "learning_rate": 4.9279206730769236e-05, | |
| "loss": 0.9707, | |
| "step": 40000 | |
| }, | |
| { | |
| "epoch": 7.75, | |
| "learning_rate": 4.92671875e-05, | |
| "loss": 0.9574, | |
| "step": 40500 | |
| }, | |
| { | |
| "epoch": 7.85, | |
| "learning_rate": 4.925516826923077e-05, | |
| "loss": 0.9455, | |
| "step": 41000 | |
| }, | |
| { | |
| "epoch": 7.94, | |
| "learning_rate": 4.924314903846154e-05, | |
| "loss": 0.9323, | |
| "step": 41500 | |
| }, | |
| { | |
| "epoch": 8.04, | |
| "learning_rate": 4.923112980769231e-05, | |
| "loss": 0.9199, | |
| "step": 42000 | |
| }, | |
| { | |
| "epoch": 8.13, | |
| "learning_rate": 4.921913461538461e-05, | |
| "loss": 0.9113, | |
| "step": 42500 | |
| }, | |
| { | |
| "epoch": 8.23, | |
| "learning_rate": 4.920713942307692e-05, | |
| "loss": 0.9012, | |
| "step": 43000 | |
| }, | |
| { | |
| "epoch": 8.33, | |
| "learning_rate": 4.919512019230769e-05, | |
| "loss": 0.8939, | |
| "step": 43500 | |
| }, | |
| { | |
| "epoch": 8.42, | |
| "learning_rate": 4.9183100961538466e-05, | |
| "loss": 0.8851, | |
| "step": 44000 | |
| }, | |
| { | |
| "epoch": 8.52, | |
| "learning_rate": 4.917108173076923e-05, | |
| "loss": 0.8745, | |
| "step": 44500 | |
| }, | |
| { | |
| "epoch": 8.61, | |
| "learning_rate": 4.91590625e-05, | |
| "loss": 0.8651, | |
| "step": 45000 | |
| }, | |
| { | |
| "epoch": 8.71, | |
| "learning_rate": 4.914704326923077e-05, | |
| "loss": 0.8578, | |
| "step": 45500 | |
| }, | |
| { | |
| "epoch": 8.8, | |
| "learning_rate": 4.913502403846154e-05, | |
| "loss": 0.8519, | |
| "step": 46000 | |
| }, | |
| { | |
| "epoch": 8.9, | |
| "learning_rate": 4.912300480769231e-05, | |
| "loss": 0.8457, | |
| "step": 46500 | |
| }, | |
| { | |
| "epoch": 9.0, | |
| "learning_rate": 4.911098557692308e-05, | |
| "loss": 0.8389, | |
| "step": 47000 | |
| }, | |
| { | |
| "epoch": 9.09, | |
| "learning_rate": 4.9098990384615386e-05, | |
| "loss": 0.8305, | |
| "step": 47500 | |
| }, | |
| { | |
| "epoch": 9.19, | |
| "learning_rate": 4.9086971153846154e-05, | |
| "loss": 0.8233, | |
| "step": 48000 | |
| }, | |
| { | |
| "epoch": 9.28, | |
| "learning_rate": 4.907495192307692e-05, | |
| "loss": 0.8189, | |
| "step": 48500 | |
| }, | |
| { | |
| "epoch": 9.38, | |
| "learning_rate": 4.90629326923077e-05, | |
| "loss": 0.8129, | |
| "step": 49000 | |
| }, | |
| { | |
| "epoch": 9.47, | |
| "learning_rate": 4.905093750000001e-05, | |
| "loss": 0.8076, | |
| "step": 49500 | |
| }, | |
| { | |
| "epoch": 9.57, | |
| "learning_rate": 4.903894230769231e-05, | |
| "loss": 0.8019, | |
| "step": 50000 | |
| }, | |
| { | |
| "epoch": 9.67, | |
| "learning_rate": 4.902692307692308e-05, | |
| "loss": 0.7962, | |
| "step": 50500 | |
| }, | |
| { | |
| "epoch": 9.76, | |
| "learning_rate": 4.901490384615385e-05, | |
| "loss": 0.7904, | |
| "step": 51000 | |
| }, | |
| { | |
| "epoch": 9.86, | |
| "learning_rate": 4.900288461538462e-05, | |
| "loss": 0.7879, | |
| "step": 51500 | |
| }, | |
| { | |
| "epoch": 9.95, | |
| "learning_rate": 4.8990865384615384e-05, | |
| "loss": 0.7811, | |
| "step": 52000 | |
| }, | |
| { | |
| "epoch": 10.05, | |
| "learning_rate": 4.897884615384616e-05, | |
| "loss": 0.7781, | |
| "step": 52500 | |
| }, | |
| { | |
| "epoch": 10.14, | |
| "learning_rate": 4.896682692307693e-05, | |
| "loss": 0.7724, | |
| "step": 53000 | |
| }, | |
| { | |
| "epoch": 10.24, | |
| "learning_rate": 4.8954807692307695e-05, | |
| "loss": 0.7682, | |
| "step": 53500 | |
| }, | |
| { | |
| "epoch": 10.33, | |
| "learning_rate": 4.89428125e-05, | |
| "loss": 0.7637, | |
| "step": 54000 | |
| }, | |
| { | |
| "epoch": 10.43, | |
| "learning_rate": 4.893081730769231e-05, | |
| "loss": 0.7592, | |
| "step": 54500 | |
| }, | |
| { | |
| "epoch": 10.53, | |
| "learning_rate": 4.891879807692308e-05, | |
| "loss": 0.7541, | |
| "step": 55000 | |
| }, | |
| { | |
| "epoch": 10.62, | |
| "learning_rate": 4.890677884615385e-05, | |
| "loss": 0.75, | |
| "step": 55500 | |
| }, | |
| { | |
| "epoch": 10.72, | |
| "learning_rate": 4.8894759615384614e-05, | |
| "loss": 0.749, | |
| "step": 56000 | |
| }, | |
| { | |
| "epoch": 10.81, | |
| "learning_rate": 4.888274038461539e-05, | |
| "loss": 0.7434, | |
| "step": 56500 | |
| }, | |
| { | |
| "epoch": 10.91, | |
| "learning_rate": 4.887074519230769e-05, | |
| "loss": 0.7407, | |
| "step": 57000 | |
| }, | |
| { | |
| "epoch": 11.0, | |
| "learning_rate": 4.885872596153847e-05, | |
| "loss": 0.7381, | |
| "step": 57500 | |
| }, | |
| { | |
| "epoch": 11.1, | |
| "learning_rate": 4.884670673076923e-05, | |
| "loss": 0.7344, | |
| "step": 58000 | |
| }, | |
| { | |
| "epoch": 11.2, | |
| "learning_rate": 4.8834687500000004e-05, | |
| "loss": 0.7282, | |
| "step": 58500 | |
| }, | |
| { | |
| "epoch": 11.29, | |
| "learning_rate": 4.882269230769231e-05, | |
| "loss": 0.7272, | |
| "step": 59000 | |
| }, | |
| { | |
| "epoch": 11.39, | |
| "learning_rate": 4.881067307692308e-05, | |
| "loss": 0.7236, | |
| "step": 59500 | |
| }, | |
| { | |
| "epoch": 11.48, | |
| "learning_rate": 4.8798653846153845e-05, | |
| "loss": 0.7196, | |
| "step": 60000 | |
| }, | |
| { | |
| "epoch": 11.58, | |
| "learning_rate": 4.878663461538462e-05, | |
| "loss": 0.7164, | |
| "step": 60500 | |
| }, | |
| { | |
| "epoch": 11.67, | |
| "learning_rate": 4.877461538461539e-05, | |
| "loss": 0.7129, | |
| "step": 61000 | |
| }, | |
| { | |
| "epoch": 11.77, | |
| "learning_rate": 4.87626201923077e-05, | |
| "loss": 0.71, | |
| "step": 61500 | |
| }, | |
| { | |
| "epoch": 11.87, | |
| "learning_rate": 4.875060096153846e-05, | |
| "loss": 0.7088, | |
| "step": 62000 | |
| }, | |
| { | |
| "epoch": 11.96, | |
| "learning_rate": 4.8738581730769235e-05, | |
| "loss": 0.7057, | |
| "step": 62500 | |
| }, | |
| { | |
| "epoch": 12.06, | |
| "learning_rate": 4.87265625e-05, | |
| "loss": 0.7022, | |
| "step": 63000 | |
| }, | |
| { | |
| "epoch": 12.15, | |
| "learning_rate": 4.8714567307692313e-05, | |
| "loss": 0.6977, | |
| "step": 63500 | |
| }, | |
| { | |
| "epoch": 12.25, | |
| "learning_rate": 4.8702548076923075e-05, | |
| "loss": 0.6988, | |
| "step": 64000 | |
| }, | |
| { | |
| "epoch": 12.34, | |
| "learning_rate": 4.869052884615385e-05, | |
| "loss": 0.6943, | |
| "step": 64500 | |
| }, | |
| { | |
| "epoch": 12.44, | |
| "learning_rate": 4.867850961538462e-05, | |
| "loss": 0.6919, | |
| "step": 65000 | |
| }, | |
| { | |
| "epoch": 12.54, | |
| "learning_rate": 4.8666490384615386e-05, | |
| "loss": 0.6888, | |
| "step": 65500 | |
| }, | |
| { | |
| "epoch": 12.63, | |
| "learning_rate": 4.865449519230769e-05, | |
| "loss": 0.686, | |
| "step": 66000 | |
| }, | |
| { | |
| "epoch": 12.73, | |
| "learning_rate": 4.8642475961538465e-05, | |
| "loss": 0.6843, | |
| "step": 66500 | |
| }, | |
| { | |
| "epoch": 12.82, | |
| "learning_rate": 4.863045673076923e-05, | |
| "loss": 0.681, | |
| "step": 67000 | |
| }, | |
| { | |
| "epoch": 12.92, | |
| "learning_rate": 4.86184375e-05, | |
| "loss": 0.68, | |
| "step": 67500 | |
| }, | |
| { | |
| "epoch": 13.01, | |
| "learning_rate": 4.860644230769231e-05, | |
| "loss": 0.6775, | |
| "step": 68000 | |
| }, | |
| { | |
| "epoch": 13.11, | |
| "learning_rate": 4.859442307692308e-05, | |
| "loss": 0.6745, | |
| "step": 68500 | |
| }, | |
| { | |
| "epoch": 13.21, | |
| "learning_rate": 4.858240384615385e-05, | |
| "loss": 0.6726, | |
| "step": 69000 | |
| }, | |
| { | |
| "epoch": 13.3, | |
| "learning_rate": 4.8570384615384616e-05, | |
| "loss": 0.6716, | |
| "step": 69500 | |
| }, | |
| { | |
| "epoch": 13.4, | |
| "learning_rate": 4.855836538461539e-05, | |
| "loss": 0.6691, | |
| "step": 70000 | |
| }, | |
| { | |
| "epoch": 13.49, | |
| "learning_rate": 4.8546370192307695e-05, | |
| "loss": 0.6665, | |
| "step": 70500 | |
| }, | |
| { | |
| "epoch": 13.59, | |
| "learning_rate": 4.853435096153846e-05, | |
| "loss": 0.6625, | |
| "step": 71000 | |
| }, | |
| { | |
| "epoch": 13.68, | |
| "learning_rate": 4.852233173076923e-05, | |
| "loss": 0.6609, | |
| "step": 71500 | |
| }, | |
| { | |
| "epoch": 13.78, | |
| "learning_rate": 4.8510312500000006e-05, | |
| "loss": 0.66, | |
| "step": 72000 | |
| }, | |
| { | |
| "epoch": 13.88, | |
| "learning_rate": 4.8498293269230774e-05, | |
| "loss": 0.6566, | |
| "step": 72500 | |
| }, | |
| { | |
| "epoch": 13.97, | |
| "learning_rate": 4.848627403846154e-05, | |
| "loss": 0.6561, | |
| "step": 73000 | |
| }, | |
| { | |
| "epoch": 14.07, | |
| "learning_rate": 4.8474278846153847e-05, | |
| "loss": 0.6536, | |
| "step": 73500 | |
| }, | |
| { | |
| "epoch": 14.16, | |
| "learning_rate": 4.846225961538462e-05, | |
| "loss": 0.6509, | |
| "step": 74000 | |
| }, | |
| { | |
| "epoch": 14.26, | |
| "learning_rate": 4.845024038461539e-05, | |
| "loss": 0.6509, | |
| "step": 74500 | |
| }, | |
| { | |
| "epoch": 14.35, | |
| "learning_rate": 4.843822115384616e-05, | |
| "loss": 0.6471, | |
| "step": 75000 | |
| }, | |
| { | |
| "epoch": 14.45, | |
| "learning_rate": 4.8426201923076926e-05, | |
| "loss": 0.6459, | |
| "step": 75500 | |
| }, | |
| { | |
| "epoch": 14.55, | |
| "learning_rate": 4.8414206730769237e-05, | |
| "loss": 0.6452, | |
| "step": 76000 | |
| }, | |
| { | |
| "epoch": 14.64, | |
| "learning_rate": 4.840221153846154e-05, | |
| "loss": 0.6425, | |
| "step": 76500 | |
| }, | |
| { | |
| "epoch": 14.74, | |
| "learning_rate": 4.839019230769231e-05, | |
| "loss": 0.6406, | |
| "step": 77000 | |
| }, | |
| { | |
| "epoch": 14.83, | |
| "learning_rate": 4.837817307692308e-05, | |
| "loss": 0.6393, | |
| "step": 77500 | |
| }, | |
| { | |
| "epoch": 14.93, | |
| "learning_rate": 4.836615384615385e-05, | |
| "loss": 0.6362, | |
| "step": 78000 | |
| }, | |
| { | |
| "epoch": 15.02, | |
| "learning_rate": 4.835413461538461e-05, | |
| "loss": 0.6368, | |
| "step": 78500 | |
| }, | |
| { | |
| "epoch": 15.12, | |
| "learning_rate": 4.834211538461539e-05, | |
| "loss": 0.634, | |
| "step": 79000 | |
| }, | |
| { | |
| "epoch": 15.22, | |
| "learning_rate": 4.8330096153846156e-05, | |
| "loss": 0.6324, | |
| "step": 79500 | |
| }, | |
| { | |
| "epoch": 15.31, | |
| "learning_rate": 4.831810096153847e-05, | |
| "loss": 0.6287, | |
| "step": 80000 | |
| }, | |
| { | |
| "epoch": 15.41, | |
| "learning_rate": 4.830608173076923e-05, | |
| "loss": 0.6286, | |
| "step": 80500 | |
| }, | |
| { | |
| "epoch": 15.5, | |
| "learning_rate": 4.82940625e-05, | |
| "loss": 0.6267, | |
| "step": 81000 | |
| }, | |
| { | |
| "epoch": 15.6, | |
| "learning_rate": 4.828204326923077e-05, | |
| "loss": 0.6257, | |
| "step": 81500 | |
| }, | |
| { | |
| "epoch": 15.69, | |
| "learning_rate": 4.827002403846154e-05, | |
| "loss": 0.6245, | |
| "step": 82000 | |
| }, | |
| { | |
| "epoch": 15.79, | |
| "learning_rate": 4.825800480769231e-05, | |
| "loss": 0.6216, | |
| "step": 82500 | |
| }, | |
| { | |
| "epoch": 15.89, | |
| "learning_rate": 4.824598557692308e-05, | |
| "loss": 0.6208, | |
| "step": 83000 | |
| }, | |
| { | |
| "epoch": 15.98, | |
| "learning_rate": 4.8233966346153844e-05, | |
| "loss": 0.621, | |
| "step": 83500 | |
| }, | |
| { | |
| "epoch": 16.08, | |
| "learning_rate": 4.822194711538462e-05, | |
| "loss": 0.617, | |
| "step": 84000 | |
| }, | |
| { | |
| "epoch": 16.17, | |
| "learning_rate": 4.820995192307692e-05, | |
| "loss": 0.6158, | |
| "step": 84500 | |
| }, | |
| { | |
| "epoch": 16.27, | |
| "learning_rate": 4.819795673076923e-05, | |
| "loss": 0.617, | |
| "step": 85000 | |
| }, | |
| { | |
| "epoch": 16.36, | |
| "learning_rate": 4.81859375e-05, | |
| "loss": 0.6149, | |
| "step": 85500 | |
| }, | |
| { | |
| "epoch": 16.46, | |
| "learning_rate": 4.817391826923077e-05, | |
| "loss": 0.6128, | |
| "step": 86000 | |
| }, | |
| { | |
| "epoch": 16.56, | |
| "learning_rate": 4.816189903846154e-05, | |
| "loss": 0.6119, | |
| "step": 86500 | |
| }, | |
| { | |
| "epoch": 16.65, | |
| "learning_rate": 4.814987980769231e-05, | |
| "loss": 0.6104, | |
| "step": 87000 | |
| }, | |
| { | |
| "epoch": 16.75, | |
| "learning_rate": 4.8137860576923074e-05, | |
| "loss": 0.6082, | |
| "step": 87500 | |
| }, | |
| { | |
| "epoch": 16.84, | |
| "learning_rate": 4.812584134615385e-05, | |
| "loss": 0.6077, | |
| "step": 88000 | |
| }, | |
| { | |
| "epoch": 16.94, | |
| "learning_rate": 4.811382211538462e-05, | |
| "loss": 0.6066, | |
| "step": 88500 | |
| }, | |
| { | |
| "epoch": 17.03, | |
| "learning_rate": 4.810182692307693e-05, | |
| "loss": 0.6051, | |
| "step": 89000 | |
| }, | |
| { | |
| "epoch": 17.13, | |
| "learning_rate": 4.808980769230769e-05, | |
| "loss": 0.6035, | |
| "step": 89500 | |
| }, | |
| { | |
| "epoch": 17.22, | |
| "learning_rate": 4.8077788461538464e-05, | |
| "loss": 0.6032, | |
| "step": 90000 | |
| }, | |
| { | |
| "epoch": 17.32, | |
| "learning_rate": 4.806576923076923e-05, | |
| "loss": 0.6001, | |
| "step": 90500 | |
| }, | |
| { | |
| "epoch": 17.42, | |
| "learning_rate": 4.805375e-05, | |
| "loss": 0.598, | |
| "step": 91000 | |
| }, | |
| { | |
| "epoch": 17.51, | |
| "learning_rate": 4.8041754807692304e-05, | |
| "loss": 0.5996, | |
| "step": 91500 | |
| }, | |
| { | |
| "epoch": 17.61, | |
| "learning_rate": 4.802973557692308e-05, | |
| "loss": 0.5987, | |
| "step": 92000 | |
| }, | |
| { | |
| "epoch": 17.7, | |
| "learning_rate": 4.801771634615385e-05, | |
| "loss": 0.5973, | |
| "step": 92500 | |
| }, | |
| { | |
| "epoch": 17.8, | |
| "learning_rate": 4.800572115384616e-05, | |
| "loss": 0.5957, | |
| "step": 93000 | |
| }, | |
| { | |
| "epoch": 17.89, | |
| "learning_rate": 4.7993701923076926e-05, | |
| "loss": 0.5938, | |
| "step": 93500 | |
| }, | |
| { | |
| "epoch": 17.99, | |
| "learning_rate": 4.7981682692307694e-05, | |
| "loss": 0.593, | |
| "step": 94000 | |
| } | |
| ], | |
| "max_steps": 2090000, | |
| "num_train_epochs": 400, | |
| "total_flos": 2.5346531711380357e+19, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |