| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 1.2270624795289868, | |
| "global_step": 2400, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 8.47457627118644e-06, | |
| "loss": 1.695, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "learning_rate": 1.694915254237288e-05, | |
| "loss": 1.6613, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "learning_rate": 1.9996485952627554e-05, | |
| "loss": 1.5992, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 1.99769328594951e-05, | |
| "loss": 1.5037, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 1.9940262356746553e-05, | |
| "loss": 1.4537, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "learning_rate": 1.9886537359911694e-05, | |
| "loss": 1.3935, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "learning_rate": 1.981585004489171e-05, | |
| "loss": 1.4331, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "learning_rate": 1.9728321689813142e-05, | |
| "loss": 1.3732, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "learning_rate": 1.962410246695118e-05, | |
| "loss": 1.3806, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 1.9503371185079295e-05, | |
| "loss": 1.3911, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 1.936633498268728e-05, | |
| "loss": 1.3657, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "learning_rate": 1.9213228972594032e-05, | |
| "loss": 1.3487, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "learning_rate": 1.9044315838564835e-05, | |
| "loss": 1.3528, | |
| "step": 650 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "learning_rate": 1.885988538462517e-05, | |
| "loss": 1.366, | |
| "step": 700 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 1.866025403784439e-05, | |
| "loss": 1.348, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "learning_rate": 1.8445764305442205e-05, | |
| "loss": 1.334, | |
| "step": 800 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "learning_rate": 1.821678418714957e-05, | |
| "loss": 1.3501, | |
| "step": 850 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "learning_rate": 1.797370654383204e-05, | |
| "loss": 1.3119, | |
| "step": 900 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "learning_rate": 1.771694842345894e-05, | |
| "loss": 1.3515, | |
| "step": 950 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 1.7446950345574762e-05, | |
| "loss": 1.3176, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 1.71641755455004e-05, | |
| "loss": 1.3265, | |
| "step": 1050 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "learning_rate": 1.686910917956096e-05, | |
| "loss": 1.3298, | |
| "step": 1100 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "learning_rate": 1.6562257492703756e-05, | |
| "loss": 1.3303, | |
| "step": 1150 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 1.624414694993454e-05, | |
| "loss": 1.3136, | |
| "step": 1200 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 1.5915323333062255e-05, | |
| "loss": 1.282, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 1.557635080430196e-05, | |
| "loss": 1.3079, | |
| "step": 1300 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "learning_rate": 1.5227810938342493e-05, | |
| "loss": 1.3088, | |
| "step": 1350 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "learning_rate": 1.4870301724539627e-05, | |
| "loss": 1.3084, | |
| "step": 1400 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 1.4504436540946548e-05, | |
| "loss": 1.3018, | |
| "step": 1450 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 1.4130843101942017e-05, | |
| "loss": 1.2903, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "learning_rate": 1.3750162381261693e-05, | |
| "loss": 1.3041, | |
| "step": 1550 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "learning_rate": 1.3363047512280391e-05, | |
| "loss": 1.2849, | |
| "step": 1600 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "learning_rate": 1.2970162667432075e-05, | |
| "loss": 1.2975, | |
| "step": 1650 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "learning_rate": 1.2572181918690162e-05, | |
| "loss": 1.2865, | |
| "step": 1700 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "learning_rate": 1.2169788081063181e-05, | |
| "loss": 1.3047, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "learning_rate": 1.1763671541090027e-05, | |
| "loss": 1.3033, | |
| "step": 1800 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "learning_rate": 1.1354529072344749e-05, | |
| "loss": 1.2714, | |
| "step": 1850 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "learning_rate": 1.0943062639983119e-05, | |
| "loss": 1.3111, | |
| "step": 1900 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "learning_rate": 1.0529978196382011e-05, | |
| "loss": 1.2791, | |
| "step": 1950 | |
| }, | |
| { | |
| "epoch": 1.02, | |
| "learning_rate": 1.0115984469937883e-05, | |
| "loss": 1.2748, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.05, | |
| "learning_rate": 9.701791749102496e-06, | |
| "loss": 1.2642, | |
| "step": 2050 | |
| }, | |
| { | |
| "epoch": 1.07, | |
| "learning_rate": 9.288110663742001e-06, | |
| "loss": 1.2482, | |
| "step": 2100 | |
| }, | |
| { | |
| "epoch": 1.1, | |
| "learning_rate": 8.87565096591028e-06, | |
| "loss": 1.2742, | |
| "step": 2150 | |
| }, | |
| { | |
| "epoch": 1.12, | |
| "learning_rate": 8.465120312128371e-06, | |
| "loss": 1.2865, | |
| "step": 2200 | |
| }, | |
| { | |
| "epoch": 1.15, | |
| "learning_rate": 8.057223049259155e-06, | |
| "loss": 1.2748, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 1.18, | |
| "learning_rate": 7.652659006060436e-06, | |
| "loss": 1.3144, | |
| "step": 2300 | |
| }, | |
| { | |
| "epoch": 1.2, | |
| "learning_rate": 7.252122292489747e-06, | |
| "loss": 1.2646, | |
| "step": 2350 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 6.856300108820865e-06, | |
| "loss": 1.3096, | |
| "step": 2400 | |
| } | |
| ], | |
| "max_steps": 3910, | |
| "num_train_epochs": 2, | |
| "total_flos": 1604270627487744.0, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |