{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.2270624795289868, "global_step": 2400, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 8.47457627118644e-06, "loss": 1.695, "step": 50 }, { "epoch": 0.05, "learning_rate": 1.694915254237288e-05, "loss": 1.6613, "step": 100 }, { "epoch": 0.08, "learning_rate": 1.9996485952627554e-05, "loss": 1.5992, "step": 150 }, { "epoch": 0.1, "learning_rate": 1.99769328594951e-05, "loss": 1.5037, "step": 200 }, { "epoch": 0.13, "learning_rate": 1.9940262356746553e-05, "loss": 1.4537, "step": 250 }, { "epoch": 0.15, "learning_rate": 1.9886537359911694e-05, "loss": 1.3935, "step": 300 }, { "epoch": 0.18, "learning_rate": 1.981585004489171e-05, "loss": 1.4331, "step": 350 }, { "epoch": 0.2, "learning_rate": 1.9728321689813142e-05, "loss": 1.3732, "step": 400 }, { "epoch": 0.23, "learning_rate": 1.962410246695118e-05, "loss": 1.3806, "step": 450 }, { "epoch": 0.26, "learning_rate": 1.9503371185079295e-05, "loss": 1.3911, "step": 500 }, { "epoch": 0.28, "learning_rate": 1.936633498268728e-05, "loss": 1.3657, "step": 550 }, { "epoch": 0.31, "learning_rate": 1.9213228972594032e-05, "loss": 1.3487, "step": 600 }, { "epoch": 0.33, "learning_rate": 1.9044315838564835e-05, "loss": 1.3528, "step": 650 }, { "epoch": 0.36, "learning_rate": 1.885988538462517e-05, "loss": 1.366, "step": 700 }, { "epoch": 0.38, "learning_rate": 1.866025403784439e-05, "loss": 1.348, "step": 750 }, { "epoch": 0.41, "learning_rate": 1.8445764305442205e-05, "loss": 1.334, "step": 800 }, { "epoch": 0.43, "learning_rate": 1.821678418714957e-05, "loss": 1.3501, "step": 850 }, { "epoch": 0.46, "learning_rate": 1.797370654383204e-05, "loss": 1.3119, "step": 900 }, { "epoch": 0.49, "learning_rate": 1.771694842345894e-05, "loss": 1.3515, "step": 950 }, { "epoch": 0.51, "learning_rate": 1.7446950345574762e-05, "loss": 1.3176, "step": 1000 }, { "epoch": 0.54, "learning_rate": 1.71641755455004e-05, "loss": 1.3265, "step": 1050 }, { "epoch": 0.56, "learning_rate": 1.686910917956096e-05, "loss": 1.3298, "step": 1100 }, { "epoch": 0.59, "learning_rate": 1.6562257492703756e-05, "loss": 1.3303, "step": 1150 }, { "epoch": 0.61, "learning_rate": 1.624414694993454e-05, "loss": 1.3136, "step": 1200 }, { "epoch": 0.64, "learning_rate": 1.5915323333062255e-05, "loss": 1.282, "step": 1250 }, { "epoch": 0.66, "learning_rate": 1.557635080430196e-05, "loss": 1.3079, "step": 1300 }, { "epoch": 0.69, "learning_rate": 1.5227810938342493e-05, "loss": 1.3088, "step": 1350 }, { "epoch": 0.72, "learning_rate": 1.4870301724539627e-05, "loss": 1.3084, "step": 1400 }, { "epoch": 0.74, "learning_rate": 1.4504436540946548e-05, "loss": 1.3018, "step": 1450 }, { "epoch": 0.77, "learning_rate": 1.4130843101942017e-05, "loss": 1.2903, "step": 1500 }, { "epoch": 0.79, "learning_rate": 1.3750162381261693e-05, "loss": 1.3041, "step": 1550 }, { "epoch": 0.82, "learning_rate": 1.3363047512280391e-05, "loss": 1.2849, "step": 1600 }, { "epoch": 0.84, "learning_rate": 1.2970162667432075e-05, "loss": 1.2975, "step": 1650 }, { "epoch": 0.87, "learning_rate": 1.2572181918690162e-05, "loss": 1.2865, "step": 1700 }, { "epoch": 0.89, "learning_rate": 1.2169788081063181e-05, "loss": 1.3047, "step": 1750 }, { "epoch": 0.92, "learning_rate": 1.1763671541090027e-05, "loss": 1.3033, "step": 1800 }, { "epoch": 0.95, "learning_rate": 1.1354529072344749e-05, "loss": 1.2714, "step": 1850 }, { "epoch": 0.97, "learning_rate": 1.0943062639983119e-05, "loss": 1.3111, "step": 1900 }, { "epoch": 1.0, "learning_rate": 1.0529978196382011e-05, "loss": 1.2791, "step": 1950 }, { "epoch": 1.02, "learning_rate": 1.0115984469937883e-05, "loss": 1.2748, "step": 2000 }, { "epoch": 1.05, "learning_rate": 9.701791749102496e-06, "loss": 1.2642, "step": 2050 }, { "epoch": 1.07, "learning_rate": 9.288110663742001e-06, "loss": 1.2482, "step": 2100 }, { "epoch": 1.1, "learning_rate": 8.87565096591028e-06, "loss": 1.2742, "step": 2150 }, { "epoch": 1.12, "learning_rate": 8.465120312128371e-06, "loss": 1.2865, "step": 2200 }, { "epoch": 1.15, "learning_rate": 8.057223049259155e-06, "loss": 1.2748, "step": 2250 }, { "epoch": 1.18, "learning_rate": 7.652659006060436e-06, "loss": 1.3144, "step": 2300 }, { "epoch": 1.2, "learning_rate": 7.252122292489747e-06, "loss": 1.2646, "step": 2350 }, { "epoch": 1.23, "learning_rate": 6.856300108820865e-06, "loss": 1.3096, "step": 2400 } ], "max_steps": 3910, "num_train_epochs": 2, "total_flos": 1604270627487744.0, "trial_name": null, "trial_params": null }