{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.960769520935496, "global_step": 21000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.09, "learning_rate": 1.952847981893625e-05, "loss": 1.3455, "step": 500 }, { "epoch": 0.19, "learning_rate": 1.90569596378725e-05, "loss": 0.6266, "step": 1000 }, { "epoch": 0.28, "learning_rate": 1.8585439456808755e-05, "loss": 0.6164, "step": 1500 }, { "epoch": 0.38, "learning_rate": 1.8113919275745004e-05, "loss": 0.6065, "step": 2000 }, { "epoch": 0.47, "learning_rate": 1.7642399094681254e-05, "loss": 0.6048, "step": 2500 }, { "epoch": 0.57, "learning_rate": 1.7170878913617504e-05, "loss": 0.602, "step": 3000 }, { "epoch": 0.66, "learning_rate": 1.6699358732553754e-05, "loss": 0.5918, "step": 3500 }, { "epoch": 0.75, "learning_rate": 1.6227838551490004e-05, "loss": 0.5913, "step": 4000 }, { "epoch": 0.85, "learning_rate": 1.5756318370426257e-05, "loss": 0.5869, "step": 4500 }, { "epoch": 0.94, "learning_rate": 1.5284798189362507e-05, "loss": 0.5885, "step": 5000 }, { "epoch": 1.04, "learning_rate": 1.4813278008298757e-05, "loss": 0.5795, "step": 5500 }, { "epoch": 1.13, "learning_rate": 1.4341757827235009e-05, "loss": 0.5732, "step": 6000 }, { "epoch": 1.23, "learning_rate": 1.3870237646171257e-05, "loss": 0.5744, "step": 6500 }, { "epoch": 1.32, "learning_rate": 1.3398717465107507e-05, "loss": 0.5726, "step": 7000 }, { "epoch": 1.41, "learning_rate": 1.2927197284043758e-05, "loss": 0.5695, "step": 7500 }, { "epoch": 1.51, "learning_rate": 1.2455677102980008e-05, "loss": 0.5672, "step": 8000 }, { "epoch": 1.6, "learning_rate": 1.198415692191626e-05, "loss": 0.5706, "step": 8500 }, { "epoch": 1.7, "learning_rate": 1.151263674085251e-05, "loss": 0.5673, "step": 9000 }, { "epoch": 1.79, "learning_rate": 1.1041116559788761e-05, "loss": 0.5702, "step": 9500 }, { "epoch": 1.89, "learning_rate": 1.056959637872501e-05, "loss": 0.5596, "step": 10000 }, { "epoch": 1.98, "learning_rate": 1.0098076197661262e-05, "loss": 0.5589, "step": 10500 }, { "epoch": 2.07, "learning_rate": 9.62655601659751e-06, "loss": 0.5565, "step": 11000 }, { "epoch": 2.17, "learning_rate": 9.155035835533762e-06, "loss": 0.5544, "step": 11500 }, { "epoch": 2.26, "learning_rate": 8.683515654470012e-06, "loss": 0.5525, "step": 12000 }, { "epoch": 2.36, "learning_rate": 8.211995473406262e-06, "loss": 0.5543, "step": 12500 }, { "epoch": 2.45, "learning_rate": 7.740475292342513e-06, "loss": 0.5525, "step": 13000 }, { "epoch": 2.55, "learning_rate": 7.268955111278764e-06, "loss": 0.5532, "step": 13500 }, { "epoch": 2.64, "learning_rate": 6.797434930215013e-06, "loss": 0.5512, "step": 14000 }, { "epoch": 2.73, "learning_rate": 6.325914749151264e-06, "loss": 0.5517, "step": 14500 }, { "epoch": 2.83, "learning_rate": 5.854394568087515e-06, "loss": 0.5506, "step": 15000 }, { "epoch": 2.92, "learning_rate": 5.382874387023765e-06, "loss": 0.5508, "step": 15500 }, { "epoch": 3.02, "learning_rate": 4.911354205960015e-06, "loss": 0.5508, "step": 16000 }, { "epoch": 3.11, "learning_rate": 4.439834024896266e-06, "loss": 0.5435, "step": 16500 }, { "epoch": 3.21, "learning_rate": 3.968313843832517e-06, "loss": 0.5448, "step": 17000 }, { "epoch": 3.3, "learning_rate": 3.4967936627687665e-06, "loss": 0.5431, "step": 17500 }, { "epoch": 3.39, "learning_rate": 3.025273481705017e-06, "loss": 0.5436, "step": 18000 }, { "epoch": 3.49, "learning_rate": 2.553753300641268e-06, "loss": 0.5407, "step": 18500 }, { "epoch": 3.58, "learning_rate": 2.082233119577518e-06, "loss": 0.5449, "step": 19000 }, { "epoch": 3.68, "learning_rate": 1.6107129385137685e-06, "loss": 0.5436, "step": 19500 }, { "epoch": 3.77, "learning_rate": 1.139192757450019e-06, "loss": 0.5468, "step": 20000 }, { "epoch": 3.87, "learning_rate": 6.676725763862695e-07, "loss": 0.5423, "step": 20500 }, { "epoch": 3.96, "learning_rate": 1.9615239532251984e-07, "loss": 0.5506, "step": 21000 } ], "max_steps": 21208, "num_train_epochs": 4, "total_flos": 3.75634128609792e+16, "trial_name": null, "trial_params": null }