| { | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 3.960769520935496, | |
| "global_step": 21000, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.09, | |
| "learning_rate": 1.952847981893625e-05, | |
| "loss": 1.3455, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 1.90569596378725e-05, | |
| "loss": 0.6266, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "learning_rate": 1.8585439456808755e-05, | |
| "loss": 0.6164, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 1.8113919275745004e-05, | |
| "loss": 0.6065, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "learning_rate": 1.7642399094681254e-05, | |
| "loss": 0.6048, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "learning_rate": 1.7170878913617504e-05, | |
| "loss": 0.602, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "learning_rate": 1.6699358732553754e-05, | |
| "loss": 0.5918, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "learning_rate": 1.6227838551490004e-05, | |
| "loss": 0.5913, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "learning_rate": 1.5756318370426257e-05, | |
| "loss": 0.5869, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "learning_rate": 1.5284798189362507e-05, | |
| "loss": 0.5885, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 1.04, | |
| "learning_rate": 1.4813278008298757e-05, | |
| "loss": 0.5795, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 1.13, | |
| "learning_rate": 1.4341757827235009e-05, | |
| "loss": 0.5732, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 1.23, | |
| "learning_rate": 1.3870237646171257e-05, | |
| "loss": 0.5744, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 1.32, | |
| "learning_rate": 1.3398717465107507e-05, | |
| "loss": 0.5726, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 1.41, | |
| "learning_rate": 1.2927197284043758e-05, | |
| "loss": 0.5695, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 1.51, | |
| "learning_rate": 1.2455677102980008e-05, | |
| "loss": 0.5672, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 1.6, | |
| "learning_rate": 1.198415692191626e-05, | |
| "loss": 0.5706, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 1.7, | |
| "learning_rate": 1.151263674085251e-05, | |
| "loss": 0.5673, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 1.79, | |
| "learning_rate": 1.1041116559788761e-05, | |
| "loss": 0.5702, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 1.89, | |
| "learning_rate": 1.056959637872501e-05, | |
| "loss": 0.5596, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 1.98, | |
| "learning_rate": 1.0098076197661262e-05, | |
| "loss": 0.5589, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 2.07, | |
| "learning_rate": 9.62655601659751e-06, | |
| "loss": 0.5565, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 2.17, | |
| "learning_rate": 9.155035835533762e-06, | |
| "loss": 0.5544, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 2.26, | |
| "learning_rate": 8.683515654470012e-06, | |
| "loss": 0.5525, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 2.36, | |
| "learning_rate": 8.211995473406262e-06, | |
| "loss": 0.5543, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 2.45, | |
| "learning_rate": 7.740475292342513e-06, | |
| "loss": 0.5525, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 2.55, | |
| "learning_rate": 7.268955111278764e-06, | |
| "loss": 0.5532, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 2.64, | |
| "learning_rate": 6.797434930215013e-06, | |
| "loss": 0.5512, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 2.73, | |
| "learning_rate": 6.325914749151264e-06, | |
| "loss": 0.5517, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 2.83, | |
| "learning_rate": 5.854394568087515e-06, | |
| "loss": 0.5506, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 2.92, | |
| "learning_rate": 5.382874387023765e-06, | |
| "loss": 0.5508, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 3.02, | |
| "learning_rate": 4.911354205960015e-06, | |
| "loss": 0.5508, | |
| "step": 16000 | |
| }, | |
| { | |
| "epoch": 3.11, | |
| "learning_rate": 4.439834024896266e-06, | |
| "loss": 0.5435, | |
| "step": 16500 | |
| }, | |
| { | |
| "epoch": 3.21, | |
| "learning_rate": 3.968313843832517e-06, | |
| "loss": 0.5448, | |
| "step": 17000 | |
| }, | |
| { | |
| "epoch": 3.3, | |
| "learning_rate": 3.4967936627687665e-06, | |
| "loss": 0.5431, | |
| "step": 17500 | |
| }, | |
| { | |
| "epoch": 3.39, | |
| "learning_rate": 3.025273481705017e-06, | |
| "loss": 0.5436, | |
| "step": 18000 | |
| }, | |
| { | |
| "epoch": 3.49, | |
| "learning_rate": 2.553753300641268e-06, | |
| "loss": 0.5407, | |
| "step": 18500 | |
| }, | |
| { | |
| "epoch": 3.58, | |
| "learning_rate": 2.082233119577518e-06, | |
| "loss": 0.5449, | |
| "step": 19000 | |
| }, | |
| { | |
| "epoch": 3.68, | |
| "learning_rate": 1.6107129385137685e-06, | |
| "loss": 0.5436, | |
| "step": 19500 | |
| }, | |
| { | |
| "epoch": 3.77, | |
| "learning_rate": 1.139192757450019e-06, | |
| "loss": 0.5468, | |
| "step": 20000 | |
| }, | |
| { | |
| "epoch": 3.87, | |
| "learning_rate": 6.676725763862695e-07, | |
| "loss": 0.5423, | |
| "step": 20500 | |
| }, | |
| { | |
| "epoch": 3.96, | |
| "learning_rate": 1.9615239532251984e-07, | |
| "loss": 0.5506, | |
| "step": 21000 | |
| } | |
| ], | |
| "max_steps": 21208, | |
| "num_train_epochs": 4, | |
| "total_flos": 3.75634128609792e+16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |