{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 10146, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.14784151389710232, "grad_norm": 3.7569432258605957, "learning_rate": 1.9016361127537947e-05, "loss": 0.156098388671875, "step": 500 }, { "epoch": 0.29568302779420463, "grad_norm": 0.009413833729922771, "learning_rate": 1.8030751034890598e-05, "loss": 0.09492064666748047, "step": 1000 }, { "epoch": 0.4435245416913069, "grad_norm": 0.07881546020507812, "learning_rate": 1.704514094224325e-05, "loss": 0.07839873504638672, "step": 1500 }, { "epoch": 0.5913660555884093, "grad_norm": 0.015427447855472565, "learning_rate": 1.6059530849595903e-05, "loss": 0.07814859008789063, "step": 2000 }, { "epoch": 0.7392075694855116, "grad_norm": 14.57061767578125, "learning_rate": 1.5073920756948552e-05, "loss": 0.08136223602294922, "step": 2500 }, { "epoch": 0.8870490833826138, "grad_norm": 1.0001031160354614, "learning_rate": 1.4088310664301204e-05, "loss": 0.07243869018554687, "step": 3000 }, { "epoch": 1.0348905972797162, "grad_norm": 30.364643096923828, "learning_rate": 1.3102700571653855e-05, "loss": 0.04469930648803711, "step": 3500 }, { "epoch": 1.1827321111768185, "grad_norm": 0.0182269848883152, "learning_rate": 1.2117090479006506e-05, "loss": 0.03440779495239258, "step": 4000 }, { "epoch": 1.3305736250739209, "grad_norm": 0.0005539056146517396, "learning_rate": 1.1131480386359156e-05, "loss": 0.0230482120513916, "step": 4500 }, { "epoch": 1.4784151389710232, "grad_norm": 0.739719033241272, "learning_rate": 1.0145870293711809e-05, "loss": 0.028042703628540038, "step": 5000 }, { "epoch": 1.6262566528681255, "grad_norm": 0.0063209934160113335, "learning_rate": 9.16026020106446e-06, "loss": 0.02548642349243164, "step": 5500 }, { "epoch": 1.7740981667652278, "grad_norm": 0.0015545282512903214, "learning_rate": 8.17465010841711e-06, "loss": 0.023311178207397462, "step": 6000 }, { "epoch": 1.9219396806623301, "grad_norm": 0.0024629898834973574, "learning_rate": 7.189040015769762e-06, "loss": 0.019577335357666016, "step": 6500 }, { "epoch": 2.0697811945594324, "grad_norm": 0.0016239744145423174, "learning_rate": 6.203429923122414e-06, "loss": 0.016429786682128907, "step": 7000 }, { "epoch": 2.2176227084565348, "grad_norm": 33.945587158203125, "learning_rate": 5.217819830475065e-06, "loss": 0.00910054111480713, "step": 7500 }, { "epoch": 2.365464222353637, "grad_norm": 0.0004062611551489681, "learning_rate": 4.2322097378277155e-06, "loss": 0.008913342475891112, "step": 8000 }, { "epoch": 2.5133057362507394, "grad_norm": 0.00030476730898953974, "learning_rate": 3.246599645180367e-06, "loss": 0.006344354152679444, "step": 8500 }, { "epoch": 2.6611472501478417, "grad_norm": 0.0006314264028333127, "learning_rate": 2.260989552533018e-06, "loss": 0.0064224090576171875, "step": 9000 }, { "epoch": 2.808988764044944, "grad_norm": 0.0005876660579815507, "learning_rate": 1.2753794598856695e-06, "loss": 0.007262358665466309, "step": 9500 }, { "epoch": 2.9568302779420463, "grad_norm": 0.0005308115505613387, "learning_rate": 2.897693672383205e-07, "loss": 0.004996685981750488, "step": 10000 }, { "epoch": 3.0, "step": 10146, "total_flos": 9.454816453507891e+16, "train_loss": 0.040564874114779687, "train_runtime": 20432.9457, "train_samples_per_second": 4.965, "train_steps_per_second": 0.497 } ], "logging_steps": 500, "max_steps": 10146, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 9.454816453507891e+16, "train_batch_size": 10, "trial_name": null, "trial_params": null }