{ "best_metric": null, "best_model_checkpoint": null, "epoch": 9.0, "global_step": 31725, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 1.8e-05, "loss": 0.0026, "step": 3525 }, { "epoch": 1.0, "eval_EG_f1": 0.9988826815642459, "eval_ET_f1": 0.9997551420176297, "eval_TE_f1": 0.9980463980463979, "eval_loss": 0.000998740317299962, "eval_overall_accuracy": 0.9997666459240822, "eval_overall_f1": 0.9988922736084187, "eval_overall_precision": 0.9988231221876082, "eval_overall_recall": 0.998961434604999, "eval_runtime": 142.8836, "eval_samples_per_second": 43.854, "eval_steps_per_second": 2.743, "step": 3525 }, { "epoch": 2.0, "learning_rate": 1.6000000000000003e-05, "loss": 0.0012, "step": 7050 }, { "epoch": 2.0, "eval_EG_f1": 0.998084902649218, "eval_ET_f1": 0.9993876301285977, "eval_TE_f1": 0.9978038067349927, "eval_loss": 0.001965524861589074, "eval_overall_accuracy": 0.9996499688861232, "eval_overall_f1": 0.9983733084138026, "eval_overall_precision": 0.9981314878892733, "eval_overall_recall": 0.9986152461399986, "eval_runtime": 140.6221, "eval_samples_per_second": 44.559, "eval_steps_per_second": 2.788, "step": 7050 }, { "epoch": 3.0, "learning_rate": 1.4e-05, "loss": 0.0007, "step": 10575 }, { "epoch": 3.0, "eval_EG_f1": 0.9988031596585016, "eval_ET_f1": 0.998775710088149, "eval_TE_f1": 0.9981682745145928, "eval_loss": 0.0016128044808283448, "eval_overall_accuracy": 0.9997666459240822, "eval_overall_f1": 0.9986154378677742, "eval_overall_precision": 0.9984771924967121, "eval_overall_recall": 0.9987537215259987, "eval_runtime": 141.5414, "eval_samples_per_second": 44.27, "eval_steps_per_second": 2.77, "step": 10575 }, { "epoch": 4.0, "learning_rate": 1.2e-05, "loss": 0.0008, "step": 14100 }, { "epoch": 4.0, "eval_EG_f1": 0.9992020427705075, "eval_ET_f1": 0.9997550820475141, "eval_TE_f1": 0.9992673992673993, "eval_loss": 0.0006879049469716847, "eval_overall_accuracy": 0.9999092511926986, "eval_overall_f1": 0.9993769039047354, "eval_overall_precision": 0.9993077189338871, "eval_overall_recall": 0.9994460984559994, "eval_runtime": 141.9363, "eval_samples_per_second": 44.147, "eval_steps_per_second": 2.762, "step": 14100 }, { "epoch": 5.0, "learning_rate": 1e-05, "loss": 0.0004, "step": 17625 }, { "epoch": 5.0, "eval_EG_f1": 0.9988828598787105, "eval_ET_f1": 0.9996326680543651, "eval_TE_f1": 0.9989012330606765, "eval_loss": 0.0006999427569098771, "eval_overall_accuracy": 0.9998055382700685, "eval_overall_f1": 0.9991000346140533, "eval_overall_precision": 0.9989617221568492, "eval_overall_recall": 0.9992383853769993, "eval_runtime": 233.5972, "eval_samples_per_second": 26.824, "eval_steps_per_second": 1.678, "step": 17625 }, { "epoch": 6.0, "learning_rate": 8.000000000000001e-06, "loss": 0.0004, "step": 21150 }, { "epoch": 6.0, "eval_EG_f1": 0.999680817108203, "eval_ET_f1": 0.9995101640950281, "eval_TE_f1": 0.9996336549029186, "eval_loss": 0.0005218852893449366, "eval_overall_accuracy": 0.9998962870773699, "eval_overall_f1": 0.9996192058711532, "eval_overall_precision": 0.9995846026031571, "eval_overall_recall": 0.9996538115349997, "eval_runtime": 141.8762, "eval_samples_per_second": 44.165, "eval_steps_per_second": 2.763, "step": 21150 }, { "epoch": 7.0, "learning_rate": 6e-06, "loss": 0.0, "step": 24675 }, { "epoch": 7.0, "eval_EG_f1": 0.999361634216406, "eval_ET_f1": 0.9997550820475141, "eval_TE_f1": 0.9993892756809576, "eval_loss": 0.0003737392835319042, "eval_overall_accuracy": 0.9999351794233562, "eval_overall_f1": 0.9994806993249091, "eval_overall_precision": 0.999515302589669, "eval_overall_recall": 0.9994460984559994, "eval_runtime": 244.7883, "eval_samples_per_second": 25.598, "eval_steps_per_second": 1.601, "step": 24675 }, { "epoch": 8.0, "learning_rate": 4.000000000000001e-06, "loss": 0.0001, "step": 28200 }, { "epoch": 8.0, "eval_EG_f1": 0.9995212256623045, "eval_ET_f1": 0.9997550820475141, "eval_TE_f1": 0.9997557997557998, "eval_loss": 0.00023818030604161322, "eval_overall_accuracy": 0.9999481435386849, "eval_overall_f1": 0.9996538355026309, "eval_overall_precision": 0.9995846313603323, "eval_overall_recall": 0.9997230492279997, "eval_runtime": 365.8307, "eval_samples_per_second": 17.128, "eval_steps_per_second": 1.072, "step": 28200 }, { "epoch": 9.0, "learning_rate": 2.0000000000000003e-06, "loss": 0.0001, "step": 31725 }, { "epoch": 9.0, "eval_EG_f1": 0.9988828598787105, "eval_ET_f1": 0.9997550820475141, "eval_TE_f1": 0.9992677568952892, "eval_loss": 0.001098370412364602, "eval_overall_accuracy": 0.9997925741547397, "eval_overall_f1": 0.9992385435414648, "eval_overall_precision": 0.9990310748148661, "eval_overall_recall": 0.9994460984559994, "eval_runtime": 243.6202, "eval_samples_per_second": 25.72, "eval_steps_per_second": 1.609, "step": 31725 } ], "max_steps": 35250, "num_train_epochs": 10, "total_flos": 7174982911185900.0, "trial_name": null, "trial_params": null }