{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 7.0, "eval_steps": 500, "global_step": 5453, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.6418485237483954, "grad_norm": 9.85666561126709, "learning_rate": 1.8169814780854577e-05, "loss": 0.5294, "step": 500 }, { "epoch": 1.0, "eval_accuracy": 0.8421842184218422, "eval_f1": 0.7176287051482059, "eval_loss": 0.42669883370399475, "eval_precision": 0.6764705882352942, "eval_recall": 0.7641196013289037, "eval_runtime": 0.5577, "eval_samples_per_second": 421.34, "eval_steps_per_second": 53.788, "step": 779 }, { "epoch": 1.2836970474967908, "grad_norm": 14.279178619384766, "learning_rate": 1.633596185585916e-05, "loss": 0.3251, "step": 1000 }, { "epoch": 1.925545571245186, "grad_norm": 3.289396047592163, "learning_rate": 1.4502108930863745e-05, "loss": 0.2522, "step": 1500 }, { "epoch": 2.0, "eval_accuracy": 0.882988298829883, "eval_f1": 0.7681549220010758, "eval_loss": 0.34702780842781067, "eval_precision": 0.7468619246861925, "eval_recall": 0.7906976744186046, "eval_runtime": 0.4659, "eval_samples_per_second": 504.441, "eval_steps_per_second": 64.397, "step": 1558 }, { "epoch": 2.5673940949935816, "grad_norm": 2.985898733139038, "learning_rate": 1.266825600586833e-05, "loss": 0.1615, "step": 2000 }, { "epoch": 3.0, "eval_accuracy": 0.8852385238523852, "eval_f1": 0.7855183763027976, "eval_loss": 0.40044641494750977, "eval_precision": 0.7782608695652173, "eval_recall": 0.7929125138427464, "eval_runtime": 0.5454, "eval_samples_per_second": 430.843, "eval_steps_per_second": 55.001, "step": 2337 }, { "epoch": 3.209242618741977, "grad_norm": 3.132018804550171, "learning_rate": 1.0834403080872916e-05, "loss": 0.1346, "step": 2500 }, { "epoch": 3.851091142490372, "grad_norm": 23.86431121826172, "learning_rate": 9.0005501558775e-06, "loss": 0.093, "step": 3000 }, { "epoch": 4.0, "eval_accuracy": 0.8876387638763876, "eval_f1": 0.7800875273522976, "eval_loss": 0.4634242355823517, "eval_precision": 0.7708108108108108, "eval_recall": 0.7895902547065338, "eval_runtime": 0.5241, "eval_samples_per_second": 448.366, "eval_steps_per_second": 57.238, "step": 3116 }, { "epoch": 4.492939666238768, "grad_norm": 10.69092845916748, "learning_rate": 7.166697230882084e-06, "loss": 0.0695, "step": 3500 }, { "epoch": 5.0, "eval_accuracy": 0.8912391239123912, "eval_f1": 0.7937024972855592, "eval_loss": 0.512829601764679, "eval_precision": 0.7784877529286475, "eval_recall": 0.8095238095238095, "eval_runtime": 0.5735, "eval_samples_per_second": 409.733, "eval_steps_per_second": 52.306, "step": 3895 }, { "epoch": 5.134788189987163, "grad_norm": 0.8048956990242004, "learning_rate": 5.332844305886668e-06, "loss": 0.0601, "step": 4000 }, { "epoch": 5.7766367137355585, "grad_norm": 2.4991579055786133, "learning_rate": 3.498991380891253e-06, "loss": 0.0413, "step": 4500 }, { "epoch": 6.0, "eval_accuracy": 0.8915391539153915, "eval_f1": 0.7760702524698133, "eval_loss": 0.5404914021492004, "eval_precision": 0.7693144722524483, "eval_recall": 0.7829457364341085, "eval_runtime": 0.5644, "eval_samples_per_second": 416.369, "eval_steps_per_second": 53.154, "step": 4674 }, { "epoch": 6.418485237483954, "grad_norm": 18.92781639099121, "learning_rate": 1.6651384558958373e-06, "loss": 0.0341, "step": 5000 }, { "epoch": 7.0, "eval_accuracy": 0.8921392139213922, "eval_f1": 0.7840349917987972, "eval_loss": 0.5803054571151733, "eval_precision": 0.7742980561555075, "eval_recall": 0.7940199335548173, "eval_runtime": 0.5542, "eval_samples_per_second": 424.041, "eval_steps_per_second": 54.133, "step": 5453 } ], "logging_steps": 500, "max_steps": 5453, "num_input_tokens_seen": 0, "num_train_epochs": 7, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1159775675802900.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }