| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 7.0, | |
| "eval_steps": 500, | |
| "global_step": 5453, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.6418485237483954, | |
| "grad_norm": 9.85666561126709, | |
| "learning_rate": 1.8169814780854577e-05, | |
| "loss": 0.5294, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_accuracy": 0.8421842184218422, | |
| "eval_f1": 0.7176287051482059, | |
| "eval_loss": 0.42669883370399475, | |
| "eval_precision": 0.6764705882352942, | |
| "eval_recall": 0.7641196013289037, | |
| "eval_runtime": 0.5577, | |
| "eval_samples_per_second": 421.34, | |
| "eval_steps_per_second": 53.788, | |
| "step": 779 | |
| }, | |
| { | |
| "epoch": 1.2836970474967908, | |
| "grad_norm": 14.279178619384766, | |
| "learning_rate": 1.633596185585916e-05, | |
| "loss": 0.3251, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 1.925545571245186, | |
| "grad_norm": 3.289396047592163, | |
| "learning_rate": 1.4502108930863745e-05, | |
| "loss": 0.2522, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 2.0, | |
| "eval_accuracy": 0.882988298829883, | |
| "eval_f1": 0.7681549220010758, | |
| "eval_loss": 0.34702780842781067, | |
| "eval_precision": 0.7468619246861925, | |
| "eval_recall": 0.7906976744186046, | |
| "eval_runtime": 0.4659, | |
| "eval_samples_per_second": 504.441, | |
| "eval_steps_per_second": 64.397, | |
| "step": 1558 | |
| }, | |
| { | |
| "epoch": 2.5673940949935816, | |
| "grad_norm": 2.985898733139038, | |
| "learning_rate": 1.266825600586833e-05, | |
| "loss": 0.1615, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 3.0, | |
| "eval_accuracy": 0.8852385238523852, | |
| "eval_f1": 0.7855183763027976, | |
| "eval_loss": 0.40044641494750977, | |
| "eval_precision": 0.7782608695652173, | |
| "eval_recall": 0.7929125138427464, | |
| "eval_runtime": 0.5454, | |
| "eval_samples_per_second": 430.843, | |
| "eval_steps_per_second": 55.001, | |
| "step": 2337 | |
| }, | |
| { | |
| "epoch": 3.209242618741977, | |
| "grad_norm": 3.132018804550171, | |
| "learning_rate": 1.0834403080872916e-05, | |
| "loss": 0.1346, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 3.851091142490372, | |
| "grad_norm": 23.86431121826172, | |
| "learning_rate": 9.0005501558775e-06, | |
| "loss": 0.093, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 4.0, | |
| "eval_accuracy": 0.8876387638763876, | |
| "eval_f1": 0.7800875273522976, | |
| "eval_loss": 0.4634242355823517, | |
| "eval_precision": 0.7708108108108108, | |
| "eval_recall": 0.7895902547065338, | |
| "eval_runtime": 0.5241, | |
| "eval_samples_per_second": 448.366, | |
| "eval_steps_per_second": 57.238, | |
| "step": 3116 | |
| }, | |
| { | |
| "epoch": 4.492939666238768, | |
| "grad_norm": 10.69092845916748, | |
| "learning_rate": 7.166697230882084e-06, | |
| "loss": 0.0695, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 5.0, | |
| "eval_accuracy": 0.8912391239123912, | |
| "eval_f1": 0.7937024972855592, | |
| "eval_loss": 0.512829601764679, | |
| "eval_precision": 0.7784877529286475, | |
| "eval_recall": 0.8095238095238095, | |
| "eval_runtime": 0.5735, | |
| "eval_samples_per_second": 409.733, | |
| "eval_steps_per_second": 52.306, | |
| "step": 3895 | |
| }, | |
| { | |
| "epoch": 5.134788189987163, | |
| "grad_norm": 0.8048956990242004, | |
| "learning_rate": 5.332844305886668e-06, | |
| "loss": 0.0601, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 5.7766367137355585, | |
| "grad_norm": 2.4991579055786133, | |
| "learning_rate": 3.498991380891253e-06, | |
| "loss": 0.0413, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 6.0, | |
| "eval_accuracy": 0.8915391539153915, | |
| "eval_f1": 0.7760702524698133, | |
| "eval_loss": 0.5404914021492004, | |
| "eval_precision": 0.7693144722524483, | |
| "eval_recall": 0.7829457364341085, | |
| "eval_runtime": 0.5644, | |
| "eval_samples_per_second": 416.369, | |
| "eval_steps_per_second": 53.154, | |
| "step": 4674 | |
| }, | |
| { | |
| "epoch": 6.418485237483954, | |
| "grad_norm": 18.92781639099121, | |
| "learning_rate": 1.6651384558958373e-06, | |
| "loss": 0.0341, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 7.0, | |
| "eval_accuracy": 0.8921392139213922, | |
| "eval_f1": 0.7840349917987972, | |
| "eval_loss": 0.5803054571151733, | |
| "eval_precision": 0.7742980561555075, | |
| "eval_recall": 0.7940199335548173, | |
| "eval_runtime": 0.5542, | |
| "eval_samples_per_second": 424.041, | |
| "eval_steps_per_second": 54.133, | |
| "step": 5453 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 5453, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 7, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1159775675802900.0, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |