{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 8.0, "eval_steps": 500, "global_step": 248, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "grad_norm": 4.933078765869141, "learning_rate": 1.7580645161290325e-05, "loss": 0.4086, "step": 31 }, { "epoch": 1.0, "eval_f1": 0.783539089652777, "eval_loss": 0.16160552203655243, "eval_precision": 0.7601809954730898, "eval_recall": 0.808378148879682, "eval_runtime": 1.4708, "eval_samples_per_second": 686.035, "eval_steps_per_second": 5.439, "step": 31 }, { "epoch": 2.0, "grad_norm": 2.215329170227051, "learning_rate": 1.5080645161290324e-05, "loss": 0.1213, "step": 62 }, { "epoch": 2.0, "eval_f1": 0.8489169130173646, "eval_loss": 0.11775331199169159, "eval_precision": 0.823185323050191, "eval_recall": 0.8763090857603275, "eval_runtime": 1.3955, "eval_samples_per_second": 723.05, "eval_steps_per_second": 5.733, "step": 62 }, { "epoch": 3.0, "grad_norm": 1.6429105997085571, "learning_rate": 1.2580645161290324e-05, "loss": 0.0765, "step": 93 }, { "epoch": 3.0, "eval_f1": 0.8752420420264311, "eval_loss": 0.11041084676980972, "eval_precision": 0.8558290505792377, "eval_recall": 0.8955561845431771, "eval_runtime": 1.5773, "eval_samples_per_second": 639.714, "eval_steps_per_second": 5.072, "step": 93 }, { "epoch": 4.0, "grad_norm": 1.440000057220459, "learning_rate": 1.0080645161290323e-05, "loss": 0.0551, "step": 124 }, { "epoch": 4.0, "eval_f1": 0.8823774425760322, "eval_loss": 0.10791593044996262, "eval_precision": 0.8661395856028732, "eval_recall": 0.8992357769575453, "eval_runtime": 1.4615, "eval_samples_per_second": 690.409, "eval_steps_per_second": 5.474, "step": 124 }, { "epoch": 5.0, "grad_norm": 1.1584373712539673, "learning_rate": 7.580645161290323e-06, "loss": 0.042, "step": 155 }, { "epoch": 5.0, "eval_f1": 0.8873842702525155, "eval_loss": 0.11261641979217529, "eval_precision": 0.8669006479458238, "eval_recall": 0.9088593263489702, "eval_runtime": 1.5207, "eval_samples_per_second": 663.502, "eval_steps_per_second": 5.261, "step": 155 }, { "epoch": 6.0, "grad_norm": 1.7214231491088867, "learning_rate": 5.080645161290323e-06, "loss": 0.035, "step": 186 }, { "epoch": 6.0, "eval_f1": 0.8884892036331468, "eval_loss": 0.11356285214424133, "eval_precision": 0.8690121786174045, "eval_recall": 0.9088593263489702, "eval_runtime": 1.4456, "eval_samples_per_second": 697.982, "eval_steps_per_second": 5.534, "step": 186 }, { "epoch": 7.0, "grad_norm": 1.0617001056671143, "learning_rate": 2.580645161290323e-06, "loss": 0.0303, "step": 217 }, { "epoch": 7.0, "eval_f1": 0.8905534698226346, "eval_loss": 0.11567464470863342, "eval_precision": 0.8732317736646539, "eval_recall": 0.9085762807786342, "eval_runtime": 1.4347, "eval_samples_per_second": 703.278, "eval_steps_per_second": 5.576, "step": 217 }, { "epoch": 8.0, "grad_norm": 0.7687620520591736, "learning_rate": 8.064516129032259e-08, "loss": 0.028, "step": 248 }, { "epoch": 8.0, "eval_f1": 0.8886423469618369, "eval_loss": 0.11542723327875137, "eval_precision": 0.8711256117431454, "eval_recall": 0.9068780073566179, "eval_runtime": 1.5699, "eval_samples_per_second": 642.7, "eval_steps_per_second": 5.096, "step": 248 } ], "logging_steps": 500, "max_steps": 248, "num_input_tokens_seen": 0, "num_train_epochs": 8, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1813582770980094.0, "train_batch_size": 128, "trial_name": null, "trial_params": null }