{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 500, "global_step": 375, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.08, "grad_norm": 2.1940720081329346, "learning_rate": 2e-05, "loss": 1.0435, "step": 10 }, { "epoch": 0.16, "grad_norm": 2.7920234203338623, "learning_rate": 2e-05, "loss": 0.6718, "step": 20 }, { "epoch": 0.24, "grad_norm": 2.9994211196899414, "learning_rate": 2e-05, "loss": 0.5008, "step": 30 }, { "epoch": 0.32, "grad_norm": 7.913583755493164, "learning_rate": 2e-05, "loss": 0.4437, "step": 40 }, { "epoch": 0.4, "grad_norm": 3.560945987701416, "learning_rate": 2e-05, "loss": 0.3628, "step": 50 }, { "epoch": 0.48, "grad_norm": 6.318229675292969, "learning_rate": 2e-05, "loss": 0.3767, "step": 60 }, { "epoch": 0.56, "grad_norm": 2.496079444885254, "learning_rate": 2e-05, "loss": 0.3398, "step": 70 }, { "epoch": 0.64, "grad_norm": 4.720012664794922, "learning_rate": 2e-05, "loss": 0.3219, "step": 80 }, { "epoch": 0.72, "grad_norm": 4.714549541473389, "learning_rate": 2e-05, "loss": 0.3112, "step": 90 }, { "epoch": 0.8, "grad_norm": 4.124231338500977, "learning_rate": 2e-05, "loss": 0.3029, "step": 100 }, { "epoch": 0.88, "grad_norm": 5.186863899230957, "learning_rate": 2e-05, "loss": 0.2997, "step": 110 }, { "epoch": 0.96, "grad_norm": 4.2964582443237305, "learning_rate": 2e-05, "loss": 0.2656, "step": 120 }, { "epoch": 1.0, "eval_accuracy": 0.8880294192239412, "eval_f1": 0.8036882807852468, "eval_loss": 0.3132246732711792, "eval_precision": 0.7543271915131211, "eval_recall": 0.8599618077657543, "eval_runtime": 0.4252, "eval_samples_per_second": 352.805, "eval_steps_per_second": 23.52, "step": 125 }, { "epoch": 1.04, "grad_norm": 3.693039655685425, "learning_rate": 2e-05, "loss": 0.2821, "step": 130 }, { "epoch": 1.12, "grad_norm": 5.225029468536377, "learning_rate": 2e-05, "loss": 0.2464, "step": 140 }, { "epoch": 1.2, "grad_norm": 7.333633899688721, "learning_rate": 2e-05, "loss": 0.2913, "step": 150 }, { "epoch": 1.28, "grad_norm": 3.361424446105957, "learning_rate": 2e-05, "loss": 0.2501, "step": 160 }, { "epoch": 1.3599999999999999, "grad_norm": 3.7213943004608154, "learning_rate": 2e-05, "loss": 0.2563, "step": 170 }, { "epoch": 1.44, "grad_norm": 4.98430061340332, "learning_rate": 2e-05, "loss": 0.234, "step": 180 }, { "epoch": 1.52, "grad_norm": 5.262845993041992, "learning_rate": 2e-05, "loss": 0.2414, "step": 190 }, { "epoch": 1.6, "grad_norm": 6.322588920593262, "learning_rate": 2e-05, "loss": 0.2807, "step": 200 }, { "epoch": 1.6800000000000002, "grad_norm": 3.358001470565796, "learning_rate": 2e-05, "loss": 0.2247, "step": 210 }, { "epoch": 1.76, "grad_norm": 3.0363667011260986, "learning_rate": 2e-05, "loss": 0.2259, "step": 220 }, { "epoch": 1.8399999999999999, "grad_norm": 3.240709066390991, "learning_rate": 2e-05, "loss": 0.2361, "step": 230 }, { "epoch": 1.92, "grad_norm": 3.994675874710083, "learning_rate": 2e-05, "loss": 0.2121, "step": 240 }, { "epoch": 2.0, "grad_norm": 2.988025665283203, "learning_rate": 2e-05, "loss": 0.2175, "step": 250 }, { "epoch": 2.0, "eval_accuracy": 0.898173979203652, "eval_f1": 0.8382264829239066, "eval_loss": 0.2903146743774414, "eval_precision": 0.7917374080362196, "eval_recall": 0.890515595162317, "eval_runtime": 0.4109, "eval_samples_per_second": 365.087, "eval_steps_per_second": 24.339, "step": 250 }, { "epoch": 2.08, "grad_norm": 2.051712989807129, "learning_rate": 2e-05, "loss": 0.1549, "step": 260 }, { "epoch": 2.16, "grad_norm": 4.846020698547363, "learning_rate": 2e-05, "loss": 0.2409, "step": 270 }, { "epoch": 2.24, "grad_norm": 2.303395986557007, "learning_rate": 2e-05, "loss": 0.1975, "step": 280 }, { "epoch": 2.32, "grad_norm": 3.298722743988037, "learning_rate": 2e-05, "loss": 0.1776, "step": 290 }, { "epoch": 2.4, "grad_norm": 2.484005928039551, "learning_rate": 2e-05, "loss": 0.189, "step": 300 }, { "epoch": 2.48, "grad_norm": 3.962454319000244, "learning_rate": 2e-05, "loss": 0.1937, "step": 310 }, { "epoch": 2.56, "grad_norm": 4.414576053619385, "learning_rate": 2e-05, "loss": 0.1799, "step": 320 }, { "epoch": 2.64, "grad_norm": 4.953629970550537, "learning_rate": 2e-05, "loss": 0.2084, "step": 330 }, { "epoch": 2.7199999999999998, "grad_norm": 4.017173767089844, "learning_rate": 2e-05, "loss": 0.1637, "step": 340 }, { "epoch": 2.8, "grad_norm": 3.566938638687134, "learning_rate": 2e-05, "loss": 0.2825, "step": 350 }, { "epoch": 2.88, "grad_norm": 4.922001838684082, "learning_rate": 2e-05, "loss": 0.1963, "step": 360 }, { "epoch": 2.96, "grad_norm": 5.1235270500183105, "learning_rate": 2e-05, "loss": 0.2167, "step": 370 } ], "logging_steps": 10, "max_steps": 375, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 473453787413376.0, "train_batch_size": 16, "trial_name": null, "trial_params": null }