{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 5.0, "eval_steps": 500, "global_step": 80, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.31746031746031744, "grad_norm": 3.1160410477431384, "learning_rate": 2e-05, "loss": 0.743, "loss_nan_ranks": 0, "loss_rank_avg": 0.18026351928710938, "step": 5, "valid_targets_mean": 4387.2, "valid_targets_min": 740 }, { "epoch": 0.6349206349206349, "grad_norm": 0.9623025342413126, "learning_rate": 3.998096443163716e-05, "loss": 0.6426, "loss_nan_ranks": 0, "loss_rank_avg": 0.15755076706409454, "step": 10, "valid_targets_mean": 3791.7, "valid_targets_min": 695 }, { "epoch": 0.9523809523809523, "grad_norm": 0.5400417708688194, "learning_rate": 3.931851652578137e-05, "loss": 0.5922, "loss_nan_ranks": 0, "loss_rank_avg": 0.1631372570991516, "step": 15, "valid_targets_mean": 4048.3, "valid_targets_min": 1151 }, { "epoch": 1.253968253968254, "grad_norm": 0.4346375139896992, "learning_rate": 3.774021666356444e-05, "loss": 0.5513, "loss_nan_ranks": 0, "loss_rank_avg": 0.13641172647476196, "step": 20, "valid_targets_mean": 3890.5, "valid_targets_min": 1004 }, { "epoch": 1.5714285714285714, "grad_norm": 0.3583194659422831, "learning_rate": 3.532088886237956e-05, "loss": 0.5268, "loss_nan_ranks": 0, "loss_rank_avg": 0.157705619931221, "step": 25, "valid_targets_mean": 4477.9, "valid_targets_min": 656 }, { "epoch": 1.8888888888888888, "grad_norm": 0.2998700734251974, "learning_rate": 3.217522858017442e-05, "loss": 0.5194, "loss_nan_ranks": 0, "loss_rank_avg": 0.12757813930511475, "step": 30, "valid_targets_mean": 4071.6, "valid_targets_min": 773 }, { "epoch": 2.1904761904761907, "grad_norm": 0.354322163099566, "learning_rate": 2.8452365234813992e-05, "loss": 0.4999, "loss_nan_ranks": 0, "loss_rank_avg": 0.1116921454668045, "step": 35, "valid_targets_mean": 3261.8, "valid_targets_min": 695 }, { "epoch": 2.507936507936508, "grad_norm": 0.2736861113965854, "learning_rate": 2.4328792278762058e-05, "loss": 0.476, "loss_nan_ranks": 0, "loss_rank_avg": 0.11490871757268906, "step": 40, "valid_targets_mean": 3865.1, "valid_targets_min": 746 }, { "epoch": 2.825396825396825, "grad_norm": 0.26810971560154995, "learning_rate": 2e-05, "loss": 0.4743, "loss_nan_ranks": 0, "loss_rank_avg": 0.09590233117341995, "step": 45, "valid_targets_mean": 3056.7, "valid_targets_min": 726 }, { "epoch": 3.126984126984127, "grad_norm": 0.26045065338871903, "learning_rate": 1.5671207721237945e-05, "loss": 0.4803, "loss_nan_ranks": 0, "loss_rank_avg": 0.1086278185248375, "step": 50, "valid_targets_mean": 3327.7, "valid_targets_min": 746 }, { "epoch": 3.4444444444444446, "grad_norm": 0.2646056254599043, "learning_rate": 1.1547634765186016e-05, "loss": 0.4741, "loss_nan_ranks": 0, "loss_rank_avg": 0.10994982719421387, "step": 55, "valid_targets_mean": 3503.4, "valid_targets_min": 569 }, { "epoch": 3.761904761904762, "grad_norm": 0.23864309413170337, "learning_rate": 7.824771419825588e-06, "loss": 0.4612, "loss_nan_ranks": 0, "loss_rank_avg": 0.09653226286172867, "step": 60, "valid_targets_mean": 3644.4, "valid_targets_min": 1042 }, { "epoch": 4.063492063492063, "grad_norm": 0.2652889879375569, "learning_rate": 4.679111137620442e-06, "loss": 0.4606, "loss_nan_ranks": 0, "loss_rank_avg": 0.1475735604763031, "step": 65, "valid_targets_mean": 4244.4, "valid_targets_min": 2068 }, { "epoch": 4.380952380952381, "grad_norm": 0.28736207202393793, "learning_rate": 2.259783336435566e-06, "loss": 0.467, "loss_nan_ranks": 0, "loss_rank_avg": 0.10715197026729584, "step": 70, "valid_targets_mean": 3558.8, "valid_targets_min": 733 }, { "epoch": 4.698412698412699, "grad_norm": 0.26387185108781563, "learning_rate": 6.814834742186361e-07, "loss": 0.459, "loss_nan_ranks": 0, "loss_rank_avg": 0.10606744885444641, "step": 75, "valid_targets_mean": 3355.8, "valid_targets_min": 761 }, { "epoch": 5.0, "grad_norm": 0.2624385775505968, "learning_rate": 1.9035568362844037e-08, "loss": 0.4442, "loss_nan_ranks": 0, "loss_rank_avg": 0.11343716084957123, "step": 80, "valid_targets_mean": 3113.4, "valid_targets_min": 587 }, { "epoch": 5.0, "loss_nan_ranks": 0, "loss_rank_avg": 0.11343716084957123, "step": 80, "total_flos": 1.3290499486829773e+17, "train_loss": 0.5170090794563293, "train_runtime": 1718.7633, "train_samples_per_second": 2.909, "train_steps_per_second": 0.047, "valid_targets_mean": 3113.4, "valid_targets_min": 587 } ], "logging_steps": 5, "max_steps": 80, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": false, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.3290499486829773e+17, "train_batch_size": 1, "trial_name": null, "trial_params": null }