{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 0.7092198581560284, "eval_steps": 20, "global_step": 200, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.07092198581560284, "grad_norm": 0.4411725103855133, "learning_rate": 9.775413711583924e-05, "loss": 0.4587, "step": 20 }, { "epoch": 0.07092198581560284, "eval_valid_loss": 0.5579081773757935, "eval_valid_runtime": 0.5769, "eval_valid_samples_per_second": 26.003, "eval_valid_steps_per_second": 3.467, "step": 20 }, { "epoch": 0.07092198581560284, "eval_test_loss": 0.4375748336315155, "eval_test_runtime": 17.9126, "eval_test_samples_per_second": 26.797, "eval_test_steps_per_second": 3.35, "step": 20 }, { "epoch": 0.14184397163120568, "grad_norm": 0.3396584689617157, "learning_rate": 9.539007092198582e-05, "loss": 0.39, "step": 40 }, { "epoch": 0.14184397163120568, "eval_valid_loss": 0.5398206114768982, "eval_valid_runtime": 0.576, "eval_valid_samples_per_second": 26.044, "eval_valid_steps_per_second": 3.472, "step": 40 }, { "epoch": 0.14184397163120568, "eval_test_loss": 0.4187872111797333, "eval_test_runtime": 17.9258, "eval_test_samples_per_second": 26.777, "eval_test_steps_per_second": 3.347, "step": 40 }, { "epoch": 0.2127659574468085, "grad_norm": 0.40809884667396545, "learning_rate": 9.302600472813238e-05, "loss": 0.3838, "step": 60 }, { "epoch": 0.2127659574468085, "eval_valid_loss": 0.5342534184455872, "eval_valid_runtime": 0.576, "eval_valid_samples_per_second": 26.04, "eval_valid_steps_per_second": 3.472, "step": 60 }, { "epoch": 0.2127659574468085, "eval_test_loss": 0.4113999009132385, "eval_test_runtime": 17.9356, "eval_test_samples_per_second": 26.762, "eval_test_steps_per_second": 3.345, "step": 60 }, { "epoch": 0.28368794326241137, "grad_norm": 0.3752501904964447, "learning_rate": 9.066193853427897e-05, "loss": 0.3455, "step": 80 }, { "epoch": 0.28368794326241137, "eval_valid_loss": 0.5170475244522095, "eval_valid_runtime": 0.5767, "eval_valid_samples_per_second": 26.01, "eval_valid_steps_per_second": 3.468, "step": 80 }, { "epoch": 0.28368794326241137, "eval_test_loss": 0.40657365322113037, "eval_test_runtime": 17.9295, "eval_test_samples_per_second": 26.772, "eval_test_steps_per_second": 3.346, "step": 80 }, { "epoch": 0.3546099290780142, "grad_norm": 0.399515300989151, "learning_rate": 8.829787234042553e-05, "loss": 0.3501, "step": 100 }, { "epoch": 0.3546099290780142, "eval_valid_loss": 0.5137313604354858, "eval_valid_runtime": 0.5779, "eval_valid_samples_per_second": 25.958, "eval_valid_steps_per_second": 3.461, "step": 100 }, { "epoch": 0.3546099290780142, "eval_test_loss": 0.40371188521385193, "eval_test_runtime": 17.9374, "eval_test_samples_per_second": 26.76, "eval_test_steps_per_second": 3.345, "step": 100 }, { "epoch": 0.425531914893617, "grad_norm": 0.3764987885951996, "learning_rate": 8.593380614657211e-05, "loss": 0.3521, "step": 120 }, { "epoch": 0.425531914893617, "eval_valid_loss": 0.5120474100112915, "eval_valid_runtime": 0.577, "eval_valid_samples_per_second": 25.996, "eval_valid_steps_per_second": 3.466, "step": 120 }, { "epoch": 0.425531914893617, "eval_test_loss": 0.40052852034568787, "eval_test_runtime": 17.9379, "eval_test_samples_per_second": 26.759, "eval_test_steps_per_second": 3.345, "step": 120 }, { "epoch": 0.49645390070921985, "grad_norm": 0.4307573437690735, "learning_rate": 8.356973995271869e-05, "loss": 0.3523, "step": 140 }, { "epoch": 0.49645390070921985, "eval_valid_loss": 0.5145517587661743, "eval_valid_runtime": 0.5771, "eval_valid_samples_per_second": 25.992, "eval_valid_steps_per_second": 3.466, "step": 140 }, { "epoch": 0.49645390070921985, "eval_test_loss": 0.3979322016239166, "eval_test_runtime": 17.9342, "eval_test_samples_per_second": 26.765, "eval_test_steps_per_second": 3.346, "step": 140 }, { "epoch": 0.5673758865248227, "grad_norm": 0.42571648955345154, "learning_rate": 8.120567375886525e-05, "loss": 0.3463, "step": 160 }, { "epoch": 0.5673758865248227, "eval_valid_loss": 0.5077592134475708, "eval_valid_runtime": 0.5775, "eval_valid_samples_per_second": 25.972, "eval_valid_steps_per_second": 3.463, "step": 160 }, { "epoch": 0.5673758865248227, "eval_test_loss": 0.39705216884613037, "eval_test_runtime": 17.9327, "eval_test_samples_per_second": 26.767, "eval_test_steps_per_second": 3.346, "step": 160 }, { "epoch": 0.6382978723404256, "grad_norm": 0.43494898080825806, "learning_rate": 7.884160756501182e-05, "loss": 0.3345, "step": 180 }, { "epoch": 0.6382978723404256, "eval_valid_loss": 0.5126659274101257, "eval_valid_runtime": 0.577, "eval_valid_samples_per_second": 25.999, "eval_valid_steps_per_second": 3.466, "step": 180 }, { "epoch": 0.6382978723404256, "eval_test_loss": 0.39509347081184387, "eval_test_runtime": 17.9456, "eval_test_samples_per_second": 26.747, "eval_test_steps_per_second": 3.343, "step": 180 }, { "epoch": 0.7092198581560284, "grad_norm": 0.4940604269504547, "learning_rate": 7.64775413711584e-05, "loss": 0.3194, "step": 200 }, { "epoch": 0.7092198581560284, "eval_valid_loss": 0.5195654034614563, "eval_valid_runtime": 0.5773, "eval_valid_samples_per_second": 25.984, "eval_valid_steps_per_second": 3.465, "step": 200 }, { "epoch": 0.7092198581560284, "eval_test_loss": 0.39300256967544556, "eval_test_runtime": 17.9386, "eval_test_samples_per_second": 26.758, "eval_test_steps_per_second": 3.345, "step": 200 } ], "logging_steps": 20, "max_steps": 846, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.64884088471552e+16, "train_batch_size": 16, "trial_name": null, "trial_params": null }