| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 0.7092198581560284, | |
| "eval_steps": 20, | |
| "global_step": 200, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.07092198581560284, | |
| "grad_norm": 0.4411725103855133, | |
| "learning_rate": 9.775413711583924e-05, | |
| "loss": 0.4587, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.07092198581560284, | |
| "eval_valid_loss": 0.5579081773757935, | |
| "eval_valid_runtime": 0.5769, | |
| "eval_valid_samples_per_second": 26.003, | |
| "eval_valid_steps_per_second": 3.467, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.07092198581560284, | |
| "eval_test_loss": 0.4375748336315155, | |
| "eval_test_runtime": 17.9126, | |
| "eval_test_samples_per_second": 26.797, | |
| "eval_test_steps_per_second": 3.35, | |
| "step": 20 | |
| }, | |
| { | |
| "epoch": 0.14184397163120568, | |
| "grad_norm": 0.3396584689617157, | |
| "learning_rate": 9.539007092198582e-05, | |
| "loss": 0.39, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.14184397163120568, | |
| "eval_valid_loss": 0.5398206114768982, | |
| "eval_valid_runtime": 0.576, | |
| "eval_valid_samples_per_second": 26.044, | |
| "eval_valid_steps_per_second": 3.472, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.14184397163120568, | |
| "eval_test_loss": 0.4187872111797333, | |
| "eval_test_runtime": 17.9258, | |
| "eval_test_samples_per_second": 26.777, | |
| "eval_test_steps_per_second": 3.347, | |
| "step": 40 | |
| }, | |
| { | |
| "epoch": 0.2127659574468085, | |
| "grad_norm": 0.40809884667396545, | |
| "learning_rate": 9.302600472813238e-05, | |
| "loss": 0.3838, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.2127659574468085, | |
| "eval_valid_loss": 0.5342534184455872, | |
| "eval_valid_runtime": 0.576, | |
| "eval_valid_samples_per_second": 26.04, | |
| "eval_valid_steps_per_second": 3.472, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.2127659574468085, | |
| "eval_test_loss": 0.4113999009132385, | |
| "eval_test_runtime": 17.9356, | |
| "eval_test_samples_per_second": 26.762, | |
| "eval_test_steps_per_second": 3.345, | |
| "step": 60 | |
| }, | |
| { | |
| "epoch": 0.28368794326241137, | |
| "grad_norm": 0.3752501904964447, | |
| "learning_rate": 9.066193853427897e-05, | |
| "loss": 0.3455, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.28368794326241137, | |
| "eval_valid_loss": 0.5170475244522095, | |
| "eval_valid_runtime": 0.5767, | |
| "eval_valid_samples_per_second": 26.01, | |
| "eval_valid_steps_per_second": 3.468, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.28368794326241137, | |
| "eval_test_loss": 0.40657365322113037, | |
| "eval_test_runtime": 17.9295, | |
| "eval_test_samples_per_second": 26.772, | |
| "eval_test_steps_per_second": 3.346, | |
| "step": 80 | |
| }, | |
| { | |
| "epoch": 0.3546099290780142, | |
| "grad_norm": 0.399515300989151, | |
| "learning_rate": 8.829787234042553e-05, | |
| "loss": 0.3501, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.3546099290780142, | |
| "eval_valid_loss": 0.5137313604354858, | |
| "eval_valid_runtime": 0.5779, | |
| "eval_valid_samples_per_second": 25.958, | |
| "eval_valid_steps_per_second": 3.461, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.3546099290780142, | |
| "eval_test_loss": 0.40371188521385193, | |
| "eval_test_runtime": 17.9374, | |
| "eval_test_samples_per_second": 26.76, | |
| "eval_test_steps_per_second": 3.345, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.425531914893617, | |
| "grad_norm": 0.3764987885951996, | |
| "learning_rate": 8.593380614657211e-05, | |
| "loss": 0.3521, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.425531914893617, | |
| "eval_valid_loss": 0.5120474100112915, | |
| "eval_valid_runtime": 0.577, | |
| "eval_valid_samples_per_second": 25.996, | |
| "eval_valid_steps_per_second": 3.466, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.425531914893617, | |
| "eval_test_loss": 0.40052852034568787, | |
| "eval_test_runtime": 17.9379, | |
| "eval_test_samples_per_second": 26.759, | |
| "eval_test_steps_per_second": 3.345, | |
| "step": 120 | |
| }, | |
| { | |
| "epoch": 0.49645390070921985, | |
| "grad_norm": 0.4307573437690735, | |
| "learning_rate": 8.356973995271869e-05, | |
| "loss": 0.3523, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.49645390070921985, | |
| "eval_valid_loss": 0.5145517587661743, | |
| "eval_valid_runtime": 0.5771, | |
| "eval_valid_samples_per_second": 25.992, | |
| "eval_valid_steps_per_second": 3.466, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.49645390070921985, | |
| "eval_test_loss": 0.3979322016239166, | |
| "eval_test_runtime": 17.9342, | |
| "eval_test_samples_per_second": 26.765, | |
| "eval_test_steps_per_second": 3.346, | |
| "step": 140 | |
| }, | |
| { | |
| "epoch": 0.5673758865248227, | |
| "grad_norm": 0.42571648955345154, | |
| "learning_rate": 8.120567375886525e-05, | |
| "loss": 0.3463, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.5673758865248227, | |
| "eval_valid_loss": 0.5077592134475708, | |
| "eval_valid_runtime": 0.5775, | |
| "eval_valid_samples_per_second": 25.972, | |
| "eval_valid_steps_per_second": 3.463, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.5673758865248227, | |
| "eval_test_loss": 0.39705216884613037, | |
| "eval_test_runtime": 17.9327, | |
| "eval_test_samples_per_second": 26.767, | |
| "eval_test_steps_per_second": 3.346, | |
| "step": 160 | |
| }, | |
| { | |
| "epoch": 0.6382978723404256, | |
| "grad_norm": 0.43494898080825806, | |
| "learning_rate": 7.884160756501182e-05, | |
| "loss": 0.3345, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.6382978723404256, | |
| "eval_valid_loss": 0.5126659274101257, | |
| "eval_valid_runtime": 0.577, | |
| "eval_valid_samples_per_second": 25.999, | |
| "eval_valid_steps_per_second": 3.466, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.6382978723404256, | |
| "eval_test_loss": 0.39509347081184387, | |
| "eval_test_runtime": 17.9456, | |
| "eval_test_samples_per_second": 26.747, | |
| "eval_test_steps_per_second": 3.343, | |
| "step": 180 | |
| }, | |
| { | |
| "epoch": 0.7092198581560284, | |
| "grad_norm": 0.4940604269504547, | |
| "learning_rate": 7.64775413711584e-05, | |
| "loss": 0.3194, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.7092198581560284, | |
| "eval_valid_loss": 0.5195654034614563, | |
| "eval_valid_runtime": 0.5773, | |
| "eval_valid_samples_per_second": 25.984, | |
| "eval_valid_steps_per_second": 3.465, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.7092198581560284, | |
| "eval_test_loss": 0.39300256967544556, | |
| "eval_test_runtime": 17.9386, | |
| "eval_test_samples_per_second": 26.758, | |
| "eval_test_steps_per_second": 3.345, | |
| "step": 200 | |
| } | |
| ], | |
| "logging_steps": 20, | |
| "max_steps": 846, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 100, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 2.64884088471552e+16, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |