| { | |
| "best_metric": 4.273556709289551, | |
| "best_model_checkpoint": "/opt/ml/model/checkpoint-400", | |
| "epoch": 0.9988439306358381, | |
| "eval_steps": 200, | |
| "global_step": 648, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.07707129094412331, | |
| "grad_norm": 2.1371634006500244, | |
| "learning_rate": 4.6219135802469136e-05, | |
| "loss": 7.0635, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.15414258188824662, | |
| "grad_norm": 11.696390151977539, | |
| "learning_rate": 4.236111111111111e-05, | |
| "loss": 4.747, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.23121387283236994, | |
| "grad_norm": 5.832604885101318, | |
| "learning_rate": 3.850308641975309e-05, | |
| "loss": 4.4816, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 0.30828516377649323, | |
| "grad_norm": 2.8921449184417725, | |
| "learning_rate": 3.4645061728395064e-05, | |
| "loss": 4.444, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.30828516377649323, | |
| "eval_loss": 4.3647613525390625, | |
| "eval_runtime": 1152.4316, | |
| "eval_samples_per_second": 4.504, | |
| "eval_steps_per_second": 0.563, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 0.3853564547206166, | |
| "grad_norm": 4.978172302246094, | |
| "learning_rate": 3.0787037037037034e-05, | |
| "loss": 4.3124, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.4624277456647399, | |
| "grad_norm": 2.087311029434204, | |
| "learning_rate": 2.692901234567901e-05, | |
| "loss": 4.309, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 0.5394990366088632, | |
| "grad_norm": 4.583073616027832, | |
| "learning_rate": 2.307098765432099e-05, | |
| "loss": 4.3329, | |
| "step": 350 | |
| }, | |
| { | |
| "epoch": 0.6165703275529865, | |
| "grad_norm": 2.860360622406006, | |
| "learning_rate": 1.9212962962962962e-05, | |
| "loss": 4.3039, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.6165703275529865, | |
| "eval_loss": 4.273556709289551, | |
| "eval_runtime": 1152.4004, | |
| "eval_samples_per_second": 4.505, | |
| "eval_steps_per_second": 0.563, | |
| "step": 400 | |
| }, | |
| { | |
| "epoch": 0.6936416184971098, | |
| "grad_norm": 3.887697219848633, | |
| "learning_rate": 1.5354938271604938e-05, | |
| "loss": 4.332, | |
| "step": 450 | |
| }, | |
| { | |
| "epoch": 0.7707129094412332, | |
| "grad_norm": 2.878767728805542, | |
| "learning_rate": 1.1496913580246914e-05, | |
| "loss": 4.2523, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.8477842003853564, | |
| "grad_norm": 4.9191155433654785, | |
| "learning_rate": 7.63888888888889e-06, | |
| "loss": 4.2528, | |
| "step": 550 | |
| }, | |
| { | |
| "epoch": 0.9248554913294798, | |
| "grad_norm": 6.933360576629639, | |
| "learning_rate": 3.7808641975308645e-06, | |
| "loss": 4.2671, | |
| "step": 600 | |
| }, | |
| { | |
| "epoch": 0.9248554913294798, | |
| "eval_loss": 4.235105037689209, | |
| "eval_runtime": 1152.402, | |
| "eval_samples_per_second": 4.505, | |
| "eval_steps_per_second": 0.563, | |
| "step": 600 | |
| } | |
| ], | |
| "logging_steps": 50, | |
| "max_steps": 648, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 1, | |
| "save_steps": 400, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 4.785053161456927e+17, | |
| "train_batch_size": 8, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |