| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.0, |
| "eval_steps": 500, |
| "global_step": 80, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.125, |
| "grad_norm": 0.3209748864173889, |
| "learning_rate": 0.0001999167799344583, |
| "loss": 1.9879, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 0.7329066395759583, |
| "learning_rate": 0.00019701859555740648, |
| "loss": 1.1746, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.375, |
| "grad_norm": 0.42199471592903137, |
| "learning_rate": 0.0001900968867902419, |
| "loss": 0.4361, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 0.23607657849788666, |
| "learning_rate": 0.00017943870854121124, |
| "loss": 0.2133, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.625, |
| "grad_norm": 0.17693307995796204, |
| "learning_rate": 0.00016548607339452853, |
| "loss": 0.1347, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 0.1544232815504074, |
| "learning_rate": 0.00014881762060482814, |
| "loss": 0.1494, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.875, |
| "grad_norm": 0.0979604721069336, |
| "learning_rate": 0.00013012461895372344, |
| "loss": 0.1249, |
| "step": 35 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.15202400088310242, |
| "learning_rate": 0.00011018229867038356, |
| "loss": 0.12, |
| "step": 40 |
| }, |
| { |
| "epoch": 1.125, |
| "grad_norm": 0.09728869050741196, |
| "learning_rate": 8.981770132961649e-05, |
| "loss": 0.1115, |
| "step": 45 |
| }, |
| { |
| "epoch": 1.25, |
| "grad_norm": 0.10440118610858917, |
| "learning_rate": 6.98753810462766e-05, |
| "loss": 0.1045, |
| "step": 50 |
| }, |
| { |
| "epoch": 1.375, |
| "grad_norm": 0.14337652921676636, |
| "learning_rate": 5.11823793951719e-05, |
| "loss": 0.0951, |
| "step": 55 |
| }, |
| { |
| "epoch": 1.5, |
| "grad_norm": 0.0896371454000473, |
| "learning_rate": 3.45139266054715e-05, |
| "loss": 0.101, |
| "step": 60 |
| }, |
| { |
| "epoch": 1.625, |
| "grad_norm": 0.1519363820552826, |
| "learning_rate": 2.0561291458788733e-05, |
| "loss": 0.0989, |
| "step": 65 |
| }, |
| { |
| "epoch": 1.75, |
| "grad_norm": 0.10926564782857895, |
| "learning_rate": 9.903113209758096e-06, |
| "loss": 0.1062, |
| "step": 70 |
| }, |
| { |
| "epoch": 1.875, |
| "grad_norm": 0.08237078040838242, |
| "learning_rate": 2.9814044425935606e-06, |
| "loss": 0.0951, |
| "step": 75 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.09666696935892105, |
| "learning_rate": 8.322006554171146e-08, |
| "loss": 0.1037, |
| "step": 80 |
| } |
| ], |
| "logging_steps": 5, |
| "max_steps": 80, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 100, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.5494316836978688e+16, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|