| { | |
| "best_global_step": null, | |
| "best_metric": null, | |
| "best_model_checkpoint": null, | |
| "epoch": 2.0, | |
| "eval_steps": 500, | |
| "global_step": 340, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.14781966001478197, | |
| "grad_norm": 0.20523984730243683, | |
| "learning_rate": 0.00019137254901960786, | |
| "loss": 1.0818, | |
| "step": 25 | |
| }, | |
| { | |
| "epoch": 0.29563932002956395, | |
| "grad_norm": 0.1347881406545639, | |
| "learning_rate": 0.0001815686274509804, | |
| "loss": 0.2846, | |
| "step": 50 | |
| }, | |
| { | |
| "epoch": 0.4434589800443459, | |
| "grad_norm": 0.11665232479572296, | |
| "learning_rate": 0.00017176470588235293, | |
| "loss": 0.2523, | |
| "step": 75 | |
| }, | |
| { | |
| "epoch": 0.5912786400591279, | |
| "grad_norm": 0.1508069485425949, | |
| "learning_rate": 0.0001619607843137255, | |
| "loss": 0.239, | |
| "step": 100 | |
| }, | |
| { | |
| "epoch": 0.7390983000739099, | |
| "grad_norm": 0.17151322960853577, | |
| "learning_rate": 0.00015215686274509804, | |
| "loss": 0.2397, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.8869179600886918, | |
| "grad_norm": 0.21252259612083435, | |
| "learning_rate": 0.0001423529411764706, | |
| "loss": 0.2447, | |
| "step": 150 | |
| }, | |
| { | |
| "epoch": 1.0295639320029564, | |
| "grad_norm": 0.1654823124408722, | |
| "learning_rate": 0.00013254901960784313, | |
| "loss": 0.2216, | |
| "step": 175 | |
| }, | |
| { | |
| "epoch": 1.1773835920177385, | |
| "grad_norm": 0.16431653499603271, | |
| "learning_rate": 0.0001227450980392157, | |
| "loss": 0.2117, | |
| "step": 200 | |
| }, | |
| { | |
| "epoch": 1.3252032520325203, | |
| "grad_norm": 0.21554099023342133, | |
| "learning_rate": 0.00011294117647058824, | |
| "loss": 0.2034, | |
| "step": 225 | |
| }, | |
| { | |
| "epoch": 1.4730229120473024, | |
| "grad_norm": 0.2417294681072235, | |
| "learning_rate": 0.0001031372549019608, | |
| "loss": 0.2137, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 1.6208425720620843, | |
| "grad_norm": 0.20341823995113373, | |
| "learning_rate": 9.333333333333334e-05, | |
| "loss": 0.2122, | |
| "step": 275 | |
| }, | |
| { | |
| "epoch": 1.7686622320768661, | |
| "grad_norm": 0.18581856787204742, | |
| "learning_rate": 8.352941176470589e-05, | |
| "loss": 0.2259, | |
| "step": 300 | |
| }, | |
| { | |
| "epoch": 1.9164818920916482, | |
| "grad_norm": 0.23839442431926727, | |
| "learning_rate": 7.372549019607843e-05, | |
| "loss": 0.2113, | |
| "step": 325 | |
| } | |
| ], | |
| "logging_steps": 25, | |
| "max_steps": 510, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 3, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": false | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 7406961549115392.0, | |
| "train_batch_size": 1, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |