| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 237, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.12658227848101267, |
| "grad_norm": 921.959716796875, |
| "learning_rate": 2.0833333333333334e-06, |
| "loss": 21.4814, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.25316455696202533, |
| "grad_norm": 141.31649780273438, |
| "learning_rate": 4.166666666666667e-06, |
| "loss": 11.7353, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.379746835443038, |
| "grad_norm": 82.57194519042969, |
| "learning_rate": 4.990217055187363e-06, |
| "loss": 8.0963, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.5063291139240507, |
| "grad_norm": 123.3320541381836, |
| "learning_rate": 4.930709439074528e-06, |
| "loss": 6.9544, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.6329113924050633, |
| "grad_norm": 40.069461822509766, |
| "learning_rate": 4.8184197871363115e-06, |
| "loss": 5.6903, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.759493670886076, |
| "grad_norm": 35.33430480957031, |
| "learning_rate": 4.65578643130007e-06, |
| "loss": 5.1445, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.8860759493670886, |
| "grad_norm": 39.58875274658203, |
| "learning_rate": 4.446340900047224e-06, |
| "loss": 4.8598, |
| "step": 70 |
| }, |
| { |
| "epoch": 1.0126582278481013, |
| "grad_norm": 45.1424674987793, |
| "learning_rate": 4.194631232463128e-06, |
| "loss": 4.3863, |
| "step": 80 |
| }, |
| { |
| "epoch": 1.139240506329114, |
| "grad_norm": 37.524261474609375, |
| "learning_rate": 3.9061232191019525e-06, |
| "loss": 4.4847, |
| "step": 90 |
| }, |
| { |
| "epoch": 1.2658227848101267, |
| "grad_norm": 23.08551025390625, |
| "learning_rate": 3.587081714187874e-06, |
| "loss": 4.0467, |
| "step": 100 |
| }, |
| { |
| "epoch": 1.3924050632911391, |
| "grad_norm": 19.070663452148438, |
| "learning_rate": 3.2444345964181394e-06, |
| "loss": 4.0029, |
| "step": 110 |
| }, |
| { |
| "epoch": 1.518987341772152, |
| "grad_norm": 22.315204620361328, |
| "learning_rate": 2.885622332413256e-06, |
| "loss": 3.9223, |
| "step": 120 |
| }, |
| { |
| "epoch": 1.6455696202531644, |
| "grad_norm": 17.018888473510742, |
| "learning_rate": 2.5184364094932812e-06, |
| "loss": 3.7371, |
| "step": 130 |
| }, |
| { |
| "epoch": 1.7721518987341773, |
| "grad_norm": 11.365253448486328, |
| "learning_rate": 2.150850146157985e-06, |
| "loss": 3.6745, |
| "step": 140 |
| }, |
| { |
| "epoch": 1.8987341772151898, |
| "grad_norm": 11.076000213623047, |
| "learning_rate": 1.7908455541642583e-06, |
| "loss": 3.5277, |
| "step": 150 |
| }, |
| { |
| "epoch": 2.0253164556962027, |
| "grad_norm": 11.306219100952148, |
| "learning_rate": 1.44624001183238e-06, |
| "loss": 3.3714, |
| "step": 160 |
| }, |
| { |
| "epoch": 2.151898734177215, |
| "grad_norm": 12.529305458068848, |
| "learning_rate": 1.124516512311836e-06, |
| "loss": 3.3842, |
| "step": 170 |
| }, |
| { |
| "epoch": 2.278481012658228, |
| "grad_norm": 11.05534839630127, |
| "learning_rate": 8.32661172908373e-07, |
| "loss": 3.5323, |
| "step": 180 |
| }, |
| { |
| "epoch": 2.4050632911392404, |
| "grad_norm": 19.704994201660156, |
| "learning_rate": 5.770115339024484e-07, |
| "loss": 3.4788, |
| "step": 190 |
| }, |
| { |
| "epoch": 2.5316455696202533, |
| "grad_norm": 25.78263282775879, |
| "learning_rate": 3.631189409990815e-07, |
| "loss": 3.332, |
| "step": 200 |
| }, |
| { |
| "epoch": 2.6582278481012658, |
| "grad_norm": 20.7712345123291, |
| "learning_rate": 1.9562799972780433e-07, |
| "loss": 3.2865, |
| "step": 210 |
| }, |
| { |
| "epoch": 2.7848101265822782, |
| "grad_norm": 14.475357055664062, |
| "learning_rate": 7.817571939976288e-08, |
| "loss": 3.2964, |
| "step": 220 |
| }, |
| { |
| "epoch": 2.911392405063291, |
| "grad_norm": 14.711614608764648, |
| "learning_rate": 1.3312536676942378e-08, |
| "loss": 3.1604, |
| "step": 230 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 237, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 200, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 1.637837742782546e+17, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|