| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 0.8421052631578947, |
| "eval_steps": 500, |
| "global_step": 30, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.028070175438596492, |
| "grad_norm": 74.61073303222656, |
| "learning_rate": 0.0, |
| "loss": 11.0904, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.056140350877192984, |
| "grad_norm": 67.27781677246094, |
| "learning_rate": 2.5e-05, |
| "loss": 11.0904, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.08421052631578947, |
| "grad_norm": 60.308128356933594, |
| "learning_rate": 5e-05, |
| "loss": 11.0125, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.11228070175438597, |
| "grad_norm": 67.1789779663086, |
| "learning_rate": 7.500000000000001e-05, |
| "loss": 9.7828, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.14035087719298245, |
| "grad_norm": 53.04948806762695, |
| "learning_rate": 0.0001, |
| "loss": 6.5555, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.16842105263157894, |
| "grad_norm": 19.973880767822266, |
| "learning_rate": 9.848484848484849e-05, |
| "loss": 2.1775, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.19649122807017544, |
| "grad_norm": 10.050884246826172, |
| "learning_rate": 9.696969696969698e-05, |
| "loss": 0.9945, |
| "step": 7 |
| }, |
| { |
| "epoch": 0.22456140350877193, |
| "grad_norm": 2.9846487045288086, |
| "learning_rate": 9.545454545454546e-05, |
| "loss": 0.317, |
| "step": 8 |
| }, |
| { |
| "epoch": 0.25263157894736843, |
| "grad_norm": 1.7284069061279297, |
| "learning_rate": 9.393939393939395e-05, |
| "loss": 0.2077, |
| "step": 9 |
| }, |
| { |
| "epoch": 0.2807017543859649, |
| "grad_norm": 5.534728050231934, |
| "learning_rate": 9.242424242424242e-05, |
| "loss": 0.4104, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.3087719298245614, |
| "grad_norm": 5.460150718688965, |
| "learning_rate": 9.090909090909092e-05, |
| "loss": 0.527, |
| "step": 11 |
| }, |
| { |
| "epoch": 0.3368421052631579, |
| "grad_norm": 3.3341681957244873, |
| "learning_rate": 8.93939393939394e-05, |
| "loss": 0.3478, |
| "step": 12 |
| }, |
| { |
| "epoch": 0.3649122807017544, |
| "grad_norm": 3.0879838466644287, |
| "learning_rate": 8.787878787878789e-05, |
| "loss": 0.3544, |
| "step": 13 |
| }, |
| { |
| "epoch": 0.3929824561403509, |
| "grad_norm": 2.3083503246307373, |
| "learning_rate": 8.636363636363637e-05, |
| "loss": 0.3322, |
| "step": 14 |
| }, |
| { |
| "epoch": 0.42105263157894735, |
| "grad_norm": 2.012421131134033, |
| "learning_rate": 8.484848484848486e-05, |
| "loss": 0.338, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.44912280701754387, |
| "grad_norm": 3.5650596618652344, |
| "learning_rate": 8.333333333333334e-05, |
| "loss": 0.8368, |
| "step": 16 |
| }, |
| { |
| "epoch": 0.47719298245614034, |
| "grad_norm": 9.133766174316406, |
| "learning_rate": 8.181818181818183e-05, |
| "loss": 3.2869, |
| "step": 17 |
| }, |
| { |
| "epoch": 0.5052631578947369, |
| "grad_norm": 11.395151138305664, |
| "learning_rate": 8.03030303030303e-05, |
| "loss": 3.484, |
| "step": 18 |
| }, |
| { |
| "epoch": 0.5333333333333333, |
| "grad_norm": 14.406392097473145, |
| "learning_rate": 7.878787878787879e-05, |
| "loss": 4.5278, |
| "step": 19 |
| }, |
| { |
| "epoch": 0.5614035087719298, |
| "grad_norm": 22.699094772338867, |
| "learning_rate": 7.727272727272727e-05, |
| "loss": 6.4349, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.5894736842105263, |
| "grad_norm": 25.127201080322266, |
| "learning_rate": 7.575757575757576e-05, |
| "loss": 7.1115, |
| "step": 21 |
| }, |
| { |
| "epoch": 0.6175438596491228, |
| "grad_norm": 11.415943145751953, |
| "learning_rate": 7.424242424242424e-05, |
| "loss": 4.9154, |
| "step": 22 |
| }, |
| { |
| "epoch": 0.6456140350877193, |
| "grad_norm": 14.724751472473145, |
| "learning_rate": 7.272727272727273e-05, |
| "loss": 6.0858, |
| "step": 23 |
| }, |
| { |
| "epoch": 0.6736842105263158, |
| "grad_norm": 10.968608856201172, |
| "learning_rate": 7.121212121212121e-05, |
| "loss": 4.6718, |
| "step": 24 |
| }, |
| { |
| "epoch": 0.7017543859649122, |
| "grad_norm": 8.286094665527344, |
| "learning_rate": 6.96969696969697e-05, |
| "loss": 5.1874, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.7298245614035088, |
| "grad_norm": 31.533506393432617, |
| "learning_rate": 6.818181818181818e-05, |
| "loss": 6.5974, |
| "step": 26 |
| }, |
| { |
| "epoch": 0.7578947368421053, |
| "grad_norm": 21.552640914916992, |
| "learning_rate": 6.666666666666667e-05, |
| "loss": 5.9696, |
| "step": 27 |
| }, |
| { |
| "epoch": 0.7859649122807018, |
| "grad_norm": 5.780311107635498, |
| "learning_rate": 6.515151515151516e-05, |
| "loss": 4.7733, |
| "step": 28 |
| }, |
| { |
| "epoch": 0.8140350877192982, |
| "grad_norm": 7.345734596252441, |
| "learning_rate": 6.363636363636364e-05, |
| "loss": 4.9007, |
| "step": 29 |
| }, |
| { |
| "epoch": 0.8421052631578947, |
| "grad_norm": 6.4748358726501465, |
| "learning_rate": 6.212121212121213e-05, |
| "loss": 3.5462, |
| "step": 30 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 70, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 2, |
| "save_steps": 30, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|