| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 243, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.12422360248447205, |
| "grad_norm": 0.45446592569351196, |
| "learning_rate": 7.2000000000000005e-06, |
| "loss": 2.634676933288574, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.2484472049689441, |
| "grad_norm": 0.23934991657733917, |
| "learning_rate": 1.5200000000000002e-05, |
| "loss": 2.372325325012207, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.37267080745341613, |
| "grad_norm": 0.13367633521556854, |
| "learning_rate": 1.9983390502829168e-05, |
| "loss": 2.020255470275879, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.4968944099378882, |
| "grad_norm": 0.16358281672000885, |
| "learning_rate": 1.9797166732215078e-05, |
| "loss": 1.7821327209472657, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.6211180124223602, |
| "grad_norm": 0.1420416533946991, |
| "learning_rate": 1.940783098998643e-05, |
| "loss": 1.5877119064331056, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.7453416149068323, |
| "grad_norm": 0.1504518836736679, |
| "learning_rate": 1.8823454869940243e-05, |
| "loss": 1.4695100784301758, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.8695652173913043, |
| "grad_norm": 0.14376819133758545, |
| "learning_rate": 1.8056153485471167e-05, |
| "loss": 1.289743995666504, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.9937888198757764, |
| "grad_norm": 0.11340057104825974, |
| "learning_rate": 1.712183430261319e-05, |
| "loss": 1.292655849456787, |
| "step": 80 |
| }, |
| { |
| "epoch": 1.1118012422360248, |
| "grad_norm": 0.12464317679405212, |
| "learning_rate": 1.6039867351144778e-05, |
| "loss": 1.1228573799133301, |
| "step": 90 |
| }, |
| { |
| "epoch": 1.236024844720497, |
| "grad_norm": 0.15469282865524292, |
| "learning_rate": 1.483268365084351e-05, |
| "loss": 1.075217342376709, |
| "step": 100 |
| }, |
| { |
| "epoch": 1.360248447204969, |
| "grad_norm": 0.2216130495071411, |
| "learning_rate": 1.3525310178198707e-05, |
| "loss": 1.010305690765381, |
| "step": 110 |
| }, |
| { |
| "epoch": 1.484472049689441, |
| "grad_norm": 0.1635051667690277, |
| "learning_rate": 1.2144851014515055e-05, |
| "loss": 1.0296391487121581, |
| "step": 120 |
| }, |
| { |
| "epoch": 1.608695652173913, |
| "grad_norm": 0.1360178142786026, |
| "learning_rate": 1.0719925432091671e-05, |
| "loss": 0.9926811218261719, |
| "step": 130 |
| }, |
| { |
| "epoch": 1.7329192546583851, |
| "grad_norm": 0.1589968353509903, |
| "learning_rate": 9.28007456790833e-06, |
| "loss": 0.9096580505371094, |
| "step": 140 |
| }, |
| { |
| "epoch": 1.8571428571428572, |
| "grad_norm": 0.25687724351882935, |
| "learning_rate": 7.855148985484946e-06, |
| "loss": 0.9168188095092773, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.981366459627329, |
| "grad_norm": 0.2077445685863495, |
| "learning_rate": 6.474689821801295e-06, |
| "loss": 0.8668004035949707, |
| "step": 160 |
| }, |
| { |
| "epoch": 2.099378881987578, |
| "grad_norm": 0.20112568140029907, |
| "learning_rate": 5.167316349156495e-06, |
| "loss": 0.813880729675293, |
| "step": 170 |
| }, |
| { |
| "epoch": 2.2236024844720497, |
| "grad_norm": 0.18293920159339905, |
| "learning_rate": 3.960132648855226e-06, |
| "loss": 0.8557339668273926, |
| "step": 180 |
| }, |
| { |
| "epoch": 2.3478260869565215, |
| "grad_norm": 0.19826386868953705, |
| "learning_rate": 2.878165697386812e-06, |
| "loss": 0.8404932022094727, |
| "step": 190 |
| }, |
| { |
| "epoch": 2.472049689440994, |
| "grad_norm": 0.26104530692100525, |
| "learning_rate": 1.9438465145288377e-06, |
| "loss": 0.7638469696044922, |
| "step": 200 |
| }, |
| { |
| "epoch": 2.596273291925466, |
| "grad_norm": 0.1945190578699112, |
| "learning_rate": 1.1765451300597574e-06, |
| "loss": 0.8175761222839355, |
| "step": 210 |
| }, |
| { |
| "epoch": 2.720496894409938, |
| "grad_norm": 0.21797508001327515, |
| "learning_rate": 5.921690100135713e-07, |
| "loss": 0.7640025615692139, |
| "step": 220 |
| }, |
| { |
| "epoch": 2.8447204968944098, |
| "grad_norm": 0.17856864631175995, |
| "learning_rate": 2.028332677849254e-07, |
| "loss": 0.7627779960632324, |
| "step": 230 |
| }, |
| { |
| "epoch": 2.968944099378882, |
| "grad_norm": 0.19549702107906342, |
| "learning_rate": 1.6609497170834154e-08, |
| "loss": 0.7115848541259766, |
| "step": 240 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 243, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 3.6764376918235546e+17, |
| "train_batch_size": 4, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|