| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 2.0, |
| "eval_steps": 500, |
| "global_step": 176, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.11428571428571428, |
| "grad_norm": 0.28923285007476807, |
| "learning_rate": 0.0001931818181818182, |
| "loss": 1.4815, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.22857142857142856, |
| "grad_norm": 0.17295370995998383, |
| "learning_rate": 0.00018560606060606061, |
| "loss": 1.1227, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.34285714285714286, |
| "grad_norm": 0.161784365773201, |
| "learning_rate": 0.00017803030303030303, |
| "loss": 0.9993, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.45714285714285713, |
| "grad_norm": 0.16568884253501892, |
| "learning_rate": 0.00017045454545454547, |
| "loss": 0.9732, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.5714285714285714, |
| "grad_norm": 0.16199344396591187, |
| "learning_rate": 0.0001628787878787879, |
| "loss": 0.9635, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.6857142857142857, |
| "grad_norm": 0.171610027551651, |
| "learning_rate": 0.0001553030303030303, |
| "loss": 0.917, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.8, |
| "grad_norm": 0.1796373575925827, |
| "learning_rate": 0.00014772727272727274, |
| "loss": 0.933, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.9142857142857143, |
| "grad_norm": 0.17292830348014832, |
| "learning_rate": 0.00014015151515151518, |
| "loss": 0.9237, |
| "step": 80 |
| }, |
| { |
| "epoch": 1.022857142857143, |
| "grad_norm": 0.17051079869270325, |
| "learning_rate": 0.00013257575757575756, |
| "loss": 0.9029, |
| "step": 90 |
| }, |
| { |
| "epoch": 1.1371428571428572, |
| "grad_norm": 0.18231824040412903, |
| "learning_rate": 0.000125, |
| "loss": 0.8958, |
| "step": 100 |
| }, |
| { |
| "epoch": 1.2514285714285713, |
| "grad_norm": 0.19094569981098175, |
| "learning_rate": 0.00011742424242424244, |
| "loss": 0.8581, |
| "step": 110 |
| }, |
| { |
| "epoch": 1.3657142857142857, |
| "grad_norm": 0.1908770650625229, |
| "learning_rate": 0.00010984848484848484, |
| "loss": 0.8432, |
| "step": 120 |
| }, |
| { |
| "epoch": 1.48, |
| "grad_norm": 0.20320802927017212, |
| "learning_rate": 0.00010227272727272727, |
| "loss": 0.8629, |
| "step": 130 |
| }, |
| { |
| "epoch": 1.5942857142857143, |
| "grad_norm": 0.18824583292007446, |
| "learning_rate": 9.469696969696971e-05, |
| "loss": 0.8612, |
| "step": 140 |
| }, |
| { |
| "epoch": 1.7085714285714286, |
| "grad_norm": 0.19998520612716675, |
| "learning_rate": 8.712121212121212e-05, |
| "loss": 0.8799, |
| "step": 150 |
| }, |
| { |
| "epoch": 1.822857142857143, |
| "grad_norm": 0.20397736132144928, |
| "learning_rate": 7.954545454545455e-05, |
| "loss": 0.8608, |
| "step": 160 |
| }, |
| { |
| "epoch": 1.9371428571428573, |
| "grad_norm": 0.20487356185913086, |
| "learning_rate": 7.196969696969698e-05, |
| "loss": 0.8497, |
| "step": 170 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 264, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.600934819495936e+17, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|