| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 24, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.12, |
| "grad_norm": 2.0197200775146484, |
| "learning_rate": 9.999999999999999e-06, |
| "loss": 2.7549, |
| "step": 1 |
| }, |
| { |
| "epoch": 0.25, |
| "grad_norm": 1.7956304550170898, |
| "learning_rate": 1.9999999999999998e-05, |
| "loss": 2.7996, |
| "step": 2 |
| }, |
| { |
| "epoch": 0.38, |
| "grad_norm": 2.116649866104126, |
| "learning_rate": 3e-05, |
| "loss": 2.6543, |
| "step": 3 |
| }, |
| { |
| "epoch": 0.5, |
| "grad_norm": 1.911434531211853, |
| "learning_rate": 2.857142857142857e-05, |
| "loss": 2.5195, |
| "step": 4 |
| }, |
| { |
| "epoch": 0.62, |
| "grad_norm": 1.7268600463867188, |
| "learning_rate": 2.7142857142857144e-05, |
| "loss": 2.6248, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.75, |
| "grad_norm": 1.9871946573257446, |
| "learning_rate": 2.5714285714285714e-05, |
| "loss": 2.4677, |
| "step": 6 |
| }, |
| { |
| "epoch": 0.88, |
| "grad_norm": 2.2535154819488525, |
| "learning_rate": 2.4285714285714288e-05, |
| "loss": 2.4355, |
| "step": 7 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 2.418750524520874, |
| "learning_rate": 2.2857142857142858e-05, |
| "loss": 2.3599, |
| "step": 8 |
| }, |
| { |
| "epoch": 1.12, |
| "grad_norm": 2.564194917678833, |
| "learning_rate": 2.1428571428571428e-05, |
| "loss": 2.2561, |
| "step": 9 |
| }, |
| { |
| "epoch": 1.25, |
| "grad_norm": 2.209094762802124, |
| "learning_rate": 1.9999999999999998e-05, |
| "loss": 2.1233, |
| "step": 10 |
| }, |
| { |
| "epoch": 1.38, |
| "grad_norm": 2.137448787689209, |
| "learning_rate": 1.8571428571428572e-05, |
| "loss": 2.1003, |
| "step": 11 |
| }, |
| { |
| "epoch": 1.5, |
| "grad_norm": 2.167248010635376, |
| "learning_rate": 1.7142857142857142e-05, |
| "loss": 2.1379, |
| "step": 12 |
| }, |
| { |
| "epoch": 1.62, |
| "grad_norm": 2.1075375080108643, |
| "learning_rate": 1.5714285714285715e-05, |
| "loss": 1.9491, |
| "step": 13 |
| }, |
| { |
| "epoch": 1.75, |
| "grad_norm": 1.9971433877944946, |
| "learning_rate": 1.4285714285714285e-05, |
| "loss": 1.8734, |
| "step": 14 |
| }, |
| { |
| "epoch": 1.88, |
| "grad_norm": 2.0848031044006348, |
| "learning_rate": 1.2857142857142857e-05, |
| "loss": 1.9518, |
| "step": 15 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 1.5937888622283936, |
| "learning_rate": 1.1428571428571429e-05, |
| "loss": 1.8169, |
| "step": 16 |
| }, |
| { |
| "epoch": 2.12, |
| "grad_norm": 1.6044315099716187, |
| "learning_rate": 9.999999999999999e-06, |
| "loss": 1.7104, |
| "step": 17 |
| }, |
| { |
| "epoch": 2.25, |
| "grad_norm": 1.3243863582611084, |
| "learning_rate": 8.571428571428571e-06, |
| "loss": 1.8524, |
| "step": 18 |
| }, |
| { |
| "epoch": 2.38, |
| "grad_norm": 1.3647443056106567, |
| "learning_rate": 7.142857142857143e-06, |
| "loss": 1.7551, |
| "step": 19 |
| }, |
| { |
| "epoch": 2.5, |
| "grad_norm": 1.318753719329834, |
| "learning_rate": 5.7142857142857145e-06, |
| "loss": 1.8268, |
| "step": 20 |
| }, |
| { |
| "epoch": 2.62, |
| "grad_norm": 1.3245106935501099, |
| "learning_rate": 4.2857142857142855e-06, |
| "loss": 1.6829, |
| "step": 21 |
| }, |
| { |
| "epoch": 2.75, |
| "grad_norm": 1.1237602233886719, |
| "learning_rate": 2.8571428571428573e-06, |
| "loss": 1.7231, |
| "step": 22 |
| }, |
| { |
| "epoch": 2.88, |
| "grad_norm": 1.3340160846710205, |
| "learning_rate": 1.4285714285714286e-06, |
| "loss": 1.6857, |
| "step": 23 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 1.1219121217727661, |
| "learning_rate": 0.0, |
| "loss": 1.7945, |
| "step": 24 |
| } |
| ], |
| "logging_steps": 1, |
| "max_steps": 24, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "total_flos": 1960374605709312.0, |
| "train_batch_size": 2, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|