| { |
| "best_global_step": null, |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 1.0, |
| "eval_steps": 500, |
| "global_step": 234, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.042735042735042736, |
| "grad_norm": 25.421951293945312, |
| "learning_rate": 3.888888888888889e-05, |
| "loss": 14.74276123046875, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.08547008547008547, |
| "grad_norm": 12.622594833374023, |
| "learning_rate": 9.444444444444444e-05, |
| "loss": 9.163805389404297, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.1282051282051282, |
| "grad_norm": 8.006195068359375, |
| "learning_rate": 0.00015000000000000001, |
| "loss": 5.28853759765625, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.17094017094017094, |
| "grad_norm": 3.498350143432617, |
| "learning_rate": 0.00019999888744757143, |
| "loss": 4.076284027099609, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.21367521367521367, |
| "grad_norm": 4.330902576446533, |
| "learning_rate": 0.00019986541110764565, |
| "loss": 3.210728073120117, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.2564102564102564, |
| "grad_norm": 5.267370700836182, |
| "learning_rate": 0.0001995097645450266, |
| "loss": 2.6838237762451174, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.29914529914529914, |
| "grad_norm": 3.2072625160217285, |
| "learning_rate": 0.00019893273896534936, |
| "loss": 2.4369382858276367, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.3418803418803419, |
| "grad_norm": 3.1016528606414795, |
| "learning_rate": 0.00019813561807535598, |
| "loss": 2.205874443054199, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.38461538461538464, |
| "grad_norm": 3.8450214862823486, |
| "learning_rate": 0.00019712017522703764, |
| "loss": 1.9279813766479492, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.42735042735042733, |
| "grad_norm": 2.348071575164795, |
| "learning_rate": 0.00019588866947246498, |
| "loss": 1.8235645294189453, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.4700854700854701, |
| "grad_norm": 3.2652463912963867, |
| "learning_rate": 0.00019444384053808288, |
| "loss": 1.8220790863037108, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.5128205128205128, |
| "grad_norm": 2.6423192024230957, |
| "learning_rate": 0.00019278890272965096, |
| "loss": 1.7959518432617188, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.5555555555555556, |
| "grad_norm": 2.6279354095458984, |
| "learning_rate": 0.00019092753778138886, |
| "loss": 1.7804344177246094, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.5982905982905983, |
| "grad_norm": 2.6313953399658203, |
| "learning_rate": 0.0001888638866652356, |
| "loss": 1.642679214477539, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.6410256410256411, |
| "grad_norm": 2.1009438037872314, |
| "learning_rate": 0.00018660254037844388, |
| "loss": 1.545415496826172, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.6837606837606838, |
| "grad_norm": 2.672374963760376, |
| "learning_rate": 0.00018414852973000503, |
| "loss": 1.5645628929138184, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.7264957264957265, |
| "grad_norm": 2.6783759593963623, |
| "learning_rate": 0.00018150731414862622, |
| "loss": 1.5343215942382813, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.7692307692307693, |
| "grad_norm": 2.3677117824554443, |
| "learning_rate": 0.000178684769537159, |
| "loss": 1.5453574180603027, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.811965811965812, |
| "grad_norm": 2.3082728385925293, |
| "learning_rate": 0.0001756871752004992, |
| "loss": 1.5324308395385742, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.8547008547008547, |
| "grad_norm": 1.969205617904663, |
| "learning_rate": 0.00017252119987603973, |
| "loss": 1.5409900665283203, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.8974358974358975, |
| "grad_norm": 2.5397582054138184, |
| "learning_rate": 0.00016919388689775464, |
| "loss": 1.4344990730285645, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.9401709401709402, |
| "grad_norm": 2.0636305809020996, |
| "learning_rate": 0.00016571263852691888, |
| "loss": 1.4311028480529786, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.9829059829059829, |
| "grad_norm": 2.4687087535858154, |
| "learning_rate": 0.0001620851994843244, |
| "loss": 1.461498737335205, |
| "step": 230 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 702, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.961214772268672e+16, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|