| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 3.0, |
| "eval_steps": 500, |
| "global_step": 1452, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.10330578512396695, |
| "grad_norm": 16.806032180786133, |
| "learning_rate": 7.8497829029835e-05, |
| "loss": 1.8643, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.2066115702479339, |
| "grad_norm": 14.575823783874512, |
| "learning_rate": 9.240637424968908e-05, |
| "loss": 0.9712, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.30991735537190085, |
| "grad_norm": 8.468477249145508, |
| "learning_rate": 9.977029096477796e-05, |
| "loss": 0.925, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.4132231404958678, |
| "grad_norm": 14.040321350097656, |
| "learning_rate": 9.594180704441041e-05, |
| "loss": 0.8377, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.5165289256198347, |
| "grad_norm": 11.354561805725098, |
| "learning_rate": 9.211332312404288e-05, |
| "loss": 0.7409, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.6198347107438017, |
| "grad_norm": 12.087076187133789, |
| "learning_rate": 8.828483920367535e-05, |
| "loss": 0.786, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.7231404958677686, |
| "grad_norm": 12.372916221618652, |
| "learning_rate": 8.445635528330781e-05, |
| "loss": 0.7259, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.8264462809917356, |
| "grad_norm": 20.278095245361328, |
| "learning_rate": 8.062787136294027e-05, |
| "loss": 0.629, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.9297520661157025, |
| "grad_norm": 9.167510986328125, |
| "learning_rate": 7.679938744257274e-05, |
| "loss": 0.6104, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.0330578512396693, |
| "grad_norm": 6.960366725921631, |
| "learning_rate": 7.297090352220522e-05, |
| "loss": 0.4766, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.1363636363636362, |
| "grad_norm": 5.87119722366333, |
| "learning_rate": 6.914241960183767e-05, |
| "loss": 0.3203, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.2396694214876034, |
| "grad_norm": 4.695213317871094, |
| "learning_rate": 6.531393568147013e-05, |
| "loss": 0.2819, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.3429752066115703, |
| "grad_norm": 7.376983642578125, |
| "learning_rate": 6.14854517611026e-05, |
| "loss": 0.2896, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.4462809917355373, |
| "grad_norm": 12.36500072479248, |
| "learning_rate": 5.765696784073508e-05, |
| "loss": 0.3251, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.549586776859504, |
| "grad_norm": 9.57766056060791, |
| "learning_rate": 5.382848392036754e-05, |
| "loss": 0.2976, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.6528925619834711, |
| "grad_norm": 7.4625372886657715, |
| "learning_rate": 5e-05, |
| "loss": 0.3102, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.756198347107438, |
| "grad_norm": 10.929963111877441, |
| "learning_rate": 4.617151607963247e-05, |
| "loss": 0.2473, |
| "step": 850 |
| }, |
| { |
| "epoch": 1.859504132231405, |
| "grad_norm": 5.091157913208008, |
| "learning_rate": 4.2343032159264934e-05, |
| "loss": 0.2526, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.962809917355372, |
| "grad_norm": 14.859657287597656, |
| "learning_rate": 3.85145482388974e-05, |
| "loss": 0.221, |
| "step": 950 |
| }, |
| { |
| "epoch": 2.0661157024793386, |
| "grad_norm": 9.133606910705566, |
| "learning_rate": 3.4686064318529865e-05, |
| "loss": 0.1737, |
| "step": 1000 |
| }, |
| { |
| "epoch": 2.169421487603306, |
| "grad_norm": 3.9577817916870117, |
| "learning_rate": 3.085758039816233e-05, |
| "loss": 0.1147, |
| "step": 1050 |
| }, |
| { |
| "epoch": 2.2727272727272725, |
| "grad_norm": 6.654378890991211, |
| "learning_rate": 2.7029096477794796e-05, |
| "loss": 0.1082, |
| "step": 1100 |
| }, |
| { |
| "epoch": 2.37603305785124, |
| "grad_norm": 1.2021315097808838, |
| "learning_rate": 2.320061255742726e-05, |
| "loss": 0.1232, |
| "step": 1150 |
| }, |
| { |
| "epoch": 2.479338842975207, |
| "grad_norm": 8.881519317626953, |
| "learning_rate": 1.9372128637059724e-05, |
| "loss": 0.0948, |
| "step": 1200 |
| }, |
| { |
| "epoch": 2.5826446280991737, |
| "grad_norm": 8.94571304321289, |
| "learning_rate": 1.554364471669219e-05, |
| "loss": 0.1079, |
| "step": 1250 |
| }, |
| { |
| "epoch": 2.6859504132231407, |
| "grad_norm": 0.7910750508308411, |
| "learning_rate": 1.1715160796324656e-05, |
| "loss": 0.0848, |
| "step": 1300 |
| }, |
| { |
| "epoch": 2.7892561983471076, |
| "grad_norm": 2.845349073410034, |
| "learning_rate": 7.886676875957121e-06, |
| "loss": 0.0723, |
| "step": 1350 |
| }, |
| { |
| "epoch": 2.8925619834710745, |
| "grad_norm": 6.832589149475098, |
| "learning_rate": 4.058192955589587e-06, |
| "loss": 0.0838, |
| "step": 1400 |
| }, |
| { |
| "epoch": 2.9958677685950414, |
| "grad_norm": 4.179447174072266, |
| "learning_rate": 2.297090352220521e-07, |
| "loss": 0.0826, |
| "step": 1450 |
| } |
| ], |
| "logging_steps": 50, |
| "max_steps": 1452, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 3, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|