| { |
| "best_global_step": 1648, |
| "best_metric": 0.8455604792384703, |
| "best_model_checkpoint": "outputs/final-run/checkpoint-1648", |
| "epoch": 2.0, |
| "eval_steps": 500, |
| "global_step": 1648, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.06071645415907711, |
| "grad_norm": 3.4753293991088867, |
| "learning_rate": 4.9993183714305955e-05, |
| "loss": 0.5491, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.12143290831815422, |
| "grad_norm": 6.545201301574707, |
| "learning_rate": 4.997217948372208e-05, |
| "loss": 0.4501, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.18214936247723132, |
| "grad_norm": 3.1810355186462402, |
| "learning_rate": 4.993699639509482e-05, |
| "loss": 0.4563, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.24286581663630843, |
| "grad_norm": 4.675663471221924, |
| "learning_rate": 4.9887654424895166e-05, |
| "loss": 0.4388, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.3035822707953855, |
| "grad_norm": 3.568342447280884, |
| "learning_rate": 4.982418158881122e-05, |
| "loss": 0.4243, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.36429872495446264, |
| "grad_norm": 4.863748550415039, |
| "learning_rate": 4.974661392584119e-05, |
| "loss": 0.4231, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.42501517911353975, |
| "grad_norm": 2.679159641265869, |
| "learning_rate": 4.965499547783105e-05, |
| "loss": 0.4259, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.48573163327261687, |
| "grad_norm": 2.305194854736328, |
| "learning_rate": 4.954937826446812e-05, |
| "loss": 0.4091, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.546448087431694, |
| "grad_norm": 6.192315578460693, |
| "learning_rate": 4.942982225374503e-05, |
| "loss": 0.4013, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.607164541590771, |
| "grad_norm": 3.520240068435669, |
| "learning_rate": 4.9296395327910704e-05, |
| "loss": 0.4065, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.6678809957498482, |
| "grad_norm": 3.2191152572631836, |
| "learning_rate": 4.914917324492781e-05, |
| "loss": 0.3986, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.7285974499089253, |
| "grad_norm": 2.3364710807800293, |
| "learning_rate": 4.8988239595458375e-05, |
| "loss": 0.3958, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.7893139040680024, |
| "grad_norm": 3.812765121459961, |
| "learning_rate": 4.881368575540219e-05, |
| "loss": 0.3984, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.8500303582270795, |
| "grad_norm": 2.674964666366577, |
| "learning_rate": 4.8625610834014855e-05, |
| "loss": 0.4113, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.9107468123861566, |
| "grad_norm": 1.985715389251709, |
| "learning_rate": 4.8424121617634884e-05, |
| "loss": 0.3926, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.9714632665452337, |
| "grad_norm": 2.5065205097198486, |
| "learning_rate": 4.820933250905191e-05, |
| "loss": 0.4014, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.0, |
| "eval_f1": 0.837057576251835, |
| "eval_loss": 0.3876318335533142, |
| "eval_runtime": 7.9624, |
| "eval_samples_per_second": 735.081, |
| "eval_steps_per_second": 22.983, |
| "step": 824 |
| }, |
| { |
| "epoch": 1.03157255616272, |
| "grad_norm": 2.5298709869384766, |
| "learning_rate": 4.798136546255038e-05, |
| "loss": 0.3394, |
| "step": 850 |
| }, |
| { |
| "epoch": 1.0922890103217973, |
| "grad_norm": 3.7168054580688477, |
| "learning_rate": 4.774034991466558e-05, |
| "loss": 0.3, |
| "step": 900 |
| }, |
| { |
| "epoch": 1.1530054644808743, |
| "grad_norm": 3.277371883392334, |
| "learning_rate": 4.7486422710691366e-05, |
| "loss": 0.3123, |
| "step": 950 |
| }, |
| { |
| "epoch": 1.2137219186399515, |
| "grad_norm": 2.1234121322631836, |
| "learning_rate": 4.7219728026981314e-05, |
| "loss": 0.3003, |
| "step": 1000 |
| }, |
| { |
| "epoch": 1.2744383727990285, |
| "grad_norm": 3.3195674419403076, |
| "learning_rate": 4.694041728908733e-05, |
| "loss": 0.3238, |
| "step": 1050 |
| }, |
| { |
| "epoch": 1.3351548269581057, |
| "grad_norm": 3.5936930179595947, |
| "learning_rate": 4.66486490857824e-05, |
| "loss": 0.3151, |
| "step": 1100 |
| }, |
| { |
| "epoch": 1.3958712811171827, |
| "grad_norm": 3.6757988929748535, |
| "learning_rate": 4.6344589079016e-05, |
| "loss": 0.3123, |
| "step": 1150 |
| }, |
| { |
| "epoch": 1.4565877352762597, |
| "grad_norm": 5.673912048339844, |
| "learning_rate": 4.6028409909853585e-05, |
| "loss": 0.3232, |
| "step": 1200 |
| }, |
| { |
| "epoch": 1.517304189435337, |
| "grad_norm": 2.754713296890259, |
| "learning_rate": 4.570029110045335e-05, |
| "loss": 0.3303, |
| "step": 1250 |
| }, |
| { |
| "epoch": 1.5780206435944142, |
| "grad_norm": 2.9985156059265137, |
| "learning_rate": 4.536041895213605e-05, |
| "loss": 0.3259, |
| "step": 1300 |
| }, |
| { |
| "epoch": 1.6387370977534912, |
| "grad_norm": 2.9103963375091553, |
| "learning_rate": 4.500898643960567e-05, |
| "loss": 0.3346, |
| "step": 1350 |
| }, |
| { |
| "epoch": 1.6994535519125682, |
| "grad_norm": 3.4398272037506104, |
| "learning_rate": 4.4646193101381076e-05, |
| "loss": 0.3293, |
| "step": 1400 |
| }, |
| { |
| "epoch": 1.7601700060716454, |
| "grad_norm": 4.817779541015625, |
| "learning_rate": 4.427224492650079e-05, |
| "loss": 0.332, |
| "step": 1450 |
| }, |
| { |
| "epoch": 1.8208864602307226, |
| "grad_norm": 3.6065807342529297, |
| "learning_rate": 4.3887354237565295e-05, |
| "loss": 0.3382, |
| "step": 1500 |
| }, |
| { |
| "epoch": 1.8816029143897997, |
| "grad_norm": 2.9802093505859375, |
| "learning_rate": 4.349173957018313e-05, |
| "loss": 0.3156, |
| "step": 1550 |
| }, |
| { |
| "epoch": 1.9423193685488767, |
| "grad_norm": 3.462924003601074, |
| "learning_rate": 4.308562554888948e-05, |
| "loss": 0.3222, |
| "step": 1600 |
| }, |
| { |
| "epoch": 2.0, |
| "eval_f1": 0.8455604792384703, |
| "eval_loss": 0.393000990152359, |
| "eval_runtime": 7.9543, |
| "eval_samples_per_second": 735.824, |
| "eval_steps_per_second": 23.006, |
| "step": 1648 |
| } |
| ], |
| "logging_steps": 50, |
| "max_steps": 6592, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 8, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 2.771769723795456e+16, |
| "train_batch_size": 32, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|