| { |
| "best_global_step": 2000, |
| "best_metric": 5.987252022653418, |
| "best_model_checkpoint": "outputs/bert-tiny-stage2-sbert/checkpoints/checkpoint-2000", |
| "epoch": 0.47303689687795647, |
| "eval_steps": 2000, |
| "global_step": 2000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.011825922421948912, |
| "grad_norm": 37.57676696777344, |
| "learning_rate": 2.3173327027666118e-07, |
| "loss": 18.0327, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.023651844843897825, |
| "grad_norm": 43.198524475097656, |
| "learning_rate": 4.6819579096713174e-07, |
| "loss": 17.7132, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.035477767265846734, |
| "grad_norm": 36.470436096191406, |
| "learning_rate": 7.046583116576024e-07, |
| "loss": 17.0003, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.04730368968779565, |
| "grad_norm": 34.2964973449707, |
| "learning_rate": 9.411208323480729e-07, |
| "loss": 16.0668, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.05912961210974456, |
| "grad_norm": 33.23365020751953, |
| "learning_rate": 1.1775833530385434e-06, |
| "loss": 14.9137, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.07095553453169347, |
| "grad_norm": 28.96517562866211, |
| "learning_rate": 1.4140458737290142e-06, |
| "loss": 13.9439, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.08278145695364239, |
| "grad_norm": 31.533185958862305, |
| "learning_rate": 1.6505083944194847e-06, |
| "loss": 12.7951, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.0946073793755913, |
| "grad_norm": 33.31780242919922, |
| "learning_rate": 1.8869709151099552e-06, |
| "loss": 11.7283, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.10643330179754021, |
| "grad_norm": 36.39932632446289, |
| "learning_rate": 2.123433435800426e-06, |
| "loss": 10.694, |
| "step": 450 |
| }, |
| { |
| "epoch": 0.11825922421948912, |
| "grad_norm": 39.11393356323242, |
| "learning_rate": 2.3598959564908965e-06, |
| "loss": 9.4759, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.13008514664143803, |
| "grad_norm": 32.978511810302734, |
| "learning_rate": 2.596358477181367e-06, |
| "loss": 8.6215, |
| "step": 550 |
| }, |
| { |
| "epoch": 0.14191106906338694, |
| "grad_norm": 33.80763244628906, |
| "learning_rate": 2.8328209978718375e-06, |
| "loss": 7.729, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.15373699148533584, |
| "grad_norm": 32.52557373046875, |
| "learning_rate": 3.069283518562308e-06, |
| "loss": 7.1881, |
| "step": 650 |
| }, |
| { |
| "epoch": 0.16556291390728478, |
| "grad_norm": 29.276039123535156, |
| "learning_rate": 3.3057460392527786e-06, |
| "loss": 6.9661, |
| "step": 700 |
| }, |
| { |
| "epoch": 0.1773888363292337, |
| "grad_norm": 29.209379196166992, |
| "learning_rate": 3.5422085599432495e-06, |
| "loss": 6.4771, |
| "step": 750 |
| }, |
| { |
| "epoch": 0.1892147587511826, |
| "grad_norm": 28.865949630737305, |
| "learning_rate": 3.77867108063372e-06, |
| "loss": 6.1275, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.2010406811731315, |
| "grad_norm": 29.751232147216797, |
| "learning_rate": 4.01513360132419e-06, |
| "loss": 6.12, |
| "step": 850 |
| }, |
| { |
| "epoch": 0.21286660359508042, |
| "grad_norm": 27.77039337158203, |
| "learning_rate": 4.2515961220146615e-06, |
| "loss": 5.9566, |
| "step": 900 |
| }, |
| { |
| "epoch": 0.22469252601702933, |
| "grad_norm": 28.1367130279541, |
| "learning_rate": 4.488058642705131e-06, |
| "loss": 5.8145, |
| "step": 950 |
| }, |
| { |
| "epoch": 0.23651844843897823, |
| "grad_norm": 26.56863021850586, |
| "learning_rate": 4.7245211633956025e-06, |
| "loss": 5.559, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.24834437086092714, |
| "grad_norm": 24.36810302734375, |
| "learning_rate": 4.960983684086072e-06, |
| "loss": 5.2348, |
| "step": 1050 |
| }, |
| { |
| "epoch": 0.26017029328287605, |
| "grad_norm": 25.959495544433594, |
| "learning_rate": 5.197446204776543e-06, |
| "loss": 5.3013, |
| "step": 1100 |
| }, |
| { |
| "epoch": 0.27199621570482496, |
| "grad_norm": 26.230960845947266, |
| "learning_rate": 5.433908725467014e-06, |
| "loss": 5.0565, |
| "step": 1150 |
| }, |
| { |
| "epoch": 0.28382213812677387, |
| "grad_norm": 26.86113166809082, |
| "learning_rate": 5.670371246157485e-06, |
| "loss": 4.8716, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.2956480605487228, |
| "grad_norm": 21.906108856201172, |
| "learning_rate": 5.906833766847954e-06, |
| "loss": 4.7829, |
| "step": 1250 |
| }, |
| { |
| "epoch": 0.3074739829706717, |
| "grad_norm": 23.101173400878906, |
| "learning_rate": 6.143296287538426e-06, |
| "loss": 4.8804, |
| "step": 1300 |
| }, |
| { |
| "epoch": 0.3192999053926206, |
| "grad_norm": 24.288232803344727, |
| "learning_rate": 6.379758808228896e-06, |
| "loss": 4.6464, |
| "step": 1350 |
| }, |
| { |
| "epoch": 0.33112582781456956, |
| "grad_norm": 22.537385940551758, |
| "learning_rate": 6.616221328919367e-06, |
| "loss": 4.7044, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.34295175023651847, |
| "grad_norm": 21.849151611328125, |
| "learning_rate": 6.852683849609837e-06, |
| "loss": 4.5196, |
| "step": 1450 |
| }, |
| { |
| "epoch": 0.3547776726584674, |
| "grad_norm": 24.385927200317383, |
| "learning_rate": 7.089146370300309e-06, |
| "loss": 4.4961, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.3666035950804163, |
| "grad_norm": 23.45542335510254, |
| "learning_rate": 7.325608890990778e-06, |
| "loss": 4.4388, |
| "step": 1550 |
| }, |
| { |
| "epoch": 0.3784295175023652, |
| "grad_norm": 24.437057495117188, |
| "learning_rate": 7.562071411681249e-06, |
| "loss": 4.1692, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.3902554399243141, |
| "grad_norm": 22.47982406616211, |
| "learning_rate": 7.79853393237172e-06, |
| "loss": 4.2153, |
| "step": 1650 |
| }, |
| { |
| "epoch": 0.402081362346263, |
| "grad_norm": 22.51207160949707, |
| "learning_rate": 8.03499645306219e-06, |
| "loss": 3.9962, |
| "step": 1700 |
| }, |
| { |
| "epoch": 0.4139072847682119, |
| "grad_norm": 23.745113372802734, |
| "learning_rate": 8.271458973752661e-06, |
| "loss": 4.1288, |
| "step": 1750 |
| }, |
| { |
| "epoch": 0.42573320719016083, |
| "grad_norm": 24.001792907714844, |
| "learning_rate": 8.507921494443131e-06, |
| "loss": 4.023, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.43755912961210974, |
| "grad_norm": 20.710079193115234, |
| "learning_rate": 8.744384015133602e-06, |
| "loss": 3.9173, |
| "step": 1850 |
| }, |
| { |
| "epoch": 0.44938505203405865, |
| "grad_norm": 19.746440887451172, |
| "learning_rate": 8.980846535824072e-06, |
| "loss": 3.7544, |
| "step": 1900 |
| }, |
| { |
| "epoch": 0.46121097445600756, |
| "grad_norm": 22.844385147094727, |
| "learning_rate": 9.217309056514543e-06, |
| "loss": 3.8234, |
| "step": 1950 |
| }, |
| { |
| "epoch": 0.47303689687795647, |
| "grad_norm": 22.3209285736084, |
| "learning_rate": 9.453771577205015e-06, |
| "loss": 3.8007, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.47303689687795647, |
| "eval_runtime": 48.8183, |
| "eval_samples_per_second": 0.0, |
| "eval_steps_per_second": 0.0, |
| "eval_validation_loss": 5.987252022653418, |
| "step": 2000 |
| } |
| ], |
| "logging_steps": 50, |
| "max_steps": 21140, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 5, |
| "save_steps": 2000, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 64, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|