{ "best_global_step": 2000, "best_metric": 5.987252022653418, "best_model_checkpoint": "outputs/bert-tiny-stage2-sbert/checkpoints/checkpoint-2000", "epoch": 0.47303689687795647, "eval_steps": 2000, "global_step": 2000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.011825922421948912, "grad_norm": 37.57676696777344, "learning_rate": 2.3173327027666118e-07, "loss": 18.0327, "step": 50 }, { "epoch": 0.023651844843897825, "grad_norm": 43.198524475097656, "learning_rate": 4.6819579096713174e-07, "loss": 17.7132, "step": 100 }, { "epoch": 0.035477767265846734, "grad_norm": 36.470436096191406, "learning_rate": 7.046583116576024e-07, "loss": 17.0003, "step": 150 }, { "epoch": 0.04730368968779565, "grad_norm": 34.2964973449707, "learning_rate": 9.411208323480729e-07, "loss": 16.0668, "step": 200 }, { "epoch": 0.05912961210974456, "grad_norm": 33.23365020751953, "learning_rate": 1.1775833530385434e-06, "loss": 14.9137, "step": 250 }, { "epoch": 0.07095553453169347, "grad_norm": 28.96517562866211, "learning_rate": 1.4140458737290142e-06, "loss": 13.9439, "step": 300 }, { "epoch": 0.08278145695364239, "grad_norm": 31.533185958862305, "learning_rate": 1.6505083944194847e-06, "loss": 12.7951, "step": 350 }, { "epoch": 0.0946073793755913, "grad_norm": 33.31780242919922, "learning_rate": 1.8869709151099552e-06, "loss": 11.7283, "step": 400 }, { "epoch": 0.10643330179754021, "grad_norm": 36.39932632446289, "learning_rate": 2.123433435800426e-06, "loss": 10.694, "step": 450 }, { "epoch": 0.11825922421948912, "grad_norm": 39.11393356323242, "learning_rate": 2.3598959564908965e-06, "loss": 9.4759, "step": 500 }, { "epoch": 0.13008514664143803, "grad_norm": 32.978511810302734, "learning_rate": 2.596358477181367e-06, "loss": 8.6215, "step": 550 }, { "epoch": 0.14191106906338694, "grad_norm": 33.80763244628906, "learning_rate": 2.8328209978718375e-06, "loss": 7.729, "step": 600 }, { "epoch": 0.15373699148533584, "grad_norm": 32.52557373046875, "learning_rate": 3.069283518562308e-06, "loss": 7.1881, "step": 650 }, { "epoch": 0.16556291390728478, "grad_norm": 29.276039123535156, "learning_rate": 3.3057460392527786e-06, "loss": 6.9661, "step": 700 }, { "epoch": 0.1773888363292337, "grad_norm": 29.209379196166992, "learning_rate": 3.5422085599432495e-06, "loss": 6.4771, "step": 750 }, { "epoch": 0.1892147587511826, "grad_norm": 28.865949630737305, "learning_rate": 3.77867108063372e-06, "loss": 6.1275, "step": 800 }, { "epoch": 0.2010406811731315, "grad_norm": 29.751232147216797, "learning_rate": 4.01513360132419e-06, "loss": 6.12, "step": 850 }, { "epoch": 0.21286660359508042, "grad_norm": 27.77039337158203, "learning_rate": 4.2515961220146615e-06, "loss": 5.9566, "step": 900 }, { "epoch": 0.22469252601702933, "grad_norm": 28.1367130279541, "learning_rate": 4.488058642705131e-06, "loss": 5.8145, "step": 950 }, { "epoch": 0.23651844843897823, "grad_norm": 26.56863021850586, "learning_rate": 4.7245211633956025e-06, "loss": 5.559, "step": 1000 }, { "epoch": 0.24834437086092714, "grad_norm": 24.36810302734375, "learning_rate": 4.960983684086072e-06, "loss": 5.2348, "step": 1050 }, { "epoch": 0.26017029328287605, "grad_norm": 25.959495544433594, "learning_rate": 5.197446204776543e-06, "loss": 5.3013, "step": 1100 }, { "epoch": 0.27199621570482496, "grad_norm": 26.230960845947266, "learning_rate": 5.433908725467014e-06, "loss": 5.0565, "step": 1150 }, { "epoch": 0.28382213812677387, "grad_norm": 26.86113166809082, "learning_rate": 5.670371246157485e-06, "loss": 4.8716, "step": 1200 }, { "epoch": 0.2956480605487228, "grad_norm": 21.906108856201172, "learning_rate": 5.906833766847954e-06, "loss": 4.7829, "step": 1250 }, { "epoch": 0.3074739829706717, "grad_norm": 23.101173400878906, "learning_rate": 6.143296287538426e-06, "loss": 4.8804, "step": 1300 }, { "epoch": 0.3192999053926206, "grad_norm": 24.288232803344727, "learning_rate": 6.379758808228896e-06, "loss": 4.6464, "step": 1350 }, { "epoch": 0.33112582781456956, "grad_norm": 22.537385940551758, "learning_rate": 6.616221328919367e-06, "loss": 4.7044, "step": 1400 }, { "epoch": 0.34295175023651847, "grad_norm": 21.849151611328125, "learning_rate": 6.852683849609837e-06, "loss": 4.5196, "step": 1450 }, { "epoch": 0.3547776726584674, "grad_norm": 24.385927200317383, "learning_rate": 7.089146370300309e-06, "loss": 4.4961, "step": 1500 }, { "epoch": 0.3666035950804163, "grad_norm": 23.45542335510254, "learning_rate": 7.325608890990778e-06, "loss": 4.4388, "step": 1550 }, { "epoch": 0.3784295175023652, "grad_norm": 24.437057495117188, "learning_rate": 7.562071411681249e-06, "loss": 4.1692, "step": 1600 }, { "epoch": 0.3902554399243141, "grad_norm": 22.47982406616211, "learning_rate": 7.79853393237172e-06, "loss": 4.2153, "step": 1650 }, { "epoch": 0.402081362346263, "grad_norm": 22.51207160949707, "learning_rate": 8.03499645306219e-06, "loss": 3.9962, "step": 1700 }, { "epoch": 0.4139072847682119, "grad_norm": 23.745113372802734, "learning_rate": 8.271458973752661e-06, "loss": 4.1288, "step": 1750 }, { "epoch": 0.42573320719016083, "grad_norm": 24.001792907714844, "learning_rate": 8.507921494443131e-06, "loss": 4.023, "step": 1800 }, { "epoch": 0.43755912961210974, "grad_norm": 20.710079193115234, "learning_rate": 8.744384015133602e-06, "loss": 3.9173, "step": 1850 }, { "epoch": 0.44938505203405865, "grad_norm": 19.746440887451172, "learning_rate": 8.980846535824072e-06, "loss": 3.7544, "step": 1900 }, { "epoch": 0.46121097445600756, "grad_norm": 22.844385147094727, "learning_rate": 9.217309056514543e-06, "loss": 3.8234, "step": 1950 }, { "epoch": 0.47303689687795647, "grad_norm": 22.3209285736084, "learning_rate": 9.453771577205015e-06, "loss": 3.8007, "step": 2000 }, { "epoch": 0.47303689687795647, "eval_runtime": 48.8183, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_validation_loss": 5.987252022653418, "step": 2000 } ], "logging_steps": 50, "max_steps": 21140, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 2000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 64, "trial_name": null, "trial_params": null }