{ "best_global_step": 7500, "best_metric": 0.7552966570626944, "best_model_checkpoint": "./labse-Matryoshka-AuthId/checkpoint-7500", "epoch": 4.0, "eval_steps": 500, "global_step": 8236, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.24283632831471588, "grad_norm": 8.226706504821777, "learning_rate": 1.8788246721709567e-05, "loss": 1.8844, "step": 500 }, { "epoch": 0.24283632831471588, "eval_accuracy": 0.580466682703873, "eval_f1_macro": 0.4960676688683372, "eval_loss": 1.5094239711761475, "eval_precision_macro": 0.5214977084503927, "eval_recall_macro": 0.5479464513669718, "eval_runtime": 79.4803, "eval_samples_per_second": 52.302, "eval_steps_per_second": 3.271, "step": 500 }, { "epoch": 0.48567265662943176, "grad_norm": 23.343076705932617, "learning_rate": 1.757406508013599e-05, "loss": 1.0439, "step": 1000 }, { "epoch": 0.48567265662943176, "eval_accuracy": 0.6338705797450084, "eval_f1_macro": 0.5443060410532249, "eval_loss": 1.180512547492981, "eval_precision_macro": 0.5964210887979737, "eval_recall_macro": 0.5815396599629518, "eval_runtime": 79.5018, "eval_samples_per_second": 52.288, "eval_steps_per_second": 3.27, "step": 1000 }, { "epoch": 0.7285089849441476, "grad_norm": 22.333507537841797, "learning_rate": 1.635988343856241e-05, "loss": 0.7775, "step": 1500 }, { "epoch": 0.7285089849441476, "eval_accuracy": 0.7168631224440702, "eval_f1_macro": 0.6352859818957786, "eval_loss": 1.000940203666687, "eval_precision_macro": 0.6284925893229054, "eval_recall_macro": 0.6892646842795946, "eval_runtime": 79.7404, "eval_samples_per_second": 52.132, "eval_steps_per_second": 3.261, "step": 1500 }, { "epoch": 0.9713453132588635, "grad_norm": 17.205595016479492, "learning_rate": 1.514570179698883e-05, "loss": 0.6235, "step": 2000 }, { "epoch": 0.9713453132588635, "eval_accuracy": 0.7002646139042579, "eval_f1_macro": 0.6444992633781246, "eval_loss": 1.0062403678894043, "eval_precision_macro": 0.6416324743101667, "eval_recall_macro": 0.7175728403685042, "eval_runtime": 79.8787, "eval_samples_per_second": 52.041, "eval_steps_per_second": 3.255, "step": 2000 }, { "epoch": 1.2141816415735793, "grad_norm": 6.284836769104004, "learning_rate": 1.393152015541525e-05, "loss": 0.4443, "step": 2500 }, { "epoch": 1.2141816415735793, "eval_accuracy": 0.752225162376714, "eval_f1_macro": 0.7005112117395045, "eval_loss": 0.8306922316551208, "eval_precision_macro": 0.7030961323363835, "eval_recall_macro": 0.7405526082434787, "eval_runtime": 79.5711, "eval_samples_per_second": 52.243, "eval_steps_per_second": 3.268, "step": 2500 }, { "epoch": 1.4570179698882952, "grad_norm": 14.860431671142578, "learning_rate": 1.2717338513841673e-05, "loss": 0.3917, "step": 3000 }, { "epoch": 1.4570179698882952, "eval_accuracy": 0.776040413759923, "eval_f1_macro": 0.7195032487137867, "eval_loss": 0.7922475934028625, "eval_precision_macro": 0.718432845056206, "eval_recall_macro": 0.7476174763134192, "eval_runtime": 79.7377, "eval_samples_per_second": 52.133, "eval_steps_per_second": 3.261, "step": 3000 }, { "epoch": 1.6998542982030111, "grad_norm": 15.658327102661133, "learning_rate": 1.1503156872268093e-05, "loss": 0.3384, "step": 3500 }, { "epoch": 1.6998542982030111, "eval_accuracy": 0.7914361318258359, "eval_f1_macro": 0.7267579331806328, "eval_loss": 0.7427302598953247, "eval_precision_macro": 0.7315940928020874, "eval_recall_macro": 0.7473775054197197, "eval_runtime": 79.5326, "eval_samples_per_second": 52.268, "eval_steps_per_second": 3.269, "step": 3500 }, { "epoch": 1.942690626517727, "grad_norm": 3.790079355239868, "learning_rate": 1.0288975230694513e-05, "loss": 0.3212, "step": 4000 }, { "epoch": 1.942690626517727, "eval_accuracy": 0.7666586480635074, "eval_f1_macro": 0.7040518310115823, "eval_loss": 0.8302342891693115, "eval_precision_macro": 0.7097429428181893, "eval_recall_macro": 0.7427055724786205, "eval_runtime": 79.7283, "eval_samples_per_second": 52.14, "eval_steps_per_second": 3.261, "step": 4000 }, { "epoch": 2.1855269548324427, "grad_norm": 20.00322151184082, "learning_rate": 9.074793589120935e-06, "loss": 0.2072, "step": 4500 }, { "epoch": 2.1855269548324427, "eval_accuracy": 0.7926389222997354, "eval_f1_macro": 0.7306884775555181, "eval_loss": 0.7662757039070129, "eval_precision_macro": 0.7372020807956, "eval_recall_macro": 0.7527073689440521, "eval_runtime": 79.8142, "eval_samples_per_second": 52.083, "eval_steps_per_second": 3.258, "step": 4500 }, { "epoch": 2.4283632831471587, "grad_norm": 9.991105079650879, "learning_rate": 7.860611947547354e-06, "loss": 0.1837, "step": 5000 }, { "epoch": 2.4283632831471587, "eval_accuracy": 0.7887899927832571, "eval_f1_macro": 0.7303342643678596, "eval_loss": 0.8012056946754456, "eval_precision_macro": 0.7383307287980656, "eval_recall_macro": 0.749799051904019, "eval_runtime": 79.4999, "eval_samples_per_second": 52.289, "eval_steps_per_second": 3.27, "step": 5000 }, { "epoch": 2.6711996114618746, "grad_norm": 2.5781819820404053, "learning_rate": 6.646430305973774e-06, "loss": 0.1853, "step": 5500 }, { "epoch": 2.6711996114618746, "eval_accuracy": 0.8169352898725042, "eval_f1_macro": 0.7425975963793477, "eval_loss": 0.7290380597114563, "eval_precision_macro": 0.7490380424010595, "eval_recall_macro": 0.7728571995465946, "eval_runtime": 79.7501, "eval_samples_per_second": 52.125, "eval_steps_per_second": 3.26, "step": 5500 }, { "epoch": 2.9140359397765905, "grad_norm": 13.812039375305176, "learning_rate": 5.432248664400195e-06, "loss": 0.1702, "step": 6000 }, { "epoch": 2.9140359397765905, "eval_accuracy": 0.8025018041857108, "eval_f1_macro": 0.7478693465335228, "eval_loss": 0.7762283682823181, "eval_precision_macro": 0.7494886922567287, "eval_recall_macro": 0.7661448691705909, "eval_runtime": 79.5684, "eval_samples_per_second": 52.244, "eval_steps_per_second": 3.268, "step": 6000 }, { "epoch": 3.1568722680913064, "grad_norm": 19.965606689453125, "learning_rate": 4.2180670228266156e-06, "loss": 0.115, "step": 6500 }, { "epoch": 3.1568722680913064, "eval_accuracy": 0.8085157565552081, "eval_f1_macro": 0.7550956906548623, "eval_loss": 0.8461021184921265, "eval_precision_macro": 0.7547053436507176, "eval_recall_macro": 0.7770605308609413, "eval_runtime": 79.7407, "eval_samples_per_second": 52.131, "eval_steps_per_second": 3.261, "step": 6500 }, { "epoch": 3.3997085964060223, "grad_norm": 5.473484039306641, "learning_rate": 3.0038853812530354e-06, "loss": 0.0881, "step": 7000 }, { "epoch": 3.3997085964060223, "eval_accuracy": 0.8051479432282896, "eval_f1_macro": 0.7518260599241406, "eval_loss": 0.8820834755897522, "eval_precision_macro": 0.7561690673733429, "eval_recall_macro": 0.7634548535133201, "eval_runtime": 79.7355, "eval_samples_per_second": 52.135, "eval_steps_per_second": 3.261, "step": 7000 }, { "epoch": 3.642544924720738, "grad_norm": 0.03955981135368347, "learning_rate": 1.789703739679456e-06, "loss": 0.0854, "step": 7500 }, { "epoch": 3.642544924720738, "eval_accuracy": 0.8171758479672842, "eval_f1_macro": 0.7552966570626944, "eval_loss": 0.8381013870239258, "eval_precision_macro": 0.7568149477995186, "eval_recall_macro": 0.7706908260368737, "eval_runtime": 79.5261, "eval_samples_per_second": 52.272, "eval_steps_per_second": 3.269, "step": 7500 }, { "epoch": 3.885381253035454, "grad_norm": 6.264761447906494, "learning_rate": 5.755220981058767e-07, "loss": 0.0848, "step": 8000 }, { "epoch": 3.885381253035454, "eval_accuracy": 0.8077940822708685, "eval_f1_macro": 0.7527204408320765, "eval_loss": 0.8696035742759705, "eval_precision_macro": 0.7535730472048996, "eval_recall_macro": 0.7700727790457579, "eval_runtime": 79.6143, "eval_samples_per_second": 52.214, "eval_steps_per_second": 3.266, "step": 8000 } ], "logging_steps": 500, "max_steps": 8236, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.7337239662270464e+16, "train_batch_size": 16, "trial_name": null, "trial_params": null }