| { | |
| "best_global_step": 7500, | |
| "best_metric": 0.7552966570626944, | |
| "best_model_checkpoint": "./labse-Matryoshka-AuthId/checkpoint-7500", | |
| "epoch": 4.0, | |
| "eval_steps": 500, | |
| "global_step": 8236, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.24283632831471588, | |
| "grad_norm": 8.226706504821777, | |
| "learning_rate": 1.8788246721709567e-05, | |
| "loss": 1.8844, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.24283632831471588, | |
| "eval_accuracy": 0.580466682703873, | |
| "eval_f1_macro": 0.4960676688683372, | |
| "eval_loss": 1.5094239711761475, | |
| "eval_precision_macro": 0.5214977084503927, | |
| "eval_recall_macro": 0.5479464513669718, | |
| "eval_runtime": 79.4803, | |
| "eval_samples_per_second": 52.302, | |
| "eval_steps_per_second": 3.271, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.48567265662943176, | |
| "grad_norm": 23.343076705932617, | |
| "learning_rate": 1.757406508013599e-05, | |
| "loss": 1.0439, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.48567265662943176, | |
| "eval_accuracy": 0.6338705797450084, | |
| "eval_f1_macro": 0.5443060410532249, | |
| "eval_loss": 1.180512547492981, | |
| "eval_precision_macro": 0.5964210887979737, | |
| "eval_recall_macro": 0.5815396599629518, | |
| "eval_runtime": 79.5018, | |
| "eval_samples_per_second": 52.288, | |
| "eval_steps_per_second": 3.27, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.7285089849441476, | |
| "grad_norm": 22.333507537841797, | |
| "learning_rate": 1.635988343856241e-05, | |
| "loss": 0.7775, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.7285089849441476, | |
| "eval_accuracy": 0.7168631224440702, | |
| "eval_f1_macro": 0.6352859818957786, | |
| "eval_loss": 1.000940203666687, | |
| "eval_precision_macro": 0.6284925893229054, | |
| "eval_recall_macro": 0.6892646842795946, | |
| "eval_runtime": 79.7404, | |
| "eval_samples_per_second": 52.132, | |
| "eval_steps_per_second": 3.261, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.9713453132588635, | |
| "grad_norm": 17.205595016479492, | |
| "learning_rate": 1.514570179698883e-05, | |
| "loss": 0.6235, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.9713453132588635, | |
| "eval_accuracy": 0.7002646139042579, | |
| "eval_f1_macro": 0.6444992633781246, | |
| "eval_loss": 1.0062403678894043, | |
| "eval_precision_macro": 0.6416324743101667, | |
| "eval_recall_macro": 0.7175728403685042, | |
| "eval_runtime": 79.8787, | |
| "eval_samples_per_second": 52.041, | |
| "eval_steps_per_second": 3.255, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 1.2141816415735793, | |
| "grad_norm": 6.284836769104004, | |
| "learning_rate": 1.393152015541525e-05, | |
| "loss": 0.4443, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.2141816415735793, | |
| "eval_accuracy": 0.752225162376714, | |
| "eval_f1_macro": 0.7005112117395045, | |
| "eval_loss": 0.8306922316551208, | |
| "eval_precision_macro": 0.7030961323363835, | |
| "eval_recall_macro": 0.7405526082434787, | |
| "eval_runtime": 79.5711, | |
| "eval_samples_per_second": 52.243, | |
| "eval_steps_per_second": 3.268, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 1.4570179698882952, | |
| "grad_norm": 14.860431671142578, | |
| "learning_rate": 1.2717338513841673e-05, | |
| "loss": 0.3917, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.4570179698882952, | |
| "eval_accuracy": 0.776040413759923, | |
| "eval_f1_macro": 0.7195032487137867, | |
| "eval_loss": 0.7922475934028625, | |
| "eval_precision_macro": 0.718432845056206, | |
| "eval_recall_macro": 0.7476174763134192, | |
| "eval_runtime": 79.7377, | |
| "eval_samples_per_second": 52.133, | |
| "eval_steps_per_second": 3.261, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 1.6998542982030111, | |
| "grad_norm": 15.658327102661133, | |
| "learning_rate": 1.1503156872268093e-05, | |
| "loss": 0.3384, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.6998542982030111, | |
| "eval_accuracy": 0.7914361318258359, | |
| "eval_f1_macro": 0.7267579331806328, | |
| "eval_loss": 0.7427302598953247, | |
| "eval_precision_macro": 0.7315940928020874, | |
| "eval_recall_macro": 0.7473775054197197, | |
| "eval_runtime": 79.5326, | |
| "eval_samples_per_second": 52.268, | |
| "eval_steps_per_second": 3.269, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 1.942690626517727, | |
| "grad_norm": 3.790079355239868, | |
| "learning_rate": 1.0288975230694513e-05, | |
| "loss": 0.3212, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 1.942690626517727, | |
| "eval_accuracy": 0.7666586480635074, | |
| "eval_f1_macro": 0.7040518310115823, | |
| "eval_loss": 0.8302342891693115, | |
| "eval_precision_macro": 0.7097429428181893, | |
| "eval_recall_macro": 0.7427055724786205, | |
| "eval_runtime": 79.7283, | |
| "eval_samples_per_second": 52.14, | |
| "eval_steps_per_second": 3.261, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 2.1855269548324427, | |
| "grad_norm": 20.00322151184082, | |
| "learning_rate": 9.074793589120935e-06, | |
| "loss": 0.2072, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 2.1855269548324427, | |
| "eval_accuracy": 0.7926389222997354, | |
| "eval_f1_macro": 0.7306884775555181, | |
| "eval_loss": 0.7662757039070129, | |
| "eval_precision_macro": 0.7372020807956, | |
| "eval_recall_macro": 0.7527073689440521, | |
| "eval_runtime": 79.8142, | |
| "eval_samples_per_second": 52.083, | |
| "eval_steps_per_second": 3.258, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 2.4283632831471587, | |
| "grad_norm": 9.991105079650879, | |
| "learning_rate": 7.860611947547354e-06, | |
| "loss": 0.1837, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 2.4283632831471587, | |
| "eval_accuracy": 0.7887899927832571, | |
| "eval_f1_macro": 0.7303342643678596, | |
| "eval_loss": 0.8012056946754456, | |
| "eval_precision_macro": 0.7383307287980656, | |
| "eval_recall_macro": 0.749799051904019, | |
| "eval_runtime": 79.4999, | |
| "eval_samples_per_second": 52.289, | |
| "eval_steps_per_second": 3.27, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 2.6711996114618746, | |
| "grad_norm": 2.5781819820404053, | |
| "learning_rate": 6.646430305973774e-06, | |
| "loss": 0.1853, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 2.6711996114618746, | |
| "eval_accuracy": 0.8169352898725042, | |
| "eval_f1_macro": 0.7425975963793477, | |
| "eval_loss": 0.7290380597114563, | |
| "eval_precision_macro": 0.7490380424010595, | |
| "eval_recall_macro": 0.7728571995465946, | |
| "eval_runtime": 79.7501, | |
| "eval_samples_per_second": 52.125, | |
| "eval_steps_per_second": 3.26, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 2.9140359397765905, | |
| "grad_norm": 13.812039375305176, | |
| "learning_rate": 5.432248664400195e-06, | |
| "loss": 0.1702, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 2.9140359397765905, | |
| "eval_accuracy": 0.8025018041857108, | |
| "eval_f1_macro": 0.7478693465335228, | |
| "eval_loss": 0.7762283682823181, | |
| "eval_precision_macro": 0.7494886922567287, | |
| "eval_recall_macro": 0.7661448691705909, | |
| "eval_runtime": 79.5684, | |
| "eval_samples_per_second": 52.244, | |
| "eval_steps_per_second": 3.268, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 3.1568722680913064, | |
| "grad_norm": 19.965606689453125, | |
| "learning_rate": 4.2180670228266156e-06, | |
| "loss": 0.115, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 3.1568722680913064, | |
| "eval_accuracy": 0.8085157565552081, | |
| "eval_f1_macro": 0.7550956906548623, | |
| "eval_loss": 0.8461021184921265, | |
| "eval_precision_macro": 0.7547053436507176, | |
| "eval_recall_macro": 0.7770605308609413, | |
| "eval_runtime": 79.7407, | |
| "eval_samples_per_second": 52.131, | |
| "eval_steps_per_second": 3.261, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 3.3997085964060223, | |
| "grad_norm": 5.473484039306641, | |
| "learning_rate": 3.0038853812530354e-06, | |
| "loss": 0.0881, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 3.3997085964060223, | |
| "eval_accuracy": 0.8051479432282896, | |
| "eval_f1_macro": 0.7518260599241406, | |
| "eval_loss": 0.8820834755897522, | |
| "eval_precision_macro": 0.7561690673733429, | |
| "eval_recall_macro": 0.7634548535133201, | |
| "eval_runtime": 79.7355, | |
| "eval_samples_per_second": 52.135, | |
| "eval_steps_per_second": 3.261, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 3.642544924720738, | |
| "grad_norm": 0.03955981135368347, | |
| "learning_rate": 1.789703739679456e-06, | |
| "loss": 0.0854, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 3.642544924720738, | |
| "eval_accuracy": 0.8171758479672842, | |
| "eval_f1_macro": 0.7552966570626944, | |
| "eval_loss": 0.8381013870239258, | |
| "eval_precision_macro": 0.7568149477995186, | |
| "eval_recall_macro": 0.7706908260368737, | |
| "eval_runtime": 79.5261, | |
| "eval_samples_per_second": 52.272, | |
| "eval_steps_per_second": 3.269, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 3.885381253035454, | |
| "grad_norm": 6.264761447906494, | |
| "learning_rate": 5.755220981058767e-07, | |
| "loss": 0.0848, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 3.885381253035454, | |
| "eval_accuracy": 0.8077940822708685, | |
| "eval_f1_macro": 0.7527204408320765, | |
| "eval_loss": 0.8696035742759705, | |
| "eval_precision_macro": 0.7535730472048996, | |
| "eval_recall_macro": 0.7700727790457579, | |
| "eval_runtime": 79.6143, | |
| "eval_samples_per_second": 52.214, | |
| "eval_steps_per_second": 3.266, | |
| "step": 8000 | |
| } | |
| ], | |
| "logging_steps": 500, | |
| "max_steps": 8236, | |
| "num_input_tokens_seen": 0, | |
| "num_train_epochs": 4, | |
| "save_steps": 500, | |
| "stateful_callbacks": { | |
| "TrainerControl": { | |
| "args": { | |
| "should_epoch_stop": false, | |
| "should_evaluate": false, | |
| "should_log": false, | |
| "should_save": true, | |
| "should_training_stop": true | |
| }, | |
| "attributes": {} | |
| } | |
| }, | |
| "total_flos": 1.7337239662270464e+16, | |
| "train_batch_size": 16, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |