MuhammadHelmy's picture
Upload folder using huggingface_hub
546a5d6 verified
{
"best_global_step": 7500,
"best_metric": 0.7552966570626944,
"best_model_checkpoint": "./labse-Matryoshka-AuthId/checkpoint-7500",
"epoch": 4.0,
"eval_steps": 500,
"global_step": 8236,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.24283632831471588,
"grad_norm": 8.226706504821777,
"learning_rate": 1.8788246721709567e-05,
"loss": 1.8844,
"step": 500
},
{
"epoch": 0.24283632831471588,
"eval_accuracy": 0.580466682703873,
"eval_f1_macro": 0.4960676688683372,
"eval_loss": 1.5094239711761475,
"eval_precision_macro": 0.5214977084503927,
"eval_recall_macro": 0.5479464513669718,
"eval_runtime": 79.4803,
"eval_samples_per_second": 52.302,
"eval_steps_per_second": 3.271,
"step": 500
},
{
"epoch": 0.48567265662943176,
"grad_norm": 23.343076705932617,
"learning_rate": 1.757406508013599e-05,
"loss": 1.0439,
"step": 1000
},
{
"epoch": 0.48567265662943176,
"eval_accuracy": 0.6338705797450084,
"eval_f1_macro": 0.5443060410532249,
"eval_loss": 1.180512547492981,
"eval_precision_macro": 0.5964210887979737,
"eval_recall_macro": 0.5815396599629518,
"eval_runtime": 79.5018,
"eval_samples_per_second": 52.288,
"eval_steps_per_second": 3.27,
"step": 1000
},
{
"epoch": 0.7285089849441476,
"grad_norm": 22.333507537841797,
"learning_rate": 1.635988343856241e-05,
"loss": 0.7775,
"step": 1500
},
{
"epoch": 0.7285089849441476,
"eval_accuracy": 0.7168631224440702,
"eval_f1_macro": 0.6352859818957786,
"eval_loss": 1.000940203666687,
"eval_precision_macro": 0.6284925893229054,
"eval_recall_macro": 0.6892646842795946,
"eval_runtime": 79.7404,
"eval_samples_per_second": 52.132,
"eval_steps_per_second": 3.261,
"step": 1500
},
{
"epoch": 0.9713453132588635,
"grad_norm": 17.205595016479492,
"learning_rate": 1.514570179698883e-05,
"loss": 0.6235,
"step": 2000
},
{
"epoch": 0.9713453132588635,
"eval_accuracy": 0.7002646139042579,
"eval_f1_macro": 0.6444992633781246,
"eval_loss": 1.0062403678894043,
"eval_precision_macro": 0.6416324743101667,
"eval_recall_macro": 0.7175728403685042,
"eval_runtime": 79.8787,
"eval_samples_per_second": 52.041,
"eval_steps_per_second": 3.255,
"step": 2000
},
{
"epoch": 1.2141816415735793,
"grad_norm": 6.284836769104004,
"learning_rate": 1.393152015541525e-05,
"loss": 0.4443,
"step": 2500
},
{
"epoch": 1.2141816415735793,
"eval_accuracy": 0.752225162376714,
"eval_f1_macro": 0.7005112117395045,
"eval_loss": 0.8306922316551208,
"eval_precision_macro": 0.7030961323363835,
"eval_recall_macro": 0.7405526082434787,
"eval_runtime": 79.5711,
"eval_samples_per_second": 52.243,
"eval_steps_per_second": 3.268,
"step": 2500
},
{
"epoch": 1.4570179698882952,
"grad_norm": 14.860431671142578,
"learning_rate": 1.2717338513841673e-05,
"loss": 0.3917,
"step": 3000
},
{
"epoch": 1.4570179698882952,
"eval_accuracy": 0.776040413759923,
"eval_f1_macro": 0.7195032487137867,
"eval_loss": 0.7922475934028625,
"eval_precision_macro": 0.718432845056206,
"eval_recall_macro": 0.7476174763134192,
"eval_runtime": 79.7377,
"eval_samples_per_second": 52.133,
"eval_steps_per_second": 3.261,
"step": 3000
},
{
"epoch": 1.6998542982030111,
"grad_norm": 15.658327102661133,
"learning_rate": 1.1503156872268093e-05,
"loss": 0.3384,
"step": 3500
},
{
"epoch": 1.6998542982030111,
"eval_accuracy": 0.7914361318258359,
"eval_f1_macro": 0.7267579331806328,
"eval_loss": 0.7427302598953247,
"eval_precision_macro": 0.7315940928020874,
"eval_recall_macro": 0.7473775054197197,
"eval_runtime": 79.5326,
"eval_samples_per_second": 52.268,
"eval_steps_per_second": 3.269,
"step": 3500
},
{
"epoch": 1.942690626517727,
"grad_norm": 3.790079355239868,
"learning_rate": 1.0288975230694513e-05,
"loss": 0.3212,
"step": 4000
},
{
"epoch": 1.942690626517727,
"eval_accuracy": 0.7666586480635074,
"eval_f1_macro": 0.7040518310115823,
"eval_loss": 0.8302342891693115,
"eval_precision_macro": 0.7097429428181893,
"eval_recall_macro": 0.7427055724786205,
"eval_runtime": 79.7283,
"eval_samples_per_second": 52.14,
"eval_steps_per_second": 3.261,
"step": 4000
},
{
"epoch": 2.1855269548324427,
"grad_norm": 20.00322151184082,
"learning_rate": 9.074793589120935e-06,
"loss": 0.2072,
"step": 4500
},
{
"epoch": 2.1855269548324427,
"eval_accuracy": 0.7926389222997354,
"eval_f1_macro": 0.7306884775555181,
"eval_loss": 0.7662757039070129,
"eval_precision_macro": 0.7372020807956,
"eval_recall_macro": 0.7527073689440521,
"eval_runtime": 79.8142,
"eval_samples_per_second": 52.083,
"eval_steps_per_second": 3.258,
"step": 4500
},
{
"epoch": 2.4283632831471587,
"grad_norm": 9.991105079650879,
"learning_rate": 7.860611947547354e-06,
"loss": 0.1837,
"step": 5000
},
{
"epoch": 2.4283632831471587,
"eval_accuracy": 0.7887899927832571,
"eval_f1_macro": 0.7303342643678596,
"eval_loss": 0.8012056946754456,
"eval_precision_macro": 0.7383307287980656,
"eval_recall_macro": 0.749799051904019,
"eval_runtime": 79.4999,
"eval_samples_per_second": 52.289,
"eval_steps_per_second": 3.27,
"step": 5000
},
{
"epoch": 2.6711996114618746,
"grad_norm": 2.5781819820404053,
"learning_rate": 6.646430305973774e-06,
"loss": 0.1853,
"step": 5500
},
{
"epoch": 2.6711996114618746,
"eval_accuracy": 0.8169352898725042,
"eval_f1_macro": 0.7425975963793477,
"eval_loss": 0.7290380597114563,
"eval_precision_macro": 0.7490380424010595,
"eval_recall_macro": 0.7728571995465946,
"eval_runtime": 79.7501,
"eval_samples_per_second": 52.125,
"eval_steps_per_second": 3.26,
"step": 5500
},
{
"epoch": 2.9140359397765905,
"grad_norm": 13.812039375305176,
"learning_rate": 5.432248664400195e-06,
"loss": 0.1702,
"step": 6000
},
{
"epoch": 2.9140359397765905,
"eval_accuracy": 0.8025018041857108,
"eval_f1_macro": 0.7478693465335228,
"eval_loss": 0.7762283682823181,
"eval_precision_macro": 0.7494886922567287,
"eval_recall_macro": 0.7661448691705909,
"eval_runtime": 79.5684,
"eval_samples_per_second": 52.244,
"eval_steps_per_second": 3.268,
"step": 6000
},
{
"epoch": 3.1568722680913064,
"grad_norm": 19.965606689453125,
"learning_rate": 4.2180670228266156e-06,
"loss": 0.115,
"step": 6500
},
{
"epoch": 3.1568722680913064,
"eval_accuracy": 0.8085157565552081,
"eval_f1_macro": 0.7550956906548623,
"eval_loss": 0.8461021184921265,
"eval_precision_macro": 0.7547053436507176,
"eval_recall_macro": 0.7770605308609413,
"eval_runtime": 79.7407,
"eval_samples_per_second": 52.131,
"eval_steps_per_second": 3.261,
"step": 6500
},
{
"epoch": 3.3997085964060223,
"grad_norm": 5.473484039306641,
"learning_rate": 3.0038853812530354e-06,
"loss": 0.0881,
"step": 7000
},
{
"epoch": 3.3997085964060223,
"eval_accuracy": 0.8051479432282896,
"eval_f1_macro": 0.7518260599241406,
"eval_loss": 0.8820834755897522,
"eval_precision_macro": 0.7561690673733429,
"eval_recall_macro": 0.7634548535133201,
"eval_runtime": 79.7355,
"eval_samples_per_second": 52.135,
"eval_steps_per_second": 3.261,
"step": 7000
},
{
"epoch": 3.642544924720738,
"grad_norm": 0.03955981135368347,
"learning_rate": 1.789703739679456e-06,
"loss": 0.0854,
"step": 7500
},
{
"epoch": 3.642544924720738,
"eval_accuracy": 0.8171758479672842,
"eval_f1_macro": 0.7552966570626944,
"eval_loss": 0.8381013870239258,
"eval_precision_macro": 0.7568149477995186,
"eval_recall_macro": 0.7706908260368737,
"eval_runtime": 79.5261,
"eval_samples_per_second": 52.272,
"eval_steps_per_second": 3.269,
"step": 7500
},
{
"epoch": 3.885381253035454,
"grad_norm": 6.264761447906494,
"learning_rate": 5.755220981058767e-07,
"loss": 0.0848,
"step": 8000
},
{
"epoch": 3.885381253035454,
"eval_accuracy": 0.8077940822708685,
"eval_f1_macro": 0.7527204408320765,
"eval_loss": 0.8696035742759705,
"eval_precision_macro": 0.7535730472048996,
"eval_recall_macro": 0.7700727790457579,
"eval_runtime": 79.6143,
"eval_samples_per_second": 52.214,
"eval_steps_per_second": 3.266,
"step": 8000
}
],
"logging_steps": 500,
"max_steps": 8236,
"num_input_tokens_seen": 0,
"num_train_epochs": 4,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": true
},
"attributes": {}
}
},
"total_flos": 1.7337239662270464e+16,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}