e5-base-no-mrl / trainer_state.json
sobamchan's picture
Upload folder using huggingface_hub
472bf90 verified
{
"best_global_step": null,
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 0.5449434979636322,
"eval_steps": 500,
"global_step": 9500,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.02868123673492801,
"grad_norm": 5.003673553466797,
"learning_rate": 9.54110898661568e-07,
"loss": 1.0608,
"step": 500
},
{
"epoch": 0.02868123673492801,
"eval_loss": 0.3829629123210907,
"eval_runtime": 12.7122,
"eval_samples_per_second": 517.929,
"eval_steps_per_second": 16.205,
"eval_sts-dev_pearson_cosine": 0.8611719010801527,
"eval_sts-dev_spearman_cosine": 0.8766654545382051,
"step": 500
},
{
"epoch": 0.05736247346985602,
"grad_norm": 5.119338512420654,
"learning_rate": 1.9101338432122374e-06,
"loss": 0.5244,
"step": 1000
},
{
"epoch": 0.05736247346985602,
"eval_loss": 0.2714338004589081,
"eval_runtime": 12.7434,
"eval_samples_per_second": 516.659,
"eval_steps_per_second": 16.165,
"eval_sts-dev_pearson_cosine": 0.8570032340400104,
"eval_sts-dev_spearman_cosine": 0.8696568405511665,
"step": 1000
},
{
"epoch": 0.08604371020478403,
"grad_norm": 1.5423139333724976,
"learning_rate": 2.8661567877629063e-06,
"loss": 0.4477,
"step": 1500
},
{
"epoch": 0.08604371020478403,
"eval_loss": 0.23649391531944275,
"eval_runtime": 12.7309,
"eval_samples_per_second": 517.168,
"eval_steps_per_second": 16.181,
"eval_sts-dev_pearson_cosine": 0.8587832954043909,
"eval_sts-dev_spearman_cosine": 0.8684451966216453,
"step": 1500
},
{
"epoch": 0.11472494693971204,
"grad_norm": 6.727112770080566,
"learning_rate": 3.822179732313576e-06,
"loss": 0.4196,
"step": 2000
},
{
"epoch": 0.11472494693971204,
"eval_loss": 0.2210703343153,
"eval_runtime": 12.7293,
"eval_samples_per_second": 517.233,
"eval_steps_per_second": 16.183,
"eval_sts-dev_pearson_cosine": 0.8620536482725116,
"eval_sts-dev_spearman_cosine": 0.8689494483130697,
"step": 2000
},
{
"epoch": 0.14340618367464006,
"grad_norm": 5.841507911682129,
"learning_rate": 4.778202676864245e-06,
"loss": 0.3861,
"step": 2500
},
{
"epoch": 0.14340618367464006,
"eval_loss": 0.2159556895494461,
"eval_runtime": 12.6972,
"eval_samples_per_second": 518.541,
"eval_steps_per_second": 16.224,
"eval_sts-dev_pearson_cosine": 0.8629719099031811,
"eval_sts-dev_spearman_cosine": 0.8678508213750861,
"step": 2500
},
{
"epoch": 0.17208742040956806,
"grad_norm": 5.232604026794434,
"learning_rate": 5.734225621414914e-06,
"loss": 0.383,
"step": 3000
},
{
"epoch": 0.17208742040956806,
"eval_loss": 0.21165093779563904,
"eval_runtime": 12.7074,
"eval_samples_per_second": 518.125,
"eval_steps_per_second": 16.211,
"eval_sts-dev_pearson_cosine": 0.8659937938410078,
"eval_sts-dev_spearman_cosine": 0.8714441691741636,
"step": 3000
},
{
"epoch": 0.20076865714449607,
"grad_norm": 4.678284168243408,
"learning_rate": 6.6902485659655835e-06,
"loss": 0.3692,
"step": 3500
},
{
"epoch": 0.20076865714449607,
"eval_loss": 0.21005575358867645,
"eval_runtime": 12.689,
"eval_samples_per_second": 518.876,
"eval_steps_per_second": 16.235,
"eval_sts-dev_pearson_cosine": 0.8597359500105216,
"eval_sts-dev_spearman_cosine": 0.8650711959150784,
"step": 3500
},
{
"epoch": 0.22944989387942408,
"grad_norm": 6.501852512359619,
"learning_rate": 7.646271510516251e-06,
"loss": 0.3543,
"step": 4000
},
{
"epoch": 0.22944989387942408,
"eval_loss": 0.21329358220100403,
"eval_runtime": 12.6888,
"eval_samples_per_second": 518.882,
"eval_steps_per_second": 16.235,
"eval_sts-dev_pearson_cosine": 0.855877914902483,
"eval_sts-dev_spearman_cosine": 0.8592050633377774,
"step": 4000
},
{
"epoch": 0.2581311306143521,
"grad_norm": 5.672712326049805,
"learning_rate": 8.602294455066922e-06,
"loss": 0.3568,
"step": 4500
},
{
"epoch": 0.2581311306143521,
"eval_loss": 0.20921653509140015,
"eval_runtime": 12.6834,
"eval_samples_per_second": 519.103,
"eval_steps_per_second": 16.242,
"eval_sts-dev_pearson_cosine": 0.8626966982304745,
"eval_sts-dev_spearman_cosine": 0.8661138721912987,
"step": 4500
},
{
"epoch": 0.2868123673492801,
"grad_norm": 6.034841537475586,
"learning_rate": 9.558317399617591e-06,
"loss": 0.3404,
"step": 5000
},
{
"epoch": 0.2868123673492801,
"eval_loss": 0.21060334146022797,
"eval_runtime": 12.681,
"eval_samples_per_second": 519.2,
"eval_steps_per_second": 16.245,
"eval_sts-dev_pearson_cosine": 0.8596459726107795,
"eval_sts-dev_spearman_cosine": 0.8617335699545178,
"step": 5000
},
{
"epoch": 0.3154936040842081,
"grad_norm": 4.462884426116943,
"learning_rate": 1.0514340344168261e-05,
"loss": 0.3307,
"step": 5500
},
{
"epoch": 0.3154936040842081,
"eval_loss": 0.20899620652198792,
"eval_runtime": 12.6837,
"eval_samples_per_second": 519.091,
"eval_steps_per_second": 16.241,
"eval_sts-dev_pearson_cosine": 0.8624256036447535,
"eval_sts-dev_spearman_cosine": 0.8646255603888939,
"step": 5500
},
{
"epoch": 0.34417484081913613,
"grad_norm": 7.7960896492004395,
"learning_rate": 1.147036328871893e-05,
"loss": 0.3359,
"step": 6000
},
{
"epoch": 0.34417484081913613,
"eval_loss": 0.2030467540025711,
"eval_runtime": 12.7091,
"eval_samples_per_second": 518.054,
"eval_steps_per_second": 16.209,
"eval_sts-dev_pearson_cosine": 0.8702937660837488,
"eval_sts-dev_spearman_cosine": 0.8730166667579369,
"step": 6000
},
{
"epoch": 0.37285607755406414,
"grad_norm": 6.986783981323242,
"learning_rate": 1.24263862332696e-05,
"loss": 0.3304,
"step": 6500
},
{
"epoch": 0.37285607755406414,
"eval_loss": 0.20733679831027985,
"eval_runtime": 12.7035,
"eval_samples_per_second": 518.284,
"eval_steps_per_second": 16.216,
"eval_sts-dev_pearson_cosine": 0.8611582102576957,
"eval_sts-dev_spearman_cosine": 0.8632160410827856,
"step": 6500
},
{
"epoch": 0.40153731428899214,
"grad_norm": 4.2199883460998535,
"learning_rate": 1.3382409177820268e-05,
"loss": 0.3319,
"step": 7000
},
{
"epoch": 0.40153731428899214,
"eval_loss": 0.19952718913555145,
"eval_runtime": 12.6937,
"eval_samples_per_second": 518.682,
"eval_steps_per_second": 16.229,
"eval_sts-dev_pearson_cosine": 0.8664693326646002,
"eval_sts-dev_spearman_cosine": 0.8692297087437101,
"step": 7000
},
{
"epoch": 0.43021855102392015,
"grad_norm": 5.597110271453857,
"learning_rate": 1.4338432122370937e-05,
"loss": 0.3087,
"step": 7500
},
{
"epoch": 0.43021855102392015,
"eval_loss": 0.20627431571483612,
"eval_runtime": 12.715,
"eval_samples_per_second": 517.814,
"eval_steps_per_second": 16.201,
"eval_sts-dev_pearson_cosine": 0.8590261077617714,
"eval_sts-dev_spearman_cosine": 0.8614239668671501,
"step": 7500
},
{
"epoch": 0.45889978775884815,
"grad_norm": 1.4251642227172852,
"learning_rate": 1.5294455066921608e-05,
"loss": 0.3058,
"step": 8000
},
{
"epoch": 0.45889978775884815,
"eval_loss": 0.19824104011058807,
"eval_runtime": 12.7073,
"eval_samples_per_second": 518.128,
"eval_steps_per_second": 16.211,
"eval_sts-dev_pearson_cosine": 0.8642348489707395,
"eval_sts-dev_spearman_cosine": 0.8667949296820955,
"step": 8000
},
{
"epoch": 0.48758102449377616,
"grad_norm": 6.344972610473633,
"learning_rate": 1.6250478011472275e-05,
"loss": 0.3207,
"step": 8500
},
{
"epoch": 0.48758102449377616,
"eval_loss": 0.20592595636844635,
"eval_runtime": 12.6964,
"eval_samples_per_second": 518.573,
"eval_steps_per_second": 16.225,
"eval_sts-dev_pearson_cosine": 0.8632819341519357,
"eval_sts-dev_spearman_cosine": 0.8663394328606723,
"step": 8500
},
{
"epoch": 0.5162622612287042,
"grad_norm": 5.220601558685303,
"learning_rate": 1.7206500956022945e-05,
"loss": 0.3184,
"step": 9000
},
{
"epoch": 0.5162622612287042,
"eval_loss": 0.20682041347026825,
"eval_runtime": 12.6949,
"eval_samples_per_second": 518.635,
"eval_steps_per_second": 16.227,
"eval_sts-dev_pearson_cosine": 0.8574421776666108,
"eval_sts-dev_spearman_cosine": 0.8603376674379024,
"step": 9000
},
{
"epoch": 0.5449434979636322,
"grad_norm": 4.097256183624268,
"learning_rate": 1.8162523900573612e-05,
"loss": 0.3085,
"step": 9500
},
{
"epoch": 0.5449434979636322,
"eval_loss": 0.19450272619724274,
"eval_runtime": 12.6996,
"eval_samples_per_second": 518.44,
"eval_steps_per_second": 16.221,
"eval_sts-dev_pearson_cosine": 0.8675839523712885,
"eval_sts-dev_spearman_cosine": 0.8694666129275455,
"step": 9500
}
],
"logging_steps": 500,
"max_steps": 261495,
"num_input_tokens_seen": 0,
"num_train_epochs": 15,
"save_steps": 500,
"stateful_callbacks": {
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 0.0,
"train_batch_size": 32,
"trial_name": null,
"trial_params": null
}