esimcse-bert-large-uncased / trainer_state.json
root
init
06f1d4b
{
"best_metric": 0.866013676946666,
"best_model_checkpoint": "result/esimcse-bert-large-uncased-0.1-bpe-0.32-160-bs64",
"epoch": 1.0,
"global_step": 15626,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"eval_avg_sts": 0.5655787069891403,
"eval_sickr_spearman": 0.5300969801413821,
"eval_stsb_spearman": 0.6010604338368984,
"step": 125
},
{
"epoch": 0.02,
"eval_avg_sts": 0.6326186877770161,
"eval_sickr_spearman": 0.6116633005413822,
"eval_stsb_spearman": 0.6535740750126502,
"step": 250
},
{
"epoch": 0.02,
"eval_avg_sts": 0.6609133620656575,
"eval_sickr_spearman": 0.6391030845441643,
"eval_stsb_spearman": 0.6827236395871509,
"step": 375
},
{
"epoch": 0.03,
"learning_rate": 9.680020478689364e-06,
"loss": 0.0123,
"step": 500
},
{
"epoch": 0.03,
"eval_avg_sts": 0.7221979056484411,
"eval_sickr_spearman": 0.6936728999765546,
"eval_stsb_spearman": 0.7507229113203276,
"step": 500
},
{
"epoch": 0.04,
"eval_avg_sts": 0.7554045800232995,
"eval_sickr_spearman": 0.7318677121511561,
"eval_stsb_spearman": 0.7789414478954428,
"step": 625
},
{
"epoch": 0.05,
"eval_avg_sts": 0.7930988815026507,
"eval_sickr_spearman": 0.761447137629702,
"eval_stsb_spearman": 0.8247506253755994,
"step": 750
},
{
"epoch": 0.06,
"eval_avg_sts": 0.799489871458167,
"eval_sickr_spearman": 0.7621246163154866,
"eval_stsb_spearman": 0.8368551266008474,
"step": 875
},
{
"epoch": 0.06,
"learning_rate": 9.36004095737873e-06,
"loss": 0.001,
"step": 1000
},
{
"epoch": 0.06,
"eval_avg_sts": 0.8058576433295244,
"eval_sickr_spearman": 0.7662108142392325,
"eval_stsb_spearman": 0.8455044724198162,
"step": 1000
},
{
"epoch": 0.07,
"eval_avg_sts": 0.8085897051320876,
"eval_sickr_spearman": 0.7775263653648347,
"eval_stsb_spearman": 0.8396530448993406,
"step": 1125
},
{
"epoch": 0.08,
"eval_avg_sts": 0.812202173171146,
"eval_sickr_spearman": 0.7778328037920097,
"eval_stsb_spearman": 0.8465715425502822,
"step": 1250
},
{
"epoch": 0.09,
"eval_avg_sts": 0.8130000077739681,
"eval_sickr_spearman": 0.7843073002036646,
"eval_stsb_spearman": 0.8416927153442717,
"step": 1375
},
{
"epoch": 0.1,
"learning_rate": 9.040061436068092e-06,
"loss": 0.0007,
"step": 1500
},
{
"epoch": 0.1,
"eval_avg_sts": 0.8069831792350133,
"eval_sickr_spearman": 0.784164936019002,
"eval_stsb_spearman": 0.8298014224510247,
"step": 1500
},
{
"epoch": 0.1,
"eval_avg_sts": 0.8066347132600319,
"eval_sickr_spearman": 0.7831622867764814,
"eval_stsb_spearman": 0.8301071397435824,
"step": 1625
},
{
"epoch": 0.11,
"eval_avg_sts": 0.8020330411815588,
"eval_sickr_spearman": 0.7803842639315037,
"eval_stsb_spearman": 0.8236818184316139,
"step": 1750
},
{
"epoch": 0.12,
"eval_avg_sts": 0.8021778407421094,
"eval_sickr_spearman": 0.774247762380669,
"eval_stsb_spearman": 0.83010791910355,
"step": 1875
},
{
"epoch": 0.13,
"learning_rate": 8.720081914757458e-06,
"loss": 0.0005,
"step": 2000
},
{
"epoch": 0.13,
"eval_avg_sts": 0.8065341521992876,
"eval_sickr_spearman": 0.7772098404063574,
"eval_stsb_spearman": 0.8358584639922177,
"step": 2000
},
{
"epoch": 0.14,
"eval_avg_sts": 0.794240687869741,
"eval_sickr_spearman": 0.7671064021566476,
"eval_stsb_spearman": 0.8213749735828343,
"step": 2125
},
{
"epoch": 0.14,
"eval_avg_sts": 0.8051559486926301,
"eval_sickr_spearman": 0.7794549101497777,
"eval_stsb_spearman": 0.8308569872354824,
"step": 2250
},
{
"epoch": 0.15,
"eval_avg_sts": 0.8073887679947801,
"eval_sickr_spearman": 0.7799384392479553,
"eval_stsb_spearman": 0.834839096741605,
"step": 2375
},
{
"epoch": 0.16,
"learning_rate": 8.400102393446819e-06,
"loss": 0.0005,
"step": 2500
},
{
"epoch": 0.16,
"eval_avg_sts": 0.8160314140071933,
"eval_sickr_spearman": 0.7852161447050764,
"eval_stsb_spearman": 0.8468466833093101,
"step": 2500
},
{
"epoch": 0.17,
"eval_avg_sts": 0.8233471889455257,
"eval_sickr_spearman": 0.78413381186527,
"eval_stsb_spearman": 0.8625605660257815,
"step": 2625
},
{
"epoch": 0.18,
"eval_avg_sts": 0.8227729219450164,
"eval_sickr_spearman": 0.782287832543699,
"eval_stsb_spearman": 0.8632580113463336,
"step": 2750
},
{
"epoch": 0.18,
"eval_avg_sts": 0.8256728603497563,
"eval_sickr_spearman": 0.7853320437528465,
"eval_stsb_spearman": 0.866013676946666,
"step": 2875
},
{
"epoch": 0.19,
"learning_rate": 8.080122872136184e-06,
"loss": 0.0004,
"step": 3000
},
{
"epoch": 0.19,
"eval_avg_sts": 0.8234860002979157,
"eval_sickr_spearman": 0.7870213456215266,
"eval_stsb_spearman": 0.8599506549743049,
"step": 3000
},
{
"epoch": 0.2,
"eval_avg_sts": 0.8214365447461363,
"eval_sickr_spearman": 0.7847184464319747,
"eval_stsb_spearman": 0.8581546430602978,
"step": 3125
},
{
"epoch": 0.21,
"eval_avg_sts": 0.8174673028951771,
"eval_sickr_spearman": 0.7841726690263334,
"eval_stsb_spearman": 0.8507619367640208,
"step": 3250
},
{
"epoch": 0.22,
"eval_avg_sts": 0.8164292052372073,
"eval_sickr_spearman": 0.7851938582740091,
"eval_stsb_spearman": 0.8476645522004054,
"step": 3375
},
{
"epoch": 0.22,
"learning_rate": 7.760143350825547e-06,
"loss": 0.0005,
"step": 3500
},
{
"epoch": 0.22,
"eval_avg_sts": 0.8128229716545061,
"eval_sickr_spearman": 0.7825479689890878,
"eval_stsb_spearman": 0.8430979743199243,
"step": 3500
},
{
"epoch": 0.23,
"eval_avg_sts": 0.7999160883647138,
"eval_sickr_spearman": 0.7685948379591132,
"eval_stsb_spearman": 0.8312373387703145,
"step": 3625
},
{
"epoch": 0.24,
"eval_avg_sts": 0.79444504891346,
"eval_sickr_spearman": 0.7639066662410434,
"eval_stsb_spearman": 0.8249834315858765,
"step": 3750
},
{
"epoch": 0.25,
"eval_avg_sts": 0.8041035838235049,
"eval_sickr_spearman": 0.7758875922148677,
"eval_stsb_spearman": 0.8323195754321422,
"step": 3875
},
{
"epoch": 0.26,
"learning_rate": 7.440163829514912e-06,
"loss": 0.0006,
"step": 4000
},
{
"epoch": 0.26,
"eval_avg_sts": 0.8147672333502036,
"eval_sickr_spearman": 0.7849670073819169,
"eval_stsb_spearman": 0.8445674593184904,
"step": 4000
},
{
"epoch": 0.26,
"eval_avg_sts": 0.8193365795354237,
"eval_sickr_spearman": 0.786982584522666,
"eval_stsb_spearman": 0.8516905745481813,
"step": 4125
},
{
"epoch": 0.27,
"eval_avg_sts": 0.8129220138792039,
"eval_sickr_spearman": 0.7803344076482109,
"eval_stsb_spearman": 0.8455096201101969,
"step": 4250
},
{
"epoch": 0.28,
"eval_avg_sts": 0.814173563113507,
"eval_sickr_spearman": 0.7786497647963706,
"eval_stsb_spearman": 0.8496973614306432,
"step": 4375
},
{
"epoch": 0.29,
"learning_rate": 7.120184308204276e-06,
"loss": 0.0007,
"step": 4500
},
{
"epoch": 0.29,
"eval_avg_sts": 0.8204432531176182,
"eval_sickr_spearman": 0.7869734105822913,
"eval_stsb_spearman": 0.853913095652945,
"step": 4500
},
{
"epoch": 0.3,
"eval_avg_sts": 0.8175076926619402,
"eval_sickr_spearman": 0.7864348858729672,
"eval_stsb_spearman": 0.8485804994509132,
"step": 4625
},
{
"epoch": 0.3,
"eval_avg_sts": 0.8128858173589497,
"eval_sickr_spearman": 0.7823268818291682,
"eval_stsb_spearman": 0.8434447528887313,
"step": 4750
},
{
"epoch": 0.31,
"eval_avg_sts": 0.8146504899933802,
"eval_sickr_spearman": 0.7820177056292111,
"eval_stsb_spearman": 0.8472832743575491,
"step": 4875
},
{
"epoch": 0.32,
"learning_rate": 6.800204786893639e-06,
"loss": 0.0003,
"step": 5000
},
{
"epoch": 0.32,
"eval_avg_sts": 0.818213782839917,
"eval_sickr_spearman": 0.7825289006418169,
"eval_stsb_spearman": 0.853898665038017,
"step": 5000
},
{
"epoch": 0.33,
"eval_avg_sts": 0.8159978701613437,
"eval_sickr_spearman": 0.7774106584414702,
"eval_stsb_spearman": 0.8545850818812172,
"step": 5125
},
{
"epoch": 0.34,
"eval_avg_sts": 0.8016384023594931,
"eval_sickr_spearman": 0.7594139330747252,
"eval_stsb_spearman": 0.8438628716442612,
"step": 5250
},
{
"epoch": 0.34,
"eval_avg_sts": 0.801479733175446,
"eval_sickr_spearman": 0.7607142310528577,
"eval_stsb_spearman": 0.8422452352980342,
"step": 5375
},
{
"epoch": 0.35,
"learning_rate": 6.480225265583003e-06,
"loss": 0.0004,
"step": 5500
},
{
"epoch": 0.35,
"eval_avg_sts": 0.8042683855550379,
"eval_sickr_spearman": 0.7645871228551171,
"eval_stsb_spearman": 0.8439496482549587,
"step": 5500
},
{
"epoch": 0.36,
"eval_avg_sts": 0.8012942192582326,
"eval_sickr_spearman": 0.7631563243743772,
"eval_stsb_spearman": 0.839432114142088,
"step": 5625
},
{
"epoch": 0.37,
"eval_avg_sts": 0.8032207475650094,
"eval_sickr_spearman": 0.767318363407294,
"eval_stsb_spearman": 0.8391231317227248,
"step": 5750
},
{
"epoch": 0.38,
"eval_avg_sts": 0.8026675474584128,
"eval_sickr_spearman": 0.769569437038394,
"eval_stsb_spearman": 0.8357656578784318,
"step": 5875
},
{
"epoch": 0.38,
"learning_rate": 6.1602457442723675e-06,
"loss": 0.0004,
"step": 6000
},
{
"epoch": 0.38,
"eval_avg_sts": 0.8027527543269901,
"eval_sickr_spearman": 0.7721073524072823,
"eval_stsb_spearman": 0.833398156246698,
"step": 6000
},
{
"epoch": 0.39,
"eval_avg_sts": 0.8080890156965373,
"eval_sickr_spearman": 0.7777212755744705,
"eval_stsb_spearman": 0.8384567558186042,
"step": 6125
},
{
"epoch": 0.4,
"eval_avg_sts": 0.8099187475300627,
"eval_sickr_spearman": 0.7792697982848352,
"eval_stsb_spearman": 0.8405676967752902,
"step": 6250
},
{
"epoch": 0.41,
"eval_avg_sts": 0.8109755257224806,
"eval_sickr_spearman": 0.779351547219483,
"eval_stsb_spearman": 0.8425995042254781,
"step": 6375
},
{
"epoch": 0.42,
"learning_rate": 5.840266222961732e-06,
"loss": 0.0004,
"step": 6500
},
{
"epoch": 0.42,
"eval_avg_sts": 0.807811991532841,
"eval_sickr_spearman": 0.7731304628990157,
"eval_stsb_spearman": 0.8424935201666665,
"step": 6500
},
{
"epoch": 0.42,
"eval_avg_sts": 0.8015312813914837,
"eval_sickr_spearman": 0.7648701701358919,
"eval_stsb_spearman": 0.8381923926470756,
"step": 6625
},
{
"epoch": 0.43,
"eval_avg_sts": 0.8023491493628093,
"eval_sickr_spearman": 0.7662689799030741,
"eval_stsb_spearman": 0.8384293188225446,
"step": 6750
},
{
"epoch": 0.44,
"eval_avg_sts": 0.8032060815338051,
"eval_sickr_spearman": 0.7652516331435133,
"eval_stsb_spearman": 0.8411605299240971,
"step": 6875
},
{
"epoch": 0.45,
"learning_rate": 5.520286701651095e-06,
"loss": 0.0003,
"step": 7000
},
{
"epoch": 0.45,
"eval_avg_sts": 0.8060138329601017,
"eval_sickr_spearman": 0.7675326781819108,
"eval_stsb_spearman": 0.8444949877382923,
"step": 7000
},
{
"epoch": 0.46,
"eval_avg_sts": 0.8073011071530847,
"eval_sickr_spearman": 0.7807883495879026,
"eval_stsb_spearman": 0.8338138647182667,
"step": 7125
},
{
"epoch": 0.46,
"eval_avg_sts": 0.8026818710000232,
"eval_sickr_spearman": 0.774955644753664,
"eval_stsb_spearman": 0.8304080972463824,
"step": 7250
},
{
"epoch": 0.47,
"eval_avg_sts": 0.8035394838977947,
"eval_sickr_spearman": 0.7752477218815091,
"eval_stsb_spearman": 0.8318312459140802,
"step": 7375
},
{
"epoch": 0.48,
"learning_rate": 5.200307180340458e-06,
"loss": 0.0005,
"step": 7500
},
{
"epoch": 0.48,
"eval_avg_sts": 0.8041748456008513,
"eval_sickr_spearman": 0.7710755002550874,
"eval_stsb_spearman": 0.8372741909466153,
"step": 7500
},
{
"epoch": 0.49,
"eval_avg_sts": 0.8031490475132864,
"eval_sickr_spearman": 0.7735819072214325,
"eval_stsb_spearman": 0.8327161878051403,
"step": 7625
},
{
"epoch": 0.5,
"eval_avg_sts": 0.796365553979756,
"eval_sickr_spearman": 0.7677243703077503,
"eval_stsb_spearman": 0.8250067376517616,
"step": 7750
},
{
"epoch": 0.5,
"eval_avg_sts": 0.7985580022948444,
"eval_sickr_spearman": 0.7685888821025348,
"eval_stsb_spearman": 0.8285271224871539,
"step": 7875
},
{
"epoch": 0.51,
"learning_rate": 4.8803276590298225e-06,
"loss": 0.0004,
"step": 8000
},
{
"epoch": 0.51,
"eval_avg_sts": 0.8005935975506946,
"eval_sickr_spearman": 0.771644332589419,
"eval_stsb_spearman": 0.8295428625119703,
"step": 8000
},
{
"epoch": 0.52,
"eval_avg_sts": 0.7989269976081819,
"eval_sickr_spearman": 0.7673743676715707,
"eval_stsb_spearman": 0.830479627544793,
"step": 8125
},
{
"epoch": 0.53,
"eval_avg_sts": 0.8016572067178073,
"eval_sickr_spearman": 0.7698353852470569,
"eval_stsb_spearman": 0.8334790281885577,
"step": 8250
},
{
"epoch": 0.54,
"eval_avg_sts": 0.8076839071350832,
"eval_sickr_spearman": 0.7736355099306375,
"eval_stsb_spearman": 0.8417323043395288,
"step": 8375
},
{
"epoch": 0.54,
"learning_rate": 4.560348137719187e-06,
"loss": 0.0003,
"step": 8500
},
{
"epoch": 0.54,
"eval_avg_sts": 0.808135722801363,
"eval_sickr_spearman": 0.7724129262746318,
"eval_stsb_spearman": 0.8438585193280942,
"step": 8500
},
{
"epoch": 0.55,
"eval_avg_sts": 0.8053092989350472,
"eval_sickr_spearman": 0.770703355281145,
"eval_stsb_spearman": 0.8399152425889496,
"step": 8625
},
{
"epoch": 0.56,
"eval_avg_sts": 0.8084270733769378,
"eval_sickr_spearman": 0.7738568852771656,
"eval_stsb_spearman": 0.84299726147671,
"step": 8750
},
{
"epoch": 0.57,
"eval_avg_sts": 0.8026266771856059,
"eval_sickr_spearman": 0.7651564355004631,
"eval_stsb_spearman": 0.8400969188707486,
"step": 8875
},
{
"epoch": 0.58,
"learning_rate": 4.24036861640855e-06,
"loss": 0.0004,
"step": 9000
},
{
"epoch": 0.58,
"eval_avg_sts": 0.8051260109076794,
"eval_sickr_spearman": 0.7693196272798144,
"eval_stsb_spearman": 0.8409323945355446,
"step": 9000
},
{
"epoch": 0.58,
"eval_avg_sts": 0.8086459894042075,
"eval_sickr_spearman": 0.7755324502508343,
"eval_stsb_spearman": 0.8417595285575807,
"step": 9125
},
{
"epoch": 0.59,
"eval_avg_sts": 0.8042063963931729,
"eval_sickr_spearman": 0.7749119844824567,
"eval_stsb_spearman": 0.8335008083038891,
"step": 9250
},
{
"epoch": 0.6,
"eval_avg_sts": 0.7931093622523872,
"eval_sickr_spearman": 0.7642648821955689,
"eval_stsb_spearman": 0.8219538423092054,
"step": 9375
},
{
"epoch": 0.61,
"learning_rate": 3.920389095097914e-06,
"loss": 0.0005,
"step": 9500
},
{
"epoch": 0.61,
"eval_avg_sts": 0.790343225626717,
"eval_sickr_spearman": 0.7611612084828409,
"eval_stsb_spearman": 0.8195252427705931,
"step": 9500
},
{
"epoch": 0.62,
"eval_avg_sts": 0.7899767423661693,
"eval_sickr_spearman": 0.7618361895513507,
"eval_stsb_spearman": 0.818117295180988,
"step": 9625
},
{
"epoch": 0.62,
"eval_avg_sts": 0.7900723613141147,
"eval_sickr_spearman": 0.7621474310886697,
"eval_stsb_spearman": 0.8179972915395599,
"step": 9750
},
{
"epoch": 0.63,
"eval_avg_sts": 0.7944895699959831,
"eval_sickr_spearman": 0.7671481411837973,
"eval_stsb_spearman": 0.8218309988081689,
"step": 9875
},
{
"epoch": 0.64,
"learning_rate": 3.600409573787278e-06,
"loss": 0.0003,
"step": 10000
},
{
"epoch": 0.64,
"eval_avg_sts": 0.7947303624992417,
"eval_sickr_spearman": 0.7661302660821208,
"eval_stsb_spearman": 0.8233304589163626,
"step": 10000
},
{
"epoch": 0.65,
"eval_avg_sts": 0.7953635000035459,
"eval_sickr_spearman": 0.7651970217811783,
"eval_stsb_spearman": 0.8255299782259135,
"step": 10125
},
{
"epoch": 0.66,
"eval_avg_sts": 0.7981958931316387,
"eval_sickr_spearman": 0.7667117305961306,
"eval_stsb_spearman": 0.8296800556671469,
"step": 10250
},
{
"epoch": 0.66,
"eval_avg_sts": 0.7955967572281076,
"eval_sickr_spearman": 0.7647861637394766,
"eval_stsb_spearman": 0.8264073507167387,
"step": 10375
},
{
"epoch": 0.67,
"learning_rate": 3.280430052476642e-06,
"loss": 0.0005,
"step": 10500
},
{
"epoch": 0.67,
"eval_avg_sts": 0.7904469016234321,
"eval_sickr_spearman": 0.7560659251489815,
"eval_stsb_spearman": 0.8248278780978826,
"step": 10500
},
{
"epoch": 0.68,
"eval_avg_sts": 0.7955565138560285,
"eval_sickr_spearman": 0.7594871324733168,
"eval_stsb_spearman": 0.8316258952387402,
"step": 10625
},
{
"epoch": 0.69,
"eval_avg_sts": 0.794608835583937,
"eval_sickr_spearman": 0.7577217973710605,
"eval_stsb_spearman": 0.8314958737968136,
"step": 10750
},
{
"epoch": 0.7,
"eval_avg_sts": 0.7984620732119101,
"eval_sickr_spearman": 0.7611833988517053,
"eval_stsb_spearman": 0.8357407475721149,
"step": 10875
},
{
"epoch": 0.7,
"learning_rate": 2.960450531166006e-06,
"loss": 0.0004,
"step": 11000
},
{
"epoch": 0.7,
"eval_avg_sts": 0.8001810968684413,
"eval_sickr_spearman": 0.7626987801020777,
"eval_stsb_spearman": 0.8376634136348049,
"step": 11000
},
{
"epoch": 0.71,
"eval_avg_sts": 0.8009359528786534,
"eval_sickr_spearman": 0.7635318795565218,
"eval_stsb_spearman": 0.838340026200785,
"step": 11125
},
{
"epoch": 0.72,
"eval_avg_sts": 0.8018165626143542,
"eval_sickr_spearman": 0.761889167856237,
"eval_stsb_spearman": 0.8417439573724714,
"step": 11250
},
{
"epoch": 0.73,
"eval_avg_sts": 0.8019628942005317,
"eval_sickr_spearman": 0.7621596790195363,
"eval_stsb_spearman": 0.8417661093815271,
"step": 11375
},
{
"epoch": 0.74,
"learning_rate": 2.640471009855369e-06,
"loss": 0.0004,
"step": 11500
},
{
"epoch": 0.74,
"eval_avg_sts": 0.8023474777679851,
"eval_sickr_spearman": 0.7618140952446891,
"eval_stsb_spearman": 0.8428808602912811,
"step": 11500
},
{
"epoch": 0.74,
"eval_avg_sts": 0.8007664692491112,
"eval_sickr_spearman": 0.7609206206957373,
"eval_stsb_spearman": 0.8406123178024852,
"step": 11625
},
{
"epoch": 0.75,
"eval_avg_sts": 0.8013536551112785,
"eval_sickr_spearman": 0.763201473609729,
"eval_stsb_spearman": 0.8395058366128281,
"step": 11750
},
{
"epoch": 0.76,
"eval_avg_sts": 0.8016071248722578,
"eval_sickr_spearman": 0.7642604633342366,
"eval_stsb_spearman": 0.838953786410279,
"step": 11875
},
{
"epoch": 0.77,
"learning_rate": 2.3204914885447333e-06,
"loss": 0.0006,
"step": 12000
},
{
"epoch": 0.77,
"eval_avg_sts": 0.8045261324324948,
"eval_sickr_spearman": 0.7673434836733461,
"eval_stsb_spearman": 0.8417087811916436,
"step": 12000
},
{
"epoch": 0.78,
"eval_avg_sts": 0.8040455198533101,
"eval_sickr_spearman": 0.7673006399308633,
"eval_stsb_spearman": 0.840790399775757,
"step": 12125
},
{
"epoch": 0.78,
"eval_avg_sts": 0.8055135790258787,
"eval_sickr_spearman": 0.7688476256459821,
"eval_stsb_spearman": 0.8421795324057754,
"step": 12250
},
{
"epoch": 0.79,
"eval_avg_sts": 0.8056301407246984,
"eval_sickr_spearman": 0.7684332613338755,
"eval_stsb_spearman": 0.8428270201155214,
"step": 12375
},
{
"epoch": 0.8,
"learning_rate": 2.000511967234097e-06,
"loss": 0.0005,
"step": 12500
},
{
"epoch": 0.8,
"eval_avg_sts": 0.8073712082258814,
"eval_sickr_spearman": 0.7699142042845167,
"eval_stsb_spearman": 0.844828212167246,
"step": 12500
},
{
"epoch": 0.81,
"eval_avg_sts": 0.8056455420795055,
"eval_sickr_spearman": 0.769138694120697,
"eval_stsb_spearman": 0.8421523900383138,
"step": 12625
},
{
"epoch": 0.82,
"eval_avg_sts": 0.8035991703759889,
"eval_sickr_spearman": 0.7667966976145747,
"eval_stsb_spearman": 0.8404016431374032,
"step": 12750
},
{
"epoch": 0.82,
"eval_avg_sts": 0.8002085597346069,
"eval_sickr_spearman": 0.7639235731887497,
"eval_stsb_spearman": 0.8364935462804641,
"step": 12875
},
{
"epoch": 0.83,
"learning_rate": 1.6805324459234608e-06,
"loss": 0.0005,
"step": 13000
},
{
"epoch": 0.83,
"eval_avg_sts": 0.8015160073860743,
"eval_sickr_spearman": 0.7661993348059888,
"eval_stsb_spearman": 0.8368326799661597,
"step": 13000
},
{
"epoch": 0.84,
"eval_avg_sts": 0.7993655095996954,
"eval_sickr_spearman": 0.7635688154735277,
"eval_stsb_spearman": 0.8351622037258633,
"step": 13125
},
{
"epoch": 0.85,
"eval_avg_sts": 0.8001347328783066,
"eval_sickr_spearman": 0.7649028312848698,
"eval_stsb_spearman": 0.8353666344717434,
"step": 13250
},
{
"epoch": 0.86,
"eval_avg_sts": 0.8014690463126566,
"eval_sickr_spearman": 0.7669867566829652,
"eval_stsb_spearman": 0.835951335942348,
"step": 13375
},
{
"epoch": 0.86,
"learning_rate": 1.3605529246128248e-06,
"loss": 0.0006,
"step": 13500
},
{
"epoch": 0.86,
"eval_avg_sts": 0.801102067772123,
"eval_sickr_spearman": 0.7658747406224701,
"eval_stsb_spearman": 0.8363293949217758,
"step": 13500
},
{
"epoch": 0.87,
"eval_avg_sts": 0.8007868057264769,
"eval_sickr_spearman": 0.7664748892349392,
"eval_stsb_spearman": 0.8350987222180144,
"step": 13625
},
{
"epoch": 0.88,
"eval_avg_sts": 0.8005208288353287,
"eval_sickr_spearman": 0.7666100487543861,
"eval_stsb_spearman": 0.8344316089162714,
"step": 13750
},
{
"epoch": 0.89,
"eval_avg_sts": 0.8032898804251193,
"eval_sickr_spearman": 0.7685208220317972,
"eval_stsb_spearman": 0.8380589388184414,
"step": 13875
},
{
"epoch": 0.9,
"learning_rate": 1.0405734033021888e-06,
"loss": 0.0005,
"step": 14000
},
{
"epoch": 0.9,
"eval_avg_sts": 0.8039134264299834,
"eval_sickr_spearman": 0.7689818725745016,
"eval_stsb_spearman": 0.8388449802854653,
"step": 14000
},
{
"epoch": 0.9,
"eval_avg_sts": 0.8045052668715511,
"eval_sickr_spearman": 0.7691046881008788,
"eval_stsb_spearman": 0.8399058456422233,
"step": 14125
},
{
"epoch": 0.91,
"eval_avg_sts": 0.806178393426559,
"eval_sickr_spearman": 0.7702171844723884,
"eval_stsb_spearman": 0.8421396023807296,
"step": 14250
},
{
"epoch": 0.92,
"eval_avg_sts": 0.8065048225721209,
"eval_sickr_spearman": 0.7704268882612672,
"eval_stsb_spearman": 0.8425827568829746,
"step": 14375
},
{
"epoch": 0.93,
"learning_rate": 7.205938819915525e-07,
"loss": 0.0003,
"step": 14500
},
{
"epoch": 0.93,
"eval_avg_sts": 0.8069821583081658,
"eval_sickr_spearman": 0.7708896198925219,
"eval_stsb_spearman": 0.8430746967238096,
"step": 14500
},
{
"epoch": 0.94,
"eval_avg_sts": 0.8077968468485142,
"eval_sickr_spearman": 0.7718276192725069,
"eval_stsb_spearman": 0.8437660744245216,
"step": 14625
},
{
"epoch": 0.94,
"eval_avg_sts": 0.8079862371583421,
"eval_sickr_spearman": 0.772007111498581,
"eval_stsb_spearman": 0.8439653628181031,
"step": 14750
},
{
"epoch": 0.95,
"eval_avg_sts": 0.8081309498929534,
"eval_sickr_spearman": 0.7723302647490567,
"eval_stsb_spearman": 0.8439316350368501,
"step": 14875
},
{
"epoch": 0.96,
"learning_rate": 4.0061436068091647e-07,
"loss": 0.0006,
"step": 15000
},
{
"epoch": 0.96,
"eval_avg_sts": 0.8084974176098827,
"eval_sickr_spearman": 0.7726533699684309,
"eval_stsb_spearman": 0.8443414652513344,
"step": 15000
},
{
"epoch": 0.97,
"eval_avg_sts": 0.8085496480083677,
"eval_sickr_spearman": 0.7727022175985934,
"eval_stsb_spearman": 0.844397078418142,
"step": 15125
},
{
"epoch": 0.98,
"eval_avg_sts": 0.8085722899568727,
"eval_sickr_spearman": 0.7729186457416736,
"eval_stsb_spearman": 0.8442259341720718,
"step": 15250
},
{
"epoch": 0.98,
"eval_avg_sts": 0.8085289743109576,
"eval_sickr_spearman": 0.77284338100572,
"eval_stsb_spearman": 0.8442145676161951,
"step": 15375
},
{
"epoch": 0.99,
"learning_rate": 8.06348393702803e-08,
"loss": 0.0005,
"step": 15500
},
{
"epoch": 0.99,
"eval_avg_sts": 0.8085319187853082,
"eval_sickr_spearman": 0.7728611044821507,
"eval_stsb_spearman": 0.8442027330884655,
"step": 15500
},
{
"epoch": 1.0,
"eval_avg_sts": 0.8084971429010983,
"eval_sickr_spearman": 0.7727954939975863,
"eval_stsb_spearman": 0.8441987918046104,
"step": 15625
},
{
"epoch": 1.0,
"step": 15626,
"train_runtime": 11240.3589,
"train_samples_per_second": 1.39
}
],
"max_steps": 15626,
"num_train_epochs": 1,
"total_flos": 329269573742764032,
"trial_name": null,
"trial_params": null
}