gsInfoNCE-bert-base-uncased / trainer_state.json
root
init
6f1955c
{
"best_metric": 0.8220295594432419,
"best_model_checkpoint": "result/my-unsup-simcse-bert-base-uncased-d0.2-rs192-std0.5-t0.05",
"epoch": 1.0,
"global_step": 15626,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"eval_avg_sts": 0.7039632579138562,
"eval_sickr_spearman": 0.6894170562025291,
"eval_stsb_spearman": 0.7185094596251831,
"step": 125
},
{
"epoch": 0.02,
"eval_avg_sts": 0.7320830312686424,
"eval_sickr_spearman": 0.7157956410501377,
"eval_stsb_spearman": 0.7483704214871472,
"step": 250
},
{
"epoch": 0.02,
"eval_avg_sts": 0.7306267531474842,
"eval_sickr_spearman": 0.7151454920610715,
"eval_stsb_spearman": 0.7461080142338968,
"step": 375
},
{
"epoch": 0.03,
"learning_rate": 2.9040061436068092e-05,
"loss": 0.0316,
"step": 500
},
{
"epoch": 0.03,
"eval_avg_sts": 0.7490673075570297,
"eval_sickr_spearman": 0.7306142925280271,
"eval_stsb_spearman": 0.7675203225860322,
"step": 500
},
{
"epoch": 0.04,
"eval_avg_sts": 0.751267358789496,
"eval_sickr_spearman": 0.7340745011067302,
"eval_stsb_spearman": 0.7684602164722617,
"step": 625
},
{
"epoch": 0.05,
"eval_avg_sts": 0.7786649440391495,
"eval_sickr_spearman": 0.7455865474683157,
"eval_stsb_spearman": 0.8117433406099835,
"step": 750
},
{
"epoch": 0.06,
"eval_avg_sts": 0.7696928806594809,
"eval_sickr_spearman": 0.7452508100692632,
"eval_stsb_spearman": 0.7941349512496988,
"step": 875
},
{
"epoch": 0.06,
"learning_rate": 2.8080122872136184e-05,
"loss": 0.0008,
"step": 1000
},
{
"epoch": 0.06,
"eval_avg_sts": 0.7651441102319789,
"eval_sickr_spearman": 0.7429110710249084,
"eval_stsb_spearman": 0.7873771494390496,
"step": 1000
},
{
"epoch": 0.07,
"eval_avg_sts": 0.7657607879714892,
"eval_sickr_spearman": 0.7408295471818849,
"eval_stsb_spearman": 0.7906920287610936,
"step": 1125
},
{
"epoch": 0.08,
"eval_avg_sts": 0.7673800101461663,
"eval_sickr_spearman": 0.7415319540093158,
"eval_stsb_spearman": 0.7932280662830168,
"step": 1250
},
{
"epoch": 0.09,
"eval_avg_sts": 0.7657849020870391,
"eval_sickr_spearman": 0.7396460128113483,
"eval_stsb_spearman": 0.7919237913627298,
"step": 1375
},
{
"epoch": 0.1,
"learning_rate": 2.7120184308204276e-05,
"loss": 0.0006,
"step": 1500
},
{
"epoch": 0.1,
"eval_avg_sts": 0.7564555230487979,
"eval_sickr_spearman": 0.7337248346882608,
"eval_stsb_spearman": 0.7791862114093351,
"step": 1500
},
{
"epoch": 0.1,
"eval_avg_sts": 0.7663342569223988,
"eval_sickr_spearman": 0.740796117535284,
"eval_stsb_spearman": 0.7918723963095137,
"step": 1625
},
{
"epoch": 0.11,
"eval_avg_sts": 0.7650001912920963,
"eval_sickr_spearman": 0.7428139521378004,
"eval_stsb_spearman": 0.787186430446392,
"step": 1750
},
{
"epoch": 0.12,
"eval_avg_sts": 0.7697843702026972,
"eval_sickr_spearman": 0.7472399220731197,
"eval_stsb_spearman": 0.7923288183322746,
"step": 1875
},
{
"epoch": 0.13,
"learning_rate": 2.6160245744272367e-05,
"loss": 0.0004,
"step": 2000
},
{
"epoch": 0.13,
"eval_avg_sts": 0.772637186091681,
"eval_sickr_spearman": 0.7453534525330364,
"eval_stsb_spearman": 0.7999209196503256,
"step": 2000
},
{
"epoch": 0.14,
"eval_avg_sts": 0.7844981403056908,
"eval_sickr_spearman": 0.7469667211681398,
"eval_stsb_spearman": 0.8220295594432419,
"step": 2125
},
{
"epoch": 0.14,
"eval_avg_sts": 0.7825301157194102,
"eval_sickr_spearman": 0.745924590360237,
"eval_stsb_spearman": 0.8191356410785836,
"step": 2250
},
{
"epoch": 0.15,
"eval_avg_sts": 0.7786888226299735,
"eval_sickr_spearman": 0.7462584545463333,
"eval_stsb_spearman": 0.8111191907136137,
"step": 2375
},
{
"epoch": 0.16,
"learning_rate": 2.5200307180340456e-05,
"loss": 0.0004,
"step": 2500
},
{
"epoch": 0.16,
"eval_avg_sts": 0.7798782002704093,
"eval_sickr_spearman": 0.7466363632524484,
"eval_stsb_spearman": 0.8131200372883702,
"step": 2500
},
{
"epoch": 0.17,
"eval_avg_sts": 0.7751115325157576,
"eval_sickr_spearman": 0.7426031436335886,
"eval_stsb_spearman": 0.8076199213979265,
"step": 2625
},
{
"epoch": 0.18,
"eval_avg_sts": 0.7666810908791618,
"eval_sickr_spearman": 0.7362270149176792,
"eval_stsb_spearman": 0.7971351668406444,
"step": 2750
},
{
"epoch": 0.18,
"eval_avg_sts": 0.7732616540318422,
"eval_sickr_spearman": 0.7442702551334042,
"eval_stsb_spearman": 0.8022530529302804,
"step": 2875
},
{
"epoch": 0.19,
"learning_rate": 2.424036861640855e-05,
"loss": 0.0004,
"step": 3000
},
{
"epoch": 0.19,
"eval_avg_sts": 0.7733068432231915,
"eval_sickr_spearman": 0.7420109201528567,
"eval_stsb_spearman": 0.8046027662935263,
"step": 3000
},
{
"epoch": 0.2,
"eval_avg_sts": 0.7672720656943643,
"eval_sickr_spearman": 0.7361518462439285,
"eval_stsb_spearman": 0.7983922851448001,
"step": 3125
},
{
"epoch": 0.21,
"eval_avg_sts": 0.7652170530985714,
"eval_sickr_spearman": 0.7340480840009392,
"eval_stsb_spearman": 0.7963860221962036,
"step": 3250
},
{
"epoch": 0.22,
"eval_avg_sts": 0.7769025861727988,
"eval_sickr_spearman": 0.7457697079566438,
"eval_stsb_spearman": 0.8080354643889539,
"step": 3375
},
{
"epoch": 0.22,
"learning_rate": 2.3280430052476642e-05,
"loss": 0.0003,
"step": 3500
},
{
"epoch": 0.22,
"eval_avg_sts": 0.7745062864644923,
"eval_sickr_spearman": 0.743565975093017,
"eval_stsb_spearman": 0.8054465978359675,
"step": 3500
},
{
"epoch": 0.23,
"eval_avg_sts": 0.7713562889079433,
"eval_sickr_spearman": 0.7430872010738819,
"eval_stsb_spearman": 0.7996253767420045,
"step": 3625
},
{
"epoch": 0.24,
"eval_avg_sts": 0.7689988388424054,
"eval_sickr_spearman": 0.7403553361173865,
"eval_stsb_spearman": 0.7976423415674243,
"step": 3750
},
{
"epoch": 0.25,
"eval_avg_sts": 0.7691667949901446,
"eval_sickr_spearman": 0.7390877953504347,
"eval_stsb_spearman": 0.7992457946298546,
"step": 3875
},
{
"epoch": 0.26,
"learning_rate": 2.2320491488544734e-05,
"loss": 0.0004,
"step": 4000
},
{
"epoch": 0.26,
"eval_avg_sts": 0.7640678967425198,
"eval_sickr_spearman": 0.7361696177514606,
"eval_stsb_spearman": 0.7919661757335791,
"step": 4000
},
{
"epoch": 0.26,
"eval_avg_sts": 0.7720104233010221,
"eval_sickr_spearman": 0.7409091347169678,
"eval_stsb_spearman": 0.8031117118850762,
"step": 4125
},
{
"epoch": 0.27,
"eval_avg_sts": 0.7716821019144677,
"eval_sickr_spearman": 0.7378609849575023,
"eval_stsb_spearman": 0.8055032188714333,
"step": 4250
},
{
"epoch": 0.28,
"eval_avg_sts": 0.7749099391602485,
"eval_sickr_spearman": 0.7425721155420596,
"eval_stsb_spearman": 0.8072477627784373,
"step": 4375
},
{
"epoch": 0.29,
"learning_rate": 2.1360552924612826e-05,
"loss": 0.0004,
"step": 4500
},
{
"epoch": 0.29,
"eval_avg_sts": 0.7668234253805365,
"eval_sickr_spearman": 0.7378467197203752,
"eval_stsb_spearman": 0.7958001310406978,
"step": 4500
},
{
"epoch": 0.3,
"eval_avg_sts": 0.7705218895648999,
"eval_sickr_spearman": 0.7391226659300787,
"eval_stsb_spearman": 0.801921113199721,
"step": 4625
},
{
"epoch": 0.3,
"eval_avg_sts": 0.7678707529208644,
"eval_sickr_spearman": 0.7376356230295548,
"eval_stsb_spearman": 0.798105882812174,
"step": 4750
},
{
"epoch": 0.31,
"eval_avg_sts": 0.768346434351147,
"eval_sickr_spearman": 0.7373303853799155,
"eval_stsb_spearman": 0.7993624833223786,
"step": 4875
},
{
"epoch": 0.32,
"learning_rate": 2.0400614360680917e-05,
"loss": 0.0002,
"step": 5000
},
{
"epoch": 0.32,
"eval_avg_sts": 0.7714937923042149,
"eval_sickr_spearman": 0.7393215146900324,
"eval_stsb_spearman": 0.8036660699183974,
"step": 5000
},
{
"epoch": 0.33,
"eval_avg_sts": 0.7705501211555204,
"eval_sickr_spearman": 0.7388855363822787,
"eval_stsb_spearman": 0.802214705928762,
"step": 5125
},
{
"epoch": 0.34,
"eval_avg_sts": 0.769454128932314,
"eval_sickr_spearman": 0.7382745327408845,
"eval_stsb_spearman": 0.8006337251237436,
"step": 5250
},
{
"epoch": 0.34,
"eval_avg_sts": 0.7705108541596231,
"eval_sickr_spearman": 0.7397600866772639,
"eval_stsb_spearman": 0.8012616216419822,
"step": 5375
},
{
"epoch": 0.35,
"learning_rate": 1.944067579674901e-05,
"loss": 0.0004,
"step": 5500
},
{
"epoch": 0.35,
"eval_avg_sts": 0.767759877044913,
"eval_sickr_spearman": 0.7378074783105003,
"eval_stsb_spearman": 0.7977122757793257,
"step": 5500
},
{
"epoch": 0.36,
"eval_avg_sts": 0.7698443583732223,
"eval_sickr_spearman": 0.7391045582048366,
"eval_stsb_spearman": 0.800584158541608,
"step": 5625
},
{
"epoch": 0.37,
"eval_avg_sts": 0.7712564343816768,
"eval_sickr_spearman": 0.741173257743776,
"eval_stsb_spearman": 0.8013396110195776,
"step": 5750
},
{
"epoch": 0.38,
"eval_avg_sts": 0.7664448809595665,
"eval_sickr_spearman": 0.739177853665631,
"eval_stsb_spearman": 0.7937119082535019,
"step": 5875
},
{
"epoch": 0.38,
"learning_rate": 1.84807372328171e-05,
"loss": 0.0002,
"step": 6000
},
{
"epoch": 0.38,
"eval_avg_sts": 0.7638769604550799,
"eval_sickr_spearman": 0.7379414370524111,
"eval_stsb_spearman": 0.7898124838577486,
"step": 6000
},
{
"epoch": 0.39,
"eval_avg_sts": 0.766509076179771,
"eval_sickr_spearman": 0.7385067150852364,
"eval_stsb_spearman": 0.7945114372743056,
"step": 6125
},
{
"epoch": 0.4,
"eval_avg_sts": 0.7666375164161757,
"eval_sickr_spearman": 0.7395296334525638,
"eval_stsb_spearman": 0.7937453993797876,
"step": 6250
},
{
"epoch": 0.41,
"eval_avg_sts": 0.7675116415394574,
"eval_sickr_spearman": 0.7385751113736841,
"eval_stsb_spearman": 0.7964481717052306,
"step": 6375
},
{
"epoch": 0.42,
"learning_rate": 1.7520798668885192e-05,
"loss": 0.0003,
"step": 6500
},
{
"epoch": 0.42,
"eval_avg_sts": 0.7676886943660834,
"eval_sickr_spearman": 0.737567130678904,
"eval_stsb_spearman": 0.797810258053263,
"step": 6500
},
{
"epoch": 0.42,
"eval_avg_sts": 0.7661323845941221,
"eval_sickr_spearman": 0.7377277947132145,
"eval_stsb_spearman": 0.7945369744750297,
"step": 6625
},
{
"epoch": 0.43,
"eval_avg_sts": 0.7657108387605938,
"eval_sickr_spearman": 0.7375463332119813,
"eval_stsb_spearman": 0.7938753443092063,
"step": 6750
},
{
"epoch": 0.44,
"eval_avg_sts": 0.7634754316352123,
"eval_sickr_spearman": 0.7353273443566419,
"eval_stsb_spearman": 0.7916235189137826,
"step": 6875
},
{
"epoch": 0.45,
"learning_rate": 1.6560860104953284e-05,
"loss": 0.0002,
"step": 7000
},
{
"epoch": 0.45,
"eval_avg_sts": 0.7659995201775873,
"eval_sickr_spearman": 0.7410373297267062,
"eval_stsb_spearman": 0.7909617106284685,
"step": 7000
},
{
"epoch": 0.46,
"eval_avg_sts": 0.7603361178127348,
"eval_sickr_spearman": 0.7367544444425711,
"eval_stsb_spearman": 0.7839177911828984,
"step": 7125
},
{
"epoch": 0.46,
"eval_avg_sts": 0.7697008342068297,
"eval_sickr_spearman": 0.7388405312402312,
"eval_stsb_spearman": 0.800561137173428,
"step": 7250
},
{
"epoch": 0.47,
"eval_avg_sts": 0.7683036722669765,
"eval_sickr_spearman": 0.7386270810254403,
"eval_stsb_spearman": 0.7979802635085127,
"step": 7375
},
{
"epoch": 0.48,
"learning_rate": 1.5600921541021372e-05,
"loss": 0.0002,
"step": 7500
},
{
"epoch": 0.48,
"eval_avg_sts": 0.7732594696328385,
"eval_sickr_spearman": 0.7422230254968075,
"eval_stsb_spearman": 0.8042959137688697,
"step": 7500
},
{
"epoch": 0.49,
"eval_avg_sts": 0.7737749390548014,
"eval_sickr_spearman": 0.742280086445316,
"eval_stsb_spearman": 0.8052697916642868,
"step": 7625
},
{
"epoch": 0.5,
"eval_avg_sts": 0.772584123544011,
"eval_sickr_spearman": 0.741925424792297,
"eval_stsb_spearman": 0.8032428222957251,
"step": 7750
},
{
"epoch": 0.5,
"eval_avg_sts": 0.772843322519777,
"eval_sickr_spearman": 0.7422232656523147,
"eval_stsb_spearman": 0.8034633793872396,
"step": 7875
},
{
"epoch": 0.51,
"learning_rate": 1.4640982977089467e-05,
"loss": 0.0002,
"step": 8000
},
{
"epoch": 0.51,
"eval_avg_sts": 0.7715935954543726,
"eval_sickr_spearman": 0.7422030925897106,
"eval_stsb_spearman": 0.8009840983190345,
"step": 8000
},
{
"epoch": 0.52,
"eval_avg_sts": 0.7690169428580813,
"eval_sickr_spearman": 0.7407306511440238,
"eval_stsb_spearman": 0.7973032345721387,
"step": 8125
},
{
"epoch": 0.53,
"eval_avg_sts": 0.7675907004689784,
"eval_sickr_spearman": 0.7394478364868148,
"eval_stsb_spearman": 0.795733564451142,
"step": 8250
},
{
"epoch": 0.54,
"eval_avg_sts": 0.7677635816632358,
"eval_sickr_spearman": 0.7398567252533574,
"eval_stsb_spearman": 0.7956704380731142,
"step": 8375
},
{
"epoch": 0.54,
"learning_rate": 1.368104441315756e-05,
"loss": 0.0001,
"step": 8500
},
{
"epoch": 0.54,
"eval_avg_sts": 0.7665064992996365,
"eval_sickr_spearman": 0.7390165171959004,
"eval_stsb_spearman": 0.7939964814033726,
"step": 8500
},
{
"epoch": 0.55,
"eval_avg_sts": 0.768501738751435,
"eval_sickr_spearman": 0.7403488519186923,
"eval_stsb_spearman": 0.7966546255841779,
"step": 8625
},
{
"epoch": 0.56,
"eval_avg_sts": 0.7705756275356871,
"eval_sickr_spearman": 0.7442900919782981,
"eval_stsb_spearman": 0.7968611630930761,
"step": 8750
},
{
"epoch": 0.57,
"eval_avg_sts": 0.7708996463902738,
"eval_sickr_spearman": 0.7443713125708299,
"eval_stsb_spearman": 0.7974279802097175,
"step": 8875
},
{
"epoch": 0.58,
"learning_rate": 1.2721105849225649e-05,
"loss": 0.0002,
"step": 9000
},
{
"epoch": 0.58,
"eval_avg_sts": 0.7690762421498543,
"eval_sickr_spearman": 0.7437613656136672,
"eval_stsb_spearman": 0.7943911186860414,
"step": 9000
},
{
"epoch": 0.58,
"eval_avg_sts": 0.7632497822206319,
"eval_sickr_spearman": 0.7418991037487088,
"eval_stsb_spearman": 0.7846004606925551,
"step": 9125
},
{
"epoch": 0.59,
"eval_avg_sts": 0.7661106731884346,
"eval_sickr_spearman": 0.7425510779196297,
"eval_stsb_spearman": 0.7896702684572396,
"step": 9250
},
{
"epoch": 0.6,
"eval_avg_sts": 0.7582809505084913,
"eval_sickr_spearman": 0.734091696241045,
"eval_stsb_spearman": 0.7824702047759376,
"step": 9375
},
{
"epoch": 0.61,
"learning_rate": 1.176116728529374e-05,
"loss": 0.0003,
"step": 9500
},
{
"epoch": 0.61,
"eval_avg_sts": 0.760148082899698,
"eval_sickr_spearman": 0.735014469761874,
"eval_stsb_spearman": 0.785281696037522,
"step": 9500
},
{
"epoch": 0.62,
"eval_avg_sts": 0.7629335466481396,
"eval_sickr_spearman": 0.7371143414856469,
"eval_stsb_spearman": 0.7887527518106323,
"step": 9625
},
{
"epoch": 0.62,
"eval_avg_sts": 0.7642452574201212,
"eval_sickr_spearman": 0.738628137709672,
"eval_stsb_spearman": 0.7898623771305704,
"step": 9750
},
{
"epoch": 0.63,
"eval_avg_sts": 0.7661101842825881,
"eval_sickr_spearman": 0.7410933820220844,
"eval_stsb_spearman": 0.791126986543092,
"step": 9875
},
{
"epoch": 0.64,
"learning_rate": 1.0801228721361832e-05,
"loss": 0.0002,
"step": 10000
},
{
"epoch": 0.64,
"eval_avg_sts": 0.7672844353835271,
"eval_sickr_spearman": 0.741854819073183,
"eval_stsb_spearman": 0.7927140516938713,
"step": 10000
},
{
"epoch": 0.65,
"eval_avg_sts": 0.7695572795608598,
"eval_sickr_spearman": 0.7413840662479879,
"eval_stsb_spearman": 0.7977304928737319,
"step": 10125
},
{
"epoch": 0.66,
"eval_avg_sts": 0.7687590638385537,
"eval_sickr_spearman": 0.739025354918565,
"eval_stsb_spearman": 0.7984927727585424,
"step": 10250
},
{
"epoch": 0.66,
"eval_avg_sts": 0.7669714587155453,
"eval_sickr_spearman": 0.7381937444282656,
"eval_stsb_spearman": 0.795749173002825,
"step": 10375
},
{
"epoch": 0.67,
"learning_rate": 9.841290157429926e-06,
"loss": 0.0002,
"step": 10500
},
{
"epoch": 0.67,
"eval_avg_sts": 0.7675942026136746,
"eval_sickr_spearman": 0.7389821269272708,
"eval_stsb_spearman": 0.7962062783000783,
"step": 10500
},
{
"epoch": 0.68,
"eval_avg_sts": 0.7678054650263499,
"eval_sickr_spearman": 0.738494515185471,
"eval_stsb_spearman": 0.7971164148672288,
"step": 10625
},
{
"epoch": 0.69,
"eval_avg_sts": 0.7682414535062771,
"eval_sickr_spearman": 0.7377959508461551,
"eval_stsb_spearman": 0.7986869561663991,
"step": 10750
},
{
"epoch": 0.7,
"eval_avg_sts": 0.7680341988448937,
"eval_sickr_spearman": 0.7380161254151475,
"eval_stsb_spearman": 0.7980522722746398,
"step": 10875
},
{
"epoch": 0.7,
"learning_rate": 8.881351593498018e-06,
"loss": 0.0001,
"step": 11000
},
{
"epoch": 0.7,
"eval_avg_sts": 0.7603629068459727,
"eval_sickr_spearman": 0.7332741588634669,
"eval_stsb_spearman": 0.7874516548284786,
"step": 11000
},
{
"epoch": 0.71,
"eval_avg_sts": 0.7621832287379494,
"eval_sickr_spearman": 0.7349048083795262,
"eval_stsb_spearman": 0.7894616490963726,
"step": 11125
},
{
"epoch": 0.72,
"eval_avg_sts": 0.7621016943200358,
"eval_sickr_spearman": 0.7353910816282503,
"eval_stsb_spearman": 0.7888123070118214,
"step": 11250
},
{
"epoch": 0.73,
"eval_avg_sts": 0.7610119893777948,
"eval_sickr_spearman": 0.7316196795433297,
"eval_stsb_spearman": 0.7904042992122599,
"step": 11375
},
{
"epoch": 0.74,
"learning_rate": 7.921413029566108e-06,
"loss": 0.0002,
"step": 11500
},
{
"epoch": 0.74,
"eval_avg_sts": 0.7617757929098798,
"eval_sickr_spearman": 0.7318357714686997,
"eval_stsb_spearman": 0.7917158143510601,
"step": 11500
},
{
"epoch": 0.74,
"eval_avg_sts": 0.7611330064890793,
"eval_sickr_spearman": 0.7338293983960914,
"eval_stsb_spearman": 0.7884366145820674,
"step": 11625
},
{
"epoch": 0.75,
"eval_avg_sts": 0.7621633110041289,
"eval_sickr_spearman": 0.7341784404102422,
"eval_stsb_spearman": 0.7901481815980157,
"step": 11750
},
{
"epoch": 0.76,
"eval_avg_sts": 0.7595196438845424,
"eval_sickr_spearman": 0.7331927461465293,
"eval_stsb_spearman": 0.7858465416225555,
"step": 11875
},
{
"epoch": 0.77,
"learning_rate": 6.961474465634199e-06,
"loss": 0.0003,
"step": 12000
},
{
"epoch": 0.77,
"eval_avg_sts": 0.7635656813674752,
"eval_sickr_spearman": 0.7330679133138917,
"eval_stsb_spearman": 0.7940634494210588,
"step": 12000
},
{
"epoch": 0.78,
"eval_avg_sts": 0.7621975138758124,
"eval_sickr_spearman": 0.7340756058220633,
"eval_stsb_spearman": 0.7903194219295613,
"step": 12125
},
{
"epoch": 0.78,
"eval_avg_sts": 0.7631065024648397,
"eval_sickr_spearman": 0.7321919220858634,
"eval_stsb_spearman": 0.794021082843816,
"step": 12250
},
{
"epoch": 0.79,
"eval_avg_sts": 0.7628859990174557,
"eval_sickr_spearman": 0.7333722383726035,
"eval_stsb_spearman": 0.7923997596623078,
"step": 12375
},
{
"epoch": 0.8,
"learning_rate": 6.001535901702292e-06,
"loss": 0.0003,
"step": 12500
},
{
"epoch": 0.8,
"eval_avg_sts": 0.7629723942710446,
"eval_sickr_spearman": 0.7341199385286906,
"eval_stsb_spearman": 0.7918248500133985,
"step": 12500
},
{
"epoch": 0.81,
"eval_avg_sts": 0.7576056121896545,
"eval_sickr_spearman": 0.7317101701384391,
"eval_stsb_spearman": 0.7835010542408699,
"step": 12625
},
{
"epoch": 0.82,
"eval_avg_sts": 0.7578230668275763,
"eval_sickr_spearman": 0.7317774136804525,
"eval_stsb_spearman": 0.7838687199747002,
"step": 12750
},
{
"epoch": 0.82,
"eval_avg_sts": 0.7592080133938004,
"eval_sickr_spearman": 0.7330049445399063,
"eval_stsb_spearman": 0.7854110822476944,
"step": 12875
},
{
"epoch": 0.83,
"learning_rate": 5.0415973377703825e-06,
"loss": 0.0002,
"step": 13000
},
{
"epoch": 0.83,
"eval_avg_sts": 0.7649310794949764,
"eval_sickr_spearman": 0.7366114078224884,
"eval_stsb_spearman": 0.7932507511674645,
"step": 13000
},
{
"epoch": 0.84,
"eval_avg_sts": 0.7663911796944379,
"eval_sickr_spearman": 0.7376808683271094,
"eval_stsb_spearman": 0.7951014910617664,
"step": 13125
},
{
"epoch": 0.85,
"eval_avg_sts": 0.7666777328866676,
"eval_sickr_spearman": 0.7386608949208526,
"eval_stsb_spearman": 0.7946945708524826,
"step": 13250
},
{
"epoch": 0.86,
"eval_avg_sts": 0.767106492489369,
"eval_sickr_spearman": 0.7391275651024254,
"eval_stsb_spearman": 0.7950854198763126,
"step": 13375
},
{
"epoch": 0.86,
"learning_rate": 4.081658773838474e-06,
"loss": 0.0002,
"step": 13500
},
{
"epoch": 0.86,
"eval_avg_sts": 0.7663641777603831,
"eval_sickr_spearman": 0.7384899041997329,
"eval_stsb_spearman": 0.7942384513210333,
"step": 13500
},
{
"epoch": 0.87,
"eval_avg_sts": 0.7662444760718037,
"eval_sickr_spearman": 0.7383694902284276,
"eval_stsb_spearman": 0.7941194619151797,
"step": 13625
},
{
"epoch": 0.88,
"eval_avg_sts": 0.766969739589136,
"eval_sickr_spearman": 0.7386586374590851,
"eval_stsb_spearman": 0.7952808417191867,
"step": 13750
},
{
"epoch": 0.89,
"eval_avg_sts": 0.766963601043843,
"eval_sickr_spearman": 0.7385023442550055,
"eval_stsb_spearman": 0.7954248578326806,
"step": 13875
},
{
"epoch": 0.9,
"learning_rate": 3.121720209906566e-06,
"loss": 0.0001,
"step": 14000
},
{
"epoch": 0.9,
"eval_avg_sts": 0.7667105573155901,
"eval_sickr_spearman": 0.7384424975026136,
"eval_stsb_spearman": 0.7949786171285665,
"step": 14000
},
{
"epoch": 0.9,
"eval_avg_sts": 0.7681149645930665,
"eval_sickr_spearman": 0.7393782874519323,
"eval_stsb_spearman": 0.7968516417342008,
"step": 14125
},
{
"epoch": 0.91,
"eval_avg_sts": 0.7680143099599532,
"eval_sickr_spearman": 0.7393469231426932,
"eval_stsb_spearman": 0.7966816967772132,
"step": 14250
},
{
"epoch": 0.92,
"eval_avg_sts": 0.7676174458337446,
"eval_sickr_spearman": 0.7389574389411316,
"eval_stsb_spearman": 0.7962774527263575,
"step": 14375
},
{
"epoch": 0.93,
"learning_rate": 2.1617816459746575e-06,
"loss": 0.0001,
"step": 14500
},
{
"epoch": 0.93,
"eval_avg_sts": 0.7678090950201439,
"eval_sickr_spearman": 0.7391150770160515,
"eval_stsb_spearman": 0.7965031130242363,
"step": 14500
},
{
"epoch": 0.94,
"eval_avg_sts": 0.7674776747079344,
"eval_sickr_spearman": 0.7389988417505713,
"eval_stsb_spearman": 0.7959565076652975,
"step": 14625
},
{
"epoch": 0.94,
"eval_avg_sts": 0.7655137295819681,
"eval_sickr_spearman": 0.7381747721431975,
"eval_stsb_spearman": 0.7928526870207386,
"step": 14750
},
{
"epoch": 0.95,
"eval_avg_sts": 0.7657447555359481,
"eval_sickr_spearman": 0.7382629572454379,
"eval_stsb_spearman": 0.7932265538264583,
"step": 14875
},
{
"epoch": 0.96,
"learning_rate": 1.2018430820427492e-06,
"loss": 0.0002,
"step": 15000
},
{
"epoch": 0.96,
"eval_avg_sts": 0.7661720244295546,
"eval_sickr_spearman": 0.7381314480897004,
"eval_stsb_spearman": 0.7942126007694087,
"step": 15000
},
{
"epoch": 0.97,
"eval_avg_sts": 0.7660620762670376,
"eval_sickr_spearman": 0.7380774611316839,
"eval_stsb_spearman": 0.7940466914023911,
"step": 15125
},
{
"epoch": 0.98,
"eval_avg_sts": 0.766592222198617,
"eval_sickr_spearman": 0.738653161913521,
"eval_stsb_spearman": 0.794531282483713,
"step": 15250
},
{
"epoch": 0.98,
"eval_avg_sts": 0.766138712195428,
"eval_sickr_spearman": 0.7384393754810201,
"eval_stsb_spearman": 0.7938380489098358,
"step": 15375
},
{
"epoch": 0.99,
"learning_rate": 2.4190451811084087e-07,
"loss": 0.0002,
"step": 15500
},
{
"epoch": 0.99,
"eval_avg_sts": 0.7661330517073972,
"eval_sickr_spearman": 0.7384201630404448,
"eval_stsb_spearman": 0.7938459403743495,
"step": 15500
},
{
"epoch": 1.0,
"eval_avg_sts": 0.7660975303030011,
"eval_sickr_spearman": 0.7383983569203919,
"eval_stsb_spearman": 0.7937967036856103,
"step": 15625
},
{
"epoch": 1.0,
"step": 15626,
"train_runtime": 3834.5201,
"train_samples_per_second": 4.075
}
],
"max_steps": 15626,
"num_train_epochs": 1,
"total_flos": 42267980696739840,
"trial_name": null,
"trial_params": null
}