esimcse-roberta-base / trainer_state.json
root
init
7fd4951
{
"best_metric": 0.8509895356755377,
"best_model_checkpoint": "result/esimcse-roberta-base-0.1-bpe-0.3-160-bs64",
"epoch": 1.0,
"global_step": 15626,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.01,
"eval_avg_sts": 0.7581765037912359,
"eval_sickr_spearman": 0.7151382873958556,
"eval_stsb_spearman": 0.8012147201866162,
"step": 125
},
{
"epoch": 0.02,
"eval_avg_sts": 0.7681462302822064,
"eval_sickr_spearman": 0.7191643023494997,
"eval_stsb_spearman": 0.817128158214913,
"step": 250
},
{
"epoch": 0.02,
"eval_avg_sts": 0.7689880945057068,
"eval_sickr_spearman": 0.7167425261838887,
"eval_stsb_spearman": 0.821233662827525,
"step": 375
},
{
"epoch": 0.03,
"learning_rate": 9.680020478689364e-06,
"loss": 0.2173,
"step": 500
},
{
"epoch": 0.03,
"eval_avg_sts": 0.77171181346581,
"eval_sickr_spearman": 0.7162504475496553,
"eval_stsb_spearman": 0.8271731793819647,
"step": 500
},
{
"epoch": 0.04,
"eval_avg_sts": 0.7748967925777871,
"eval_sickr_spearman": 0.7145534607047452,
"eval_stsb_spearman": 0.835240124450829,
"step": 625
},
{
"epoch": 0.05,
"eval_avg_sts": 0.7740832357556943,
"eval_sickr_spearman": 0.7138060487352668,
"eval_stsb_spearman": 0.8343604227761218,
"step": 750
},
{
"epoch": 0.06,
"eval_avg_sts": 0.7762465978643387,
"eval_sickr_spearman": 0.7208324225024454,
"eval_stsb_spearman": 0.8316607732262321,
"step": 875
},
{
"epoch": 0.06,
"learning_rate": 9.36004095737873e-06,
"loss": 0.001,
"step": 1000
},
{
"epoch": 0.06,
"eval_avg_sts": 0.7733397471522039,
"eval_sickr_spearman": 0.7168732668420033,
"eval_stsb_spearman": 0.8298062274624045,
"step": 1000
},
{
"epoch": 0.07,
"eval_avg_sts": 0.7653409481290172,
"eval_sickr_spearman": 0.7163877684686668,
"eval_stsb_spearman": 0.8142941277893675,
"step": 1125
},
{
"epoch": 0.08,
"eval_avg_sts": 0.7736208665177124,
"eval_sickr_spearman": 0.7180561287771196,
"eval_stsb_spearman": 0.8291856042583052,
"step": 1250
},
{
"epoch": 0.09,
"eval_avg_sts": 0.7769947892835245,
"eval_sickr_spearman": 0.719469443936936,
"eval_stsb_spearman": 0.834520134630113,
"step": 1375
},
{
"epoch": 0.1,
"learning_rate": 9.040061436068092e-06,
"loss": 0.0009,
"step": 1500
},
{
"epoch": 0.1,
"eval_avg_sts": 0.7724765591275501,
"eval_sickr_spearman": 0.7175171717878825,
"eval_stsb_spearman": 0.8274359464672176,
"step": 1500
},
{
"epoch": 0.1,
"eval_avg_sts": 0.7805406775721991,
"eval_sickr_spearman": 0.7221231622913917,
"eval_stsb_spearman": 0.8389581928530065,
"step": 1625
},
{
"epoch": 0.11,
"eval_avg_sts": 0.7788261300626871,
"eval_sickr_spearman": 0.7156971292610881,
"eval_stsb_spearman": 0.841955130864286,
"step": 1750
},
{
"epoch": 0.12,
"eval_avg_sts": 0.7840241297108503,
"eval_sickr_spearman": 0.7276132612790723,
"eval_stsb_spearman": 0.8404349981426283,
"step": 1875
},
{
"epoch": 0.13,
"learning_rate": 8.720081914757458e-06,
"loss": 0.0007,
"step": 2000
},
{
"epoch": 0.13,
"eval_avg_sts": 0.7859614716903541,
"eval_sickr_spearman": 0.73084517803264,
"eval_stsb_spearman": 0.8410777653480682,
"step": 2000
},
{
"epoch": 0.14,
"eval_avg_sts": 0.7879631319134857,
"eval_sickr_spearman": 0.7249367281514335,
"eval_stsb_spearman": 0.8509895356755377,
"step": 2125
},
{
"epoch": 0.14,
"eval_avg_sts": 0.785546295104504,
"eval_sickr_spearman": 0.7227898820104544,
"eval_stsb_spearman": 0.8483027081985536,
"step": 2250
},
{
"epoch": 0.15,
"eval_avg_sts": 0.7834786244087628,
"eval_sickr_spearman": 0.723430760996943,
"eval_stsb_spearman": 0.8435264878205825,
"step": 2375
},
{
"epoch": 0.16,
"learning_rate": 8.400102393446819e-06,
"loss": 0.0009,
"step": 2500
},
{
"epoch": 0.16,
"eval_avg_sts": 0.7820490328984251,
"eval_sickr_spearman": 0.7255294799742813,
"eval_stsb_spearman": 0.8385685858225688,
"step": 2500
},
{
"epoch": 0.17,
"eval_avg_sts": 0.7808832119394027,
"eval_sickr_spearman": 0.7211363152812443,
"eval_stsb_spearman": 0.8406301085975609,
"step": 2625
},
{
"epoch": 0.18,
"eval_avg_sts": 0.7801816610620529,
"eval_sickr_spearman": 0.7208986573913285,
"eval_stsb_spearman": 0.8394646647327774,
"step": 2750
},
{
"epoch": 0.18,
"eval_avg_sts": 0.7804005291249532,
"eval_sickr_spearman": 0.7246992623859235,
"eval_stsb_spearman": 0.8361017958639829,
"step": 2875
},
{
"epoch": 0.19,
"learning_rate": 8.080122872136184e-06,
"loss": 0.0007,
"step": 3000
},
{
"epoch": 0.19,
"eval_avg_sts": 0.7782109243211895,
"eval_sickr_spearman": 0.722025947342081,
"eval_stsb_spearman": 0.8343959013002981,
"step": 3000
},
{
"epoch": 0.2,
"eval_avg_sts": 0.7856724922536673,
"eval_sickr_spearman": 0.7338031734147062,
"eval_stsb_spearman": 0.8375418110926284,
"step": 3125
},
{
"epoch": 0.21,
"eval_avg_sts": 0.7856388427423309,
"eval_sickr_spearman": 0.732672281131346,
"eval_stsb_spearman": 0.8386054043533157,
"step": 3250
},
{
"epoch": 0.22,
"eval_avg_sts": 0.7828902827361719,
"eval_sickr_spearman": 0.7300378712796683,
"eval_stsb_spearman": 0.8357426941926754,
"step": 3375
},
{
"epoch": 0.22,
"learning_rate": 7.760143350825547e-06,
"loss": 0.0007,
"step": 3500
},
{
"epoch": 0.22,
"eval_avg_sts": 0.7802120728333768,
"eval_sickr_spearman": 0.7278627828510433,
"eval_stsb_spearman": 0.8325613628157101,
"step": 3500
},
{
"epoch": 0.23,
"eval_avg_sts": 0.7814736917323457,
"eval_sickr_spearman": 0.7240533881648852,
"eval_stsb_spearman": 0.8388939952998063,
"step": 3625
},
{
"epoch": 0.24,
"eval_avg_sts": 0.7752603635110975,
"eval_sickr_spearman": 0.7130665618975255,
"eval_stsb_spearman": 0.8374541651246696,
"step": 3750
},
{
"epoch": 0.25,
"eval_avg_sts": 0.7738531430333261,
"eval_sickr_spearman": 0.7116984440041625,
"eval_stsb_spearman": 0.8360078420624897,
"step": 3875
},
{
"epoch": 0.26,
"learning_rate": 7.440163829514912e-06,
"loss": 0.0008,
"step": 4000
},
{
"epoch": 0.26,
"eval_avg_sts": 0.7716461848796647,
"eval_sickr_spearman": 0.7067132959859007,
"eval_stsb_spearman": 0.8365790737734288,
"step": 4000
},
{
"epoch": 0.26,
"eval_avg_sts": 0.7726640328663634,
"eval_sickr_spearman": 0.7067841898916234,
"eval_stsb_spearman": 0.8385438758411035,
"step": 4125
},
{
"epoch": 0.27,
"eval_avg_sts": 0.7710462569066159,
"eval_sickr_spearman": 0.7075652236321085,
"eval_stsb_spearman": 0.8345272901811234,
"step": 4250
},
{
"epoch": 0.28,
"eval_avg_sts": 0.7746792058093399,
"eval_sickr_spearman": 0.7143302601763624,
"eval_stsb_spearman": 0.8350281514423173,
"step": 4375
},
{
"epoch": 0.29,
"learning_rate": 7.120184308204276e-06,
"loss": 0.0007,
"step": 4500
},
{
"epoch": 0.29,
"eval_avg_sts": 0.7565698440366436,
"eval_sickr_spearman": 0.7096718197100826,
"eval_stsb_spearman": 0.8034678683632046,
"step": 4500
},
{
"epoch": 0.3,
"eval_avg_sts": 0.7675338058741563,
"eval_sickr_spearman": 0.7151141757829338,
"eval_stsb_spearman": 0.819953435965379,
"step": 4625
},
{
"epoch": 0.3,
"eval_avg_sts": 0.7690480296047658,
"eval_sickr_spearman": 0.7127470109796578,
"eval_stsb_spearman": 0.8253490482298739,
"step": 4750
},
{
"epoch": 0.31,
"eval_avg_sts": 0.7684443815823148,
"eval_sickr_spearman": 0.713693800051206,
"eval_stsb_spearman": 0.8231949631134236,
"step": 4875
},
{
"epoch": 0.32,
"learning_rate": 6.800204786893639e-06,
"loss": 0.0004,
"step": 5000
},
{
"epoch": 0.32,
"eval_avg_sts": 0.7691689362019429,
"eval_sickr_spearman": 0.7142018250111167,
"eval_stsb_spearman": 0.824136047392769,
"step": 5000
},
{
"epoch": 0.33,
"eval_avg_sts": 0.7695187713024993,
"eval_sickr_spearman": 0.7115182313115667,
"eval_stsb_spearman": 0.8275193112934319,
"step": 5125
},
{
"epoch": 0.34,
"eval_avg_sts": 0.7653115806051607,
"eval_sickr_spearman": 0.7045340768825521,
"eval_stsb_spearman": 0.8260890843277693,
"step": 5250
},
{
"epoch": 0.34,
"eval_avg_sts": 0.7668890157040491,
"eval_sickr_spearman": 0.7114907094904426,
"eval_stsb_spearman": 0.8222873219176555,
"step": 5375
},
{
"epoch": 0.35,
"learning_rate": 6.480225265583003e-06,
"loss": 0.0006,
"step": 5500
},
{
"epoch": 0.35,
"eval_avg_sts": 0.7699847309597411,
"eval_sickr_spearman": 0.7107595800643516,
"eval_stsb_spearman": 0.8292098818551306,
"step": 5500
},
{
"epoch": 0.36,
"eval_avg_sts": 0.7567106410293966,
"eval_sickr_spearman": 0.7000862527960785,
"eval_stsb_spearman": 0.8133350292627146,
"step": 5625
},
{
"epoch": 0.37,
"eval_avg_sts": 0.7573616140937487,
"eval_sickr_spearman": 0.699952438147472,
"eval_stsb_spearman": 0.8147707900400254,
"step": 5750
},
{
"epoch": 0.38,
"eval_avg_sts": 0.7537503092076071,
"eval_sickr_spearman": 0.6944211284747575,
"eval_stsb_spearman": 0.8130794899404568,
"step": 5875
},
{
"epoch": 0.38,
"learning_rate": 6.1602457442723675e-06,
"loss": 0.0006,
"step": 6000
},
{
"epoch": 0.38,
"eval_avg_sts": 0.7520214801965759,
"eval_sickr_spearman": 0.6948064820015954,
"eval_stsb_spearman": 0.8092364783915562,
"step": 6000
},
{
"epoch": 0.39,
"eval_avg_sts": 0.7575813676798819,
"eval_sickr_spearman": 0.7104122191387511,
"eval_stsb_spearman": 0.8047505162210129,
"step": 6125
},
{
"epoch": 0.4,
"eval_avg_sts": 0.7601457988270166,
"eval_sickr_spearman": 0.7141828046949473,
"eval_stsb_spearman": 0.8061087929590859,
"step": 6250
},
{
"epoch": 0.41,
"eval_avg_sts": 0.7635874777468191,
"eval_sickr_spearman": 0.71502354109452,
"eval_stsb_spearman": 0.8121514143991182,
"step": 6375
},
{
"epoch": 0.42,
"learning_rate": 5.840266222961732e-06,
"loss": 0.0006,
"step": 6500
},
{
"epoch": 0.42,
"eval_avg_sts": 0.7661782010291336,
"eval_sickr_spearman": 0.7131075804581537,
"eval_stsb_spearman": 0.8192488216001136,
"step": 6500
},
{
"epoch": 0.42,
"eval_avg_sts": 0.7668687543410466,
"eval_sickr_spearman": 0.715227337057922,
"eval_stsb_spearman": 0.8185101716241713,
"step": 6625
},
{
"epoch": 0.43,
"eval_avg_sts": 0.7653179212909831,
"eval_sickr_spearman": 0.706994422022618,
"eval_stsb_spearman": 0.8236414205593481,
"step": 6750
},
{
"epoch": 0.44,
"eval_avg_sts": 0.7660646063264975,
"eval_sickr_spearman": 0.708211338008654,
"eval_stsb_spearman": 0.823917874644341,
"step": 6875
},
{
"epoch": 0.45,
"learning_rate": 5.520286701651095e-06,
"loss": 0.0005,
"step": 7000
},
{
"epoch": 0.45,
"eval_avg_sts": 0.7679532903699651,
"eval_sickr_spearman": 0.7082187828293769,
"eval_stsb_spearman": 0.8276877979105532,
"step": 7000
},
{
"epoch": 0.46,
"eval_avg_sts": 0.7681556050474033,
"eval_sickr_spearman": 0.7095044313215708,
"eval_stsb_spearman": 0.8268067787732358,
"step": 7125
},
{
"epoch": 0.46,
"eval_avg_sts": 0.7631361714276783,
"eval_sickr_spearman": 0.7091408358836844,
"eval_stsb_spearman": 0.8171315069716724,
"step": 7250
},
{
"epoch": 0.47,
"eval_avg_sts": 0.7696379675303331,
"eval_sickr_spearman": 0.7148051917073824,
"eval_stsb_spearman": 0.8244707433532837,
"step": 7375
},
{
"epoch": 0.48,
"learning_rate": 5.200307180340458e-06,
"loss": 0.0007,
"step": 7500
},
{
"epoch": 0.48,
"eval_avg_sts": 0.7739437746585223,
"eval_sickr_spearman": 0.7161355091239138,
"eval_stsb_spearman": 0.8317520401931309,
"step": 7500
},
{
"epoch": 0.49,
"eval_avg_sts": 0.7628158501141427,
"eval_sickr_spearman": 0.7044322509475033,
"eval_stsb_spearman": 0.8211994492807821,
"step": 7625
},
{
"epoch": 0.5,
"eval_avg_sts": 0.7566107770472272,
"eval_sickr_spearman": 0.7021283431048213,
"eval_stsb_spearman": 0.8110932109896332,
"step": 7750
},
{
"epoch": 0.5,
"eval_avg_sts": 0.7576093285240451,
"eval_sickr_spearman": 0.7070185336355399,
"eval_stsb_spearman": 0.8082001234125501,
"step": 7875
},
{
"epoch": 0.51,
"learning_rate": 4.8803276590298225e-06,
"loss": 0.0004,
"step": 8000
},
{
"epoch": 0.51,
"eval_avg_sts": 0.760233390447007,
"eval_sickr_spearman": 0.7091670608650695,
"eval_stsb_spearman": 0.8112997200289445,
"step": 8000
},
{
"epoch": 0.52,
"eval_avg_sts": 0.7602377460168234,
"eval_sickr_spearman": 0.7104034774782894,
"eval_stsb_spearman": 0.8100720145553574,
"step": 8125
},
{
"epoch": 0.53,
"eval_avg_sts": 0.7616580314424666,
"eval_sickr_spearman": 0.7110805719152625,
"eval_stsb_spearman": 0.8122354909696707,
"step": 8250
},
{
"epoch": 0.54,
"eval_avg_sts": 0.7651474794871342,
"eval_sickr_spearman": 0.7107735571148701,
"eval_stsb_spearman": 0.8195214018593985,
"step": 8375
},
{
"epoch": 0.54,
"learning_rate": 4.560348137719187e-06,
"loss": 0.0004,
"step": 8500
},
{
"epoch": 0.54,
"eval_avg_sts": 0.7653451370230322,
"eval_sickr_spearman": 0.7108329235562477,
"eval_stsb_spearman": 0.8198573504898168,
"step": 8500
},
{
"epoch": 0.55,
"eval_avg_sts": 0.7693105360724941,
"eval_sickr_spearman": 0.7174611194925042,
"eval_stsb_spearman": 0.821159952652484,
"step": 8625
},
{
"epoch": 0.56,
"eval_avg_sts": 0.7688644948269532,
"eval_sickr_spearman": 0.7159121645022266,
"eval_stsb_spearman": 0.8218168251516798,
"step": 8750
},
{
"epoch": 0.57,
"eval_avg_sts": 0.7658999611274551,
"eval_sickr_spearman": 0.7111667877423439,
"eval_stsb_spearman": 0.8206331345125664,
"step": 8875
},
{
"epoch": 0.58,
"learning_rate": 4.24036861640855e-06,
"loss": 0.0005,
"step": 9000
},
{
"epoch": 0.58,
"eval_avg_sts": 0.7680124691016457,
"eval_sickr_spearman": 0.7111179881432828,
"eval_stsb_spearman": 0.8249069500600086,
"step": 9000
},
{
"epoch": 0.58,
"eval_avg_sts": 0.7700973304120509,
"eval_sickr_spearman": 0.7201923120135797,
"eval_stsb_spearman": 0.8200023488105221,
"step": 9125
},
{
"epoch": 0.59,
"eval_avg_sts": 0.7709943925570436,
"eval_sickr_spearman": 0.7219245056558439,
"eval_stsb_spearman": 0.8200642794582433,
"step": 9250
},
{
"epoch": 0.6,
"eval_avg_sts": 0.7704940850500881,
"eval_sickr_spearman": 0.7215150405160838,
"eval_stsb_spearman": 0.8194731295840925,
"step": 9375
},
{
"epoch": 0.61,
"learning_rate": 3.920389095097914e-06,
"loss": 0.0007,
"step": 9500
},
{
"epoch": 0.61,
"eval_avg_sts": 0.772013094506881,
"eval_sickr_spearman": 0.7239060767767745,
"eval_stsb_spearman": 0.8201201122369877,
"step": 9500
},
{
"epoch": 0.62,
"eval_avg_sts": 0.7722529224488867,
"eval_sickr_spearman": 0.7238736077522024,
"eval_stsb_spearman": 0.8206322371455709,
"step": 9625
},
{
"epoch": 0.62,
"eval_avg_sts": 0.769809433747652,
"eval_sickr_spearman": 0.7224149992637298,
"eval_stsb_spearman": 0.8172038682315741,
"step": 9750
},
{
"epoch": 0.63,
"eval_avg_sts": 0.7692010357381744,
"eval_sickr_spearman": 0.7215842533332562,
"eval_stsb_spearman": 0.8168178181430927,
"step": 9875
},
{
"epoch": 0.64,
"learning_rate": 3.600409573787278e-06,
"loss": 0.0005,
"step": 10000
},
{
"epoch": 0.64,
"eval_avg_sts": 0.7667248884742295,
"eval_sickr_spearman": 0.719994904186669,
"eval_stsb_spearman": 0.8134548727617901,
"step": 10000
},
{
"epoch": 0.65,
"eval_avg_sts": 0.7631501690436606,
"eval_sickr_spearman": 0.716718222446561,
"eval_stsb_spearman": 0.8095821156407603,
"step": 10125
},
{
"epoch": 0.66,
"eval_avg_sts": 0.7629832948957905,
"eval_sickr_spearman": 0.7177377786367878,
"eval_stsb_spearman": 0.8082288111547933,
"step": 10250
},
{
"epoch": 0.66,
"eval_avg_sts": 0.7669126925997836,
"eval_sickr_spearman": 0.7264237230208561,
"eval_stsb_spearman": 0.8074016621787109,
"step": 10375
},
{
"epoch": 0.67,
"learning_rate": 3.280430052476642e-06,
"loss": 0.0006,
"step": 10500
},
{
"epoch": 0.67,
"eval_avg_sts": 0.7718219901997241,
"eval_sickr_spearman": 0.7337559108108911,
"eval_stsb_spearman": 0.8098880695885572,
"step": 10500
},
{
"epoch": 0.68,
"eval_avg_sts": 0.7714922632909542,
"eval_sickr_spearman": 0.7311764005081574,
"eval_stsb_spearman": 0.8118081260737511,
"step": 10625
},
{
"epoch": 0.69,
"eval_avg_sts": 0.7727876566370058,
"eval_sickr_spearman": 0.7285461693623048,
"eval_stsb_spearman": 0.8170291439117067,
"step": 10750
},
{
"epoch": 0.7,
"eval_avg_sts": 0.772842738222782,
"eval_sickr_spearman": 0.7248763530569258,
"eval_stsb_spearman": 0.8208091233886382,
"step": 10875
},
{
"epoch": 0.7,
"learning_rate": 2.960450531166006e-06,
"loss": 0.0005,
"step": 11000
},
{
"epoch": 0.7,
"eval_avg_sts": 0.7737373423612539,
"eval_sickr_spearman": 0.7237420025342619,
"eval_stsb_spearman": 0.8237326821882459,
"step": 11000
},
{
"epoch": 0.71,
"eval_avg_sts": 0.7736646449640281,
"eval_sickr_spearman": 0.7248283219554877,
"eval_stsb_spearman": 0.8225009679725683,
"step": 11125
},
{
"epoch": 0.72,
"eval_avg_sts": 0.7733368787551957,
"eval_sickr_spearman": 0.7176964238584496,
"eval_stsb_spearman": 0.8289773336519419,
"step": 11250
},
{
"epoch": 0.73,
"eval_avg_sts": 0.772262796771174,
"eval_sickr_spearman": 0.7156863222632646,
"eval_stsb_spearman": 0.8288392712790835,
"step": 11375
},
{
"epoch": 0.74,
"learning_rate": 2.640471009855369e-06,
"loss": 0.0006,
"step": 11500
},
{
"epoch": 0.74,
"eval_avg_sts": 0.7728128954421813,
"eval_sickr_spearman": 0.717364721071918,
"eval_stsb_spearman": 0.8282610698124449,
"step": 11500
},
{
"epoch": 0.74,
"eval_avg_sts": 0.7681791597143148,
"eval_sickr_spearman": 0.7144295404630349,
"eval_stsb_spearman": 0.8219287789655949,
"step": 11625
},
{
"epoch": 0.75,
"eval_avg_sts": 0.7668750327968881,
"eval_sickr_spearman": 0.7139568663937824,
"eval_stsb_spearman": 0.8197931991999937,
"step": 11750
},
{
"epoch": 0.76,
"eval_avg_sts": 0.7640936046803386,
"eval_sickr_spearman": 0.7124148278821119,
"eval_stsb_spearman": 0.8157723814785652,
"step": 11875
},
{
"epoch": 0.77,
"learning_rate": 2.3204914885447333e-06,
"loss": 0.0007,
"step": 12000
},
{
"epoch": 0.77,
"eval_avg_sts": 0.7638027953353596,
"eval_sickr_spearman": 0.7146511079339689,
"eval_stsb_spearman": 0.8129544827367502,
"step": 12000
},
{
"epoch": 0.78,
"eval_avg_sts": 0.7619683130589899,
"eval_sickr_spearman": 0.7137378445712247,
"eval_stsb_spearman": 0.8101987815467552,
"step": 12125
},
{
"epoch": 0.78,
"eval_avg_sts": 0.7636607985650499,
"eval_sickr_spearman": 0.7154829585797756,
"eval_stsb_spearman": 0.811838638550324,
"step": 12250
},
{
"epoch": 0.79,
"eval_avg_sts": 0.7656556423146959,
"eval_sickr_spearman": 0.7171441142230127,
"eval_stsb_spearman": 0.8141671704063791,
"step": 12375
},
{
"epoch": 0.8,
"learning_rate": 2.000511967234097e-06,
"loss": 0.0007,
"step": 12500
},
{
"epoch": 0.8,
"eval_avg_sts": 0.7654090921860195,
"eval_sickr_spearman": 0.7177945513986876,
"eval_stsb_spearman": 0.8130236329733512,
"step": 12500
},
{
"epoch": 0.81,
"eval_avg_sts": 0.7631961636347036,
"eval_sickr_spearman": 0.7180557925594095,
"eval_stsb_spearman": 0.8083365347099974,
"step": 12625
},
{
"epoch": 0.82,
"eval_avg_sts": 0.7639726864144019,
"eval_sickr_spearman": 0.7181145345964683,
"eval_stsb_spearman": 0.8098308382323353,
"step": 12750
},
{
"epoch": 0.82,
"eval_avg_sts": 0.7630039855353228,
"eval_sickr_spearman": 0.7154700382134888,
"eval_stsb_spearman": 0.8105379328571569,
"step": 12875
},
{
"epoch": 0.83,
"learning_rate": 1.6805324459234608e-06,
"loss": 0.0007,
"step": 13000
},
{
"epoch": 0.83,
"eval_avg_sts": 0.7623670598777739,
"eval_sickr_spearman": 0.7141997116426535,
"eval_stsb_spearman": 0.8105344081128942,
"step": 13000
},
{
"epoch": 0.84,
"eval_avg_sts": 0.762649541131944,
"eval_sickr_spearman": 0.714223871286677,
"eval_stsb_spearman": 0.8110752109772111,
"step": 13125
},
{
"epoch": 0.85,
"eval_avg_sts": 0.762339972693242,
"eval_sickr_spearman": 0.7139050408353308,
"eval_stsb_spearman": 0.8107749045511534,
"step": 13250
},
{
"epoch": 0.86,
"eval_avg_sts": 0.7621756663732473,
"eval_sickr_spearman": 0.7133488406806774,
"eval_stsb_spearman": 0.8110024920658171,
"step": 13375
},
{
"epoch": 0.86,
"learning_rate": 1.3605529246128248e-06,
"loss": 0.0006,
"step": 13500
},
{
"epoch": 0.86,
"eval_avg_sts": 0.7614721810339764,
"eval_sickr_spearman": 0.7150819949449702,
"eval_stsb_spearman": 0.8078623671229826,
"step": 13500
},
{
"epoch": 0.87,
"eval_avg_sts": 0.7613416141094851,
"eval_sickr_spearman": 0.7152224378855754,
"eval_stsb_spearman": 0.8074607903333948,
"step": 13625
},
{
"epoch": 0.88,
"eval_avg_sts": 0.7619056768073365,
"eval_sickr_spearman": 0.7157069276057815,
"eval_stsb_spearman": 0.8081044260088914,
"step": 13750
},
{
"epoch": 0.89,
"eval_avg_sts": 0.7625807690636458,
"eval_sickr_spearman": 0.7158452091468218,
"eval_stsb_spearman": 0.8093163289804698,
"step": 13875
},
{
"epoch": 0.9,
"learning_rate": 1.0405734033021888e-06,
"loss": 0.0005,
"step": 14000
},
{
"epoch": 0.9,
"eval_avg_sts": 0.7626740159905894,
"eval_sickr_spearman": 0.7153790192762635,
"eval_stsb_spearman": 0.8099690127049152,
"step": 14000
},
{
"epoch": 0.9,
"eval_avg_sts": 0.7623073492171049,
"eval_sickr_spearman": 0.7137424075258613,
"eval_stsb_spearman": 0.8108722909083483,
"step": 14125
},
{
"epoch": 0.91,
"eval_avg_sts": 0.7627676837162675,
"eval_sickr_spearman": 0.7135836167045069,
"eval_stsb_spearman": 0.811951750728028,
"step": 14250
},
{
"epoch": 0.92,
"eval_avg_sts": 0.7622897188554142,
"eval_sickr_spearman": 0.7125857225410286,
"eval_stsb_spearman": 0.8119937151697998,
"step": 14375
},
{
"epoch": 0.93,
"learning_rate": 7.205938819915525e-07,
"loss": 0.0003,
"step": 14500
},
{
"epoch": 0.93,
"eval_avg_sts": 0.762755661091796,
"eval_sickr_spearman": 0.7135279486579401,
"eval_stsb_spearman": 0.8119833735256521,
"step": 14500
},
{
"epoch": 0.94,
"eval_avg_sts": 0.7635781922303508,
"eval_sickr_spearman": 0.7126439362359717,
"eval_stsb_spearman": 0.8145124482247301,
"step": 14625
},
{
"epoch": 0.94,
"eval_avg_sts": 0.7627630570209332,
"eval_sickr_spearman": 0.7108910892200891,
"eval_stsb_spearman": 0.8146350248217772,
"step": 14750
},
{
"epoch": 0.95,
"eval_avg_sts": 0.7620568929343465,
"eval_sickr_spearman": 0.7104201923015899,
"eval_stsb_spearman": 0.8136935935671031,
"step": 14875
},
{
"epoch": 0.96,
"learning_rate": 4.0061436068091647e-07,
"loss": 0.0007,
"step": 15000
},
{
"epoch": 0.96,
"eval_avg_sts": 0.7614322691019428,
"eval_sickr_spearman": 0.7104694241805639,
"eval_stsb_spearman": 0.8123951140233218,
"step": 15000
},
{
"epoch": 0.97,
"eval_avg_sts": 0.7614246018155921,
"eval_sickr_spearman": 0.7104402212708896,
"eval_stsb_spearman": 0.8124089823602947,
"step": 15125
},
{
"epoch": 0.98,
"eval_avg_sts": 0.7615696008749822,
"eval_sickr_spearman": 0.7107793208470427,
"eval_stsb_spearman": 0.8123598809029217,
"step": 15250
},
{
"epoch": 0.98,
"eval_avg_sts": 0.7613961550857908,
"eval_sickr_spearman": 0.7105805201181903,
"eval_stsb_spearman": 0.8122117900533915,
"step": 15375
},
{
"epoch": 0.99,
"learning_rate": 8.06348393702803e-08,
"loss": 0.0005,
"step": 15500
},
{
"epoch": 0.99,
"eval_avg_sts": 0.7614175786889548,
"eval_sickr_spearman": 0.7106010293985044,
"eval_stsb_spearman": 0.812234127979405,
"step": 15500
},
{
"epoch": 1.0,
"eval_avg_sts": 0.7613506554623436,
"eval_sickr_spearman": 0.7104816721114307,
"eval_stsb_spearman": 0.8122196388132565,
"step": 15625
},
{
"epoch": 1.0,
"step": 15626,
"train_runtime": 4633.8657,
"train_samples_per_second": 3.372
}
],
"max_steps": 15626,
"num_train_epochs": 1,
"total_flos": 119690591356191744,
"trial_name": null,
"trial_params": null
}