| { | |
| "best_metric": 0.8509895356755377, | |
| "best_model_checkpoint": "result/esimcse-roberta-base-0.1-bpe-0.3-160-bs64", | |
| "epoch": 1.0, | |
| "global_step": 15626, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.01, | |
| "eval_avg_sts": 0.7581765037912359, | |
| "eval_sickr_spearman": 0.7151382873958556, | |
| "eval_stsb_spearman": 0.8012147201866162, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_avg_sts": 0.7681462302822064, | |
| "eval_sickr_spearman": 0.7191643023494997, | |
| "eval_stsb_spearman": 0.817128158214913, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_avg_sts": 0.7689880945057068, | |
| "eval_sickr_spearman": 0.7167425261838887, | |
| "eval_stsb_spearman": 0.821233662827525, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 9.680020478689364e-06, | |
| "loss": 0.2173, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_avg_sts": 0.77171181346581, | |
| "eval_sickr_spearman": 0.7162504475496553, | |
| "eval_stsb_spearman": 0.8271731793819647, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_avg_sts": 0.7748967925777871, | |
| "eval_sickr_spearman": 0.7145534607047452, | |
| "eval_stsb_spearman": 0.835240124450829, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_avg_sts": 0.7740832357556943, | |
| "eval_sickr_spearman": 0.7138060487352668, | |
| "eval_stsb_spearman": 0.8343604227761218, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "eval_avg_sts": 0.7762465978643387, | |
| "eval_sickr_spearman": 0.7208324225024454, | |
| "eval_stsb_spearman": 0.8316607732262321, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 9.36004095737873e-06, | |
| "loss": 0.001, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "eval_avg_sts": 0.7733397471522039, | |
| "eval_sickr_spearman": 0.7168732668420033, | |
| "eval_stsb_spearman": 0.8298062274624045, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "eval_avg_sts": 0.7653409481290172, | |
| "eval_sickr_spearman": 0.7163877684686668, | |
| "eval_stsb_spearman": 0.8142941277893675, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "eval_avg_sts": 0.7736208665177124, | |
| "eval_sickr_spearman": 0.7180561287771196, | |
| "eval_stsb_spearman": 0.8291856042583052, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "eval_avg_sts": 0.7769947892835245, | |
| "eval_sickr_spearman": 0.719469443936936, | |
| "eval_stsb_spearman": 0.834520134630113, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 9.040061436068092e-06, | |
| "loss": 0.0009, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "eval_avg_sts": 0.7724765591275501, | |
| "eval_sickr_spearman": 0.7175171717878825, | |
| "eval_stsb_spearman": 0.8274359464672176, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "eval_avg_sts": 0.7805406775721991, | |
| "eval_sickr_spearman": 0.7221231622913917, | |
| "eval_stsb_spearman": 0.8389581928530065, | |
| "step": 1625 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "eval_avg_sts": 0.7788261300626871, | |
| "eval_sickr_spearman": 0.7156971292610881, | |
| "eval_stsb_spearman": 0.841955130864286, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "eval_avg_sts": 0.7840241297108503, | |
| "eval_sickr_spearman": 0.7276132612790723, | |
| "eval_stsb_spearman": 0.8404349981426283, | |
| "step": 1875 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 8.720081914757458e-06, | |
| "loss": 0.0007, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "eval_avg_sts": 0.7859614716903541, | |
| "eval_sickr_spearman": 0.73084517803264, | |
| "eval_stsb_spearman": 0.8410777653480682, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "eval_avg_sts": 0.7879631319134857, | |
| "eval_sickr_spearman": 0.7249367281514335, | |
| "eval_stsb_spearman": 0.8509895356755377, | |
| "step": 2125 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "eval_avg_sts": 0.785546295104504, | |
| "eval_sickr_spearman": 0.7227898820104544, | |
| "eval_stsb_spearman": 0.8483027081985536, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "eval_avg_sts": 0.7834786244087628, | |
| "eval_sickr_spearman": 0.723430760996943, | |
| "eval_stsb_spearman": 0.8435264878205825, | |
| "step": 2375 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 8.400102393446819e-06, | |
| "loss": 0.0009, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "eval_avg_sts": 0.7820490328984251, | |
| "eval_sickr_spearman": 0.7255294799742813, | |
| "eval_stsb_spearman": 0.8385685858225688, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "eval_avg_sts": 0.7808832119394027, | |
| "eval_sickr_spearman": 0.7211363152812443, | |
| "eval_stsb_spearman": 0.8406301085975609, | |
| "step": 2625 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "eval_avg_sts": 0.7801816610620529, | |
| "eval_sickr_spearman": 0.7208986573913285, | |
| "eval_stsb_spearman": 0.8394646647327774, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "eval_avg_sts": 0.7804005291249532, | |
| "eval_sickr_spearman": 0.7246992623859235, | |
| "eval_stsb_spearman": 0.8361017958639829, | |
| "step": 2875 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 8.080122872136184e-06, | |
| "loss": 0.0007, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "eval_avg_sts": 0.7782109243211895, | |
| "eval_sickr_spearman": 0.722025947342081, | |
| "eval_stsb_spearman": 0.8343959013002981, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "eval_avg_sts": 0.7856724922536673, | |
| "eval_sickr_spearman": 0.7338031734147062, | |
| "eval_stsb_spearman": 0.8375418110926284, | |
| "step": 3125 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "eval_avg_sts": 0.7856388427423309, | |
| "eval_sickr_spearman": 0.732672281131346, | |
| "eval_stsb_spearman": 0.8386054043533157, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "eval_avg_sts": 0.7828902827361719, | |
| "eval_sickr_spearman": 0.7300378712796683, | |
| "eval_stsb_spearman": 0.8357426941926754, | |
| "step": 3375 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 7.760143350825547e-06, | |
| "loss": 0.0007, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "eval_avg_sts": 0.7802120728333768, | |
| "eval_sickr_spearman": 0.7278627828510433, | |
| "eval_stsb_spearman": 0.8325613628157101, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "eval_avg_sts": 0.7814736917323457, | |
| "eval_sickr_spearman": 0.7240533881648852, | |
| "eval_stsb_spearman": 0.8388939952998063, | |
| "step": 3625 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "eval_avg_sts": 0.7752603635110975, | |
| "eval_sickr_spearman": 0.7130665618975255, | |
| "eval_stsb_spearman": 0.8374541651246696, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "eval_avg_sts": 0.7738531430333261, | |
| "eval_sickr_spearman": 0.7116984440041625, | |
| "eval_stsb_spearman": 0.8360078420624897, | |
| "step": 3875 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 7.440163829514912e-06, | |
| "loss": 0.0008, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "eval_avg_sts": 0.7716461848796647, | |
| "eval_sickr_spearman": 0.7067132959859007, | |
| "eval_stsb_spearman": 0.8365790737734288, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "eval_avg_sts": 0.7726640328663634, | |
| "eval_sickr_spearman": 0.7067841898916234, | |
| "eval_stsb_spearman": 0.8385438758411035, | |
| "step": 4125 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "eval_avg_sts": 0.7710462569066159, | |
| "eval_sickr_spearman": 0.7075652236321085, | |
| "eval_stsb_spearman": 0.8345272901811234, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "eval_avg_sts": 0.7746792058093399, | |
| "eval_sickr_spearman": 0.7143302601763624, | |
| "eval_stsb_spearman": 0.8350281514423173, | |
| "step": 4375 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 7.120184308204276e-06, | |
| "loss": 0.0007, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "eval_avg_sts": 0.7565698440366436, | |
| "eval_sickr_spearman": 0.7096718197100826, | |
| "eval_stsb_spearman": 0.8034678683632046, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "eval_avg_sts": 0.7675338058741563, | |
| "eval_sickr_spearman": 0.7151141757829338, | |
| "eval_stsb_spearman": 0.819953435965379, | |
| "step": 4625 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "eval_avg_sts": 0.7690480296047658, | |
| "eval_sickr_spearman": 0.7127470109796578, | |
| "eval_stsb_spearman": 0.8253490482298739, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "eval_avg_sts": 0.7684443815823148, | |
| "eval_sickr_spearman": 0.713693800051206, | |
| "eval_stsb_spearman": 0.8231949631134236, | |
| "step": 4875 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 6.800204786893639e-06, | |
| "loss": 0.0004, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "eval_avg_sts": 0.7691689362019429, | |
| "eval_sickr_spearman": 0.7142018250111167, | |
| "eval_stsb_spearman": 0.824136047392769, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "eval_avg_sts": 0.7695187713024993, | |
| "eval_sickr_spearman": 0.7115182313115667, | |
| "eval_stsb_spearman": 0.8275193112934319, | |
| "step": 5125 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "eval_avg_sts": 0.7653115806051607, | |
| "eval_sickr_spearman": 0.7045340768825521, | |
| "eval_stsb_spearman": 0.8260890843277693, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "eval_avg_sts": 0.7668890157040491, | |
| "eval_sickr_spearman": 0.7114907094904426, | |
| "eval_stsb_spearman": 0.8222873219176555, | |
| "step": 5375 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 6.480225265583003e-06, | |
| "loss": 0.0006, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "eval_avg_sts": 0.7699847309597411, | |
| "eval_sickr_spearman": 0.7107595800643516, | |
| "eval_stsb_spearman": 0.8292098818551306, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "eval_avg_sts": 0.7567106410293966, | |
| "eval_sickr_spearman": 0.7000862527960785, | |
| "eval_stsb_spearman": 0.8133350292627146, | |
| "step": 5625 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "eval_avg_sts": 0.7573616140937487, | |
| "eval_sickr_spearman": 0.699952438147472, | |
| "eval_stsb_spearman": 0.8147707900400254, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "eval_avg_sts": 0.7537503092076071, | |
| "eval_sickr_spearman": 0.6944211284747575, | |
| "eval_stsb_spearman": 0.8130794899404568, | |
| "step": 5875 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 6.1602457442723675e-06, | |
| "loss": 0.0006, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "eval_avg_sts": 0.7520214801965759, | |
| "eval_sickr_spearman": 0.6948064820015954, | |
| "eval_stsb_spearman": 0.8092364783915562, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "eval_avg_sts": 0.7575813676798819, | |
| "eval_sickr_spearman": 0.7104122191387511, | |
| "eval_stsb_spearman": 0.8047505162210129, | |
| "step": 6125 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "eval_avg_sts": 0.7601457988270166, | |
| "eval_sickr_spearman": 0.7141828046949473, | |
| "eval_stsb_spearman": 0.8061087929590859, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "eval_avg_sts": 0.7635874777468191, | |
| "eval_sickr_spearman": 0.71502354109452, | |
| "eval_stsb_spearman": 0.8121514143991182, | |
| "step": 6375 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 5.840266222961732e-06, | |
| "loss": 0.0006, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "eval_avg_sts": 0.7661782010291336, | |
| "eval_sickr_spearman": 0.7131075804581537, | |
| "eval_stsb_spearman": 0.8192488216001136, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "eval_avg_sts": 0.7668687543410466, | |
| "eval_sickr_spearman": 0.715227337057922, | |
| "eval_stsb_spearman": 0.8185101716241713, | |
| "step": 6625 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "eval_avg_sts": 0.7653179212909831, | |
| "eval_sickr_spearman": 0.706994422022618, | |
| "eval_stsb_spearman": 0.8236414205593481, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "eval_avg_sts": 0.7660646063264975, | |
| "eval_sickr_spearman": 0.708211338008654, | |
| "eval_stsb_spearman": 0.823917874644341, | |
| "step": 6875 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 5.520286701651095e-06, | |
| "loss": 0.0005, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "eval_avg_sts": 0.7679532903699651, | |
| "eval_sickr_spearman": 0.7082187828293769, | |
| "eval_stsb_spearman": 0.8276877979105532, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "eval_avg_sts": 0.7681556050474033, | |
| "eval_sickr_spearman": 0.7095044313215708, | |
| "eval_stsb_spearman": 0.8268067787732358, | |
| "step": 7125 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "eval_avg_sts": 0.7631361714276783, | |
| "eval_sickr_spearman": 0.7091408358836844, | |
| "eval_stsb_spearman": 0.8171315069716724, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "eval_avg_sts": 0.7696379675303331, | |
| "eval_sickr_spearman": 0.7148051917073824, | |
| "eval_stsb_spearman": 0.8244707433532837, | |
| "step": 7375 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 5.200307180340458e-06, | |
| "loss": 0.0007, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "eval_avg_sts": 0.7739437746585223, | |
| "eval_sickr_spearman": 0.7161355091239138, | |
| "eval_stsb_spearman": 0.8317520401931309, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "eval_avg_sts": 0.7628158501141427, | |
| "eval_sickr_spearman": 0.7044322509475033, | |
| "eval_stsb_spearman": 0.8211994492807821, | |
| "step": 7625 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "eval_avg_sts": 0.7566107770472272, | |
| "eval_sickr_spearman": 0.7021283431048213, | |
| "eval_stsb_spearman": 0.8110932109896332, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "eval_avg_sts": 0.7576093285240451, | |
| "eval_sickr_spearman": 0.7070185336355399, | |
| "eval_stsb_spearman": 0.8082001234125501, | |
| "step": 7875 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 4.8803276590298225e-06, | |
| "loss": 0.0004, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "eval_avg_sts": 0.760233390447007, | |
| "eval_sickr_spearman": 0.7091670608650695, | |
| "eval_stsb_spearman": 0.8112997200289445, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "eval_avg_sts": 0.7602377460168234, | |
| "eval_sickr_spearman": 0.7104034774782894, | |
| "eval_stsb_spearman": 0.8100720145553574, | |
| "step": 8125 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "eval_avg_sts": 0.7616580314424666, | |
| "eval_sickr_spearman": 0.7110805719152625, | |
| "eval_stsb_spearman": 0.8122354909696707, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "eval_avg_sts": 0.7651474794871342, | |
| "eval_sickr_spearman": 0.7107735571148701, | |
| "eval_stsb_spearman": 0.8195214018593985, | |
| "step": 8375 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 4.560348137719187e-06, | |
| "loss": 0.0004, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "eval_avg_sts": 0.7653451370230322, | |
| "eval_sickr_spearman": 0.7108329235562477, | |
| "eval_stsb_spearman": 0.8198573504898168, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "eval_avg_sts": 0.7693105360724941, | |
| "eval_sickr_spearman": 0.7174611194925042, | |
| "eval_stsb_spearman": 0.821159952652484, | |
| "step": 8625 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "eval_avg_sts": 0.7688644948269532, | |
| "eval_sickr_spearman": 0.7159121645022266, | |
| "eval_stsb_spearman": 0.8218168251516798, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "eval_avg_sts": 0.7658999611274551, | |
| "eval_sickr_spearman": 0.7111667877423439, | |
| "eval_stsb_spearman": 0.8206331345125664, | |
| "step": 8875 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 4.24036861640855e-06, | |
| "loss": 0.0005, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "eval_avg_sts": 0.7680124691016457, | |
| "eval_sickr_spearman": 0.7111179881432828, | |
| "eval_stsb_spearman": 0.8249069500600086, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "eval_avg_sts": 0.7700973304120509, | |
| "eval_sickr_spearman": 0.7201923120135797, | |
| "eval_stsb_spearman": 0.8200023488105221, | |
| "step": 9125 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "eval_avg_sts": 0.7709943925570436, | |
| "eval_sickr_spearman": 0.7219245056558439, | |
| "eval_stsb_spearman": 0.8200642794582433, | |
| "step": 9250 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "eval_avg_sts": 0.7704940850500881, | |
| "eval_sickr_spearman": 0.7215150405160838, | |
| "eval_stsb_spearman": 0.8194731295840925, | |
| "step": 9375 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 3.920389095097914e-06, | |
| "loss": 0.0007, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "eval_avg_sts": 0.772013094506881, | |
| "eval_sickr_spearman": 0.7239060767767745, | |
| "eval_stsb_spearman": 0.8201201122369877, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "eval_avg_sts": 0.7722529224488867, | |
| "eval_sickr_spearman": 0.7238736077522024, | |
| "eval_stsb_spearman": 0.8206322371455709, | |
| "step": 9625 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "eval_avg_sts": 0.769809433747652, | |
| "eval_sickr_spearman": 0.7224149992637298, | |
| "eval_stsb_spearman": 0.8172038682315741, | |
| "step": 9750 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "eval_avg_sts": 0.7692010357381744, | |
| "eval_sickr_spearman": 0.7215842533332562, | |
| "eval_stsb_spearman": 0.8168178181430927, | |
| "step": 9875 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 3.600409573787278e-06, | |
| "loss": 0.0005, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "eval_avg_sts": 0.7667248884742295, | |
| "eval_sickr_spearman": 0.719994904186669, | |
| "eval_stsb_spearman": 0.8134548727617901, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "eval_avg_sts": 0.7631501690436606, | |
| "eval_sickr_spearman": 0.716718222446561, | |
| "eval_stsb_spearman": 0.8095821156407603, | |
| "step": 10125 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "eval_avg_sts": 0.7629832948957905, | |
| "eval_sickr_spearman": 0.7177377786367878, | |
| "eval_stsb_spearman": 0.8082288111547933, | |
| "step": 10250 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "eval_avg_sts": 0.7669126925997836, | |
| "eval_sickr_spearman": 0.7264237230208561, | |
| "eval_stsb_spearman": 0.8074016621787109, | |
| "step": 10375 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 3.280430052476642e-06, | |
| "loss": 0.0006, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "eval_avg_sts": 0.7718219901997241, | |
| "eval_sickr_spearman": 0.7337559108108911, | |
| "eval_stsb_spearman": 0.8098880695885572, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "eval_avg_sts": 0.7714922632909542, | |
| "eval_sickr_spearman": 0.7311764005081574, | |
| "eval_stsb_spearman": 0.8118081260737511, | |
| "step": 10625 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "eval_avg_sts": 0.7727876566370058, | |
| "eval_sickr_spearman": 0.7285461693623048, | |
| "eval_stsb_spearman": 0.8170291439117067, | |
| "step": 10750 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "eval_avg_sts": 0.772842738222782, | |
| "eval_sickr_spearman": 0.7248763530569258, | |
| "eval_stsb_spearman": 0.8208091233886382, | |
| "step": 10875 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 2.960450531166006e-06, | |
| "loss": 0.0005, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "eval_avg_sts": 0.7737373423612539, | |
| "eval_sickr_spearman": 0.7237420025342619, | |
| "eval_stsb_spearman": 0.8237326821882459, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "eval_avg_sts": 0.7736646449640281, | |
| "eval_sickr_spearman": 0.7248283219554877, | |
| "eval_stsb_spearman": 0.8225009679725683, | |
| "step": 11125 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "eval_avg_sts": 0.7733368787551957, | |
| "eval_sickr_spearman": 0.7176964238584496, | |
| "eval_stsb_spearman": 0.8289773336519419, | |
| "step": 11250 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "eval_avg_sts": 0.772262796771174, | |
| "eval_sickr_spearman": 0.7156863222632646, | |
| "eval_stsb_spearman": 0.8288392712790835, | |
| "step": 11375 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 2.640471009855369e-06, | |
| "loss": 0.0006, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "eval_avg_sts": 0.7728128954421813, | |
| "eval_sickr_spearman": 0.717364721071918, | |
| "eval_stsb_spearman": 0.8282610698124449, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "eval_avg_sts": 0.7681791597143148, | |
| "eval_sickr_spearman": 0.7144295404630349, | |
| "eval_stsb_spearman": 0.8219287789655949, | |
| "step": 11625 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "eval_avg_sts": 0.7668750327968881, | |
| "eval_sickr_spearman": 0.7139568663937824, | |
| "eval_stsb_spearman": 0.8197931991999937, | |
| "step": 11750 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "eval_avg_sts": 0.7640936046803386, | |
| "eval_sickr_spearman": 0.7124148278821119, | |
| "eval_stsb_spearman": 0.8157723814785652, | |
| "step": 11875 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 2.3204914885447333e-06, | |
| "loss": 0.0007, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "eval_avg_sts": 0.7638027953353596, | |
| "eval_sickr_spearman": 0.7146511079339689, | |
| "eval_stsb_spearman": 0.8129544827367502, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "eval_avg_sts": 0.7619683130589899, | |
| "eval_sickr_spearman": 0.7137378445712247, | |
| "eval_stsb_spearman": 0.8101987815467552, | |
| "step": 12125 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "eval_avg_sts": 0.7636607985650499, | |
| "eval_sickr_spearman": 0.7154829585797756, | |
| "eval_stsb_spearman": 0.811838638550324, | |
| "step": 12250 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "eval_avg_sts": 0.7656556423146959, | |
| "eval_sickr_spearman": 0.7171441142230127, | |
| "eval_stsb_spearman": 0.8141671704063791, | |
| "step": 12375 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 2.000511967234097e-06, | |
| "loss": 0.0007, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "eval_avg_sts": 0.7654090921860195, | |
| "eval_sickr_spearman": 0.7177945513986876, | |
| "eval_stsb_spearman": 0.8130236329733512, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "eval_avg_sts": 0.7631961636347036, | |
| "eval_sickr_spearman": 0.7180557925594095, | |
| "eval_stsb_spearman": 0.8083365347099974, | |
| "step": 12625 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "eval_avg_sts": 0.7639726864144019, | |
| "eval_sickr_spearman": 0.7181145345964683, | |
| "eval_stsb_spearman": 0.8098308382323353, | |
| "step": 12750 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "eval_avg_sts": 0.7630039855353228, | |
| "eval_sickr_spearman": 0.7154700382134888, | |
| "eval_stsb_spearman": 0.8105379328571569, | |
| "step": 12875 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 1.6805324459234608e-06, | |
| "loss": 0.0007, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "eval_avg_sts": 0.7623670598777739, | |
| "eval_sickr_spearman": 0.7141997116426535, | |
| "eval_stsb_spearman": 0.8105344081128942, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "eval_avg_sts": 0.762649541131944, | |
| "eval_sickr_spearman": 0.714223871286677, | |
| "eval_stsb_spearman": 0.8110752109772111, | |
| "step": 13125 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "eval_avg_sts": 0.762339972693242, | |
| "eval_sickr_spearman": 0.7139050408353308, | |
| "eval_stsb_spearman": 0.8107749045511534, | |
| "step": 13250 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "eval_avg_sts": 0.7621756663732473, | |
| "eval_sickr_spearman": 0.7133488406806774, | |
| "eval_stsb_spearman": 0.8110024920658171, | |
| "step": 13375 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 1.3605529246128248e-06, | |
| "loss": 0.0006, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "eval_avg_sts": 0.7614721810339764, | |
| "eval_sickr_spearman": 0.7150819949449702, | |
| "eval_stsb_spearman": 0.8078623671229826, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "eval_avg_sts": 0.7613416141094851, | |
| "eval_sickr_spearman": 0.7152224378855754, | |
| "eval_stsb_spearman": 0.8074607903333948, | |
| "step": 13625 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "eval_avg_sts": 0.7619056768073365, | |
| "eval_sickr_spearman": 0.7157069276057815, | |
| "eval_stsb_spearman": 0.8081044260088914, | |
| "step": 13750 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "eval_avg_sts": 0.7625807690636458, | |
| "eval_sickr_spearman": 0.7158452091468218, | |
| "eval_stsb_spearman": 0.8093163289804698, | |
| "step": 13875 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 1.0405734033021888e-06, | |
| "loss": 0.0005, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "eval_avg_sts": 0.7626740159905894, | |
| "eval_sickr_spearman": 0.7153790192762635, | |
| "eval_stsb_spearman": 0.8099690127049152, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "eval_avg_sts": 0.7623073492171049, | |
| "eval_sickr_spearman": 0.7137424075258613, | |
| "eval_stsb_spearman": 0.8108722909083483, | |
| "step": 14125 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "eval_avg_sts": 0.7627676837162675, | |
| "eval_sickr_spearman": 0.7135836167045069, | |
| "eval_stsb_spearman": 0.811951750728028, | |
| "step": 14250 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "eval_avg_sts": 0.7622897188554142, | |
| "eval_sickr_spearman": 0.7125857225410286, | |
| "eval_stsb_spearman": 0.8119937151697998, | |
| "step": 14375 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 7.205938819915525e-07, | |
| "loss": 0.0003, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "eval_avg_sts": 0.762755661091796, | |
| "eval_sickr_spearman": 0.7135279486579401, | |
| "eval_stsb_spearman": 0.8119833735256521, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "eval_avg_sts": 0.7635781922303508, | |
| "eval_sickr_spearman": 0.7126439362359717, | |
| "eval_stsb_spearman": 0.8145124482247301, | |
| "step": 14625 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "eval_avg_sts": 0.7627630570209332, | |
| "eval_sickr_spearman": 0.7108910892200891, | |
| "eval_stsb_spearman": 0.8146350248217772, | |
| "step": 14750 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "eval_avg_sts": 0.7620568929343465, | |
| "eval_sickr_spearman": 0.7104201923015899, | |
| "eval_stsb_spearman": 0.8136935935671031, | |
| "step": 14875 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 4.0061436068091647e-07, | |
| "loss": 0.0007, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "eval_avg_sts": 0.7614322691019428, | |
| "eval_sickr_spearman": 0.7104694241805639, | |
| "eval_stsb_spearman": 0.8123951140233218, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "eval_avg_sts": 0.7614246018155921, | |
| "eval_sickr_spearman": 0.7104402212708896, | |
| "eval_stsb_spearman": 0.8124089823602947, | |
| "step": 15125 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "eval_avg_sts": 0.7615696008749822, | |
| "eval_sickr_spearman": 0.7107793208470427, | |
| "eval_stsb_spearman": 0.8123598809029217, | |
| "step": 15250 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "eval_avg_sts": 0.7613961550857908, | |
| "eval_sickr_spearman": 0.7105805201181903, | |
| "eval_stsb_spearman": 0.8122117900533915, | |
| "step": 15375 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 8.06348393702803e-08, | |
| "loss": 0.0005, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "eval_avg_sts": 0.7614175786889548, | |
| "eval_sickr_spearman": 0.7106010293985044, | |
| "eval_stsb_spearman": 0.812234127979405, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_avg_sts": 0.7613506554623436, | |
| "eval_sickr_spearman": 0.7104816721114307, | |
| "eval_stsb_spearman": 0.8122196388132565, | |
| "step": 15625 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 15626, | |
| "train_runtime": 4633.8657, | |
| "train_samples_per_second": 3.372 | |
| } | |
| ], | |
| "max_steps": 15626, | |
| "num_train_epochs": 1, | |
| "total_flos": 119690591356191744, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |