| { | |
| "best_metric": 0.8485214656023966, | |
| "best_model_checkpoint": "result/esimcse-bert-base-uncased-0.1-bpe-0.32-160-bs64", | |
| "epoch": 1.0, | |
| "global_step": 15626, | |
| "is_hyper_param_search": false, | |
| "is_local_process_zero": true, | |
| "is_world_process_zero": true, | |
| "log_history": [ | |
| { | |
| "epoch": 0.01, | |
| "eval_avg_sts": 0.6100880461913585, | |
| "eval_sickr_spearman": 0.5832078028364882, | |
| "eval_stsb_spearman": 0.6369682895462287, | |
| "step": 125 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_avg_sts": 0.6397205453652646, | |
| "eval_sickr_spearman": 0.616963004242962, | |
| "eval_stsb_spearman": 0.6624780864875672, | |
| "step": 250 | |
| }, | |
| { | |
| "epoch": 0.02, | |
| "eval_avg_sts": 0.6789809731427074, | |
| "eval_sickr_spearman": 0.6587511191783517, | |
| "eval_stsb_spearman": 0.6992108271070633, | |
| "step": 375 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "learning_rate": 2.9040061436068092e-05, | |
| "loss": 0.0108, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.03, | |
| "eval_avg_sts": 0.7335141964825347, | |
| "eval_sickr_spearman": 0.7049946471142421, | |
| "eval_stsb_spearman": 0.7620337458508273, | |
| "step": 500 | |
| }, | |
| { | |
| "epoch": 0.04, | |
| "eval_avg_sts": 0.7519583861186683, | |
| "eval_sickr_spearman": 0.7213448663236887, | |
| "eval_stsb_spearman": 0.782571905913648, | |
| "step": 625 | |
| }, | |
| { | |
| "epoch": 0.05, | |
| "eval_avg_sts": 0.7678435373865832, | |
| "eval_sickr_spearman": 0.7283978012899625, | |
| "eval_stsb_spearman": 0.8072892734832039, | |
| "step": 750 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "eval_avg_sts": 0.7606834842782872, | |
| "eval_sickr_spearman": 0.719232746669049, | |
| "eval_stsb_spearman": 0.8021342218875254, | |
| "step": 875 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "learning_rate": 2.8080122872136184e-05, | |
| "loss": 0.0007, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.06, | |
| "eval_avg_sts": 0.7725764209069759, | |
| "eval_sickr_spearman": 0.7352987658512863, | |
| "eval_stsb_spearman": 0.8098540759626653, | |
| "step": 1000 | |
| }, | |
| { | |
| "epoch": 0.07, | |
| "eval_avg_sts": 0.749297683848744, | |
| "eval_sickr_spearman": 0.7265425039347125, | |
| "eval_stsb_spearman": 0.7720528637627754, | |
| "step": 1125 | |
| }, | |
| { | |
| "epoch": 0.08, | |
| "eval_avg_sts": 0.7274946000391853, | |
| "eval_sickr_spearman": 0.7107656319831328, | |
| "eval_stsb_spearman": 0.7442235680952377, | |
| "step": 1250 | |
| }, | |
| { | |
| "epoch": 0.09, | |
| "eval_avg_sts": 0.7476475148023185, | |
| "eval_sickr_spearman": 0.7236732700281041, | |
| "eval_stsb_spearman": 0.7716217595765328, | |
| "step": 1375 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "learning_rate": 2.7120184308204276e-05, | |
| "loss": 0.0006, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "eval_avg_sts": 0.7566770100403266, | |
| "eval_sickr_spearman": 0.7318386053036846, | |
| "eval_stsb_spearman": 0.7815154147769687, | |
| "step": 1500 | |
| }, | |
| { | |
| "epoch": 0.1, | |
| "eval_avg_sts": 0.7600955301789452, | |
| "eval_sickr_spearman": 0.7316815916330834, | |
| "eval_stsb_spearman": 0.7885094687248068, | |
| "step": 1625 | |
| }, | |
| { | |
| "epoch": 0.11, | |
| "eval_avg_sts": 0.7270610894602345, | |
| "eval_sickr_spearman": 0.6906233092840458, | |
| "eval_stsb_spearman": 0.7634988696364233, | |
| "step": 1750 | |
| }, | |
| { | |
| "epoch": 0.12, | |
| "eval_avg_sts": 0.7308078942978946, | |
| "eval_sickr_spearman": 0.6979846039215525, | |
| "eval_stsb_spearman": 0.7636311846742367, | |
| "step": 1875 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "learning_rate": 2.6160245744272367e-05, | |
| "loss": 0.0004, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.13, | |
| "eval_avg_sts": 0.7009937398682455, | |
| "eval_sickr_spearman": 0.676284680633929, | |
| "eval_stsb_spearman": 0.7257027991025622, | |
| "step": 2000 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "eval_avg_sts": 0.7145112612407412, | |
| "eval_sickr_spearman": 0.6811648326644469, | |
| "eval_stsb_spearman": 0.7478576898170354, | |
| "step": 2125 | |
| }, | |
| { | |
| "epoch": 0.14, | |
| "eval_avg_sts": 0.7269807482138698, | |
| "eval_sickr_spearman": 0.6930580538470452, | |
| "eval_stsb_spearman": 0.7609034425806944, | |
| "step": 2250 | |
| }, | |
| { | |
| "epoch": 0.15, | |
| "eval_avg_sts": 0.7876865693494822, | |
| "eval_sickr_spearman": 0.7498401337805735, | |
| "eval_stsb_spearman": 0.825533004918391, | |
| "step": 2375 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "learning_rate": 2.5200307180340456e-05, | |
| "loss": 0.0006, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.16, | |
| "eval_avg_sts": 0.7800273423845525, | |
| "eval_sickr_spearman": 0.7402713297209711, | |
| "eval_stsb_spearman": 0.8197833550481338, | |
| "step": 2500 | |
| }, | |
| { | |
| "epoch": 0.17, | |
| "eval_avg_sts": 0.7774661815337005, | |
| "eval_sickr_spearman": 0.7407253196917641, | |
| "eval_stsb_spearman": 0.8142070433756369, | |
| "step": 2625 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "eval_avg_sts": 0.795139063492616, | |
| "eval_sickr_spearman": 0.7583184877442262, | |
| "eval_stsb_spearman": 0.8319596392410058, | |
| "step": 2750 | |
| }, | |
| { | |
| "epoch": 0.18, | |
| "eval_avg_sts": 0.8007774495654907, | |
| "eval_sickr_spearman": 0.7530334335285849, | |
| "eval_stsb_spearman": 0.8485214656023966, | |
| "step": 2875 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "learning_rate": 2.424036861640855e-05, | |
| "loss": 0.0005, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.19, | |
| "eval_avg_sts": 0.7910581147340385, | |
| "eval_sickr_spearman": 0.7508507561859327, | |
| "eval_stsb_spearman": 0.8312654732821445, | |
| "step": 3000 | |
| }, | |
| { | |
| "epoch": 0.2, | |
| "eval_avg_sts": 0.7798469609404326, | |
| "eval_sickr_spearman": 0.7498414786514137, | |
| "eval_stsb_spearman": 0.8098524432294515, | |
| "step": 3125 | |
| }, | |
| { | |
| "epoch": 0.21, | |
| "eval_avg_sts": 0.7100086424191657, | |
| "eval_sickr_spearman": 0.6737912900960736, | |
| "eval_stsb_spearman": 0.7462259947422578, | |
| "step": 3250 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "eval_avg_sts": 0.7098691907547254, | |
| "eval_sickr_spearman": 0.6749191564200434, | |
| "eval_stsb_spearman": 0.7448192250894073, | |
| "step": 3375 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "learning_rate": 2.3280430052476642e-05, | |
| "loss": 0.0005, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.22, | |
| "eval_avg_sts": 0.7558791570716834, | |
| "eval_sickr_spearman": 0.7183665057546127, | |
| "eval_stsb_spearman": 0.793391808388754, | |
| "step": 3500 | |
| }, | |
| { | |
| "epoch": 0.23, | |
| "eval_avg_sts": 0.7724894260205434, | |
| "eval_sickr_spearman": 0.7317967221832306, | |
| "eval_stsb_spearman": 0.8131821298578562, | |
| "step": 3625 | |
| }, | |
| { | |
| "epoch": 0.24, | |
| "eval_avg_sts": 0.7735778094480449, | |
| "eval_sickr_spearman": 0.7319867332205197, | |
| "eval_stsb_spearman": 0.8151688856755701, | |
| "step": 3750 | |
| }, | |
| { | |
| "epoch": 0.25, | |
| "eval_avg_sts": 0.7753994732757892, | |
| "eval_sickr_spearman": 0.7344932362490677, | |
| "eval_stsb_spearman": 0.8163057103025108, | |
| "step": 3875 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "learning_rate": 2.2320491488544734e-05, | |
| "loss": 0.0005, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "eval_avg_sts": 0.7645563589773194, | |
| "eval_sickr_spearman": 0.7288089955493742, | |
| "eval_stsb_spearman": 0.8003037224052648, | |
| "step": 4000 | |
| }, | |
| { | |
| "epoch": 0.26, | |
| "eval_avg_sts": 0.77217921247597, | |
| "eval_sickr_spearman": 0.7348872353741646, | |
| "eval_stsb_spearman": 0.8094711895777754, | |
| "step": 4125 | |
| }, | |
| { | |
| "epoch": 0.27, | |
| "eval_avg_sts": 0.7459204678567914, | |
| "eval_sickr_spearman": 0.7152080285551439, | |
| "eval_stsb_spearman": 0.7766329071584389, | |
| "step": 4250 | |
| }, | |
| { | |
| "epoch": 0.28, | |
| "eval_avg_sts": 0.7630540681359231, | |
| "eval_sickr_spearman": 0.7283370419466434, | |
| "eval_stsb_spearman": 0.7977710943252029, | |
| "step": 4375 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "learning_rate": 2.1360552924612826e-05, | |
| "loss": 0.0005, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.29, | |
| "eval_avg_sts": 0.7261107657045172, | |
| "eval_sickr_spearman": 0.6958098517106376, | |
| "eval_stsb_spearman": 0.7564116796983967, | |
| "step": 4500 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "eval_avg_sts": 0.7402966337518628, | |
| "eval_sickr_spearman": 0.7071302059463835, | |
| "eval_stsb_spearman": 0.7734630615573422, | |
| "step": 4625 | |
| }, | |
| { | |
| "epoch": 0.3, | |
| "eval_avg_sts": 0.7563164963384006, | |
| "eval_sickr_spearman": 0.7236546819918475, | |
| "eval_stsb_spearman": 0.7889783106849536, | |
| "step": 4750 | |
| }, | |
| { | |
| "epoch": 0.31, | |
| "eval_avg_sts": 0.7653071609134422, | |
| "eval_sickr_spearman": 0.7303088147228807, | |
| "eval_stsb_spearman": 0.8003055071040037, | |
| "step": 4875 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "learning_rate": 2.0400614360680917e-05, | |
| "loss": 0.0003, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.32, | |
| "eval_avg_sts": 0.7611963687067248, | |
| "eval_sickr_spearman": 0.7255880779180358, | |
| "eval_stsb_spearman": 0.7968046594954138, | |
| "step": 5000 | |
| }, | |
| { | |
| "epoch": 0.33, | |
| "eval_avg_sts": 0.7744538434517783, | |
| "eval_sickr_spearman": 0.7411195109412667, | |
| "eval_stsb_spearman": 0.8077881759622897, | |
| "step": 5125 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "eval_avg_sts": 0.7765902002597658, | |
| "eval_sickr_spearman": 0.7444951367503373, | |
| "eval_stsb_spearman": 0.8086852637691941, | |
| "step": 5250 | |
| }, | |
| { | |
| "epoch": 0.34, | |
| "eval_avg_sts": 0.7699546022654327, | |
| "eval_sickr_spearman": 0.7376793313318634, | |
| "eval_stsb_spearman": 0.8022298731990021, | |
| "step": 5375 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "learning_rate": 1.944067579674901e-05, | |
| "loss": 0.0005, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.35, | |
| "eval_avg_sts": 0.7734752595558289, | |
| "eval_sickr_spearman": 0.7410133141759871, | |
| "eval_stsb_spearman": 0.8059372049356706, | |
| "step": 5500 | |
| }, | |
| { | |
| "epoch": 0.36, | |
| "eval_avg_sts": 0.7626449726319405, | |
| "eval_sickr_spearman": 0.7365559319003274, | |
| "eval_stsb_spearman": 0.7887340133635536, | |
| "step": 5625 | |
| }, | |
| { | |
| "epoch": 0.37, | |
| "eval_avg_sts": 0.7687278256378415, | |
| "eval_sickr_spearman": 0.7398524504853293, | |
| "eval_stsb_spearman": 0.7976032007903536, | |
| "step": 5750 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "eval_avg_sts": 0.7693941323907878, | |
| "eval_sickr_spearman": 0.7396455325003339, | |
| "eval_stsb_spearman": 0.7991427322812418, | |
| "step": 5875 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "learning_rate": 1.84807372328171e-05, | |
| "loss": 0.0005, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.38, | |
| "eval_avg_sts": 0.7574432337471098, | |
| "eval_sickr_spearman": 0.7347453034694149, | |
| "eval_stsb_spearman": 0.7801411640248046, | |
| "step": 6000 | |
| }, | |
| { | |
| "epoch": 0.39, | |
| "eval_avg_sts": 0.7708101615966496, | |
| "eval_sickr_spearman": 0.7320090676826885, | |
| "eval_stsb_spearman": 0.8096112555106109, | |
| "step": 6125 | |
| }, | |
| { | |
| "epoch": 0.4, | |
| "eval_avg_sts": 0.7635841238166383, | |
| "eval_sickr_spearman": 0.7288792650507782, | |
| "eval_stsb_spearman": 0.7982889825824986, | |
| "step": 6250 | |
| }, | |
| { | |
| "epoch": 0.41, | |
| "eval_avg_sts": 0.7627376362894998, | |
| "eval_sickr_spearman": 0.7300567955336349, | |
| "eval_stsb_spearman": 0.7954184770453646, | |
| "step": 6375 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "learning_rate": 1.7520798668885192e-05, | |
| "loss": 0.0005, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "eval_avg_sts": 0.7591781252494068, | |
| "eval_sickr_spearman": 0.7227859434601362, | |
| "eval_stsb_spearman": 0.7955703070386773, | |
| "step": 6500 | |
| }, | |
| { | |
| "epoch": 0.42, | |
| "eval_avg_sts": 0.7527229076746123, | |
| "eval_sickr_spearman": 0.7119316830327459, | |
| "eval_stsb_spearman": 0.7935141323164787, | |
| "step": 6625 | |
| }, | |
| { | |
| "epoch": 0.43, | |
| "eval_avg_sts": 0.7584939992660324, | |
| "eval_sickr_spearman": 0.7191479717750108, | |
| "eval_stsb_spearman": 0.7978400267570542, | |
| "step": 6750 | |
| }, | |
| { | |
| "epoch": 0.44, | |
| "eval_avg_sts": 0.7630465793985837, | |
| "eval_sickr_spearman": 0.7249168913065396, | |
| "eval_stsb_spearman": 0.8011762674906276, | |
| "step": 6875 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "learning_rate": 1.6560860104953284e-05, | |
| "loss": 0.0004, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.45, | |
| "eval_avg_sts": 0.7655527346776364, | |
| "eval_sickr_spearman": 0.7304577591684402, | |
| "eval_stsb_spearman": 0.8006477101868326, | |
| "step": 7000 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "eval_avg_sts": 0.7395454013504682, | |
| "eval_sickr_spearman": 0.7105281181865213, | |
| "eval_stsb_spearman": 0.7685626845144151, | |
| "step": 7125 | |
| }, | |
| { | |
| "epoch": 0.46, | |
| "eval_avg_sts": 0.7229578267812158, | |
| "eval_sickr_spearman": 0.6942447582702768, | |
| "eval_stsb_spearman": 0.7516708952921549, | |
| "step": 7250 | |
| }, | |
| { | |
| "epoch": 0.47, | |
| "eval_avg_sts": 0.7452003408688526, | |
| "eval_sickr_spearman": 0.7146070634139502, | |
| "eval_stsb_spearman": 0.775793618323755, | |
| "step": 7375 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "learning_rate": 1.5600921541021372e-05, | |
| "loss": 0.0005, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.48, | |
| "eval_avg_sts": 0.7600508627663232, | |
| "eval_sickr_spearman": 0.7285831533104121, | |
| "eval_stsb_spearman": 0.7915185722222341, | |
| "step": 7500 | |
| }, | |
| { | |
| "epoch": 0.49, | |
| "eval_avg_sts": 0.7674997774484139, | |
| "eval_sickr_spearman": 0.7341920332119493, | |
| "eval_stsb_spearman": 0.8008075216848786, | |
| "step": 7625 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "eval_avg_sts": 0.7668269678433662, | |
| "eval_sickr_spearman": 0.7276831399805795, | |
| "eval_stsb_spearman": 0.8059707957061529, | |
| "step": 7750 | |
| }, | |
| { | |
| "epoch": 0.5, | |
| "eval_avg_sts": 0.7759267074313813, | |
| "eval_sickr_spearman": 0.7360665910388758, | |
| "eval_stsb_spearman": 0.8157868238238867, | |
| "step": 7875 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "learning_rate": 1.4640982977089467e-05, | |
| "loss": 0.0004, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.51, | |
| "eval_avg_sts": 0.7694891376849935, | |
| "eval_sickr_spearman": 0.7355848390914517, | |
| "eval_stsb_spearman": 0.8033934362785352, | |
| "step": 8000 | |
| }, | |
| { | |
| "epoch": 0.52, | |
| "eval_avg_sts": 0.7612899584011228, | |
| "eval_sickr_spearman": 0.7261187735578255, | |
| "eval_stsb_spearman": 0.79646114324442, | |
| "step": 8125 | |
| }, | |
| { | |
| "epoch": 0.53, | |
| "eval_avg_sts": 0.7590520798954239, | |
| "eval_sickr_spearman": 0.7263883721301976, | |
| "eval_stsb_spearman": 0.7917157876606503, | |
| "step": 8250 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "eval_avg_sts": 0.7681689830272878, | |
| "eval_sickr_spearman": 0.7343635698999812, | |
| "eval_stsb_spearman": 0.8019743961545942, | |
| "step": 8375 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "learning_rate": 1.368104441315756e-05, | |
| "loss": 0.0003, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.54, | |
| "eval_avg_sts": 0.7645627255636145, | |
| "eval_sickr_spearman": 0.7345889622342338, | |
| "eval_stsb_spearman": 0.7945364888929952, | |
| "step": 8500 | |
| }, | |
| { | |
| "epoch": 0.55, | |
| "eval_avg_sts": 0.7519813686443279, | |
| "eval_sickr_spearman": 0.7268361660889052, | |
| "eval_stsb_spearman": 0.7771265711997505, | |
| "step": 8625 | |
| }, | |
| { | |
| "epoch": 0.56, | |
| "eval_avg_sts": 0.7570879059121025, | |
| "eval_sickr_spearman": 0.7295248030541064, | |
| "eval_stsb_spearman": 0.7846510087700986, | |
| "step": 8750 | |
| }, | |
| { | |
| "epoch": 0.57, | |
| "eval_avg_sts": 0.7651801271677768, | |
| "eval_sickr_spearman": 0.7324815976586366, | |
| "eval_stsb_spearman": 0.797878656676917, | |
| "step": 8875 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "learning_rate": 1.2721105849225649e-05, | |
| "loss": 0.0004, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "eval_avg_sts": 0.7712013823080692, | |
| "eval_sickr_spearman": 0.7386502800474348, | |
| "eval_stsb_spearman": 0.8037524845687036, | |
| "step": 9000 | |
| }, | |
| { | |
| "epoch": 0.58, | |
| "eval_avg_sts": 0.7620043628899366, | |
| "eval_sickr_spearman": 0.7352328191490116, | |
| "eval_stsb_spearman": 0.7887759066308615, | |
| "step": 9125 | |
| }, | |
| { | |
| "epoch": 0.59, | |
| "eval_avg_sts": 0.7406795525094927, | |
| "eval_sickr_spearman": 0.7231166856246392, | |
| "eval_stsb_spearman": 0.7582424193943462, | |
| "step": 9250 | |
| }, | |
| { | |
| "epoch": 0.6, | |
| "eval_avg_sts": 0.7653971603316696, | |
| "eval_sickr_spearman": 0.7354289301361836, | |
| "eval_stsb_spearman": 0.7953653905271556, | |
| "step": 9375 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "learning_rate": 1.176116728529374e-05, | |
| "loss": 0.0005, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.61, | |
| "eval_avg_sts": 0.7646069433356613, | |
| "eval_sickr_spearman": 0.7343778655434133, | |
| "eval_stsb_spearman": 0.7948360211279091, | |
| "step": 9500 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "eval_avg_sts": 0.7593680988773044, | |
| "eval_sickr_spearman": 0.732813492569574, | |
| "eval_stsb_spearman": 0.7859227051850348, | |
| "step": 9625 | |
| }, | |
| { | |
| "epoch": 0.62, | |
| "eval_avg_sts": 0.7587276133400723, | |
| "eval_sickr_spearman": 0.7302601592171238, | |
| "eval_stsb_spearman": 0.7871950674630208, | |
| "step": 9750 | |
| }, | |
| { | |
| "epoch": 0.63, | |
| "eval_avg_sts": 0.7470220132260748, | |
| "eval_sickr_spearman": 0.716697953321754, | |
| "eval_stsb_spearman": 0.7773460731303955, | |
| "step": 9875 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "learning_rate": 1.0801228721361832e-05, | |
| "loss": 0.0003, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.64, | |
| "eval_avg_sts": 0.7602204587910995, | |
| "eval_sickr_spearman": 0.7292309487755081, | |
| "eval_stsb_spearman": 0.7912099688066908, | |
| "step": 10000 | |
| }, | |
| { | |
| "epoch": 0.65, | |
| "eval_avg_sts": 0.7763161485207313, | |
| "eval_sickr_spearman": 0.7364229337804452, | |
| "eval_stsb_spearman": 0.8162093632610175, | |
| "step": 10125 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "eval_avg_sts": 0.7776963975561555, | |
| "eval_sickr_spearman": 0.7362918569046206, | |
| "eval_stsb_spearman": 0.8191009382076904, | |
| "step": 10250 | |
| }, | |
| { | |
| "epoch": 0.66, | |
| "eval_avg_sts": 0.7769543328958908, | |
| "eval_sickr_spearman": 0.7370006998996443, | |
| "eval_stsb_spearman": 0.8169079658921373, | |
| "step": 10375 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "learning_rate": 9.841290157429926e-06, | |
| "loss": 0.0005, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.67, | |
| "eval_avg_sts": 0.7786140293258825, | |
| "eval_sickr_spearman": 0.7414595711394486, | |
| "eval_stsb_spearman": 0.8157684875123163, | |
| "step": 10500 | |
| }, | |
| { | |
| "epoch": 0.68, | |
| "eval_avg_sts": 0.77959121226111, | |
| "eval_sickr_spearman": 0.7405204670441307, | |
| "eval_stsb_spearman": 0.8186619574780892, | |
| "step": 10625 | |
| }, | |
| { | |
| "epoch": 0.69, | |
| "eval_avg_sts": 0.7779319076421938, | |
| "eval_sickr_spearman": 0.7420048682340755, | |
| "eval_stsb_spearman": 0.813858947050312, | |
| "step": 10750 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "eval_avg_sts": 0.7775449329956121, | |
| "eval_sickr_spearman": 0.7400306458716649, | |
| "eval_stsb_spearman": 0.8150592201195593, | |
| "step": 10875 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "learning_rate": 8.881351593498018e-06, | |
| "loss": 0.0004, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.7, | |
| "eval_avg_sts": 0.778286005470163, | |
| "eval_sickr_spearman": 0.7363517997192154, | |
| "eval_stsb_spearman": 0.8202202112211106, | |
| "step": 11000 | |
| }, | |
| { | |
| "epoch": 0.71, | |
| "eval_avg_sts": 0.777618581685105, | |
| "eval_sickr_spearman": 0.7384216520045894, | |
| "eval_stsb_spearman": 0.8168155113656206, | |
| "step": 11125 | |
| }, | |
| { | |
| "epoch": 0.72, | |
| "eval_avg_sts": 0.7762628934419535, | |
| "eval_sickr_spearman": 0.7332475976643716, | |
| "eval_stsb_spearman": 0.8192781892195354, | |
| "step": 11250 | |
| }, | |
| { | |
| "epoch": 0.73, | |
| "eval_avg_sts": 0.7770120788854875, | |
| "eval_sickr_spearman": 0.733883289291905, | |
| "eval_stsb_spearman": 0.8201408684790699, | |
| "step": 11375 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "learning_rate": 7.921413029566108e-06, | |
| "loss": 0.0004, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "eval_avg_sts": 0.7750375125081059, | |
| "eval_sickr_spearman": 0.7312441243611851, | |
| "eval_stsb_spearman": 0.8188309006550266, | |
| "step": 11500 | |
| }, | |
| { | |
| "epoch": 0.74, | |
| "eval_avg_sts": 0.7713758693823137, | |
| "eval_sickr_spearman": 0.7301794189356065, | |
| "eval_stsb_spearman": 0.812572319829021, | |
| "step": 11625 | |
| }, | |
| { | |
| "epoch": 0.75, | |
| "eval_avg_sts": 0.7729201610115823, | |
| "eval_sickr_spearman": 0.7338767570621094, | |
| "eval_stsb_spearman": 0.8119635649610553, | |
| "step": 11750 | |
| }, | |
| { | |
| "epoch": 0.76, | |
| "eval_avg_sts": 0.7721840581068564, | |
| "eval_sickr_spearman": 0.7340904474324076, | |
| "eval_stsb_spearman": 0.8102776687813052, | |
| "step": 11875 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "learning_rate": 6.961474465634199e-06, | |
| "loss": 0.0006, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.77, | |
| "eval_avg_sts": 0.762864859686734, | |
| "eval_sickr_spearman": 0.7334133049643331, | |
| "eval_stsb_spearman": 0.7923164144091348, | |
| "step": 12000 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "eval_avg_sts": 0.7653804206448536, | |
| "eval_sickr_spearman": 0.7362588595379327, | |
| "eval_stsb_spearman": 0.7945019817517746, | |
| "step": 12125 | |
| }, | |
| { | |
| "epoch": 0.78, | |
| "eval_avg_sts": 0.770245273378219, | |
| "eval_sickr_spearman": 0.7383542643692718, | |
| "eval_stsb_spearman": 0.8021362823871663, | |
| "step": 12250 | |
| }, | |
| { | |
| "epoch": 0.79, | |
| "eval_avg_sts": 0.769823742960263, | |
| "eval_sickr_spearman": 0.7377583424937291, | |
| "eval_stsb_spearman": 0.8018891434267971, | |
| "step": 12375 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "learning_rate": 6.001535901702292e-06, | |
| "loss": 0.0006, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.8, | |
| "eval_avg_sts": 0.7643310786346977, | |
| "eval_sickr_spearman": 0.7368435421357389, | |
| "eval_stsb_spearman": 0.7918186151336565, | |
| "step": 12500 | |
| }, | |
| { | |
| "epoch": 0.81, | |
| "eval_avg_sts": 0.7478178784310427, | |
| "eval_sickr_spearman": 0.7295915662851054, | |
| "eval_stsb_spearman": 0.76604419057698, | |
| "step": 12625 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "eval_avg_sts": 0.7521749358657968, | |
| "eval_sickr_spearman": 0.7287060648989924, | |
| "eval_stsb_spearman": 0.7756438068326013, | |
| "step": 12750 | |
| }, | |
| { | |
| "epoch": 0.82, | |
| "eval_avg_sts": 0.7552169285539614, | |
| "eval_sickr_spearman": 0.7275865559866728, | |
| "eval_stsb_spearman": 0.7828473011212499, | |
| "step": 12875 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "learning_rate": 5.0415973377703825e-06, | |
| "loss": 0.0006, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.83, | |
| "eval_avg_sts": 0.7586663192427485, | |
| "eval_sickr_spearman": 0.7291622643004515, | |
| "eval_stsb_spearman": 0.7881703741850457, | |
| "step": 13000 | |
| }, | |
| { | |
| "epoch": 0.84, | |
| "eval_avg_sts": 0.7590819682264265, | |
| "eval_sickr_spearman": 0.7291833019228814, | |
| "eval_stsb_spearman": 0.7889806345299715, | |
| "step": 13125 | |
| }, | |
| { | |
| "epoch": 0.85, | |
| "eval_avg_sts": 0.7599496756491293, | |
| "eval_sickr_spearman": 0.7258233342528796, | |
| "eval_stsb_spearman": 0.794076017045379, | |
| "step": 13250 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "eval_avg_sts": 0.7620834946535875, | |
| "eval_sickr_spearman": 0.7272929418635816, | |
| "eval_stsb_spearman": 0.7968740474435934, | |
| "step": 13375 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "learning_rate": 4.081658773838474e-06, | |
| "loss": 0.0005, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.86, | |
| "eval_avg_sts": 0.7638553003387081, | |
| "eval_sickr_spearman": 0.7278715725426066, | |
| "eval_stsb_spearman": 0.7998390281348094, | |
| "step": 13500 | |
| }, | |
| { | |
| "epoch": 0.87, | |
| "eval_avg_sts": 0.764322584988894, | |
| "eval_sickr_spearman": 0.7266260300201134, | |
| "eval_stsb_spearman": 0.8020191399576747, | |
| "step": 13625 | |
| }, | |
| { | |
| "epoch": 0.88, | |
| "eval_avg_sts": 0.7648945618293426, | |
| "eval_sickr_spearman": 0.7271069174077117, | |
| "eval_stsb_spearman": 0.8026822062509734, | |
| "step": 13750 | |
| }, | |
| { | |
| "epoch": 0.89, | |
| "eval_avg_sts": 0.7669716790989545, | |
| "eval_sickr_spearman": 0.7275461137992619, | |
| "eval_stsb_spearman": 0.8063972443986471, | |
| "step": 13875 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "learning_rate": 3.121720209906566e-06, | |
| "loss": 0.0004, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "eval_avg_sts": 0.7674021960316728, | |
| "eval_sickr_spearman": 0.727724597372206, | |
| "eval_stsb_spearman": 0.8070797946911397, | |
| "step": 14000 | |
| }, | |
| { | |
| "epoch": 0.9, | |
| "eval_avg_sts": 0.7683497505006873, | |
| "eval_sickr_spearman": 0.7281690291538128, | |
| "eval_stsb_spearman": 0.8085304718475618, | |
| "step": 14125 | |
| }, | |
| { | |
| "epoch": 0.91, | |
| "eval_avg_sts": 0.7680069912021876, | |
| "eval_sickr_spearman": 0.7273477933814239, | |
| "eval_stsb_spearman": 0.8086661890229514, | |
| "step": 14250 | |
| }, | |
| { | |
| "epoch": 0.92, | |
| "eval_avg_sts": 0.76821821049172, | |
| "eval_sickr_spearman": 0.7266917845979821, | |
| "eval_stsb_spearman": 0.8097446363854579, | |
| "step": 14375 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "learning_rate": 2.1617816459746575e-06, | |
| "loss": 0.0003, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.93, | |
| "eval_avg_sts": 0.7677497628237885, | |
| "eval_sickr_spearman": 0.7264186797552051, | |
| "eval_stsb_spearman": 0.8090808458923718, | |
| "step": 14500 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "eval_avg_sts": 0.7680442088137338, | |
| "eval_sickr_spearman": 0.7256543608380205, | |
| "eval_stsb_spearman": 0.8104340567894471, | |
| "step": 14625 | |
| }, | |
| { | |
| "epoch": 0.94, | |
| "eval_avg_sts": 0.7698268910330168, | |
| "eval_sickr_spearman": 0.7279500553623565, | |
| "eval_stsb_spearman": 0.8117037267036772, | |
| "step": 14750 | |
| }, | |
| { | |
| "epoch": 0.95, | |
| "eval_avg_sts": 0.7708269865099182, | |
| "eval_sickr_spearman": 0.7296867639281557, | |
| "eval_stsb_spearman": 0.8119672090916807, | |
| "step": 14875 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "learning_rate": 1.2018430820427492e-06, | |
| "loss": 0.0007, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.96, | |
| "eval_avg_sts": 0.7723124686000136, | |
| "eval_sickr_spearman": 0.7316718893505929, | |
| "eval_stsb_spearman": 0.8129530478494341, | |
| "step": 15000 | |
| }, | |
| { | |
| "epoch": 0.97, | |
| "eval_avg_sts": 0.7726351825661049, | |
| "eval_sickr_spearman": 0.7322496074386905, | |
| "eval_stsb_spearman": 0.8130207576935193, | |
| "step": 15125 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "eval_avg_sts": 0.7724775113219677, | |
| "eval_sickr_spearman": 0.7322382240676497, | |
| "eval_stsb_spearman": 0.8127167985762856, | |
| "step": 15250 | |
| }, | |
| { | |
| "epoch": 0.98, | |
| "eval_avg_sts": 0.7726148952184836, | |
| "eval_sickr_spearman": 0.732653981281698, | |
| "eval_stsb_spearman": 0.8125758091552692, | |
| "step": 15375 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "learning_rate": 2.4190451811084087e-07, | |
| "loss": 0.0004, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 0.99, | |
| "eval_avg_sts": 0.7726350099777579, | |
| "eval_sickr_spearman": 0.7326541734061037, | |
| "eval_stsb_spearman": 0.812615846549412, | |
| "step": 15500 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "eval_avg_sts": 0.7724457544885928, | |
| "eval_sickr_spearman": 0.7325294846667704, | |
| "eval_stsb_spearman": 0.8123620243104152, | |
| "step": 15625 | |
| }, | |
| { | |
| "epoch": 1.0, | |
| "step": 15626, | |
| "train_runtime": 4555.3265, | |
| "train_samples_per_second": 3.43 | |
| } | |
| ], | |
| "max_steps": 15626, | |
| "num_train_epochs": 1, | |
| "total_flos": 107685402744674304, | |
| "trial_name": null, | |
| "trial_params": null | |
| } | |