| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 9.200557103064067, |
| "eval_steps": 5, |
| "global_step": 405, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.11142061281337047, |
| "eval_loss": 0.037675488740205765, |
| "eval_runtime": 6.7342, |
| "eval_samples_per_second": 222.743, |
| "eval_steps_per_second": 27.917, |
| "eval_sts_dev_pearson_cosine": 0.7494115429773479, |
| "eval_sts_dev_pearson_dot": 0.6583752142885668, |
| "eval_sts_dev_pearson_euclidean": 0.6941454281465765, |
| "eval_sts_dev_pearson_manhattan": 0.6964259759684527, |
| "eval_sts_dev_pearson_max": 0.7494115429773479, |
| "eval_sts_dev_spearman_cosine": 0.7470700524367354, |
| "eval_sts_dev_spearman_dot": 0.6497928276890669, |
| "eval_sts_dev_spearman_euclidean": 0.684590776689316, |
| "eval_sts_dev_spearman_manhattan": 0.6873610947323412, |
| "eval_sts_dev_spearman_max": 0.7470700524367354, |
| "step": 5 |
| }, |
| { |
| "epoch": 0.22284122562674094, |
| "grad_norm": 6.7429633140563965, |
| "learning_rate": 6.957731779439903e-08, |
| "loss": 0.6923, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.22284122562674094, |
| "eval_loss": 0.0376589335501194, |
| "eval_runtime": 6.8403, |
| "eval_samples_per_second": 219.29, |
| "eval_steps_per_second": 27.484, |
| "eval_sts_dev_pearson_cosine": 0.7494940477075391, |
| "eval_sts_dev_pearson_dot": 0.6584328702717946, |
| "eval_sts_dev_pearson_euclidean": 0.6942213054869852, |
| "eval_sts_dev_pearson_manhattan": 0.6965001647458872, |
| "eval_sts_dev_pearson_max": 0.7494940477075391, |
| "eval_sts_dev_spearman_cosine": 0.7471377072884906, |
| "eval_sts_dev_spearman_dot": 0.6498755431337675, |
| "eval_sts_dev_spearman_euclidean": 0.6846545112671376, |
| "eval_sts_dev_spearman_manhattan": 0.687454500948251, |
| "eval_sts_dev_spearman_max": 0.7471377072884906, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.3342618384401114, |
| "eval_loss": 0.03763080760836601, |
| "eval_runtime": 6.9686, |
| "eval_samples_per_second": 215.253, |
| "eval_steps_per_second": 26.978, |
| "eval_sts_dev_pearson_cosine": 0.7496395035968593, |
| "eval_sts_dev_pearson_dot": 0.6585292611324672, |
| "eval_sts_dev_pearson_euclidean": 0.6943597344549325, |
| "eval_sts_dev_pearson_manhattan": 0.6966356509027943, |
| "eval_sts_dev_pearson_max": 0.7496395035968593, |
| "eval_sts_dev_spearman_cosine": 0.747293071934341, |
| "eval_sts_dev_spearman_dot": 0.6499672916131112, |
| "eval_sts_dev_spearman_euclidean": 0.6848464778088699, |
| "eval_sts_dev_spearman_manhattan": 0.6875927784863133, |
| "eval_sts_dev_spearman_max": 0.747293071934341, |
| "step": 15 |
| }, |
| { |
| "epoch": 0.4456824512534819, |
| "grad_norm": 7.523725986480713, |
| "learning_rate": 1.3915463558879807e-07, |
| "loss": 0.6832, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.4456824512534819, |
| "eval_loss": 0.03759082034230232, |
| "eval_runtime": 6.9891, |
| "eval_samples_per_second": 214.62, |
| "eval_steps_per_second": 26.899, |
| "eval_sts_dev_pearson_cosine": 0.7498515127163549, |
| "eval_sts_dev_pearson_dot": 0.6586892126695529, |
| "eval_sts_dev_pearson_euclidean": 0.6945632277600391, |
| "eval_sts_dev_pearson_manhattan": 0.6968351270246123, |
| "eval_sts_dev_pearson_max": 0.7498515127163549, |
| "eval_sts_dev_spearman_cosine": 0.7475384213284385, |
| "eval_sts_dev_spearman_dot": 0.6500677799755323, |
| "eval_sts_dev_spearman_euclidean": 0.6850767084625934, |
| "eval_sts_dev_spearman_manhattan": 0.6877654793239389, |
| "eval_sts_dev_spearman_max": 0.7475384213284385, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.5571030640668524, |
| "eval_loss": 0.03754143416881561, |
| "eval_runtime": 6.8247, |
| "eval_samples_per_second": 219.79, |
| "eval_steps_per_second": 27.547, |
| "eval_sts_dev_pearson_cosine": 0.7501122656435163, |
| "eval_sts_dev_pearson_dot": 0.6588212748683685, |
| "eval_sts_dev_pearson_euclidean": 0.6948708332777139, |
| "eval_sts_dev_pearson_manhattan": 0.6971351224061912, |
| "eval_sts_dev_pearson_max": 0.7501122656435163, |
| "eval_sts_dev_spearman_cosine": 0.7478755024321192, |
| "eval_sts_dev_spearman_dot": 0.6502167543650381, |
| "eval_sts_dev_spearman_euclidean": 0.6854436169483377, |
| "eval_sts_dev_spearman_manhattan": 0.6880846722054696, |
| "eval_sts_dev_spearman_max": 0.7478755024321192, |
| "step": 25 |
| }, |
| { |
| "epoch": 0.6685236768802229, |
| "grad_norm": 7.176445960998535, |
| "learning_rate": 2.0873195338319708e-07, |
| "loss": 0.6787, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.6685236768802229, |
| "eval_loss": 0.037479061633348465, |
| "eval_runtime": 6.9898, |
| "eval_samples_per_second": 214.598, |
| "eval_steps_per_second": 26.896, |
| "eval_sts_dev_pearson_cosine": 0.7504502235424245, |
| "eval_sts_dev_pearson_dot": 0.6589729935526047, |
| "eval_sts_dev_pearson_euclidean": 0.6952782546669927, |
| "eval_sts_dev_pearson_manhattan": 0.6975315748701472, |
| "eval_sts_dev_pearson_max": 0.7504502235424245, |
| "eval_sts_dev_spearman_cosine": 0.7483727549578874, |
| "eval_sts_dev_spearman_dot": 0.6502927839552382, |
| "eval_sts_dev_spearman_euclidean": 0.6858779938781956, |
| "eval_sts_dev_spearman_manhattan": 0.6885426870287449, |
| "eval_sts_dev_spearman_max": 0.7483727549578874, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.7799442896935933, |
| "eval_loss": 0.03741108253598213, |
| "eval_runtime": 7.0272, |
| "eval_samples_per_second": 213.456, |
| "eval_steps_per_second": 26.753, |
| "eval_sts_dev_pearson_cosine": 0.7508317539918448, |
| "eval_sts_dev_pearson_dot": 0.6592089487188968, |
| "eval_sts_dev_pearson_euclidean": 0.6957145823768739, |
| "eval_sts_dev_pearson_manhattan": 0.6979566424519045, |
| "eval_sts_dev_pearson_max": 0.7508317539918448, |
| "eval_sts_dev_spearman_cosine": 0.7488095875667629, |
| "eval_sts_dev_spearman_dot": 0.6505123414164061, |
| "eval_sts_dev_spearman_euclidean": 0.6863890021142346, |
| "eval_sts_dev_spearman_manhattan": 0.6889574531430644, |
| "eval_sts_dev_spearman_max": 0.7488095875667629, |
| "step": 35 |
| }, |
| { |
| "epoch": 0.8913649025069638, |
| "grad_norm": 5.811614036560059, |
| "learning_rate": 2.7830927117759614e-07, |
| "loss": 0.6154, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.8913649025069638, |
| "eval_loss": 0.03732568398118019, |
| "eval_runtime": 6.9675, |
| "eval_samples_per_second": 215.286, |
| "eval_steps_per_second": 26.983, |
| "eval_sts_dev_pearson_cosine": 0.7512943163494744, |
| "eval_sts_dev_pearson_dot": 0.6595252251920851, |
| "eval_sts_dev_pearson_euclidean": 0.6961931337237875, |
| "eval_sts_dev_pearson_manhattan": 0.6984244275683631, |
| "eval_sts_dev_pearson_max": 0.7512943163494744, |
| "eval_sts_dev_spearman_cosine": 0.7494136836534844, |
| "eval_sts_dev_spearman_dot": 0.6507477353375185, |
| "eval_sts_dev_spearman_euclidean": 0.6869101418254764, |
| "eval_sts_dev_spearman_manhattan": 0.6894841894318411, |
| "eval_sts_dev_spearman_max": 0.7494136836534844, |
| "step": 40 |
| }, |
| { |
| "epoch": 1.0222841225626742, |
| "eval_loss": 0.03723177686333656, |
| "eval_runtime": 6.807, |
| "eval_samples_per_second": 220.362, |
| "eval_steps_per_second": 27.619, |
| "eval_sts_dev_pearson_cosine": 0.7518064092372247, |
| "eval_sts_dev_pearson_dot": 0.659916579804052, |
| "eval_sts_dev_pearson_euclidean": 0.6966963956623822, |
| "eval_sts_dev_pearson_manhattan": 0.6989173818306955, |
| "eval_sts_dev_pearson_max": 0.7518064092372247, |
| "eval_sts_dev_spearman_cosine": 0.7499673977394428, |
| "eval_sts_dev_spearman_dot": 0.6509971484372724, |
| "eval_sts_dev_spearman_euclidean": 0.6874727739859278, |
| "eval_sts_dev_spearman_manhattan": 0.6900562750157024, |
| "eval_sts_dev_spearman_max": 0.7499673977394428, |
| "step": 45 |
| }, |
| { |
| "epoch": 1.1337047353760445, |
| "grad_norm": 5.788002014160156, |
| "learning_rate": 3.4788658897199517e-07, |
| "loss": 0.6231, |
| "step": 50 |
| }, |
| { |
| "epoch": 1.1337047353760445, |
| "eval_loss": 0.03712593764066696, |
| "eval_runtime": 6.8692, |
| "eval_samples_per_second": 218.366, |
| "eval_steps_per_second": 27.369, |
| "eval_sts_dev_pearson_cosine": 0.7523963675159875, |
| "eval_sts_dev_pearson_dot": 0.6603951554863274, |
| "eval_sts_dev_pearson_euclidean": 0.6972537699536556, |
| "eval_sts_dev_pearson_manhattan": 0.6994643420859175, |
| "eval_sts_dev_pearson_max": 0.7523963675159875, |
| "eval_sts_dev_spearman_cosine": 0.750612531997651, |
| "eval_sts_dev_spearman_dot": 0.6513909659491809, |
| "eval_sts_dev_spearman_euclidean": 0.6881037653193015, |
| "eval_sts_dev_spearman_manhattan": 0.690698809264565, |
| "eval_sts_dev_spearman_max": 0.750612531997651, |
| "step": 50 |
| }, |
| { |
| "epoch": 1.2451253481894151, |
| "eval_loss": 0.03701437637209892, |
| "eval_runtime": 6.9079, |
| "eval_samples_per_second": 217.142, |
| "eval_steps_per_second": 27.215, |
| "eval_sts_dev_pearson_cosine": 0.7530298660085821, |
| "eval_sts_dev_pearson_dot": 0.6608066497022934, |
| "eval_sts_dev_pearson_euclidean": 0.6979265726405308, |
| "eval_sts_dev_pearson_manhattan": 0.7001220235641434, |
| "eval_sts_dev_pearson_max": 0.7530298660085821, |
| "eval_sts_dev_spearman_cosine": 0.7512373553393066, |
| "eval_sts_dev_spearman_dot": 0.6517398579494034, |
| "eval_sts_dev_spearman_euclidean": 0.6888011095183327, |
| "eval_sts_dev_spearman_manhattan": 0.691414492932023, |
| "eval_sts_dev_spearman_max": 0.7512373553393066, |
| "step": 55 |
| }, |
| { |
| "epoch": 1.3565459610027855, |
| "grad_norm": 6.15402889251709, |
| "learning_rate": 4.1746390676639416e-07, |
| "loss": 0.6562, |
| "step": 60 |
| }, |
| { |
| "epoch": 1.3565459610027855, |
| "eval_loss": 0.03689862787723541, |
| "eval_runtime": 7.0618, |
| "eval_samples_per_second": 212.409, |
| "eval_steps_per_second": 26.622, |
| "eval_sts_dev_pearson_cosine": 0.7536755590598476, |
| "eval_sts_dev_pearson_dot": 0.6612397236569308, |
| "eval_sts_dev_pearson_euclidean": 0.6986261571902858, |
| "eval_sts_dev_pearson_manhattan": 0.7008037618197723, |
| "eval_sts_dev_pearson_max": 0.7536755590598476, |
| "eval_sts_dev_spearman_cosine": 0.7518746736763288, |
| "eval_sts_dev_spearman_dot": 0.6520463167363649, |
| "eval_sts_dev_spearman_euclidean": 0.6896250409475332, |
| "eval_sts_dev_spearman_manhattan": 0.6921595229559657, |
| "eval_sts_dev_spearman_max": 0.7518746736763288, |
| "step": 60 |
| }, |
| { |
| "epoch": 1.467966573816156, |
| "eval_loss": 0.03677487000823021, |
| "eval_runtime": 7.0563, |
| "eval_samples_per_second": 212.575, |
| "eval_steps_per_second": 26.643, |
| "eval_sts_dev_pearson_cosine": 0.7543139332813571, |
| "eval_sts_dev_pearson_dot": 0.6616585186532069, |
| "eval_sts_dev_pearson_euclidean": 0.6993248351610868, |
| "eval_sts_dev_pearson_manhattan": 0.701480628825091, |
| "eval_sts_dev_pearson_max": 0.7543139332813571, |
| "eval_sts_dev_spearman_cosine": 0.7525649831393398, |
| "eval_sts_dev_spearman_dot": 0.6522844686788962, |
| "eval_sts_dev_spearman_euclidean": 0.6904248656764869, |
| "eval_sts_dev_spearman_manhattan": 0.6929891697203803, |
| "eval_sts_dev_spearman_max": 0.7525649831393398, |
| "step": 65 |
| }, |
| { |
| "epoch": 1.5793871866295266, |
| "grad_norm": 7.177963733673096, |
| "learning_rate": 4.870412245607932e-07, |
| "loss": 0.6578, |
| "step": 70 |
| }, |
| { |
| "epoch": 1.5793871866295266, |
| "eval_loss": 0.036648884415626526, |
| "eval_runtime": 6.959, |
| "eval_samples_per_second": 215.549, |
| "eval_steps_per_second": 27.015, |
| "eval_sts_dev_pearson_cosine": 0.7550016826683398, |
| "eval_sts_dev_pearson_dot": 0.6621754750211006, |
| "eval_sts_dev_pearson_euclidean": 0.7000072525876023, |
| "eval_sts_dev_pearson_manhattan": 0.7021439545430929, |
| "eval_sts_dev_pearson_max": 0.7550016826683398, |
| "eval_sts_dev_spearman_cosine": 0.7533627904462605, |
| "eval_sts_dev_spearman_dot": 0.6528254486243784, |
| "eval_sts_dev_spearman_euclidean": 0.6912255844955055, |
| "eval_sts_dev_spearman_manhattan": 0.693756112728956, |
| "eval_sts_dev_spearman_max": 0.7533627904462605, |
| "step": 70 |
| }, |
| { |
| "epoch": 1.690807799442897, |
| "eval_loss": 0.03651271015405655, |
| "eval_runtime": 7.007, |
| "eval_samples_per_second": 214.073, |
| "eval_steps_per_second": 26.83, |
| "eval_sts_dev_pearson_cosine": 0.7558035417500417, |
| "eval_sts_dev_pearson_dot": 0.6628193044191966, |
| "eval_sts_dev_pearson_euclidean": 0.7007736916543533, |
| "eval_sts_dev_pearson_manhattan": 0.7028932537624881, |
| "eval_sts_dev_pearson_max": 0.7558035417500417, |
| "eval_sts_dev_spearman_cosine": 0.7541058046949494, |
| "eval_sts_dev_spearman_dot": 0.6534460767465545, |
| "eval_sts_dev_spearman_euclidean": 0.6920637091980477, |
| "eval_sts_dev_spearman_manhattan": 0.6945583716986528, |
| "eval_sts_dev_spearman_max": 0.7541058046949494, |
| "step": 75 |
| }, |
| { |
| "epoch": 1.8022284122562673, |
| "grad_norm": 5.0526251792907715, |
| "learning_rate": 5.566185423551923e-07, |
| "loss": 0.6669, |
| "step": 80 |
| }, |
| { |
| "epoch": 1.8022284122562673, |
| "eval_loss": 0.03638559579849243, |
| "eval_runtime": 6.7319, |
| "eval_samples_per_second": 222.821, |
| "eval_steps_per_second": 27.927, |
| "eval_sts_dev_pearson_cosine": 0.7566016073951273, |
| "eval_sts_dev_pearson_dot": 0.6635483139977033, |
| "eval_sts_dev_pearson_euclidean": 0.7014893647689773, |
| "eval_sts_dev_pearson_manhattan": 0.703594324322853, |
| "eval_sts_dev_pearson_max": 0.7566016073951273, |
| "eval_sts_dev_spearman_cosine": 0.7549242270132541, |
| "eval_sts_dev_spearman_dot": 0.6540274190985176, |
| "eval_sts_dev_spearman_euclidean": 0.6927527403904686, |
| "eval_sts_dev_spearman_manhattan": 0.695333682691011, |
| "eval_sts_dev_spearman_max": 0.7549242270132541, |
| "step": 80 |
| }, |
| { |
| "epoch": 1.9136490250696379, |
| "eval_loss": 0.036259058862924576, |
| "eval_runtime": 7.0619, |
| "eval_samples_per_second": 212.407, |
| "eval_steps_per_second": 26.622, |
| "eval_sts_dev_pearson_cosine": 0.7574029038481553, |
| "eval_sts_dev_pearson_dot": 0.6643528168117957, |
| "eval_sts_dev_pearson_euclidean": 0.7021549030740968, |
| "eval_sts_dev_pearson_manhattan": 0.7042456310839478, |
| "eval_sts_dev_pearson_max": 0.7574029038481553, |
| "eval_sts_dev_spearman_cosine": 0.7559011874733633, |
| "eval_sts_dev_spearman_dot": 0.654608486564625, |
| "eval_sts_dev_spearman_euclidean": 0.6934728497203049, |
| "eval_sts_dev_spearman_manhattan": 0.696009977505159, |
| "eval_sts_dev_spearman_max": 0.7559011874733633, |
| "step": 85 |
| }, |
| { |
| "epoch": 2.0445682451253484, |
| "grad_norm": 7.539985656738281, |
| "learning_rate": 6.261958601495913e-07, |
| "loss": 0.6428, |
| "step": 90 |
| }, |
| { |
| "epoch": 2.0445682451253484, |
| "eval_loss": 0.036127302795648575, |
| "eval_runtime": 7.0107, |
| "eval_samples_per_second": 213.958, |
| "eval_steps_per_second": 26.816, |
| "eval_sts_dev_pearson_cosine": 0.7582160683192293, |
| "eval_sts_dev_pearson_dot": 0.6651865048982631, |
| "eval_sts_dev_pearson_euclidean": 0.7028452108161203, |
| "eval_sts_dev_pearson_manhattan": 0.7049193272018017, |
| "eval_sts_dev_pearson_max": 0.7582160683192293, |
| "eval_sts_dev_spearman_cosine": 0.756839547083474, |
| "eval_sts_dev_spearman_dot": 0.6554766310741506, |
| "eval_sts_dev_spearman_euclidean": 0.6942368514501571, |
| "eval_sts_dev_spearman_manhattan": 0.6967968978437559, |
| "eval_sts_dev_spearman_max": 0.756839547083474, |
| "step": 90 |
| }, |
| { |
| "epoch": 2.1559888579387185, |
| "eval_loss": 0.03598429635167122, |
| "eval_runtime": 6.8629, |
| "eval_samples_per_second": 218.568, |
| "eval_steps_per_second": 27.394, |
| "eval_sts_dev_pearson_cosine": 0.7590157349155543, |
| "eval_sts_dev_pearson_dot": 0.6664047922354215, |
| "eval_sts_dev_pearson_euclidean": 0.7032597502450331, |
| "eval_sts_dev_pearson_manhattan": 0.7053311453976816, |
| "eval_sts_dev_pearson_max": 0.7590157349155543, |
| "eval_sts_dev_spearman_cosine": 0.7577360756559688, |
| "eval_sts_dev_spearman_dot": 0.6567130424552957, |
| "eval_sts_dev_spearman_euclidean": 0.694683268380771, |
| "eval_sts_dev_spearman_manhattan": 0.6973000099834088, |
| "eval_sts_dev_spearman_max": 0.7577360756559688, |
| "step": 95 |
| }, |
| { |
| "epoch": 2.267409470752089, |
| "grad_norm": 5.178345680236816, |
| "learning_rate": 6.957731779439903e-07, |
| "loss": 0.5854, |
| "step": 100 |
| }, |
| { |
| "epoch": 2.267409470752089, |
| "eval_loss": 0.03583008423447609, |
| "eval_runtime": 7.173, |
| "eval_samples_per_second": 209.119, |
| "eval_steps_per_second": 26.21, |
| "eval_sts_dev_pearson_cosine": 0.7597921519073876, |
| "eval_sts_dev_pearson_dot": 0.6678122125215467, |
| "eval_sts_dev_pearson_euclidean": 0.7035339087302831, |
| "eval_sts_dev_pearson_manhattan": 0.7056098859433702, |
| "eval_sts_dev_pearson_max": 0.7597921519073876, |
| "eval_sts_dev_spearman_cosine": 0.758602852970159, |
| "eval_sts_dev_spearman_dot": 0.6582467955758544, |
| "eval_sts_dev_spearman_euclidean": 0.6948605697617651, |
| "eval_sts_dev_spearman_manhattan": 0.6975703877172783, |
| "eval_sts_dev_spearman_max": 0.758602852970159, |
| "step": 100 |
| }, |
| { |
| "epoch": 2.3788300835654597, |
| "eval_loss": 0.035686325281858444, |
| "eval_runtime": 7.0872, |
| "eval_samples_per_second": 211.649, |
| "eval_steps_per_second": 26.527, |
| "eval_sts_dev_pearson_cosine": 0.7606568693270315, |
| "eval_sts_dev_pearson_dot": 0.6687514434055418, |
| "eval_sts_dev_pearson_euclidean": 0.704286798579542, |
| "eval_sts_dev_pearson_manhattan": 0.7063472791256069, |
| "eval_sts_dev_pearson_max": 0.7606568693270315, |
| "eval_sts_dev_spearman_cosine": 0.7597087921768803, |
| "eval_sts_dev_spearman_dot": 0.658946428183679, |
| "eval_sts_dev_spearman_euclidean": 0.695592274693547, |
| "eval_sts_dev_spearman_manhattan": 0.6983308228030709, |
| "eval_sts_dev_spearman_max": 0.7597087921768803, |
| "step": 105 |
| }, |
| { |
| "epoch": 2.4902506963788302, |
| "grad_norm": 5.807418346405029, |
| "learning_rate": 7.653504957383893e-07, |
| "loss": 0.6027, |
| "step": 110 |
| }, |
| { |
| "epoch": 2.4902506963788302, |
| "eval_loss": 0.035556692630052567, |
| "eval_runtime": 7.0493, |
| "eval_samples_per_second": 212.788, |
| "eval_steps_per_second": 26.669, |
| "eval_sts_dev_pearson_cosine": 0.761527279815679, |
| "eval_sts_dev_pearson_dot": 0.6695555734987789, |
| "eval_sts_dev_pearson_euclidean": 0.705168673869323, |
| "eval_sts_dev_pearson_manhattan": 0.7072038979059934, |
| "eval_sts_dev_pearson_max": 0.761527279815679, |
| "eval_sts_dev_spearman_cosine": 0.760744250643423, |
| "eval_sts_dev_spearman_dot": 0.6597526569449198, |
| "eval_sts_dev_spearman_euclidean": 0.6967183194293859, |
| "eval_sts_dev_spearman_manhattan": 0.6992467241695522, |
| "eval_sts_dev_spearman_max": 0.760744250643423, |
| "step": 110 |
| }, |
| { |
| "epoch": 2.6016713091922004, |
| "eval_loss": 0.03542407229542732, |
| "eval_runtime": 6.8156, |
| "eval_samples_per_second": 220.083, |
| "eval_steps_per_second": 27.584, |
| "eval_sts_dev_pearson_cosine": 0.7623648733142145, |
| "eval_sts_dev_pearson_dot": 0.6704772598451654, |
| "eval_sts_dev_pearson_euclidean": 0.7059197567148983, |
| "eval_sts_dev_pearson_manhattan": 0.7079355090955533, |
| "eval_sts_dev_pearson_max": 0.7623648733142145, |
| "eval_sts_dev_spearman_cosine": 0.7618131283610858, |
| "eval_sts_dev_spearman_dot": 0.6605908503497494, |
| "eval_sts_dev_spearman_euclidean": 0.6976245585578177, |
| "eval_sts_dev_spearman_manhattan": 0.7002055764519721, |
| "eval_sts_dev_spearman_max": 0.7618131283610858, |
| "step": 115 |
| }, |
| { |
| "epoch": 2.713091922005571, |
| "grad_norm": 4.760545253753662, |
| "learning_rate": 8.349278135327883e-07, |
| "loss": 0.6375, |
| "step": 120 |
| }, |
| { |
| "epoch": 2.713091922005571, |
| "eval_loss": 0.03528669476509094, |
| "eval_runtime": 6.9936, |
| "eval_samples_per_second": 214.481, |
| "eval_steps_per_second": 26.882, |
| "eval_sts_dev_pearson_cosine": 0.7631052098822656, |
| "eval_sts_dev_pearson_dot": 0.6714460378701741, |
| "eval_sts_dev_pearson_euclidean": 0.7064722681555804, |
| "eval_sts_dev_pearson_manhattan": 0.7084736073971417, |
| "eval_sts_dev_pearson_max": 0.7631052098822656, |
| "eval_sts_dev_spearman_cosine": 0.7627318359213398, |
| "eval_sts_dev_spearman_dot": 0.6614807337490313, |
| "eval_sts_dev_spearman_euclidean": 0.6982972981814837, |
| "eval_sts_dev_spearman_manhattan": 0.7008247751818659, |
| "eval_sts_dev_spearman_max": 0.7627318359213398, |
| "step": 120 |
| }, |
| { |
| "epoch": 2.8245125348189415, |
| "eval_loss": 0.03514046594500542, |
| "eval_runtime": 7.0888, |
| "eval_samples_per_second": 211.601, |
| "eval_steps_per_second": 26.521, |
| "eval_sts_dev_pearson_cosine": 0.7638287349941795, |
| "eval_sts_dev_pearson_dot": 0.6724854308235324, |
| "eval_sts_dev_pearson_euclidean": 0.7068315364008582, |
| "eval_sts_dev_pearson_manhattan": 0.7088273928548983, |
| "eval_sts_dev_pearson_max": 0.7638287349941795, |
| "eval_sts_dev_spearman_cosine": 0.7635020295116245, |
| "eval_sts_dev_spearman_dot": 0.6624349213377722, |
| "eval_sts_dev_spearman_euclidean": 0.6987404256446157, |
| "eval_sts_dev_spearman_manhattan": 0.7011827796563965, |
| "eval_sts_dev_spearman_max": 0.7635020295116245, |
| "step": 125 |
| }, |
| { |
| "epoch": 2.935933147632312, |
| "grad_norm": 4.589956760406494, |
| "learning_rate": 9.045051313271874e-07, |
| "loss": 0.6204, |
| "step": 130 |
| }, |
| { |
| "epoch": 2.935933147632312, |
| "eval_loss": 0.03499244153499603, |
| "eval_runtime": 7.03, |
| "eval_samples_per_second": 213.37, |
| "eval_steps_per_second": 26.742, |
| "eval_sts_dev_pearson_cosine": 0.7646248483324349, |
| "eval_sts_dev_pearson_dot": 0.6736502017218999, |
| "eval_sts_dev_pearson_euclidean": 0.707216661995043, |
| "eval_sts_dev_pearson_manhattan": 0.709212008478957, |
| "eval_sts_dev_pearson_max": 0.7646248483324349, |
| "eval_sts_dev_spearman_cosine": 0.7643307027826172, |
| "eval_sts_dev_spearman_dot": 0.6636213615361183, |
| "eval_sts_dev_spearman_euclidean": 0.6991046333767655, |
| "eval_sts_dev_spearman_manhattan": 0.7016301334896569, |
| "eval_sts_dev_spearman_max": 0.7643307027826172, |
| "step": 130 |
| }, |
| { |
| "epoch": 3.066852367688022, |
| "eval_loss": 0.0348396897315979, |
| "eval_runtime": 6.8191, |
| "eval_samples_per_second": 219.972, |
| "eval_steps_per_second": 27.57, |
| "eval_sts_dev_pearson_cosine": 0.7654784319525549, |
| "eval_sts_dev_pearson_dot": 0.6748874130308962, |
| "eval_sts_dev_pearson_euclidean": 0.707641103763532, |
| "eval_sts_dev_pearson_manhattan": 0.7096402166194079, |
| "eval_sts_dev_pearson_max": 0.7654784319525549, |
| "eval_sts_dev_spearman_cosine": 0.7653040232955037, |
| "eval_sts_dev_spearman_dot": 0.6648308844991435, |
| "eval_sts_dev_spearman_euclidean": 0.6995286562724882, |
| "eval_sts_dev_spearman_manhattan": 0.7020556361876031, |
| "eval_sts_dev_spearman_max": 0.7653040232955037, |
| "step": 135 |
| }, |
| { |
| "epoch": 3.1782729805013927, |
| "grad_norm": 5.2525177001953125, |
| "learning_rate": 9.740824491215864e-07, |
| "loss": 0.6077, |
| "step": 140 |
| }, |
| { |
| "epoch": 3.1782729805013927, |
| "eval_loss": 0.034706421196460724, |
| "eval_runtime": 6.9212, |
| "eval_samples_per_second": 216.726, |
| "eval_steps_per_second": 27.163, |
| "eval_sts_dev_pearson_cosine": 0.766263287304504, |
| "eval_sts_dev_pearson_dot": 0.675948414551205, |
| "eval_sts_dev_pearson_euclidean": 0.7081178470450136, |
| "eval_sts_dev_pearson_manhattan": 0.7101145234880011, |
| "eval_sts_dev_pearson_max": 0.766263287304504, |
| "eval_sts_dev_spearman_cosine": 0.7662688094783671, |
| "eval_sts_dev_spearman_dot": 0.666010980931314, |
| "eval_sts_dev_spearman_euclidean": 0.7000434625148538, |
| "eval_sts_dev_spearman_manhattan": 0.7026795019088747, |
| "eval_sts_dev_spearman_max": 0.7662688094783671, |
| "step": 140 |
| }, |
| { |
| "epoch": 3.2896935933147633, |
| "eval_loss": 0.03455406054854393, |
| "eval_runtime": 7.043, |
| "eval_samples_per_second": 212.977, |
| "eval_steps_per_second": 26.693, |
| "eval_sts_dev_pearson_cosine": 0.7669976344527232, |
| "eval_sts_dev_pearson_dot": 0.6772130506339817, |
| "eval_sts_dev_pearson_euclidean": 0.708340308738127, |
| "eval_sts_dev_pearson_manhattan": 0.7103439855119656, |
| "eval_sts_dev_pearson_max": 0.7669976344527232, |
| "eval_sts_dev_spearman_cosine": 0.7671727295922609, |
| "eval_sts_dev_spearman_dot": 0.6673064034335351, |
| "eval_sts_dev_spearman_euclidean": 0.7002877031846776, |
| "eval_sts_dev_spearman_manhattan": 0.702790337375281, |
| "eval_sts_dev_spearman_max": 0.7671727295922609, |
| "step": 145 |
| }, |
| { |
| "epoch": 3.401114206128134, |
| "grad_norm": 4.290932655334473, |
| "learning_rate": 1.0436597669159855e-06, |
| "loss": 0.5772, |
| "step": 150 |
| }, |
| { |
| "epoch": 3.401114206128134, |
| "eval_loss": 0.03440996631979942, |
| "eval_runtime": 6.9991, |
| "eval_samples_per_second": 214.314, |
| "eval_steps_per_second": 26.861, |
| "eval_sts_dev_pearson_cosine": 0.7678013334085165, |
| "eval_sts_dev_pearson_dot": 0.6782264568419771, |
| "eval_sts_dev_pearson_euclidean": 0.7088142668828982, |
| "eval_sts_dev_pearson_manhattan": 0.7108177316956372, |
| "eval_sts_dev_pearson_max": 0.7678013334085165, |
| "eval_sts_dev_spearman_cosine": 0.7681151208619762, |
| "eval_sts_dev_spearman_dot": 0.6684064347787971, |
| "eval_sts_dev_spearman_euclidean": 0.7007326265687747, |
| "eval_sts_dev_spearman_manhattan": 0.7034130912956567, |
| "eval_sts_dev_spearman_max": 0.7681151208619762, |
| "step": 150 |
| }, |
| { |
| "epoch": 3.5125348189415044, |
| "eval_loss": 0.03426254168152809, |
| "eval_runtime": 6.9148, |
| "eval_samples_per_second": 216.927, |
| "eval_steps_per_second": 27.188, |
| "eval_sts_dev_pearson_cosine": 0.7686124311956235, |
| "eval_sts_dev_pearson_dot": 0.6794161416279998, |
| "eval_sts_dev_pearson_euclidean": 0.709205498775362, |
| "eval_sts_dev_pearson_manhattan": 0.7112097548871816, |
| "eval_sts_dev_pearson_max": 0.7686124311956235, |
| "eval_sts_dev_spearman_cosine": 0.7690103394236019, |
| "eval_sts_dev_spearman_dot": 0.6696872196092013, |
| "eval_sts_dev_spearman_euclidean": 0.7011801292985436, |
| "eval_sts_dev_spearman_manhattan": 0.7038459216523878, |
| "eval_sts_dev_spearman_max": 0.7690103394236019, |
| "step": 155 |
| }, |
| { |
| "epoch": 3.6239554317548746, |
| "grad_norm": 4.819970607757568, |
| "learning_rate": 1.1132370847103846e-06, |
| "loss": 0.5793, |
| "step": 160 |
| }, |
| { |
| "epoch": 3.6239554317548746, |
| "eval_loss": 0.034103069454431534, |
| "eval_runtime": 6.9114, |
| "eval_samples_per_second": 217.034, |
| "eval_steps_per_second": 27.202, |
| "eval_sts_dev_pearson_cosine": 0.7693080146114311, |
| "eval_sts_dev_pearson_dot": 0.6808270827318879, |
| "eval_sts_dev_pearson_euclidean": 0.7092760536788121, |
| "eval_sts_dev_pearson_manhattan": 0.7112999822871733, |
| "eval_sts_dev_pearson_max": 0.7693080146114311, |
| "eval_sts_dev_spearman_cosine": 0.7697572707961111, |
| "eval_sts_dev_spearman_dot": 0.6712211078819724, |
| "eval_sts_dev_spearman_euclidean": 0.7012923239631217, |
| "eval_sts_dev_spearman_manhattan": 0.7039518721666204, |
| "eval_sts_dev_spearman_max": 0.7697572707961111, |
| "step": 160 |
| }, |
| { |
| "epoch": 3.735376044568245, |
| "eval_loss": 0.0339648611843586, |
| "eval_runtime": 7.0549, |
| "eval_samples_per_second": 212.619, |
| "eval_steps_per_second": 26.648, |
| "eval_sts_dev_pearson_cosine": 0.7699086602747196, |
| "eval_sts_dev_pearson_dot": 0.6821532615290817, |
| "eval_sts_dev_pearson_euclidean": 0.7093076470422841, |
| "eval_sts_dev_pearson_manhattan": 0.7113457981900373, |
| "eval_sts_dev_pearson_max": 0.7699086602747196, |
| "eval_sts_dev_spearman_cosine": 0.7705074022984313, |
| "eval_sts_dev_spearman_dot": 0.672703308451007, |
| "eval_sts_dev_spearman_euclidean": 0.7012209819190688, |
| "eval_sts_dev_spearman_manhattan": 0.7040382275601695, |
| "eval_sts_dev_spearman_max": 0.7705074022984313, |
| "step": 165 |
| }, |
| { |
| "epoch": 3.8467966573816157, |
| "grad_norm": 4.6819539070129395, |
| "learning_rate": 1.1828144025047836e-06, |
| "loss": 0.5807, |
| "step": 170 |
| }, |
| { |
| "epoch": 3.8467966573816157, |
| "eval_loss": 0.033846523612737656, |
| "eval_runtime": 7.007, |
| "eval_samples_per_second": 214.071, |
| "eval_steps_per_second": 26.83, |
| "eval_sts_dev_pearson_cosine": 0.7705963307037736, |
| "eval_sts_dev_pearson_dot": 0.6833451512490409, |
| "eval_sts_dev_pearson_euclidean": 0.7096001250212141, |
| "eval_sts_dev_pearson_manhattan": 0.711632091113557, |
| "eval_sts_dev_pearson_max": 0.7705963307037736, |
| "eval_sts_dev_spearman_cosine": 0.7712362988663569, |
| "eval_sts_dev_spearman_dot": 0.6739107651886832, |
| "eval_sts_dev_spearman_euclidean": 0.7015732848026783, |
| "eval_sts_dev_spearman_manhattan": 0.7044555432408592, |
| "eval_sts_dev_spearman_max": 0.7712362988663569, |
| "step": 170 |
| }, |
| { |
| "epoch": 3.958217270194986, |
| "eval_loss": 0.03373364359140396, |
| "eval_runtime": 7.0142, |
| "eval_samples_per_second": 213.851, |
| "eval_steps_per_second": 26.803, |
| "eval_sts_dev_pearson_cosine": 0.7712791156976446, |
| "eval_sts_dev_pearson_dot": 0.684640185733316, |
| "eval_sts_dev_pearson_euclidean": 0.7099359677821528, |
| "eval_sts_dev_pearson_manhattan": 0.7119586396389017, |
| "eval_sts_dev_pearson_max": 0.7712791156976446, |
| "eval_sts_dev_spearman_cosine": 0.7720608877912845, |
| "eval_sts_dev_spearman_dot": 0.6752035487866894, |
| "eval_sts_dev_spearman_euclidean": 0.702006901214985, |
| "eval_sts_dev_spearman_manhattan": 0.7046928888569776, |
| "eval_sts_dev_spearman_max": 0.7720608877912845, |
| "step": 175 |
| }, |
| { |
| "epoch": 4.089136490250697, |
| "grad_norm": 5.301053524017334, |
| "learning_rate": 1.2523917202991825e-06, |
| "loss": 0.5576, |
| "step": 180 |
| }, |
| { |
| "epoch": 4.089136490250697, |
| "eval_loss": 0.03359239175915718, |
| "eval_runtime": 6.9089, |
| "eval_samples_per_second": 217.11, |
| "eval_steps_per_second": 27.211, |
| "eval_sts_dev_pearson_cosine": 0.7720094084547121, |
| "eval_sts_dev_pearson_dot": 0.6861711141593462, |
| "eval_sts_dev_pearson_euclidean": 0.7101810374908146, |
| "eval_sts_dev_pearson_manhattan": 0.7122008904979185, |
| "eval_sts_dev_pearson_max": 0.7720094084547121, |
| "eval_sts_dev_spearman_cosine": 0.7729262262575222, |
| "eval_sts_dev_spearman_dot": 0.6768691715243821, |
| "eval_sts_dev_spearman_euclidean": 0.7021927194762703, |
| "eval_sts_dev_spearman_manhattan": 0.7048656110976538, |
| "eval_sts_dev_spearman_max": 0.7729262262575222, |
| "step": 180 |
| }, |
| { |
| "epoch": 4.2005571030640665, |
| "eval_loss": 0.03344343975186348, |
| "eval_runtime": 6.9828, |
| "eval_samples_per_second": 214.815, |
| "eval_steps_per_second": 26.923, |
| "eval_sts_dev_pearson_cosine": 0.7726659560898915, |
| "eval_sts_dev_pearson_dot": 0.687537592193805, |
| "eval_sts_dev_pearson_euclidean": 0.7102002040734198, |
| "eval_sts_dev_pearson_manhattan": 0.7122330129546837, |
| "eval_sts_dev_pearson_max": 0.7726659560898915, |
| "eval_sts_dev_spearman_cosine": 0.773425201401485, |
| "eval_sts_dev_spearman_dot": 0.6783853594387605, |
| "eval_sts_dev_spearman_euclidean": 0.7021304440842328, |
| "eval_sts_dev_spearman_manhattan": 0.7048113806985111, |
| "eval_sts_dev_spearman_max": 0.773425201401485, |
| "step": 185 |
| }, |
| { |
| "epoch": 4.311977715877437, |
| "grad_norm": 6.004096984863281, |
| "learning_rate": 1.3219690380935816e-06, |
| "loss": 0.5244, |
| "step": 190 |
| }, |
| { |
| "epoch": 4.311977715877437, |
| "eval_loss": 0.033325061202049255, |
| "eval_runtime": 7.0443, |
| "eval_samples_per_second": 212.938, |
| "eval_steps_per_second": 26.688, |
| "eval_sts_dev_pearson_cosine": 0.7733338124006317, |
| "eval_sts_dev_pearson_dot": 0.6884427467691409, |
| "eval_sts_dev_pearson_euclidean": 0.7104572948924557, |
| "eval_sts_dev_pearson_manhattan": 0.7124982272648852, |
| "eval_sts_dev_pearson_max": 0.7733338124006317, |
| "eval_sts_dev_spearman_cosine": 0.7740160113372371, |
| "eval_sts_dev_spearman_dot": 0.679314640853313, |
| "eval_sts_dev_spearman_euclidean": 0.7022600917103325, |
| "eval_sts_dev_spearman_manhattan": 0.7049775652371025, |
| "eval_sts_dev_spearman_max": 0.7740160113372371, |
| "step": 190 |
| }, |
| { |
| "epoch": 4.423398328690808, |
| "eval_loss": 0.03322310745716095, |
| "eval_runtime": 7.0234, |
| "eval_samples_per_second": 213.573, |
| "eval_steps_per_second": 26.768, |
| "eval_sts_dev_pearson_cosine": 0.7739695721631923, |
| "eval_sts_dev_pearson_dot": 0.6893517389464994, |
| "eval_sts_dev_pearson_euclidean": 0.7107902241882483, |
| "eval_sts_dev_pearson_manhattan": 0.7128377305936389, |
| "eval_sts_dev_pearson_max": 0.7739695721631923, |
| "eval_sts_dev_spearman_cosine": 0.7747804094168401, |
| "eval_sts_dev_spearman_dot": 0.68031602482782, |
| "eval_sts_dev_spearman_euclidean": 0.702670109171386, |
| "eval_sts_dev_spearman_manhattan": 0.7052134150159887, |
| "eval_sts_dev_spearman_max": 0.7747804094168401, |
| "step": 195 |
| }, |
| { |
| "epoch": 4.534818941504178, |
| "grad_norm": 4.593257427215576, |
| "learning_rate": 1.3915463558879807e-06, |
| "loss": 0.539, |
| "step": 200 |
| }, |
| { |
| "epoch": 4.534818941504178, |
| "eval_loss": 0.033111851662397385, |
| "eval_runtime": 6.8979, |
| "eval_samples_per_second": 217.459, |
| "eval_steps_per_second": 27.255, |
| "eval_sts_dev_pearson_cosine": 0.7745730498768191, |
| "eval_sts_dev_pearson_dot": 0.6906339428851104, |
| "eval_sts_dev_pearson_euclidean": 0.7109426833835167, |
| "eval_sts_dev_pearson_manhattan": 0.7130040632760261, |
| "eval_sts_dev_pearson_max": 0.7745730498768191, |
| "eval_sts_dev_spearman_cosine": 0.775379074216674, |
| "eval_sts_dev_spearman_dot": 0.6818359626434134, |
| "eval_sts_dev_spearman_euclidean": 0.7027815708069985, |
| "eval_sts_dev_spearman_manhattan": 0.7054772962806527, |
| "eval_sts_dev_spearman_max": 0.775379074216674, |
| "step": 200 |
| }, |
| { |
| "epoch": 4.646239554317549, |
| "eval_loss": 0.03302275016903877, |
| "eval_runtime": 6.926, |
| "eval_samples_per_second": 216.576, |
| "eval_steps_per_second": 27.144, |
| "eval_sts_dev_pearson_cosine": 0.7750720264452357, |
| "eval_sts_dev_pearson_dot": 0.6916453477028499, |
| "eval_sts_dev_pearson_euclidean": 0.7112190933233568, |
| "eval_sts_dev_pearson_manhattan": 0.7132769890476369, |
| "eval_sts_dev_pearson_max": 0.7750720264452357, |
| "eval_sts_dev_spearman_cosine": 0.7760014955136583, |
| "eval_sts_dev_spearman_dot": 0.6829098752509514, |
| "eval_sts_dev_spearman_euclidean": 0.7030344024642863, |
| "eval_sts_dev_spearman_manhattan": 0.7059066267642276, |
| "eval_sts_dev_spearman_max": 0.7760014955136583, |
| "step": 205 |
| }, |
| { |
| "epoch": 4.757660167130919, |
| "grad_norm": 4.035131931304932, |
| "learning_rate": 1.4611236736823798e-06, |
| "loss": 0.5517, |
| "step": 210 |
| }, |
| { |
| "epoch": 4.757660167130919, |
| "eval_loss": 0.032943133264780045, |
| "eval_runtime": 7.07, |
| "eval_samples_per_second": 212.164, |
| "eval_steps_per_second": 26.591, |
| "eval_sts_dev_pearson_cosine": 0.775436879881936, |
| "eval_sts_dev_pearson_dot": 0.6925918600460864, |
| "eval_sts_dev_pearson_euclidean": 0.7113376814593186, |
| "eval_sts_dev_pearson_manhattan": 0.7133931376814393, |
| "eval_sts_dev_pearson_max": 0.775436879881936, |
| "eval_sts_dev_spearman_cosine": 0.7764792385291549, |
| "eval_sts_dev_spearman_dot": 0.6839147456943953, |
| "eval_sts_dev_spearman_euclidean": 0.703141329969615, |
| "eval_sts_dev_spearman_manhattan": 0.7059362250994191, |
| "eval_sts_dev_spearman_max": 0.7764792385291549, |
| "step": 210 |
| }, |
| { |
| "epoch": 4.86908077994429, |
| "eval_loss": 0.03284618631005287, |
| "eval_runtime": 6.8565, |
| "eval_samples_per_second": 218.77, |
| "eval_steps_per_second": 27.419, |
| "eval_sts_dev_pearson_cosine": 0.775910266044483, |
| "eval_sts_dev_pearson_dot": 0.6934614331040406, |
| "eval_sts_dev_pearson_euclidean": 0.711548659602332, |
| "eval_sts_dev_pearson_manhattan": 0.7136048948232416, |
| "eval_sts_dev_pearson_max": 0.775910266044483, |
| "eval_sts_dev_spearman_cosine": 0.7769479258303382, |
| "eval_sts_dev_spearman_dot": 0.6848713805423069, |
| "eval_sts_dev_spearman_euclidean": 0.7033494094042918, |
| "eval_sts_dev_spearman_manhattan": 0.7060255698767176, |
| "eval_sts_dev_spearman_max": 0.7769479258303382, |
| "step": 215 |
| }, |
| { |
| "epoch": 4.9805013927576605, |
| "grad_norm": 4.164207458496094, |
| "learning_rate": 1.5307009914767787e-06, |
| "loss": 0.5265, |
| "step": 220 |
| }, |
| { |
| "epoch": 4.9805013927576605, |
| "eval_loss": 0.0327322892844677, |
| "eval_runtime": 6.9904, |
| "eval_samples_per_second": 214.58, |
| "eval_steps_per_second": 26.894, |
| "eval_sts_dev_pearson_cosine": 0.7764785690089298, |
| "eval_sts_dev_pearson_dot": 0.6942342520710683, |
| "eval_sts_dev_pearson_euclidean": 0.7119234281148877, |
| "eval_sts_dev_pearson_manhattan": 0.7139725405773478, |
| "eval_sts_dev_pearson_max": 0.7764785690089298, |
| "eval_sts_dev_spearman_cosine": 0.7776377175908147, |
| "eval_sts_dev_spearman_dot": 0.6856461394544989, |
| "eval_sts_dev_spearman_euclidean": 0.7037087745638393, |
| "eval_sts_dev_spearman_manhattan": 0.7064702298285305, |
| "eval_sts_dev_spearman_max": 0.7776377175908147, |
| "step": 220 |
| }, |
| { |
| "epoch": 5.111420612813371, |
| "eval_loss": 0.032635681331157684, |
| "eval_runtime": 6.8033, |
| "eval_samples_per_second": 220.482, |
| "eval_steps_per_second": 27.634, |
| "eval_sts_dev_pearson_cosine": 0.7768906949758223, |
| "eval_sts_dev_pearson_dot": 0.695219441450241, |
| "eval_sts_dev_pearson_euclidean": 0.7119427716298626, |
| "eval_sts_dev_pearson_manhattan": 0.7139906781614199, |
| "eval_sts_dev_pearson_max": 0.7768906949758223, |
| "eval_sts_dev_spearman_cosine": 0.7779652464100915, |
| "eval_sts_dev_spearman_dot": 0.6869571731826094, |
| "eval_sts_dev_spearman_euclidean": 0.7036077013230951, |
| "eval_sts_dev_spearman_manhattan": 0.7064509076431469, |
| "eval_sts_dev_spearman_max": 0.7779652464100915, |
| "step": 225 |
| }, |
| { |
| "epoch": 5.222841225626741, |
| "grad_norm": 4.008439064025879, |
| "learning_rate": 1.6002783092711777e-06, |
| "loss": 0.5285, |
| "step": 230 |
| }, |
| { |
| "epoch": 5.222841225626741, |
| "eval_loss": 0.03253428637981415, |
| "eval_runtime": 6.8983, |
| "eval_samples_per_second": 217.445, |
| "eval_steps_per_second": 27.253, |
| "eval_sts_dev_pearson_cosine": 0.7772382339972829, |
| "eval_sts_dev_pearson_dot": 0.6962971989781661, |
| "eval_sts_dev_pearson_euclidean": 0.7116605569376889, |
| "eval_sts_dev_pearson_manhattan": 0.7137176755568332, |
| "eval_sts_dev_pearson_max": 0.7772382339972829, |
| "eval_sts_dev_spearman_cosine": 0.7783426175116597, |
| "eval_sts_dev_spearman_dot": 0.6882750477744878, |
| "eval_sts_dev_spearman_euclidean": 0.7031754685029606, |
| "eval_sts_dev_spearman_manhattan": 0.7062052563630147, |
| "eval_sts_dev_spearman_max": 0.7783426175116597, |
| "step": 230 |
| }, |
| { |
| "epoch": 5.334261838440112, |
| "eval_loss": 0.032446879893541336, |
| "eval_runtime": 7.1024, |
| "eval_samples_per_second": 211.195, |
| "eval_steps_per_second": 26.47, |
| "eval_sts_dev_pearson_cosine": 0.7776669424440168, |
| "eval_sts_dev_pearson_dot": 0.6970405122472402, |
| "eval_sts_dev_pearson_euclidean": 0.7117722670287954, |
| "eval_sts_dev_pearson_manhattan": 0.7138312835497453, |
| "eval_sts_dev_pearson_max": 0.7776669424440168, |
| "eval_sts_dev_spearman_cosine": 0.7789160171177805, |
| "eval_sts_dev_spearman_dot": 0.6891076670812013, |
| "eval_sts_dev_spearman_euclidean": 0.7033258975002282, |
| "eval_sts_dev_spearman_manhattan": 0.7062752235073074, |
| "eval_sts_dev_spearman_max": 0.7789160171177805, |
| "step": 235 |
| }, |
| { |
| "epoch": 5.445682451253482, |
| "grad_norm": 3.6369762420654297, |
| "learning_rate": 1.6698556270655766e-06, |
| "loss": 0.4697, |
| "step": 240 |
| }, |
| { |
| "epoch": 5.445682451253482, |
| "eval_loss": 0.03234243392944336, |
| "eval_runtime": 6.782, |
| "eval_samples_per_second": 221.172, |
| "eval_steps_per_second": 27.72, |
| "eval_sts_dev_pearson_cosine": 0.7781440012528016, |
| "eval_sts_dev_pearson_dot": 0.6975764419235699, |
| "eval_sts_dev_pearson_euclidean": 0.712024820219635, |
| "eval_sts_dev_pearson_manhattan": 0.7140934326314853, |
| "eval_sts_dev_pearson_max": 0.7781440012528016, |
| "eval_sts_dev_spearman_cosine": 0.779282426254369, |
| "eval_sts_dev_spearman_dot": 0.6897740636983543, |
| "eval_sts_dev_spearman_euclidean": 0.7035466980830317, |
| "eval_sts_dev_spearman_manhattan": 0.706402706407244, |
| "eval_sts_dev_spearman_max": 0.779282426254369, |
| "step": 240 |
| }, |
| { |
| "epoch": 5.557103064066853, |
| "eval_loss": 0.0322665236890316, |
| "eval_runtime": 7.0287, |
| "eval_samples_per_second": 213.412, |
| "eval_steps_per_second": 26.748, |
| "eval_sts_dev_pearson_cosine": 0.7787273229273541, |
| "eval_sts_dev_pearson_dot": 0.6977971151317023, |
| "eval_sts_dev_pearson_euclidean": 0.7128704818639644, |
| "eval_sts_dev_pearson_manhattan": 0.7149352374625544, |
| "eval_sts_dev_pearson_max": 0.7787273229273541, |
| "eval_sts_dev_spearman_cosine": 0.77981903098488, |
| "eval_sts_dev_spearman_dot": 0.6899867909899472, |
| "eval_sts_dev_spearman_euclidean": 0.7044750738813548, |
| "eval_sts_dev_spearman_manhattan": 0.707203879577786, |
| "eval_sts_dev_spearman_max": 0.77981903098488, |
| "step": 245 |
| }, |
| { |
| "epoch": 5.6685236768802225, |
| "grad_norm": 3.939344882965088, |
| "learning_rate": 1.739432944859976e-06, |
| "loss": 0.4913, |
| "step": 250 |
| }, |
| { |
| "epoch": 5.6685236768802225, |
| "eval_loss": 0.03220539167523384, |
| "eval_runtime": 6.7653, |
| "eval_samples_per_second": 221.72, |
| "eval_steps_per_second": 27.789, |
| "eval_sts_dev_pearson_cosine": 0.7791771917276973, |
| "eval_sts_dev_pearson_dot": 0.6981160056071188, |
| "eval_sts_dev_pearson_euclidean": 0.713488315174772, |
| "eval_sts_dev_pearson_manhattan": 0.7155411689371374, |
| "eval_sts_dev_pearson_max": 0.7791771917276973, |
| "eval_sts_dev_spearman_cosine": 0.7803556746575578, |
| "eval_sts_dev_spearman_dot": 0.6902449156806119, |
| "eval_sts_dev_spearman_euclidean": 0.7052006351141208, |
| "eval_sts_dev_spearman_manhattan": 0.7079806405930662, |
| "eval_sts_dev_spearman_max": 0.7803556746575578, |
| "step": 250 |
| }, |
| { |
| "epoch": 5.779944289693593, |
| "eval_loss": 0.0321136973798275, |
| "eval_runtime": 6.8852, |
| "eval_samples_per_second": 217.857, |
| "eval_steps_per_second": 27.305, |
| "eval_sts_dev_pearson_cosine": 0.7795408783298017, |
| "eval_sts_dev_pearson_dot": 0.698621796206566, |
| "eval_sts_dev_pearson_euclidean": 0.713845705178594, |
| "eval_sts_dev_pearson_manhattan": 0.7158847989781144, |
| "eval_sts_dev_pearson_max": 0.7795408783298017, |
| "eval_sts_dev_spearman_cosine": 0.7808851254829866, |
| "eval_sts_dev_spearman_dot": 0.6910441279803855, |
| "eval_sts_dev_spearman_euclidean": 0.7057147472696849, |
| "eval_sts_dev_spearman_manhattan": 0.7084308417857139, |
| "eval_sts_dev_spearman_max": 0.7808851254829866, |
| "step": 255 |
| }, |
| { |
| "epoch": 5.891364902506964, |
| "grad_norm": 4.813522815704346, |
| "learning_rate": 1.8090102626543748e-06, |
| "loss": 0.5253, |
| "step": 260 |
| }, |
| { |
| "epoch": 5.891364902506964, |
| "eval_loss": 0.03203197568655014, |
| "eval_runtime": 7.0476, |
| "eval_samples_per_second": 212.839, |
| "eval_steps_per_second": 26.676, |
| "eval_sts_dev_pearson_cosine": 0.7799732728461426, |
| "eval_sts_dev_pearson_dot": 0.6992354089058229, |
| "eval_sts_dev_pearson_euclidean": 0.7142404896335972, |
| "eval_sts_dev_pearson_manhattan": 0.716270082443381, |
| "eval_sts_dev_pearson_max": 0.7799732728461426, |
| "eval_sts_dev_spearman_cosine": 0.7812777358255738, |
| "eval_sts_dev_spearman_dot": 0.6917093769490908, |
| "eval_sts_dev_spearman_euclidean": 0.7062223056881557, |
| "eval_sts_dev_spearman_manhattan": 0.7089598550457142, |
| "eval_sts_dev_spearman_max": 0.7812777358255738, |
| "step": 260 |
| }, |
| { |
| "epoch": 6.022284122562674, |
| "eval_loss": 0.03195018321275711, |
| "eval_runtime": 7.099, |
| "eval_samples_per_second": 211.299, |
| "eval_steps_per_second": 26.483, |
| "eval_sts_dev_pearson_cosine": 0.7803233438802165, |
| "eval_sts_dev_pearson_dot": 0.6999738035020234, |
| "eval_sts_dev_pearson_euclidean": 0.7143605362249807, |
| "eval_sts_dev_pearson_manhattan": 0.7163833317778756, |
| "eval_sts_dev_pearson_max": 0.7803233438802165, |
| "eval_sts_dev_spearman_cosine": 0.7817289518382318, |
| "eval_sts_dev_spearman_dot": 0.692658440982393, |
| "eval_sts_dev_spearman_euclidean": 0.7062913822145624, |
| "eval_sts_dev_spearman_manhattan": 0.7091007508962174, |
| "eval_sts_dev_spearman_max": 0.7817289518382318, |
| "step": 265 |
| }, |
| { |
| "epoch": 6.133704735376044, |
| "grad_norm": 3.873243570327759, |
| "learning_rate": 1.8785875804487739e-06, |
| "loss": 0.4924, |
| "step": 270 |
| }, |
| { |
| "epoch": 6.133704735376044, |
| "eval_loss": 0.031853143125772476, |
| "eval_runtime": 7.045, |
| "eval_samples_per_second": 212.918, |
| "eval_steps_per_second": 26.686, |
| "eval_sts_dev_pearson_cosine": 0.7805555688659683, |
| "eval_sts_dev_pearson_dot": 0.7005444051022546, |
| "eval_sts_dev_pearson_euclidean": 0.7142124903197049, |
| "eval_sts_dev_pearson_manhattan": 0.716248059084913, |
| "eval_sts_dev_pearson_max": 0.7805555688659683, |
| "eval_sts_dev_spearman_cosine": 0.7818561644430513, |
| "eval_sts_dev_spearman_dot": 0.6936098988133554, |
| "eval_sts_dev_spearman_euclidean": 0.7060309965817769, |
| "eval_sts_dev_spearman_manhattan": 0.7089509487437853, |
| "eval_sts_dev_spearman_max": 0.7818561644430513, |
| "step": 270 |
| }, |
| { |
| "epoch": 6.245125348189415, |
| "eval_loss": 0.031787075102329254, |
| "eval_runtime": 7.0799, |
| "eval_samples_per_second": 211.867, |
| "eval_steps_per_second": 26.554, |
| "eval_sts_dev_pearson_cosine": 0.7807075804252084, |
| "eval_sts_dev_pearson_dot": 0.7015197969666243, |
| "eval_sts_dev_pearson_euclidean": 0.713830705347577, |
| "eval_sts_dev_pearson_manhattan": 0.7158793994538133, |
| "eval_sts_dev_pearson_max": 0.7807075804252084, |
| "eval_sts_dev_spearman_cosine": 0.7819875621854264, |
| "eval_sts_dev_spearman_dot": 0.694826261852757, |
| "eval_sts_dev_spearman_euclidean": 0.7053731328646764, |
| "eval_sts_dev_spearman_manhattan": 0.7083527948173437, |
| "eval_sts_dev_spearman_max": 0.7819875621854264, |
| "step": 275 |
| }, |
| { |
| "epoch": 6.3565459610027855, |
| "grad_norm": 4.469658374786377, |
| "learning_rate": 1.9481648982431728e-06, |
| "loss": 0.4844, |
| "step": 280 |
| }, |
| { |
| "epoch": 6.3565459610027855, |
| "eval_loss": 0.031746331602334976, |
| "eval_runtime": 6.7748, |
| "eval_samples_per_second": 221.41, |
| "eval_steps_per_second": 27.75, |
| "eval_sts_dev_pearson_cosine": 0.7808289673024869, |
| "eval_sts_dev_pearson_dot": 0.702423126121021, |
| "eval_sts_dev_pearson_euclidean": 0.7134962000576563, |
| "eval_sts_dev_pearson_manhattan": 0.7155503733116253, |
| "eval_sts_dev_pearson_max": 0.7808289673024869, |
| "eval_sts_dev_spearman_cosine": 0.7822111314547963, |
| "eval_sts_dev_spearman_dot": 0.6958278382473629, |
| "eval_sts_dev_spearman_euclidean": 0.7049726585244658, |
| "eval_sts_dev_spearman_manhattan": 0.7078651037745494, |
| "eval_sts_dev_spearman_max": 0.7822111314547963, |
| "step": 280 |
| }, |
| { |
| "epoch": 6.467966573816156, |
| "eval_loss": 0.0316670723259449, |
| "eval_runtime": 7.0619, |
| "eval_samples_per_second": 212.406, |
| "eval_steps_per_second": 26.622, |
| "eval_sts_dev_pearson_cosine": 0.781180936397055, |
| "eval_sts_dev_pearson_dot": 0.7027629453006121, |
| "eval_sts_dev_pearson_euclidean": 0.7136902176147873, |
| "eval_sts_dev_pearson_manhattan": 0.715757628364657, |
| "eval_sts_dev_pearson_max": 0.781180936397055, |
| "eval_sts_dev_spearman_cosine": 0.78250079334828, |
| "eval_sts_dev_spearman_dot": 0.6962981450393402, |
| "eval_sts_dev_spearman_euclidean": 0.7051141445683561, |
| "eval_sts_dev_spearman_manhattan": 0.70821885209965, |
| "eval_sts_dev_spearman_max": 0.78250079334828, |
| "step": 285 |
| }, |
| { |
| "epoch": 6.579387186629527, |
| "grad_norm": 4.325808048248291, |
| "learning_rate": 2.017742216037572e-06, |
| "loss": 0.442, |
| "step": 290 |
| }, |
| { |
| "epoch": 6.579387186629527, |
| "eval_loss": 0.03155314922332764, |
| "eval_runtime": 6.933, |
| "eval_samples_per_second": 216.356, |
| "eval_steps_per_second": 27.117, |
| "eval_sts_dev_pearson_cosine": 0.781592834547759, |
| "eval_sts_dev_pearson_dot": 0.7030321075873802, |
| "eval_sts_dev_pearson_euclidean": 0.7138293804278546, |
| "eval_sts_dev_pearson_manhattan": 0.7159175761814789, |
| "eval_sts_dev_pearson_max": 0.781592834547759, |
| "eval_sts_dev_spearman_cosine": 0.7827403875693918, |
| "eval_sts_dev_spearman_dot": 0.6966818933630766, |
| "eval_sts_dev_spearman_euclidean": 0.705222522900883, |
| "eval_sts_dev_spearman_manhattan": 0.7082679375517423, |
| "eval_sts_dev_spearman_max": 0.7827403875693918, |
| "step": 290 |
| }, |
| { |
| "epoch": 6.690807799442897, |
| "eval_loss": 0.0314662829041481, |
| "eval_runtime": 7.0474, |
| "eval_samples_per_second": 212.844, |
| "eval_steps_per_second": 26.676, |
| "eval_sts_dev_pearson_cosine": 0.7820122068864954, |
| "eval_sts_dev_pearson_dot": 0.703421139648371, |
| "eval_sts_dev_pearson_euclidean": 0.7141068771656474, |
| "eval_sts_dev_pearson_manhattan": 0.7162068261112142, |
| "eval_sts_dev_pearson_max": 0.7820122068864954, |
| "eval_sts_dev_spearman_cosine": 0.7829970553896861, |
| "eval_sts_dev_spearman_dot": 0.6970113959506001, |
| "eval_sts_dev_spearman_euclidean": 0.7054796488454884, |
| "eval_sts_dev_spearman_manhattan": 0.7085587324330124, |
| "eval_sts_dev_spearman_max": 0.7829970553896861, |
| "step": 295 |
| }, |
| { |
| "epoch": 6.802228412256268, |
| "grad_norm": 3.6315908432006836, |
| "learning_rate": 2.087319533831971e-06, |
| "loss": 0.4665, |
| "step": 300 |
| }, |
| { |
| "epoch": 6.802228412256268, |
| "eval_loss": 0.03142312169075012, |
| "eval_runtime": 6.9811, |
| "eval_samples_per_second": 214.864, |
| "eval_steps_per_second": 26.93, |
| "eval_sts_dev_pearson_cosine": 0.7823768397963167, |
| "eval_sts_dev_pearson_dot": 0.7038756871911903, |
| "eval_sts_dev_pearson_euclidean": 0.7145009916374723, |
| "eval_sts_dev_pearson_manhattan": 0.7165993081434159, |
| "eval_sts_dev_pearson_max": 0.7823768397963167, |
| "eval_sts_dev_spearman_cosine": 0.7834457731278, |
| "eval_sts_dev_spearman_dot": 0.6973417239926998, |
| "eval_sts_dev_spearman_euclidean": 0.7059158400220358, |
| "eval_sts_dev_spearman_manhattan": 0.7090603670611569, |
| "eval_sts_dev_spearman_max": 0.7834457731278, |
| "step": 300 |
| }, |
| { |
| "epoch": 6.913649025069638, |
| "eval_loss": 0.03140180557966232, |
| "eval_runtime": 7.0935, |
| "eval_samples_per_second": 211.462, |
| "eval_steps_per_second": 26.503, |
| "eval_sts_dev_pearson_cosine": 0.7827189612475338, |
| "eval_sts_dev_pearson_dot": 0.7043799909167585, |
| "eval_sts_dev_pearson_euclidean": 0.715034388904346, |
| "eval_sts_dev_pearson_manhattan": 0.7171022025564596, |
| "eval_sts_dev_pearson_max": 0.7827189612475338, |
| "eval_sts_dev_spearman_cosine": 0.7839004976206189, |
| "eval_sts_dev_spearman_dot": 0.6975156259478882, |
| "eval_sts_dev_spearman_euclidean": 0.7065303588201288, |
| "eval_sts_dev_spearman_manhattan": 0.7095736568498506, |
| "eval_sts_dev_spearman_max": 0.7839004976206189, |
| "step": 305 |
| }, |
| { |
| "epoch": 7.044568245125348, |
| "grad_norm": 4.26026725769043, |
| "learning_rate": 2.15689685162637e-06, |
| "loss": 0.4672, |
| "step": 310 |
| }, |
| { |
| "epoch": 7.044568245125348, |
| "eval_loss": 0.03136160969734192, |
| "eval_runtime": 6.6776, |
| "eval_samples_per_second": 224.63, |
| "eval_steps_per_second": 28.154, |
| "eval_sts_dev_pearson_cosine": 0.7831698418188764, |
| "eval_sts_dev_pearson_dot": 0.7044122663834302, |
| "eval_sts_dev_pearson_euclidean": 0.7156598421085834, |
| "eval_sts_dev_pearson_manhattan": 0.7176890258722983, |
| "eval_sts_dev_pearson_max": 0.7831698418188764, |
| "eval_sts_dev_spearman_cosine": 0.7843284949390994, |
| "eval_sts_dev_spearman_dot": 0.697639093220699, |
| "eval_sts_dev_spearman_euclidean": 0.7073241375609828, |
| "eval_sts_dev_spearman_manhattan": 0.710185012169815, |
| "eval_sts_dev_spearman_max": 0.7843284949390994, |
| "step": 310 |
| }, |
| { |
| "epoch": 7.155988857938719, |
| "eval_loss": 0.031366512179374695, |
| "eval_runtime": 6.9924, |
| "eval_samples_per_second": 214.518, |
| "eval_steps_per_second": 26.886, |
| "eval_sts_dev_pearson_cosine": 0.7835832006721541, |
| "eval_sts_dev_pearson_dot": 0.7043934252027199, |
| "eval_sts_dev_pearson_euclidean": 0.7164264689263184, |
| "eval_sts_dev_pearson_manhattan": 0.7184030248845167, |
| "eval_sts_dev_pearson_max": 0.7835832006721541, |
| "eval_sts_dev_spearman_cosine": 0.7850548943796795, |
| "eval_sts_dev_spearman_dot": 0.6977756771302583, |
| "eval_sts_dev_spearman_euclidean": 0.708343725874613, |
| "eval_sts_dev_spearman_manhattan": 0.7111504960736558, |
| "eval_sts_dev_spearman_max": 0.7850548943796795, |
| "step": 315 |
| }, |
| { |
| "epoch": 7.2674094707520895, |
| "grad_norm": 3.808695077896118, |
| "learning_rate": 2.226474169420769e-06, |
| "loss": 0.4131, |
| "step": 320 |
| }, |
| { |
| "epoch": 7.2674094707520895, |
| "eval_loss": 0.03135285899043083, |
| "eval_runtime": 6.9057, |
| "eval_samples_per_second": 217.213, |
| "eval_steps_per_second": 27.224, |
| "eval_sts_dev_pearson_cosine": 0.7836045257427042, |
| "eval_sts_dev_pearson_dot": 0.7048735903915628, |
| "eval_sts_dev_pearson_euclidean": 0.7161062363729224, |
| "eval_sts_dev_pearson_manhattan": 0.7180798998241316, |
| "eval_sts_dev_pearson_max": 0.7836045257427042, |
| "eval_sts_dev_spearman_cosine": 0.7849975337135177, |
| "eval_sts_dev_spearman_dot": 0.6982899839848741, |
| "eval_sts_dev_spearman_euclidean": 0.7079431278357644, |
| "eval_sts_dev_spearman_manhattan": 0.710852480857077, |
| "eval_sts_dev_spearman_max": 0.7849975337135177, |
| "step": 320 |
| }, |
| { |
| "epoch": 7.378830083565459, |
| "eval_loss": 0.03127756714820862, |
| "eval_runtime": 6.9241, |
| "eval_samples_per_second": 216.634, |
| "eval_steps_per_second": 27.151, |
| "eval_sts_dev_pearson_cosine": 0.7836610063557831, |
| "eval_sts_dev_pearson_dot": 0.705409260823171, |
| "eval_sts_dev_pearson_euclidean": 0.7154023331837831, |
| "eval_sts_dev_pearson_manhattan": 0.717401985035912, |
| "eval_sts_dev_pearson_max": 0.7836610063557831, |
| "eval_sts_dev_spearman_cosine": 0.7848718916416149, |
| "eval_sts_dev_spearman_dot": 0.6991510364393221, |
| "eval_sts_dev_spearman_euclidean": 0.7071171759954781, |
| "eval_sts_dev_spearman_manhattan": 0.709827734664151, |
| "eval_sts_dev_spearman_max": 0.7848718916416149, |
| "step": 325 |
| }, |
| { |
| "epoch": 7.49025069637883, |
| "grad_norm": 3.8009250164031982, |
| "learning_rate": 2.2960514872151678e-06, |
| "loss": 0.4221, |
| "step": 330 |
| }, |
| { |
| "epoch": 7.49025069637883, |
| "eval_loss": 0.031188100576400757, |
| "eval_runtime": 7.0999, |
| "eval_samples_per_second": 211.272, |
| "eval_steps_per_second": 26.479, |
| "eval_sts_dev_pearson_cosine": 0.7838825238345812, |
| "eval_sts_dev_pearson_dot": 0.7057496676467132, |
| "eval_sts_dev_pearson_euclidean": 0.7150892410708943, |
| "eval_sts_dev_pearson_manhattan": 0.7171064711121474, |
| "eval_sts_dev_pearson_max": 0.7838825238345812, |
| "eval_sts_dev_spearman_cosine": 0.784820320759411, |
| "eval_sts_dev_spearman_dot": 0.6997042671311072, |
| "eval_sts_dev_spearman_euclidean": 0.7065608619879493, |
| "eval_sts_dev_spearman_manhattan": 0.7094620852598932, |
| "eval_sts_dev_spearman_max": 0.784820320759411, |
| "step": 330 |
| }, |
| { |
| "epoch": 7.6016713091922, |
| "eval_loss": 0.0310923233628273, |
| "eval_runtime": 6.9662, |
| "eval_samples_per_second": 215.326, |
| "eval_steps_per_second": 26.987, |
| "eval_sts_dev_pearson_cosine": 0.7843923769897447, |
| "eval_sts_dev_pearson_dot": 0.7058530968248947, |
| "eval_sts_dev_pearson_euclidean": 0.7155332189451762, |
| "eval_sts_dev_pearson_manhattan": 0.7175425736786123, |
| "eval_sts_dev_pearson_max": 0.7843923769897447, |
| "eval_sts_dev_spearman_cosine": 0.7853756910328091, |
| "eval_sts_dev_spearman_dot": 0.6999248217974418, |
| "eval_sts_dev_spearman_euclidean": 0.7071685659073802, |
| "eval_sts_dev_spearman_manhattan": 0.7099135119853421, |
| "eval_sts_dev_spearman_max": 0.7853756910328091, |
| "step": 335 |
| }, |
| { |
| "epoch": 7.713091922005571, |
| "grad_norm": 4.329479694366455, |
| "learning_rate": 2.3656288050095673e-06, |
| "loss": 0.4268, |
| "step": 340 |
| }, |
| { |
| "epoch": 7.713091922005571, |
| "eval_loss": 0.031015686690807343, |
| "eval_runtime": 6.8718, |
| "eval_samples_per_second": 218.283, |
| "eval_steps_per_second": 27.358, |
| "eval_sts_dev_pearson_cosine": 0.7848078944075182, |
| "eval_sts_dev_pearson_dot": 0.7062611613987171, |
| "eval_sts_dev_pearson_euclidean": 0.7156669541008578, |
| "eval_sts_dev_pearson_manhattan": 0.7176849379592309, |
| "eval_sts_dev_pearson_max": 0.7848078944075182, |
| "eval_sts_dev_spearman_cosine": 0.7857175803487115, |
| "eval_sts_dev_spearman_dot": 0.7006071388870717, |
| "eval_sts_dev_spearman_euclidean": 0.7074396606352066, |
| "eval_sts_dev_spearman_manhattan": 0.7101303213368534, |
| "eval_sts_dev_spearman_max": 0.7857175803487115, |
| "step": 340 |
| }, |
| { |
| "epoch": 7.8245125348189415, |
| "eval_loss": 0.030945729464292526, |
| "eval_runtime": 6.9722, |
| "eval_samples_per_second": 215.14, |
| "eval_steps_per_second": 26.964, |
| "eval_sts_dev_pearson_cosine": 0.7852280992749574, |
| "eval_sts_dev_pearson_dot": 0.7063015365766652, |
| "eval_sts_dev_pearson_euclidean": 0.71618048050416, |
| "eval_sts_dev_pearson_manhattan": 0.7181959951306995, |
| "eval_sts_dev_pearson_max": 0.7852280992749574, |
| "eval_sts_dev_spearman_cosine": 0.7861447827888495, |
| "eval_sts_dev_spearman_dot": 0.7007253260607372, |
| "eval_sts_dev_spearman_euclidean": 0.7080307843557273, |
| "eval_sts_dev_spearman_manhattan": 0.710707788624518, |
| "eval_sts_dev_spearman_max": 0.7861447827888495, |
| "step": 345 |
| }, |
| { |
| "epoch": 7.935933147632312, |
| "grad_norm": 4.521576881408691, |
| "learning_rate": 2.435206122803966e-06, |
| "loss": 0.4316, |
| "step": 350 |
| }, |
| { |
| "epoch": 7.935933147632312, |
| "eval_loss": 0.030903467908501625, |
| "eval_runtime": 6.8754, |
| "eval_samples_per_second": 218.169, |
| "eval_steps_per_second": 27.344, |
| "eval_sts_dev_pearson_cosine": 0.7857408106817081, |
| "eval_sts_dev_pearson_dot": 0.7063227803586387, |
| "eval_sts_dev_pearson_euclidean": 0.7171064497768416, |
| "eval_sts_dev_pearson_manhattan": 0.7190977579026478, |
| "eval_sts_dev_pearson_max": 0.7857408106817081, |
| "eval_sts_dev_spearman_cosine": 0.786647063435545, |
| "eval_sts_dev_spearman_dot": 0.7004210617791904, |
| "eval_sts_dev_spearman_euclidean": 0.7090060931384192, |
| "eval_sts_dev_spearman_manhattan": 0.7117304388117395, |
| "eval_sts_dev_spearman_max": 0.786647063435545, |
| "step": 350 |
| }, |
| { |
| "epoch": 8.066852367688023, |
| "eval_loss": 0.03090326115489006, |
| "eval_runtime": 6.7967, |
| "eval_samples_per_second": 220.696, |
| "eval_steps_per_second": 27.661, |
| "eval_sts_dev_pearson_cosine": 0.7860914327083659, |
| "eval_sts_dev_pearson_dot": 0.7067109311815922, |
| "eval_sts_dev_pearson_euclidean": 0.7179978723314155, |
| "eval_sts_dev_pearson_manhattan": 0.7199506434198831, |
| "eval_sts_dev_pearson_max": 0.7860914327083659, |
| "eval_sts_dev_spearman_cosine": 0.7871799411716375, |
| "eval_sts_dev_spearman_dot": 0.7005966817709771, |
| "eval_sts_dev_spearman_euclidean": 0.7099849983444726, |
| "eval_sts_dev_spearman_manhattan": 0.7126081974741519, |
| "eval_sts_dev_spearman_max": 0.7871799411716375, |
| "step": 355 |
| }, |
| { |
| "epoch": 8.178272980501394, |
| "grad_norm": 3.464381217956543, |
| "learning_rate": 2.504783440598365e-06, |
| "loss": 0.4277, |
| "step": 360 |
| }, |
| { |
| "epoch": 8.178272980501394, |
| "eval_loss": 0.030861668288707733, |
| "eval_runtime": 6.8952, |
| "eval_samples_per_second": 217.544, |
| "eval_steps_per_second": 27.265, |
| "eval_sts_dev_pearson_cosine": 0.7862113365203784, |
| "eval_sts_dev_pearson_dot": 0.7070142268847368, |
| "eval_sts_dev_pearson_euclidean": 0.7181137478219999, |
| "eval_sts_dev_pearson_manhattan": 0.7200573508948256, |
| "eval_sts_dev_pearson_max": 0.7862113365203784, |
| "eval_sts_dev_spearman_cosine": 0.7873051906331155, |
| "eval_sts_dev_spearman_dot": 0.700851803333668, |
| "eval_sts_dev_spearman_euclidean": 0.7101326235059475, |
| "eval_sts_dev_spearman_manhattan": 0.7126791959108771, |
| "eval_sts_dev_spearman_max": 0.7873051906331155, |
| "step": 360 |
| }, |
| { |
| "epoch": 8.289693593314762, |
| "eval_loss": 0.03079277276992798, |
| "eval_runtime": 7.0041, |
| "eval_samples_per_second": 214.159, |
| "eval_steps_per_second": 26.841, |
| "eval_sts_dev_pearson_cosine": 0.7861051555153227, |
| "eval_sts_dev_pearson_dot": 0.7077462081618229, |
| "eval_sts_dev_pearson_euclidean": 0.7175047036545574, |
| "eval_sts_dev_pearson_manhattan": 0.7194616943503004, |
| "eval_sts_dev_pearson_max": 0.7861051555153227, |
| "eval_sts_dev_spearman_cosine": 0.7869754283660466, |
| "eval_sts_dev_spearman_dot": 0.7018953525077267, |
| "eval_sts_dev_spearman_euclidean": 0.7093618435488815, |
| "eval_sts_dev_spearman_manhattan": 0.7120432245619701, |
| "eval_sts_dev_spearman_max": 0.7869754283660466, |
| "step": 365 |
| }, |
| { |
| "epoch": 8.401114206128133, |
| "grad_norm": 3.629032850265503, |
| "learning_rate": 2.5743607583927645e-06, |
| "loss": 0.3925, |
| "step": 370 |
| }, |
| { |
| "epoch": 8.401114206128133, |
| "eval_loss": 0.03077574074268341, |
| "eval_runtime": 6.9569, |
| "eval_samples_per_second": 215.613, |
| "eval_steps_per_second": 27.024, |
| "eval_sts_dev_pearson_cosine": 0.7860927703016911, |
| "eval_sts_dev_pearson_dot": 0.7084805810982604, |
| "eval_sts_dev_pearson_euclidean": 0.7171292733763057, |
| "eval_sts_dev_pearson_manhattan": 0.7191008391698412, |
| "eval_sts_dev_pearson_max": 0.7860927703016911, |
| "eval_sts_dev_spearman_cosine": 0.7868465023058949, |
| "eval_sts_dev_spearman_dot": 0.7026257860756843, |
| "eval_sts_dev_spearman_euclidean": 0.7087433915922463, |
| "eval_sts_dev_spearman_manhattan": 0.7115662090675204, |
| "eval_sts_dev_spearman_max": 0.7868465023058949, |
| "step": 370 |
| }, |
| { |
| "epoch": 8.512534818941504, |
| "eval_loss": 0.03077036887407303, |
| "eval_runtime": 6.8481, |
| "eval_samples_per_second": 219.038, |
| "eval_steps_per_second": 27.453, |
| "eval_sts_dev_pearson_cosine": 0.7860543259557101, |
| "eval_sts_dev_pearson_dot": 0.7090029747286515, |
| "eval_sts_dev_pearson_euclidean": 0.7168001987123229, |
| "eval_sts_dev_pearson_manhattan": 0.7187912798445806, |
| "eval_sts_dev_pearson_max": 0.7860543259557101, |
| "eval_sts_dev_spearman_cosine": 0.786577121013552, |
| "eval_sts_dev_spearman_dot": 0.7032207123703509, |
| "eval_sts_dev_spearman_euclidean": 0.7083026579268292, |
| "eval_sts_dev_spearman_manhattan": 0.7111138102646555, |
| "eval_sts_dev_spearman_max": 0.786577121013552, |
| "step": 375 |
| }, |
| { |
| "epoch": 8.623955431754874, |
| "grad_norm": 4.5424346923828125, |
| "learning_rate": 2.643938076187163e-06, |
| "loss": 0.4049, |
| "step": 380 |
| }, |
| { |
| "epoch": 8.623955431754874, |
| "eval_loss": 0.030785972252488136, |
| "eval_runtime": 6.9052, |
| "eval_samples_per_second": 217.228, |
| "eval_steps_per_second": 27.226, |
| "eval_sts_dev_pearson_cosine": 0.786338341456081, |
| "eval_sts_dev_pearson_dot": 0.7090251722360976, |
| "eval_sts_dev_pearson_euclidean": 0.7176375494602096, |
| "eval_sts_dev_pearson_manhattan": 0.7195903686388057, |
| "eval_sts_dev_pearson_max": 0.786338341456081, |
| "eval_sts_dev_spearman_cosine": 0.7869461186588641, |
| "eval_sts_dev_spearman_dot": 0.7030353980707192, |
| "eval_sts_dev_spearman_euclidean": 0.7093240329985625, |
| "eval_sts_dev_spearman_manhattan": 0.7120013731894795, |
| "eval_sts_dev_spearman_max": 0.7869461186588641, |
| "step": 380 |
| }, |
| { |
| "epoch": 8.735376044568245, |
| "eval_loss": 0.03077947534620762, |
| "eval_runtime": 6.94, |
| "eval_samples_per_second": 216.137, |
| "eval_steps_per_second": 27.089, |
| "eval_sts_dev_pearson_cosine": 0.7867836664964302, |
| "eval_sts_dev_pearson_dot": 0.7089649699768177, |
| "eval_sts_dev_pearson_euclidean": 0.7185998785212442, |
| "eval_sts_dev_pearson_manhattan": 0.7205256023581162, |
| "eval_sts_dev_pearson_max": 0.7867836664964302, |
| "eval_sts_dev_spearman_cosine": 0.7875195626790124, |
| "eval_sts_dev_spearman_dot": 0.7028351666319841, |
| "eval_sts_dev_spearman_euclidean": 0.7105482738364566, |
| "eval_sts_dev_spearman_manhattan": 0.7132642042369475, |
| "eval_sts_dev_spearman_max": 0.7875195626790124, |
| "step": 385 |
| }, |
| { |
| "epoch": 8.846796657381615, |
| "grad_norm": 3.7269480228424072, |
| "learning_rate": 2.7135153939815623e-06, |
| "loss": 0.3742, |
| "step": 390 |
| }, |
| { |
| "epoch": 8.846796657381615, |
| "eval_loss": 0.030757909640669823, |
| "eval_runtime": 6.912, |
| "eval_samples_per_second": 217.015, |
| "eval_steps_per_second": 27.199, |
| "eval_sts_dev_pearson_cosine": 0.7873307957198338, |
| "eval_sts_dev_pearson_dot": 0.7087450117938812, |
| "eval_sts_dev_pearson_euclidean": 0.7199394166229915, |
| "eval_sts_dev_pearson_manhattan": 0.7218118008402783, |
| "eval_sts_dev_pearson_max": 0.7873307957198338, |
| "eval_sts_dev_spearman_cosine": 0.7883481466120934, |
| "eval_sts_dev_spearman_dot": 0.702431533404311, |
| "eval_sts_dev_spearman_euclidean": 0.7122286167501692, |
| "eval_sts_dev_spearman_manhattan": 0.7149544811678771, |
| "eval_sts_dev_spearman_max": 0.7883481466120934, |
| "step": 390 |
| }, |
| { |
| "epoch": 8.958217270194986, |
| "eval_loss": 0.03074067085981369, |
| "eval_runtime": 7.0786, |
| "eval_samples_per_second": 211.905, |
| "eval_steps_per_second": 26.559, |
| "eval_sts_dev_pearson_cosine": 0.7875281932009626, |
| "eval_sts_dev_pearson_dot": 0.7091183187974348, |
| "eval_sts_dev_pearson_euclidean": 0.720306579358833, |
| "eval_sts_dev_pearson_manhattan": 0.7221545912209083, |
| "eval_sts_dev_pearson_max": 0.7875281932009626, |
| "eval_sts_dev_spearman_cosine": 0.7884911216315376, |
| "eval_sts_dev_spearman_dot": 0.7026504547905195, |
| "eval_sts_dev_spearman_euclidean": 0.7125846397557779, |
| "eval_sts_dev_spearman_manhattan": 0.7153917764693033, |
| "eval_sts_dev_spearman_max": 0.7884911216315376, |
| "step": 395 |
| }, |
| { |
| "epoch": 9.089136490250697, |
| "grad_norm": 3.8048255443573, |
| "learning_rate": 2.7830927117759614e-06, |
| "loss": 0.3498, |
| "step": 400 |
| }, |
| { |
| "epoch": 9.089136490250697, |
| "eval_loss": 0.03073756769299507, |
| "eval_runtime": 7.1819, |
| "eval_samples_per_second": 208.858, |
| "eval_steps_per_second": 26.177, |
| "eval_sts_dev_pearson_cosine": 0.7875285006609543, |
| "eval_sts_dev_pearson_dot": 0.709718276464936, |
| "eval_sts_dev_pearson_euclidean": 0.7202436438310591, |
| "eval_sts_dev_pearson_manhattan": 0.7220766094080024, |
| "eval_sts_dev_pearson_max": 0.7875285006609543, |
| "eval_sts_dev_spearman_cosine": 0.7885939335328866, |
| "eval_sts_dev_spearman_dot": 0.7032536436958657, |
| "eval_sts_dev_spearman_euclidean": 0.7124855846354039, |
| "eval_sts_dev_spearman_manhattan": 0.7153797502128406, |
| "eval_sts_dev_spearman_max": 0.7885939335328866, |
| "step": 400 |
| }, |
| { |
| "epoch": 9.200557103064067, |
| "eval_loss": 0.03071259893476963, |
| "eval_runtime": 6.8201, |
| "eval_samples_per_second": 219.938, |
| "eval_steps_per_second": 27.566, |
| "eval_sts_dev_pearson_cosine": 0.787184477170156, |
| "eval_sts_dev_pearson_dot": 0.7102603851217889, |
| "eval_sts_dev_pearson_euclidean": 0.7195444208609296, |
| "eval_sts_dev_pearson_manhattan": 0.7213936268781151, |
| "eval_sts_dev_pearson_max": 0.787184477170156, |
| "eval_sts_dev_spearman_cosine": 0.78809909542145, |
| "eval_sts_dev_spearman_dot": 0.7036724949513745, |
| "eval_sts_dev_spearman_euclidean": 0.7115938480269084, |
| "eval_sts_dev_spearman_manhattan": 0.7143300985487689, |
| "eval_sts_dev_spearman_max": 0.78809909542145, |
| "step": 405 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 440, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 10, |
| "save_steps": 5, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 1, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|