| { |
| "best_metric": null, |
| "best_model_checkpoint": null, |
| "epoch": 9.97880210556267, |
| "eval_steps": 250, |
| "global_step": 4390, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.022762839664248115, |
| "grad_norm": 7.620693683624268, |
| "learning_rate": 7.112628471851774e-09, |
| "loss": 0.3524, |
| "step": 10 |
| }, |
| { |
| "epoch": 0.04552567932849623, |
| "grad_norm": 6.552708625793457, |
| "learning_rate": 1.4225256943703548e-08, |
| "loss": 0.3496, |
| "step": 20 |
| }, |
| { |
| "epoch": 0.06828851899274435, |
| "grad_norm": 7.1639084815979, |
| "learning_rate": 2.1337885415555322e-08, |
| "loss": 0.3515, |
| "step": 30 |
| }, |
| { |
| "epoch": 0.09105135865699246, |
| "grad_norm": 6.914605617523193, |
| "learning_rate": 2.8450513887407095e-08, |
| "loss": 0.348, |
| "step": 40 |
| }, |
| { |
| "epoch": 0.11381419832124058, |
| "grad_norm": 6.548000812530518, |
| "learning_rate": 3.556314235925887e-08, |
| "loss": 0.3409, |
| "step": 50 |
| }, |
| { |
| "epoch": 0.1365770379854887, |
| "grad_norm": 7.1885857582092285, |
| "learning_rate": 4.2675770831110644e-08, |
| "loss": 0.347, |
| "step": 60 |
| }, |
| { |
| "epoch": 0.1593398776497368, |
| "grad_norm": 7.09645938873291, |
| "learning_rate": 4.978839930296241e-08, |
| "loss": 0.3377, |
| "step": 70 |
| }, |
| { |
| "epoch": 0.18210271731398492, |
| "grad_norm": 6.552192687988281, |
| "learning_rate": 5.690102777481419e-08, |
| "loss": 0.3317, |
| "step": 80 |
| }, |
| { |
| "epoch": 0.20486555697823303, |
| "grad_norm": 6.317521095275879, |
| "learning_rate": 6.401365624666596e-08, |
| "loss": 0.3279, |
| "step": 90 |
| }, |
| { |
| "epoch": 0.22762839664248116, |
| "grad_norm": 6.849682807922363, |
| "learning_rate": 7.112628471851774e-08, |
| "loss": 0.3264, |
| "step": 100 |
| }, |
| { |
| "epoch": 0.25039123630672927, |
| "grad_norm": 6.271164894104004, |
| "learning_rate": 7.823891319036951e-08, |
| "loss": 0.3116, |
| "step": 110 |
| }, |
| { |
| "epoch": 0.2731540759709774, |
| "grad_norm": 6.1660895347595215, |
| "learning_rate": 8.535154166222129e-08, |
| "loss": 0.3055, |
| "step": 120 |
| }, |
| { |
| "epoch": 0.2959169156352255, |
| "grad_norm": 5.520643711090088, |
| "learning_rate": 9.246417013407305e-08, |
| "loss": 0.3042, |
| "step": 130 |
| }, |
| { |
| "epoch": 0.3186797552994736, |
| "grad_norm": 5.591308116912842, |
| "learning_rate": 9.957679860592482e-08, |
| "loss": 0.2928, |
| "step": 140 |
| }, |
| { |
| "epoch": 0.3414425949637217, |
| "grad_norm": 5.716772079467773, |
| "learning_rate": 1.0668942707777661e-07, |
| "loss": 0.2835, |
| "step": 150 |
| }, |
| { |
| "epoch": 0.36420543462796984, |
| "grad_norm": 5.372351169586182, |
| "learning_rate": 1.1380205554962838e-07, |
| "loss": 0.2665, |
| "step": 160 |
| }, |
| { |
| "epoch": 0.386968274292218, |
| "grad_norm": 5.111922264099121, |
| "learning_rate": 1.2091468402148016e-07, |
| "loss": 0.2665, |
| "step": 170 |
| }, |
| { |
| "epoch": 0.40973111395646605, |
| "grad_norm": 4.756952285766602, |
| "learning_rate": 1.2802731249333193e-07, |
| "loss": 0.2486, |
| "step": 180 |
| }, |
| { |
| "epoch": 0.4324939536207142, |
| "grad_norm": 4.510580539703369, |
| "learning_rate": 1.351399409651837e-07, |
| "loss": 0.2387, |
| "step": 190 |
| }, |
| { |
| "epoch": 0.4552567932849623, |
| "grad_norm": 4.627274990081787, |
| "learning_rate": 1.4225256943703549e-07, |
| "loss": 0.2283, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.4780196329492104, |
| "grad_norm": 4.426296234130859, |
| "learning_rate": 1.4936519790888725e-07, |
| "loss": 0.2237, |
| "step": 210 |
| }, |
| { |
| "epoch": 0.5007824726134585, |
| "grad_norm": 4.098926544189453, |
| "learning_rate": 1.5647782638073902e-07, |
| "loss": 0.2204, |
| "step": 220 |
| }, |
| { |
| "epoch": 0.5235453122777066, |
| "grad_norm": 3.592745065689087, |
| "learning_rate": 1.635904548525908e-07, |
| "loss": 0.205, |
| "step": 230 |
| }, |
| { |
| "epoch": 0.5463081519419548, |
| "grad_norm": 3.4670684337615967, |
| "learning_rate": 1.7070308332444258e-07, |
| "loss": 0.2002, |
| "step": 240 |
| }, |
| { |
| "epoch": 0.5690709916062029, |
| "grad_norm": 3.4649081230163574, |
| "learning_rate": 1.7781571179629434e-07, |
| "loss": 0.1904, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.5690709916062029, |
| "eval_loss": 0.03304145112633705, |
| "eval_runtime": 3.1631, |
| "eval_samples_per_second": 474.224, |
| "eval_steps_per_second": 7.588, |
| "eval_sts_dev_pearson_cosine": 0.7871582161337227, |
| "eval_sts_dev_pearson_dot": 0.712522215037403, |
| "eval_sts_dev_pearson_euclidean": 0.7266842290027538, |
| "eval_sts_dev_pearson_manhattan": 0.7281230919411741, |
| "eval_sts_dev_pearson_max": 0.7871582161337227, |
| "eval_sts_dev_spearman_cosine": 0.7920626405951849, |
| "eval_sts_dev_spearman_dot": 0.7029699912720668, |
| "eval_sts_dev_spearman_euclidean": 0.7209681248102595, |
| "eval_sts_dev_spearman_manhattan": 0.7231977500622078, |
| "eval_sts_dev_spearman_max": 0.7920626405951849, |
| "step": 250 |
| }, |
| { |
| "epoch": 0.591833831270451, |
| "grad_norm": 2.9149794578552246, |
| "learning_rate": 1.849283402681461e-07, |
| "loss": 0.1834, |
| "step": 260 |
| }, |
| { |
| "epoch": 0.614596670934699, |
| "grad_norm": 2.7359046936035156, |
| "learning_rate": 1.9204096873999788e-07, |
| "loss": 0.1776, |
| "step": 270 |
| }, |
| { |
| "epoch": 0.6373595105989472, |
| "grad_norm": 2.24949312210083, |
| "learning_rate": 1.9915359721184964e-07, |
| "loss": 0.1665, |
| "step": 280 |
| }, |
| { |
| "epoch": 0.6601223502631953, |
| "grad_norm": 2.2236201763153076, |
| "learning_rate": 2.062662256837014e-07, |
| "loss": 0.1625, |
| "step": 290 |
| }, |
| { |
| "epoch": 0.6828851899274434, |
| "grad_norm": 1.932987928390503, |
| "learning_rate": 2.1337885415555323e-07, |
| "loss": 0.1585, |
| "step": 300 |
| }, |
| { |
| "epoch": 0.7056480295916916, |
| "grad_norm": 1.819048285484314, |
| "learning_rate": 2.20491482627405e-07, |
| "loss": 0.1522, |
| "step": 310 |
| }, |
| { |
| "epoch": 0.7284108692559397, |
| "grad_norm": 1.5515508651733398, |
| "learning_rate": 2.2760411109925676e-07, |
| "loss": 0.1552, |
| "step": 320 |
| }, |
| { |
| "epoch": 0.7511737089201878, |
| "grad_norm": 1.398522973060608, |
| "learning_rate": 2.3471673957110853e-07, |
| "loss": 0.1448, |
| "step": 330 |
| }, |
| { |
| "epoch": 0.773936548584436, |
| "grad_norm": 1.3401200771331787, |
| "learning_rate": 2.418293680429603e-07, |
| "loss": 0.1428, |
| "step": 340 |
| }, |
| { |
| "epoch": 0.796699388248684, |
| "grad_norm": 0.7522925734519958, |
| "learning_rate": 2.4894199651481206e-07, |
| "loss": 0.1401, |
| "step": 350 |
| }, |
| { |
| "epoch": 0.8194622279129321, |
| "grad_norm": 0.8474672436714172, |
| "learning_rate": 2.5605462498666385e-07, |
| "loss": 0.1399, |
| "step": 360 |
| }, |
| { |
| "epoch": 0.8422250675771803, |
| "grad_norm": 0.7029187083244324, |
| "learning_rate": 2.631672534585156e-07, |
| "loss": 0.1389, |
| "step": 370 |
| }, |
| { |
| "epoch": 0.8649879072414284, |
| "grad_norm": 0.7115994095802307, |
| "learning_rate": 2.702798819303674e-07, |
| "loss": 0.1372, |
| "step": 380 |
| }, |
| { |
| "epoch": 0.8877507469056765, |
| "grad_norm": 0.8128587007522583, |
| "learning_rate": 2.773925104022192e-07, |
| "loss": 0.1338, |
| "step": 390 |
| }, |
| { |
| "epoch": 0.9105135865699246, |
| "grad_norm": 0.694238543510437, |
| "learning_rate": 2.8450513887407097e-07, |
| "loss": 0.1361, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.9332764262341727, |
| "grad_norm": 0.6458141207695007, |
| "learning_rate": 2.916177673459227e-07, |
| "loss": 0.1389, |
| "step": 410 |
| }, |
| { |
| "epoch": 0.9560392658984208, |
| "grad_norm": 0.653184711933136, |
| "learning_rate": 2.987303958177745e-07, |
| "loss": 0.1328, |
| "step": 420 |
| }, |
| { |
| "epoch": 0.9788021055626689, |
| "grad_norm": 0.7768956422805786, |
| "learning_rate": 3.0584302428962624e-07, |
| "loss": 0.1375, |
| "step": 430 |
| }, |
| { |
| "epoch": 1.0, |
| "grad_norm": 0.32018622756004333, |
| "learning_rate": 3.1295565276147804e-07, |
| "loss": 0.1266, |
| "step": 440 |
| }, |
| { |
| "epoch": 1.022762839664248, |
| "grad_norm": 0.6019369959831238, |
| "learning_rate": 3.200682812333298e-07, |
| "loss": 0.1269, |
| "step": 450 |
| }, |
| { |
| "epoch": 1.0455256793284962, |
| "grad_norm": 0.5142800211906433, |
| "learning_rate": 3.271809097051816e-07, |
| "loss": 0.1262, |
| "step": 460 |
| }, |
| { |
| "epoch": 1.0682885189927442, |
| "grad_norm": 0.5277538299560547, |
| "learning_rate": 3.342935381770333e-07, |
| "loss": 0.127, |
| "step": 470 |
| }, |
| { |
| "epoch": 1.0910513586569925, |
| "grad_norm": 0.6493993401527405, |
| "learning_rate": 3.4140616664888515e-07, |
| "loss": 0.1306, |
| "step": 480 |
| }, |
| { |
| "epoch": 1.1138141983212406, |
| "grad_norm": 0.49625104665756226, |
| "learning_rate": 3.485187951207369e-07, |
| "loss": 0.1266, |
| "step": 490 |
| }, |
| { |
| "epoch": 1.1365770379854887, |
| "grad_norm": 0.4953573942184448, |
| "learning_rate": 3.556314235925887e-07, |
| "loss": 0.1247, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.1365770379854887, |
| "eval_loss": 0.04051072895526886, |
| "eval_runtime": 3.1508, |
| "eval_samples_per_second": 476.068, |
| "eval_steps_per_second": 7.617, |
| "eval_sts_dev_pearson_cosine": 0.7884134511298457, |
| "eval_sts_dev_pearson_dot": 0.7101637689598334, |
| "eval_sts_dev_pearson_euclidean": 0.7398882194893972, |
| "eval_sts_dev_pearson_manhattan": 0.7407996939857429, |
| "eval_sts_dev_pearson_max": 0.7884134511298457, |
| "eval_sts_dev_spearman_cosine": 0.7995145632068007, |
| "eval_sts_dev_spearman_dot": 0.6959219164369063, |
| "eval_sts_dev_spearman_euclidean": 0.7373979245023166, |
| "eval_sts_dev_spearman_manhattan": 0.7393614960639477, |
| "eval_sts_dev_spearman_max": 0.7995145632068007, |
| "step": 500 |
| }, |
| { |
| "epoch": 1.1593398776497368, |
| "grad_norm": 0.6302499771118164, |
| "learning_rate": 3.627440520644404e-07, |
| "loss": 0.1258, |
| "step": 510 |
| }, |
| { |
| "epoch": 1.1821027173139849, |
| "grad_norm": 0.559424638748169, |
| "learning_rate": 3.698566805362922e-07, |
| "loss": 0.1277, |
| "step": 520 |
| }, |
| { |
| "epoch": 1.204865556978233, |
| "grad_norm": 0.5976749658584595, |
| "learning_rate": 3.76969309008144e-07, |
| "loss": 0.13, |
| "step": 530 |
| }, |
| { |
| "epoch": 1.2276283966424812, |
| "grad_norm": 0.6781278252601624, |
| "learning_rate": 3.8408193747999575e-07, |
| "loss": 0.1291, |
| "step": 540 |
| }, |
| { |
| "epoch": 1.2503912363067293, |
| "grad_norm": 0.5501216053962708, |
| "learning_rate": 3.9119456595184754e-07, |
| "loss": 0.1287, |
| "step": 550 |
| }, |
| { |
| "epoch": 1.2731540759709774, |
| "grad_norm": 0.49228161573410034, |
| "learning_rate": 3.983071944236993e-07, |
| "loss": 0.1233, |
| "step": 560 |
| }, |
| { |
| "epoch": 1.2959169156352255, |
| "grad_norm": 0.5892780423164368, |
| "learning_rate": 4.054198228955511e-07, |
| "loss": 0.1242, |
| "step": 570 |
| }, |
| { |
| "epoch": 1.3186797552994736, |
| "grad_norm": 0.5604830980300903, |
| "learning_rate": 4.125324513674028e-07, |
| "loss": 0.1242, |
| "step": 580 |
| }, |
| { |
| "epoch": 1.3414425949637216, |
| "grad_norm": 0.46688252687454224, |
| "learning_rate": 4.196450798392546e-07, |
| "loss": 0.1227, |
| "step": 590 |
| }, |
| { |
| "epoch": 1.3642054346279697, |
| "grad_norm": 0.6248797178268433, |
| "learning_rate": 4.2675770831110646e-07, |
| "loss": 0.1201, |
| "step": 600 |
| }, |
| { |
| "epoch": 1.386968274292218, |
| "grad_norm": 0.5143482685089111, |
| "learning_rate": 4.338703367829582e-07, |
| "loss": 0.1247, |
| "step": 610 |
| }, |
| { |
| "epoch": 1.409731113956466, |
| "grad_norm": 0.50174880027771, |
| "learning_rate": 4.4098296525481e-07, |
| "loss": 0.1249, |
| "step": 620 |
| }, |
| { |
| "epoch": 1.4324939536207142, |
| "grad_norm": 0.4486837685108185, |
| "learning_rate": 4.4809559372666173e-07, |
| "loss": 0.1213, |
| "step": 630 |
| }, |
| { |
| "epoch": 1.4552567932849623, |
| "grad_norm": 0.5088754892349243, |
| "learning_rate": 4.552082221985135e-07, |
| "loss": 0.1217, |
| "step": 640 |
| }, |
| { |
| "epoch": 1.4780196329492103, |
| "grad_norm": 0.5201794505119324, |
| "learning_rate": 4.6232085067036526e-07, |
| "loss": 0.1204, |
| "step": 650 |
| }, |
| { |
| "epoch": 1.5007824726134587, |
| "grad_norm": 0.4499863088130951, |
| "learning_rate": 4.6943347914221705e-07, |
| "loss": 0.1191, |
| "step": 660 |
| }, |
| { |
| "epoch": 1.5235453122777067, |
| "grad_norm": 0.5528525114059448, |
| "learning_rate": 4.765461076140688e-07, |
| "loss": 0.1163, |
| "step": 670 |
| }, |
| { |
| "epoch": 1.5463081519419548, |
| "grad_norm": 0.475242018699646, |
| "learning_rate": 4.836587360859206e-07, |
| "loss": 0.1171, |
| "step": 680 |
| }, |
| { |
| "epoch": 1.569070991606203, |
| "grad_norm": 0.5379391312599182, |
| "learning_rate": 4.907713645577724e-07, |
| "loss": 0.1208, |
| "step": 690 |
| }, |
| { |
| "epoch": 1.591833831270451, |
| "grad_norm": 0.4778967797756195, |
| "learning_rate": 4.978839930296241e-07, |
| "loss": 0.1194, |
| "step": 700 |
| }, |
| { |
| "epoch": 1.614596670934699, |
| "grad_norm": 0.5140128135681152, |
| "learning_rate": 5.04996621501476e-07, |
| "loss": 0.1173, |
| "step": 710 |
| }, |
| { |
| "epoch": 1.6373595105989471, |
| "grad_norm": 0.448091059923172, |
| "learning_rate": 5.121092499733277e-07, |
| "loss": 0.1177, |
| "step": 720 |
| }, |
| { |
| "epoch": 1.6601223502631952, |
| "grad_norm": 0.5216450691223145, |
| "learning_rate": 5.192218784451794e-07, |
| "loss": 0.1148, |
| "step": 730 |
| }, |
| { |
| "epoch": 1.6828851899274433, |
| "grad_norm": 0.4396895170211792, |
| "learning_rate": 5.263345069170312e-07, |
| "loss": 0.1134, |
| "step": 740 |
| }, |
| { |
| "epoch": 1.7056480295916916, |
| "grad_norm": 0.4478048086166382, |
| "learning_rate": 5.33447135388883e-07, |
| "loss": 0.1167, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.7056480295916916, |
| "eval_loss": 0.0421689935028553, |
| "eval_runtime": 3.1447, |
| "eval_samples_per_second": 476.988, |
| "eval_steps_per_second": 7.632, |
| "eval_sts_dev_pearson_cosine": 0.7979696362389916, |
| "eval_sts_dev_pearson_dot": 0.7067263755448246, |
| "eval_sts_dev_pearson_euclidean": 0.7541572372535862, |
| "eval_sts_dev_pearson_manhattan": 0.7549658870605886, |
| "eval_sts_dev_pearson_max": 0.7979696362389916, |
| "eval_sts_dev_spearman_cosine": 0.8091625743286466, |
| "eval_sts_dev_spearman_dot": 0.6900878688098099, |
| "eval_sts_dev_spearman_euclidean": 0.7547432017784844, |
| "eval_sts_dev_spearman_manhattan": 0.7564738854563926, |
| "eval_sts_dev_spearman_max": 0.8091625743286466, |
| "step": 750 |
| }, |
| { |
| "epoch": 1.7284108692559397, |
| "grad_norm": 0.5125904083251953, |
| "learning_rate": 5.405597638607348e-07, |
| "loss": 0.1145, |
| "step": 760 |
| }, |
| { |
| "epoch": 1.7511737089201878, |
| "grad_norm": 0.5108799934387207, |
| "learning_rate": 5.476723923325865e-07, |
| "loss": 0.114, |
| "step": 770 |
| }, |
| { |
| "epoch": 1.773936548584436, |
| "grad_norm": 0.6547980308532715, |
| "learning_rate": 5.547850208044384e-07, |
| "loss": 0.1136, |
| "step": 780 |
| }, |
| { |
| "epoch": 1.7966993882486841, |
| "grad_norm": 0.5986394882202148, |
| "learning_rate": 5.618976492762901e-07, |
| "loss": 0.1123, |
| "step": 790 |
| }, |
| { |
| "epoch": 1.8194622279129322, |
| "grad_norm": 0.4110976457595825, |
| "learning_rate": 5.690102777481419e-07, |
| "loss": 0.1115, |
| "step": 800 |
| }, |
| { |
| "epoch": 1.8422250675771803, |
| "grad_norm": 0.4697369933128357, |
| "learning_rate": 5.761229062199937e-07, |
| "loss": 0.1127, |
| "step": 810 |
| }, |
| { |
| "epoch": 1.8649879072414284, |
| "grad_norm": 0.44591763615608215, |
| "learning_rate": 5.832355346918454e-07, |
| "loss": 0.1137, |
| "step": 820 |
| }, |
| { |
| "epoch": 1.8877507469056765, |
| "grad_norm": 0.4944719076156616, |
| "learning_rate": 5.903481631636972e-07, |
| "loss": 0.1137, |
| "step": 830 |
| }, |
| { |
| "epoch": 1.9105135865699245, |
| "grad_norm": 0.4511684775352478, |
| "learning_rate": 5.97460791635549e-07, |
| "loss": 0.1123, |
| "step": 840 |
| }, |
| { |
| "epoch": 1.9332764262341726, |
| "grad_norm": 0.41804537177085876, |
| "learning_rate": 6.045734201074007e-07, |
| "loss": 0.1115, |
| "step": 850 |
| }, |
| { |
| "epoch": 1.9560392658984207, |
| "grad_norm": 0.4134292006492615, |
| "learning_rate": 6.116860485792525e-07, |
| "loss": 0.1105, |
| "step": 860 |
| }, |
| { |
| "epoch": 1.9788021055626688, |
| "grad_norm": 0.4234001934528351, |
| "learning_rate": 6.187986770511043e-07, |
| "loss": 0.1133, |
| "step": 870 |
| }, |
| { |
| "epoch": 2.0, |
| "grad_norm": 0.2553180754184723, |
| "learning_rate": 6.259113055229561e-07, |
| "loss": 0.1049, |
| "step": 880 |
| }, |
| { |
| "epoch": 2.022762839664248, |
| "grad_norm": 0.40301546454429626, |
| "learning_rate": 6.330239339948078e-07, |
| "loss": 0.1091, |
| "step": 890 |
| }, |
| { |
| "epoch": 2.045525679328496, |
| "grad_norm": 0.4319583773612976, |
| "learning_rate": 6.401365624666596e-07, |
| "loss": 0.111, |
| "step": 900 |
| }, |
| { |
| "epoch": 2.0682885189927442, |
| "grad_norm": 0.6076052188873291, |
| "learning_rate": 6.472491909385113e-07, |
| "loss": 0.1101, |
| "step": 910 |
| }, |
| { |
| "epoch": 2.0910513586569923, |
| "grad_norm": 0.4324122965335846, |
| "learning_rate": 6.543618194103632e-07, |
| "loss": 0.1078, |
| "step": 920 |
| }, |
| { |
| "epoch": 2.1138141983212404, |
| "grad_norm": 0.4718656837940216, |
| "learning_rate": 6.61474447882215e-07, |
| "loss": 0.1097, |
| "step": 930 |
| }, |
| { |
| "epoch": 2.1365770379854885, |
| "grad_norm": 0.42693474888801575, |
| "learning_rate": 6.685870763540666e-07, |
| "loss": 0.108, |
| "step": 940 |
| }, |
| { |
| "epoch": 2.159339877649737, |
| "grad_norm": 0.4170973300933838, |
| "learning_rate": 6.756997048259186e-07, |
| "loss": 0.1077, |
| "step": 950 |
| }, |
| { |
| "epoch": 2.182102717313985, |
| "grad_norm": 0.44675740599632263, |
| "learning_rate": 6.828123332977703e-07, |
| "loss": 0.1087, |
| "step": 960 |
| }, |
| { |
| "epoch": 2.204865556978233, |
| "grad_norm": 0.3757316470146179, |
| "learning_rate": 6.89924961769622e-07, |
| "loss": 0.1058, |
| "step": 970 |
| }, |
| { |
| "epoch": 2.2276283966424812, |
| "grad_norm": 0.3848642408847809, |
| "learning_rate": 6.970375902414738e-07, |
| "loss": 0.1071, |
| "step": 980 |
| }, |
| { |
| "epoch": 2.2503912363067293, |
| "grad_norm": 0.4413306713104248, |
| "learning_rate": 7.041502187133256e-07, |
| "loss": 0.1058, |
| "step": 990 |
| }, |
| { |
| "epoch": 2.2731540759709774, |
| "grad_norm": 0.38140761852264404, |
| "learning_rate": 7.112628471851774e-07, |
| "loss": 0.1104, |
| "step": 1000 |
| }, |
| { |
| "epoch": 2.2731540759709774, |
| "eval_loss": 0.04340244457125664, |
| "eval_runtime": 3.1758, |
| "eval_samples_per_second": 472.317, |
| "eval_steps_per_second": 7.557, |
| "eval_sts_dev_pearson_cosine": 0.8046339175494668, |
| "eval_sts_dev_pearson_dot": 0.7044830518743517, |
| "eval_sts_dev_pearson_euclidean": 0.7649588273720311, |
| "eval_sts_dev_pearson_manhattan": 0.7658573793768735, |
| "eval_sts_dev_pearson_max": 0.8046339175494668, |
| "eval_sts_dev_spearman_cosine": 0.8156078510955985, |
| "eval_sts_dev_spearman_dot": 0.686432354234728, |
| "eval_sts_dev_spearman_euclidean": 0.7675914883765887, |
| "eval_sts_dev_spearman_manhattan": 0.769330201012383, |
| "eval_sts_dev_spearman_max": 0.8156078510955985, |
| "step": 1000 |
| }, |
| { |
| "epoch": 2.2959169156352255, |
| "grad_norm": 0.39764779806137085, |
| "learning_rate": 7.183754756570291e-07, |
| "loss": 0.1036, |
| "step": 1010 |
| }, |
| { |
| "epoch": 2.3186797552994736, |
| "grad_norm": 0.45731303095817566, |
| "learning_rate": 7.254881041288809e-07, |
| "loss": 0.1068, |
| "step": 1020 |
| }, |
| { |
| "epoch": 2.3414425949637216, |
| "grad_norm": 0.39621132612228394, |
| "learning_rate": 7.326007326007327e-07, |
| "loss": 0.1033, |
| "step": 1030 |
| }, |
| { |
| "epoch": 2.3642054346279697, |
| "grad_norm": 0.4059266448020935, |
| "learning_rate": 7.397133610725844e-07, |
| "loss": 0.1058, |
| "step": 1040 |
| }, |
| { |
| "epoch": 2.386968274292218, |
| "grad_norm": 0.3780401945114136, |
| "learning_rate": 7.468259895444362e-07, |
| "loss": 0.105, |
| "step": 1050 |
| }, |
| { |
| "epoch": 2.409731113956466, |
| "grad_norm": 0.39316442608833313, |
| "learning_rate": 7.53938618016288e-07, |
| "loss": 0.1052, |
| "step": 1060 |
| }, |
| { |
| "epoch": 2.432493953620714, |
| "grad_norm": 0.42634522914886475, |
| "learning_rate": 7.610512464881398e-07, |
| "loss": 0.1013, |
| "step": 1070 |
| }, |
| { |
| "epoch": 2.4552567932849625, |
| "grad_norm": 0.42909443378448486, |
| "learning_rate": 7.681638749599915e-07, |
| "loss": 0.1037, |
| "step": 1080 |
| }, |
| { |
| "epoch": 2.4780196329492106, |
| "grad_norm": 0.404904842376709, |
| "learning_rate": 7.752765034318432e-07, |
| "loss": 0.1031, |
| "step": 1090 |
| }, |
| { |
| "epoch": 2.5007824726134587, |
| "grad_norm": 0.39788374304771423, |
| "learning_rate": 7.823891319036951e-07, |
| "loss": 0.1057, |
| "step": 1100 |
| }, |
| { |
| "epoch": 2.5235453122777067, |
| "grad_norm": 0.39511770009994507, |
| "learning_rate": 7.895017603755468e-07, |
| "loss": 0.1051, |
| "step": 1110 |
| }, |
| { |
| "epoch": 2.546308151941955, |
| "grad_norm": 0.3775276839733124, |
| "learning_rate": 7.966143888473986e-07, |
| "loss": 0.1019, |
| "step": 1120 |
| }, |
| { |
| "epoch": 2.569070991606203, |
| "grad_norm": 0.36302006244659424, |
| "learning_rate": 8.037270173192504e-07, |
| "loss": 0.1018, |
| "step": 1130 |
| }, |
| { |
| "epoch": 2.591833831270451, |
| "grad_norm": 0.397919625043869, |
| "learning_rate": 8.108396457911022e-07, |
| "loss": 0.1007, |
| "step": 1140 |
| }, |
| { |
| "epoch": 2.614596670934699, |
| "grad_norm": 0.39391639828681946, |
| "learning_rate": 8.179522742629539e-07, |
| "loss": 0.1035, |
| "step": 1150 |
| }, |
| { |
| "epoch": 2.637359510598947, |
| "grad_norm": 0.3966914117336273, |
| "learning_rate": 8.250649027348056e-07, |
| "loss": 0.1032, |
| "step": 1160 |
| }, |
| { |
| "epoch": 2.660122350263195, |
| "grad_norm": 0.47250422835350037, |
| "learning_rate": 8.321775312066576e-07, |
| "loss": 0.1036, |
| "step": 1170 |
| }, |
| { |
| "epoch": 2.6828851899274433, |
| "grad_norm": 0.41388604044914246, |
| "learning_rate": 8.392901596785092e-07, |
| "loss": 0.0971, |
| "step": 1180 |
| }, |
| { |
| "epoch": 2.705648029591692, |
| "grad_norm": 0.3682123124599457, |
| "learning_rate": 8.46402788150361e-07, |
| "loss": 0.1015, |
| "step": 1190 |
| }, |
| { |
| "epoch": 2.7284108692559395, |
| "grad_norm": 0.49675652384757996, |
| "learning_rate": 8.535154166222129e-07, |
| "loss": 0.104, |
| "step": 1200 |
| }, |
| { |
| "epoch": 2.751173708920188, |
| "grad_norm": 0.3492577075958252, |
| "learning_rate": 8.606280450940646e-07, |
| "loss": 0.1007, |
| "step": 1210 |
| }, |
| { |
| "epoch": 2.773936548584436, |
| "grad_norm": 0.34821173548698425, |
| "learning_rate": 8.677406735659164e-07, |
| "loss": 0.102, |
| "step": 1220 |
| }, |
| { |
| "epoch": 2.796699388248684, |
| "grad_norm": 0.37269824743270874, |
| "learning_rate": 8.748533020377681e-07, |
| "loss": 0.0994, |
| "step": 1230 |
| }, |
| { |
| "epoch": 2.819462227912932, |
| "grad_norm": 0.39633363485336304, |
| "learning_rate": 8.8196593050962e-07, |
| "loss": 0.0972, |
| "step": 1240 |
| }, |
| { |
| "epoch": 2.8422250675771803, |
| "grad_norm": 0.3534165322780609, |
| "learning_rate": 8.890785589814717e-07, |
| "loss": 0.0969, |
| "step": 1250 |
| }, |
| { |
| "epoch": 2.8422250675771803, |
| "eval_loss": 0.043736640363931656, |
| "eval_runtime": 3.1459, |
| "eval_samples_per_second": 476.804, |
| "eval_steps_per_second": 7.629, |
| "eval_sts_dev_pearson_cosine": 0.8088895757064547, |
| "eval_sts_dev_pearson_dot": 0.7030740598598051, |
| "eval_sts_dev_pearson_euclidean": 0.7723078699673291, |
| "eval_sts_dev_pearson_manhattan": 0.7732962792712872, |
| "eval_sts_dev_pearson_max": 0.8088895757064547, |
| "eval_sts_dev_spearman_cosine": 0.8185494576067605, |
| "eval_sts_dev_spearman_dot": 0.6845694825698349, |
| "eval_sts_dev_spearman_euclidean": 0.7756955565337718, |
| "eval_sts_dev_spearman_manhattan": 0.7776997356093678, |
| "eval_sts_dev_spearman_max": 0.8185494576067605, |
| "step": 1250 |
| }, |
| { |
| "epoch": 2.8649879072414284, |
| "grad_norm": 0.438899964094162, |
| "learning_rate": 8.961911874533235e-07, |
| "loss": 0.0968, |
| "step": 1260 |
| }, |
| { |
| "epoch": 2.8877507469056765, |
| "grad_norm": 0.3127659261226654, |
| "learning_rate": 9.033038159251752e-07, |
| "loss": 0.1003, |
| "step": 1270 |
| }, |
| { |
| "epoch": 2.9105135865699245, |
| "grad_norm": 0.41025590896606445, |
| "learning_rate": 9.10416444397027e-07, |
| "loss": 0.1036, |
| "step": 1280 |
| }, |
| { |
| "epoch": 2.9332764262341726, |
| "grad_norm": 0.3556434214115143, |
| "learning_rate": 9.175290728688788e-07, |
| "loss": 0.0969, |
| "step": 1290 |
| }, |
| { |
| "epoch": 2.9560392658984207, |
| "grad_norm": 0.3952799439430237, |
| "learning_rate": 9.246417013407305e-07, |
| "loss": 0.0965, |
| "step": 1300 |
| }, |
| { |
| "epoch": 2.978802105562669, |
| "grad_norm": 0.35889461636543274, |
| "learning_rate": 9.317543298125824e-07, |
| "loss": 0.0974, |
| "step": 1310 |
| }, |
| { |
| "epoch": 3.0, |
| "grad_norm": 0.1986362338066101, |
| "learning_rate": 9.388669582844341e-07, |
| "loss": 0.0905, |
| "step": 1320 |
| }, |
| { |
| "epoch": 3.022762839664248, |
| "grad_norm": 0.3811950981616974, |
| "learning_rate": 9.459795867562858e-07, |
| "loss": 0.1006, |
| "step": 1330 |
| }, |
| { |
| "epoch": 3.045525679328496, |
| "grad_norm": 0.3444836735725403, |
| "learning_rate": 9.530922152281376e-07, |
| "loss": 0.0952, |
| "step": 1340 |
| }, |
| { |
| "epoch": 3.0682885189927442, |
| "grad_norm": 0.38668960332870483, |
| "learning_rate": 9.602048436999895e-07, |
| "loss": 0.0971, |
| "step": 1350 |
| }, |
| { |
| "epoch": 3.0910513586569923, |
| "grad_norm": 0.3300378620624542, |
| "learning_rate": 9.673174721718413e-07, |
| "loss": 0.0943, |
| "step": 1360 |
| }, |
| { |
| "epoch": 3.1138141983212404, |
| "grad_norm": 0.35947421193122864, |
| "learning_rate": 9.74430100643693e-07, |
| "loss": 0.0996, |
| "step": 1370 |
| }, |
| { |
| "epoch": 3.1365770379854885, |
| "grad_norm": 0.33226439356803894, |
| "learning_rate": 9.815427291155448e-07, |
| "loss": 0.0971, |
| "step": 1380 |
| }, |
| { |
| "epoch": 3.159339877649737, |
| "grad_norm": 0.34526577591896057, |
| "learning_rate": 9.886553575873965e-07, |
| "loss": 0.097, |
| "step": 1390 |
| }, |
| { |
| "epoch": 3.182102717313985, |
| "grad_norm": 0.3941132128238678, |
| "learning_rate": 9.957679860592482e-07, |
| "loss": 0.0937, |
| "step": 1400 |
| }, |
| { |
| "epoch": 3.204865556978233, |
| "grad_norm": 0.3083663582801819, |
| "learning_rate": 1.0028806145311e-06, |
| "loss": 0.0955, |
| "step": 1410 |
| }, |
| { |
| "epoch": 3.2276283966424812, |
| "grad_norm": 0.33311647176742554, |
| "learning_rate": 1.009993243002952e-06, |
| "loss": 0.0963, |
| "step": 1420 |
| }, |
| { |
| "epoch": 3.2503912363067293, |
| "grad_norm": 0.35510948300361633, |
| "learning_rate": 1.0171058714748037e-06, |
| "loss": 0.0938, |
| "step": 1430 |
| }, |
| { |
| "epoch": 3.2731540759709774, |
| "grad_norm": 0.32245343923568726, |
| "learning_rate": 1.0242184999466554e-06, |
| "loss": 0.0986, |
| "step": 1440 |
| }, |
| { |
| "epoch": 3.2959169156352255, |
| "grad_norm": 0.34923267364501953, |
| "learning_rate": 1.0313311284185071e-06, |
| "loss": 0.0949, |
| "step": 1450 |
| }, |
| { |
| "epoch": 3.3186797552994736, |
| "grad_norm": 0.3278236985206604, |
| "learning_rate": 1.0384437568903589e-06, |
| "loss": 0.0932, |
| "step": 1460 |
| }, |
| { |
| "epoch": 3.3414425949637216, |
| "grad_norm": 0.3266041576862335, |
| "learning_rate": 1.0455563853622106e-06, |
| "loss": 0.096, |
| "step": 1470 |
| }, |
| { |
| "epoch": 3.3642054346279697, |
| "grad_norm": 0.3194010555744171, |
| "learning_rate": 1.0526690138340624e-06, |
| "loss": 0.0919, |
| "step": 1480 |
| }, |
| { |
| "epoch": 3.386968274292218, |
| "grad_norm": 0.2891862988471985, |
| "learning_rate": 1.0597816423059143e-06, |
| "loss": 0.093, |
| "step": 1490 |
| }, |
| { |
| "epoch": 3.409731113956466, |
| "grad_norm": 0.3133799433708191, |
| "learning_rate": 1.066894270777766e-06, |
| "loss": 0.0925, |
| "step": 1500 |
| }, |
| { |
| "epoch": 3.409731113956466, |
| "eval_loss": 0.04378899559378624, |
| "eval_runtime": 3.0984, |
| "eval_samples_per_second": 484.121, |
| "eval_steps_per_second": 7.746, |
| "eval_sts_dev_pearson_cosine": 0.811446634517756, |
| "eval_sts_dev_pearson_dot": 0.7028657760262548, |
| "eval_sts_dev_pearson_euclidean": 0.7772158914948109, |
| "eval_sts_dev_pearson_manhattan": 0.7782468091124333, |
| "eval_sts_dev_pearson_max": 0.811446634517756, |
| "eval_sts_dev_spearman_cosine": 0.8200700667481104, |
| "eval_sts_dev_spearman_dot": 0.6837762987069376, |
| "eval_sts_dev_spearman_euclidean": 0.7809451824112935, |
| "eval_sts_dev_spearman_manhattan": 0.782912075510424, |
| "eval_sts_dev_spearman_max": 0.8200700667481104, |
| "step": 1500 |
| }, |
| { |
| "epoch": 3.432493953620714, |
| "grad_norm": 0.35416606068611145, |
| "learning_rate": 1.0740068992496178e-06, |
| "loss": 0.0935, |
| "step": 1510 |
| }, |
| { |
| "epoch": 3.4552567932849625, |
| "grad_norm": 0.30686748027801514, |
| "learning_rate": 1.0811195277214695e-06, |
| "loss": 0.0928, |
| "step": 1520 |
| }, |
| { |
| "epoch": 3.4780196329492106, |
| "grad_norm": 0.35064470767974854, |
| "learning_rate": 1.0882321561933213e-06, |
| "loss": 0.0914, |
| "step": 1530 |
| }, |
| { |
| "epoch": 3.5007824726134587, |
| "grad_norm": 0.29766592383384705, |
| "learning_rate": 1.095344784665173e-06, |
| "loss": 0.0912, |
| "step": 1540 |
| }, |
| { |
| "epoch": 3.5235453122777067, |
| "grad_norm": 0.3228856921195984, |
| "learning_rate": 1.1024574131370248e-06, |
| "loss": 0.091, |
| "step": 1550 |
| }, |
| { |
| "epoch": 3.546308151941955, |
| "grad_norm": 0.3097136616706848, |
| "learning_rate": 1.1095700416088767e-06, |
| "loss": 0.0906, |
| "step": 1560 |
| }, |
| { |
| "epoch": 3.569070991606203, |
| "grad_norm": 0.3007184863090515, |
| "learning_rate": 1.1166826700807284e-06, |
| "loss": 0.0936, |
| "step": 1570 |
| }, |
| { |
| "epoch": 3.591833831270451, |
| "grad_norm": 0.33350813388824463, |
| "learning_rate": 1.1237952985525802e-06, |
| "loss": 0.0943, |
| "step": 1580 |
| }, |
| { |
| "epoch": 3.614596670934699, |
| "grad_norm": 0.3203691840171814, |
| "learning_rate": 1.130907927024432e-06, |
| "loss": 0.0925, |
| "step": 1590 |
| }, |
| { |
| "epoch": 3.637359510598947, |
| "grad_norm": 0.331552118062973, |
| "learning_rate": 1.1380205554962839e-06, |
| "loss": 0.0908, |
| "step": 1600 |
| }, |
| { |
| "epoch": 3.660122350263195, |
| "grad_norm": 0.3371254801750183, |
| "learning_rate": 1.1451331839681356e-06, |
| "loss": 0.0933, |
| "step": 1610 |
| }, |
| { |
| "epoch": 3.6828851899274433, |
| "grad_norm": 0.302493691444397, |
| "learning_rate": 1.1522458124399874e-06, |
| "loss": 0.0917, |
| "step": 1620 |
| }, |
| { |
| "epoch": 3.705648029591692, |
| "grad_norm": 0.3068028390407562, |
| "learning_rate": 1.159358440911839e-06, |
| "loss": 0.0887, |
| "step": 1630 |
| }, |
| { |
| "epoch": 3.7284108692559395, |
| "grad_norm": 0.3035215139389038, |
| "learning_rate": 1.1664710693836908e-06, |
| "loss": 0.0903, |
| "step": 1640 |
| }, |
| { |
| "epoch": 3.751173708920188, |
| "grad_norm": 0.30323877930641174, |
| "learning_rate": 1.1735836978555426e-06, |
| "loss": 0.0934, |
| "step": 1650 |
| }, |
| { |
| "epoch": 3.773936548584436, |
| "grad_norm": 0.3012927770614624, |
| "learning_rate": 1.1806963263273943e-06, |
| "loss": 0.0906, |
| "step": 1660 |
| }, |
| { |
| "epoch": 3.796699388248684, |
| "grad_norm": 0.30301883816719055, |
| "learning_rate": 1.1878089547992463e-06, |
| "loss": 0.0886, |
| "step": 1670 |
| }, |
| { |
| "epoch": 3.819462227912932, |
| "grad_norm": 0.33015456795692444, |
| "learning_rate": 1.194921583271098e-06, |
| "loss": 0.0915, |
| "step": 1680 |
| }, |
| { |
| "epoch": 3.8422250675771803, |
| "grad_norm": 0.3174781799316406, |
| "learning_rate": 1.2020342117429498e-06, |
| "loss": 0.0924, |
| "step": 1690 |
| }, |
| { |
| "epoch": 3.8649879072414284, |
| "grad_norm": 0.4451993405818939, |
| "learning_rate": 1.2091468402148015e-06, |
| "loss": 0.094, |
| "step": 1700 |
| }, |
| { |
| "epoch": 3.8877507469056765, |
| "grad_norm": 0.28383544087409973, |
| "learning_rate": 1.2162594686866532e-06, |
| "loss": 0.0899, |
| "step": 1710 |
| }, |
| { |
| "epoch": 3.9105135865699245, |
| "grad_norm": 0.29724323749542236, |
| "learning_rate": 1.223372097158505e-06, |
| "loss": 0.0881, |
| "step": 1720 |
| }, |
| { |
| "epoch": 3.9332764262341726, |
| "grad_norm": 0.2890814542770386, |
| "learning_rate": 1.2304847256303567e-06, |
| "loss": 0.0884, |
| "step": 1730 |
| }, |
| { |
| "epoch": 3.9560392658984207, |
| "grad_norm": 0.3286956548690796, |
| "learning_rate": 1.2375973541022087e-06, |
| "loss": 0.0894, |
| "step": 1740 |
| }, |
| { |
| "epoch": 3.978802105562669, |
| "grad_norm": 0.3125689923763275, |
| "learning_rate": 1.2447099825740604e-06, |
| "loss": 0.0892, |
| "step": 1750 |
| }, |
| { |
| "epoch": 3.978802105562669, |
| "eval_loss": 0.044054221361875534, |
| "eval_runtime": 3.1571, |
| "eval_samples_per_second": 475.117, |
| "eval_steps_per_second": 7.602, |
| "eval_sts_dev_pearson_cosine": 0.8132413514515704, |
| "eval_sts_dev_pearson_dot": 0.7029474825790806, |
| "eval_sts_dev_pearson_euclidean": 0.7809188333660187, |
| "eval_sts_dev_pearson_manhattan": 0.7819647523352286, |
| "eval_sts_dev_pearson_max": 0.8132413514515704, |
| "eval_sts_dev_spearman_cosine": 0.821487180003909, |
| "eval_sts_dev_spearman_dot": 0.6837898742130047, |
| "eval_sts_dev_spearman_euclidean": 0.7847115974803252, |
| "eval_sts_dev_spearman_manhattan": 0.7867474388257931, |
| "eval_sts_dev_spearman_max": 0.821487180003909, |
| "step": 1750 |
| }, |
| { |
| "epoch": 4.0, |
| "grad_norm": 0.15849126875400543, |
| "learning_rate": 1.2518226110459121e-06, |
| "loss": 0.0812, |
| "step": 1760 |
| }, |
| { |
| "epoch": 4.0227628396642485, |
| "grad_norm": 0.2848968505859375, |
| "learning_rate": 1.2589352395177639e-06, |
| "loss": 0.0878, |
| "step": 1770 |
| }, |
| { |
| "epoch": 4.045525679328496, |
| "grad_norm": 0.2904186546802521, |
| "learning_rate": 1.2660478679896156e-06, |
| "loss": 0.0869, |
| "step": 1780 |
| }, |
| { |
| "epoch": 4.068288518992745, |
| "grad_norm": 0.3993697464466095, |
| "learning_rate": 1.2731604964614674e-06, |
| "loss": 0.09, |
| "step": 1790 |
| }, |
| { |
| "epoch": 4.091051358656992, |
| "grad_norm": 0.28611546754837036, |
| "learning_rate": 1.280273124933319e-06, |
| "loss": 0.0875, |
| "step": 1800 |
| }, |
| { |
| "epoch": 4.113814198321241, |
| "grad_norm": 0.29007813334465027, |
| "learning_rate": 1.2873857534051708e-06, |
| "loss": 0.086, |
| "step": 1810 |
| }, |
| { |
| "epoch": 4.1365770379854885, |
| "grad_norm": 0.32408079504966736, |
| "learning_rate": 1.2944983818770226e-06, |
| "loss": 0.0888, |
| "step": 1820 |
| }, |
| { |
| "epoch": 4.159339877649737, |
| "grad_norm": 0.3043130934238434, |
| "learning_rate": 1.3016110103488747e-06, |
| "loss": 0.086, |
| "step": 1830 |
| }, |
| { |
| "epoch": 4.182102717313985, |
| "grad_norm": 0.27660122513771057, |
| "learning_rate": 1.3087236388207265e-06, |
| "loss": 0.0869, |
| "step": 1840 |
| }, |
| { |
| "epoch": 4.204865556978233, |
| "grad_norm": 0.28952455520629883, |
| "learning_rate": 1.3158362672925782e-06, |
| "loss": 0.0885, |
| "step": 1850 |
| }, |
| { |
| "epoch": 4.227628396642481, |
| "grad_norm": 0.3132406175136566, |
| "learning_rate": 1.32294889576443e-06, |
| "loss": 0.0891, |
| "step": 1860 |
| }, |
| { |
| "epoch": 4.250391236306729, |
| "grad_norm": 0.300589919090271, |
| "learning_rate": 1.3300615242362817e-06, |
| "loss": 0.0853, |
| "step": 1870 |
| }, |
| { |
| "epoch": 4.273154075970977, |
| "grad_norm": 0.28771117329597473, |
| "learning_rate": 1.3371741527081332e-06, |
| "loss": 0.0849, |
| "step": 1880 |
| }, |
| { |
| "epoch": 4.2959169156352255, |
| "grad_norm": 0.29580655694007874, |
| "learning_rate": 1.344286781179985e-06, |
| "loss": 0.0856, |
| "step": 1890 |
| }, |
| { |
| "epoch": 4.318679755299474, |
| "grad_norm": 0.31038954854011536, |
| "learning_rate": 1.3513994096518371e-06, |
| "loss": 0.0863, |
| "step": 1900 |
| }, |
| { |
| "epoch": 4.341442594963722, |
| "grad_norm": 0.28325986862182617, |
| "learning_rate": 1.3585120381236889e-06, |
| "loss": 0.0849, |
| "step": 1910 |
| }, |
| { |
| "epoch": 4.36420543462797, |
| "grad_norm": 0.27782177925109863, |
| "learning_rate": 1.3656246665955406e-06, |
| "loss": 0.0855, |
| "step": 1920 |
| }, |
| { |
| "epoch": 4.386968274292218, |
| "grad_norm": 0.30091577768325806, |
| "learning_rate": 1.3727372950673924e-06, |
| "loss": 0.0841, |
| "step": 1930 |
| }, |
| { |
| "epoch": 4.409731113956466, |
| "grad_norm": 0.3271619379520416, |
| "learning_rate": 1.379849923539244e-06, |
| "loss": 0.0893, |
| "step": 1940 |
| }, |
| { |
| "epoch": 4.432493953620714, |
| "grad_norm": 0.2756374478340149, |
| "learning_rate": 1.3869625520110958e-06, |
| "loss": 0.0847, |
| "step": 1950 |
| }, |
| { |
| "epoch": 4.4552567932849625, |
| "grad_norm": 0.29490333795547485, |
| "learning_rate": 1.3940751804829476e-06, |
| "loss": 0.0866, |
| "step": 1960 |
| }, |
| { |
| "epoch": 4.47801963294921, |
| "grad_norm": 0.318877637386322, |
| "learning_rate": 1.4011878089547993e-06, |
| "loss": 0.0866, |
| "step": 1970 |
| }, |
| { |
| "epoch": 4.500782472613459, |
| "grad_norm": 0.2944406270980835, |
| "learning_rate": 1.4083004374266513e-06, |
| "loss": 0.0844, |
| "step": 1980 |
| }, |
| { |
| "epoch": 4.523545312277706, |
| "grad_norm": 0.2944386601448059, |
| "learning_rate": 1.415413065898503e-06, |
| "loss": 0.0846, |
| "step": 1990 |
| }, |
| { |
| "epoch": 4.546308151941955, |
| "grad_norm": 0.33368054032325745, |
| "learning_rate": 1.4225256943703547e-06, |
| "loss": 0.0847, |
| "step": 2000 |
| }, |
| { |
| "epoch": 4.546308151941955, |
| "eval_loss": 0.043504875153303146, |
| "eval_runtime": 3.154, |
| "eval_samples_per_second": 475.589, |
| "eval_steps_per_second": 7.609, |
| "eval_sts_dev_pearson_cosine": 0.8146715006362908, |
| "eval_sts_dev_pearson_dot": 0.7040304873244005, |
| "eval_sts_dev_pearson_euclidean": 0.7833063990865667, |
| "eval_sts_dev_pearson_manhattan": 0.7843736809113127, |
| "eval_sts_dev_pearson_max": 0.8146715006362908, |
| "eval_sts_dev_spearman_cosine": 0.8219541620728356, |
| "eval_sts_dev_spearman_dot": 0.6847507056516472, |
| "eval_sts_dev_spearman_euclidean": 0.7871847822322454, |
| "eval_sts_dev_spearman_manhattan": 0.7889612573133329, |
| "eval_sts_dev_spearman_max": 0.8219541620728356, |
| "step": 2000 |
| }, |
| { |
| "epoch": 4.569070991606203, |
| "grad_norm": 0.27128133177757263, |
| "learning_rate": 1.4296383228422065e-06, |
| "loss": 0.0831, |
| "step": 2010 |
| }, |
| { |
| "epoch": 4.591833831270451, |
| "grad_norm": 0.34261852502822876, |
| "learning_rate": 1.4367509513140582e-06, |
| "loss": 0.0843, |
| "step": 2020 |
| }, |
| { |
| "epoch": 4.614596670934699, |
| "grad_norm": 0.35007432103157043, |
| "learning_rate": 1.44386357978591e-06, |
| "loss": 0.086, |
| "step": 2030 |
| }, |
| { |
| "epoch": 4.637359510598947, |
| "grad_norm": 0.3019850552082062, |
| "learning_rate": 1.4509762082577617e-06, |
| "loss": 0.0851, |
| "step": 2040 |
| }, |
| { |
| "epoch": 4.660122350263196, |
| "grad_norm": 0.2931526303291321, |
| "learning_rate": 1.4580888367296137e-06, |
| "loss": 0.0844, |
| "step": 2050 |
| }, |
| { |
| "epoch": 4.682885189927443, |
| "grad_norm": 0.32966339588165283, |
| "learning_rate": 1.4652014652014654e-06, |
| "loss": 0.0843, |
| "step": 2060 |
| }, |
| { |
| "epoch": 4.705648029591692, |
| "grad_norm": 0.27907735109329224, |
| "learning_rate": 1.4723140936733171e-06, |
| "loss": 0.0854, |
| "step": 2070 |
| }, |
| { |
| "epoch": 4.7284108692559395, |
| "grad_norm": 0.29893922805786133, |
| "learning_rate": 1.4794267221451689e-06, |
| "loss": 0.0851, |
| "step": 2080 |
| }, |
| { |
| "epoch": 4.751173708920188, |
| "grad_norm": 0.34036242961883545, |
| "learning_rate": 1.4865393506170206e-06, |
| "loss": 0.0822, |
| "step": 2090 |
| }, |
| { |
| "epoch": 4.773936548584436, |
| "grad_norm": 0.2901923656463623, |
| "learning_rate": 1.4936519790888724e-06, |
| "loss": 0.0859, |
| "step": 2100 |
| }, |
| { |
| "epoch": 4.796699388248684, |
| "grad_norm": 0.29854685068130493, |
| "learning_rate": 1.500764607560724e-06, |
| "loss": 0.0844, |
| "step": 2110 |
| }, |
| { |
| "epoch": 4.819462227912932, |
| "grad_norm": 0.3369494378566742, |
| "learning_rate": 1.507877236032576e-06, |
| "loss": 0.0853, |
| "step": 2120 |
| }, |
| { |
| "epoch": 4.84222506757718, |
| "grad_norm": 0.2752622067928314, |
| "learning_rate": 1.5149898645044278e-06, |
| "loss": 0.0815, |
| "step": 2130 |
| }, |
| { |
| "epoch": 4.864987907241428, |
| "grad_norm": 0.2834544777870178, |
| "learning_rate": 1.5221024929762795e-06, |
| "loss": 0.0833, |
| "step": 2140 |
| }, |
| { |
| "epoch": 4.8877507469056765, |
| "grad_norm": 0.29377996921539307, |
| "learning_rate": 1.5292151214481313e-06, |
| "loss": 0.0817, |
| "step": 2150 |
| }, |
| { |
| "epoch": 4.910513586569925, |
| "grad_norm": 0.28692808747291565, |
| "learning_rate": 1.536327749919983e-06, |
| "loss": 0.0873, |
| "step": 2160 |
| }, |
| { |
| "epoch": 4.933276426234173, |
| "grad_norm": 0.39381957054138184, |
| "learning_rate": 1.5434403783918347e-06, |
| "loss": 0.0813, |
| "step": 2170 |
| }, |
| { |
| "epoch": 4.956039265898421, |
| "grad_norm": 0.2600520849227905, |
| "learning_rate": 1.5505530068636865e-06, |
| "loss": 0.0829, |
| "step": 2180 |
| }, |
| { |
| "epoch": 4.978802105562669, |
| "grad_norm": 0.2618444859981537, |
| "learning_rate": 1.5576656353355384e-06, |
| "loss": 0.0812, |
| "step": 2190 |
| }, |
| { |
| "epoch": 5.0, |
| "grad_norm": 0.1586732417345047, |
| "learning_rate": 1.5647782638073902e-06, |
| "loss": 0.0776, |
| "step": 2200 |
| }, |
| { |
| "epoch": 5.0227628396642485, |
| "grad_norm": 0.290334016084671, |
| "learning_rate": 1.571890892279242e-06, |
| "loss": 0.083, |
| "step": 2210 |
| }, |
| { |
| "epoch": 5.045525679328496, |
| "grad_norm": 0.27750375866889954, |
| "learning_rate": 1.5790035207510937e-06, |
| "loss": 0.0821, |
| "step": 2220 |
| }, |
| { |
| "epoch": 5.068288518992745, |
| "grad_norm": 0.30981412529945374, |
| "learning_rate": 1.5861161492229454e-06, |
| "loss": 0.0806, |
| "step": 2230 |
| }, |
| { |
| "epoch": 5.091051358656992, |
| "grad_norm": 0.27481886744499207, |
| "learning_rate": 1.5932287776947971e-06, |
| "loss": 0.0809, |
| "step": 2240 |
| }, |
| { |
| "epoch": 5.113814198321241, |
| "grad_norm": 0.4338255226612091, |
| "learning_rate": 1.6003414061666489e-06, |
| "loss": 0.0814, |
| "step": 2250 |
| }, |
| { |
| "epoch": 5.113814198321241, |
| "eval_loss": 0.043074991554021835, |
| "eval_runtime": 3.1278, |
| "eval_samples_per_second": 479.564, |
| "eval_steps_per_second": 7.673, |
| "eval_sts_dev_pearson_cosine": 0.8157779253220427, |
| "eval_sts_dev_pearson_dot": 0.7048364440953621, |
| "eval_sts_dev_pearson_euclidean": 0.7856670451519477, |
| "eval_sts_dev_pearson_manhattan": 0.7867510343221469, |
| "eval_sts_dev_pearson_max": 0.8157779253220427, |
| "eval_sts_dev_spearman_cosine": 0.8225234805140361, |
| "eval_sts_dev_spearman_dot": 0.6855999639965007, |
| "eval_sts_dev_spearman_euclidean": 0.7894583838133404, |
| "eval_sts_dev_spearman_manhattan": 0.791341587911783, |
| "eval_sts_dev_spearman_max": 0.8225234805140361, |
| "step": 2250 |
| }, |
| { |
| "epoch": 5.1365770379854885, |
| "grad_norm": 0.28418755531311035, |
| "learning_rate": 1.6074540346385008e-06, |
| "loss": 0.0808, |
| "step": 2260 |
| }, |
| { |
| "epoch": 5.159339877649737, |
| "grad_norm": 0.2720179259777069, |
| "learning_rate": 1.6145666631103526e-06, |
| "loss": 0.0791, |
| "step": 2270 |
| }, |
| { |
| "epoch": 5.182102717313985, |
| "grad_norm": 0.24729762971401215, |
| "learning_rate": 1.6216792915822043e-06, |
| "loss": 0.0811, |
| "step": 2280 |
| }, |
| { |
| "epoch": 5.204865556978233, |
| "grad_norm": 0.2798333764076233, |
| "learning_rate": 1.628791920054056e-06, |
| "loss": 0.0805, |
| "step": 2290 |
| }, |
| { |
| "epoch": 5.227628396642481, |
| "grad_norm": 0.3200991749763489, |
| "learning_rate": 1.6359045485259078e-06, |
| "loss": 0.0817, |
| "step": 2300 |
| }, |
| { |
| "epoch": 5.250391236306729, |
| "grad_norm": 0.28277549147605896, |
| "learning_rate": 1.6430171769977595e-06, |
| "loss": 0.0772, |
| "step": 2310 |
| }, |
| { |
| "epoch": 5.273154075970977, |
| "grad_norm": 0.2859530746936798, |
| "learning_rate": 1.6501298054696113e-06, |
| "loss": 0.0799, |
| "step": 2320 |
| }, |
| { |
| "epoch": 5.2959169156352255, |
| "grad_norm": 0.30756381154060364, |
| "learning_rate": 1.6572424339414634e-06, |
| "loss": 0.0829, |
| "step": 2330 |
| }, |
| { |
| "epoch": 5.318679755299474, |
| "grad_norm": 0.280272901058197, |
| "learning_rate": 1.6643550624133152e-06, |
| "loss": 0.077, |
| "step": 2340 |
| }, |
| { |
| "epoch": 5.341442594963722, |
| "grad_norm": 0.25623443722724915, |
| "learning_rate": 1.671467690885167e-06, |
| "loss": 0.0801, |
| "step": 2350 |
| }, |
| { |
| "epoch": 5.36420543462797, |
| "grad_norm": 0.26670023798942566, |
| "learning_rate": 1.6785803193570184e-06, |
| "loss": 0.0812, |
| "step": 2360 |
| }, |
| { |
| "epoch": 5.386968274292218, |
| "grad_norm": 0.24578404426574707, |
| "learning_rate": 1.6856929478288702e-06, |
| "loss": 0.0788, |
| "step": 2370 |
| }, |
| { |
| "epoch": 5.409731113956466, |
| "grad_norm": 0.3148477375507355, |
| "learning_rate": 1.692805576300722e-06, |
| "loss": 0.0776, |
| "step": 2380 |
| }, |
| { |
| "epoch": 5.432493953620714, |
| "grad_norm": 0.2843981087207794, |
| "learning_rate": 1.6999182047725737e-06, |
| "loss": 0.0785, |
| "step": 2390 |
| }, |
| { |
| "epoch": 5.4552567932849625, |
| "grad_norm": 0.2720634639263153, |
| "learning_rate": 1.7070308332444258e-06, |
| "loss": 0.0771, |
| "step": 2400 |
| }, |
| { |
| "epoch": 5.47801963294921, |
| "grad_norm": 0.2510247826576233, |
| "learning_rate": 1.7141434617162776e-06, |
| "loss": 0.0788, |
| "step": 2410 |
| }, |
| { |
| "epoch": 5.500782472613459, |
| "grad_norm": 0.2841964662075043, |
| "learning_rate": 1.7212560901881293e-06, |
| "loss": 0.0796, |
| "step": 2420 |
| }, |
| { |
| "epoch": 5.523545312277706, |
| "grad_norm": 0.28437066078186035, |
| "learning_rate": 1.728368718659981e-06, |
| "loss": 0.0793, |
| "step": 2430 |
| }, |
| { |
| "epoch": 5.546308151941955, |
| "grad_norm": 0.26712462306022644, |
| "learning_rate": 1.7354813471318328e-06, |
| "loss": 0.0813, |
| "step": 2440 |
| }, |
| { |
| "epoch": 5.569070991606203, |
| "grad_norm": 0.28166961669921875, |
| "learning_rate": 1.7425939756036845e-06, |
| "loss": 0.0757, |
| "step": 2450 |
| }, |
| { |
| "epoch": 5.591833831270451, |
| "grad_norm": 0.31931671500205994, |
| "learning_rate": 1.7497066040755363e-06, |
| "loss": 0.079, |
| "step": 2460 |
| }, |
| { |
| "epoch": 5.614596670934699, |
| "grad_norm": 0.26701247692108154, |
| "learning_rate": 1.756819232547388e-06, |
| "loss": 0.0797, |
| "step": 2470 |
| }, |
| { |
| "epoch": 5.637359510598947, |
| "grad_norm": 0.31258830428123474, |
| "learning_rate": 1.76393186101924e-06, |
| "loss": 0.0794, |
| "step": 2480 |
| }, |
| { |
| "epoch": 5.660122350263196, |
| "grad_norm": 0.24133124947547913, |
| "learning_rate": 1.7710444894910917e-06, |
| "loss": 0.0808, |
| "step": 2490 |
| }, |
| { |
| "epoch": 5.682885189927443, |
| "grad_norm": 0.321419894695282, |
| "learning_rate": 1.7781571179629434e-06, |
| "loss": 0.0796, |
| "step": 2500 |
| }, |
| { |
| "epoch": 5.682885189927443, |
| "eval_loss": 0.042360566556453705, |
| "eval_runtime": 3.1328, |
| "eval_samples_per_second": 478.807, |
| "eval_steps_per_second": 7.661, |
| "eval_sts_dev_pearson_cosine": 0.8169384514842573, |
| "eval_sts_dev_pearson_dot": 0.7053659085797274, |
| "eval_sts_dev_pearson_euclidean": 0.7873338046578867, |
| "eval_sts_dev_pearson_manhattan": 0.7884276721147352, |
| "eval_sts_dev_pearson_max": 0.8169384514842573, |
| "eval_sts_dev_spearman_cosine": 0.8229950220469622, |
| "eval_sts_dev_spearman_dot": 0.6868513283838655, |
| "eval_sts_dev_spearman_euclidean": 0.7910863183517545, |
| "eval_sts_dev_spearman_manhattan": 0.7928925773180566, |
| "eval_sts_dev_spearman_max": 0.8229950220469622, |
| "step": 2500 |
| }, |
| { |
| "epoch": 5.705648029591692, |
| "grad_norm": 0.2824183404445648, |
| "learning_rate": 1.7852697464347952e-06, |
| "loss": 0.0802, |
| "step": 2510 |
| }, |
| { |
| "epoch": 5.7284108692559395, |
| "grad_norm": 0.23221443593502045, |
| "learning_rate": 1.792382374906647e-06, |
| "loss": 0.0799, |
| "step": 2520 |
| }, |
| { |
| "epoch": 5.751173708920188, |
| "grad_norm": 0.3471781313419342, |
| "learning_rate": 1.7994950033784986e-06, |
| "loss": 0.0802, |
| "step": 2530 |
| }, |
| { |
| "epoch": 5.773936548584436, |
| "grad_norm": 0.2657029628753662, |
| "learning_rate": 1.8066076318503504e-06, |
| "loss": 0.0813, |
| "step": 2540 |
| }, |
| { |
| "epoch": 5.796699388248684, |
| "grad_norm": 0.2691391706466675, |
| "learning_rate": 1.8137202603222023e-06, |
| "loss": 0.0772, |
| "step": 2550 |
| }, |
| { |
| "epoch": 5.819462227912932, |
| "grad_norm": 0.2894577383995056, |
| "learning_rate": 1.820832888794054e-06, |
| "loss": 0.0766, |
| "step": 2560 |
| }, |
| { |
| "epoch": 5.84222506757718, |
| "grad_norm": 0.3584199547767639, |
| "learning_rate": 1.8279455172659058e-06, |
| "loss": 0.0778, |
| "step": 2570 |
| }, |
| { |
| "epoch": 5.864987907241428, |
| "grad_norm": 0.29335150122642517, |
| "learning_rate": 1.8350581457377576e-06, |
| "loss": 0.076, |
| "step": 2580 |
| }, |
| { |
| "epoch": 5.8877507469056765, |
| "grad_norm": 0.3056882321834564, |
| "learning_rate": 1.8421707742096093e-06, |
| "loss": 0.0787, |
| "step": 2590 |
| }, |
| { |
| "epoch": 5.910513586569925, |
| "grad_norm": 0.2651148736476898, |
| "learning_rate": 1.849283402681461e-06, |
| "loss": 0.0794, |
| "step": 2600 |
| }, |
| { |
| "epoch": 5.933276426234173, |
| "grad_norm": 0.2723177969455719, |
| "learning_rate": 1.8563960311533128e-06, |
| "loss": 0.076, |
| "step": 2610 |
| }, |
| { |
| "epoch": 5.956039265898421, |
| "grad_norm": 0.26683980226516724, |
| "learning_rate": 1.8635086596251647e-06, |
| "loss": 0.0773, |
| "step": 2620 |
| }, |
| { |
| "epoch": 5.978802105562669, |
| "grad_norm": 0.2798727750778198, |
| "learning_rate": 1.8706212880970165e-06, |
| "loss": 0.0755, |
| "step": 2630 |
| }, |
| { |
| "epoch": 6.0, |
| "grad_norm": 0.1584087312221527, |
| "learning_rate": 1.8777339165688682e-06, |
| "loss": 0.0725, |
| "step": 2640 |
| }, |
| { |
| "epoch": 6.0227628396642485, |
| "grad_norm": 0.2422705441713333, |
| "learning_rate": 1.88484654504072e-06, |
| "loss": 0.0738, |
| "step": 2650 |
| }, |
| { |
| "epoch": 6.045525679328496, |
| "grad_norm": 0.29021552205085754, |
| "learning_rate": 1.8919591735125717e-06, |
| "loss": 0.0762, |
| "step": 2660 |
| }, |
| { |
| "epoch": 6.068288518992745, |
| "grad_norm": 0.24207890033721924, |
| "learning_rate": 1.8990718019844234e-06, |
| "loss": 0.0761, |
| "step": 2670 |
| }, |
| { |
| "epoch": 6.091051358656992, |
| "grad_norm": 0.2542373538017273, |
| "learning_rate": 1.9061844304562752e-06, |
| "loss": 0.0771, |
| "step": 2680 |
| }, |
| { |
| "epoch": 6.113814198321241, |
| "grad_norm": 0.24501535296440125, |
| "learning_rate": 1.913297058928127e-06, |
| "loss": 0.0765, |
| "step": 2690 |
| }, |
| { |
| "epoch": 6.1365770379854885, |
| "grad_norm": 0.2733090817928314, |
| "learning_rate": 1.920409687399979e-06, |
| "loss": 0.0755, |
| "step": 2700 |
| }, |
| { |
| "epoch": 6.159339877649737, |
| "grad_norm": 0.24523428082466125, |
| "learning_rate": 1.9275223158718306e-06, |
| "loss": 0.0771, |
| "step": 2710 |
| }, |
| { |
| "epoch": 6.182102717313985, |
| "grad_norm": 0.2885381877422333, |
| "learning_rate": 1.9346349443436826e-06, |
| "loss": 0.0748, |
| "step": 2720 |
| }, |
| { |
| "epoch": 6.204865556978233, |
| "grad_norm": 0.30437180399894714, |
| "learning_rate": 1.941747572815534e-06, |
| "loss": 0.0768, |
| "step": 2730 |
| }, |
| { |
| "epoch": 6.227628396642481, |
| "grad_norm": 0.26403576135635376, |
| "learning_rate": 1.948860201287386e-06, |
| "loss": 0.0766, |
| "step": 2740 |
| }, |
| { |
| "epoch": 6.250391236306729, |
| "grad_norm": 0.2586285471916199, |
| "learning_rate": 1.9559728297592376e-06, |
| "loss": 0.0766, |
| "step": 2750 |
| }, |
| { |
| "epoch": 6.250391236306729, |
| "eval_loss": 0.04222765937447548, |
| "eval_runtime": 3.1267, |
| "eval_samples_per_second": 479.737, |
| "eval_steps_per_second": 7.676, |
| "eval_sts_dev_pearson_cosine": 0.8179974050572225, |
| "eval_sts_dev_pearson_dot": 0.7047975315002233, |
| "eval_sts_dev_pearson_euclidean": 0.7884393906368459, |
| "eval_sts_dev_pearson_manhattan": 0.7895245481097086, |
| "eval_sts_dev_pearson_max": 0.8179974050572225, |
| "eval_sts_dev_spearman_cosine": 0.823939141053365, |
| "eval_sts_dev_spearman_dot": 0.6866457343802284, |
| "eval_sts_dev_spearman_euclidean": 0.7922252886902459, |
| "eval_sts_dev_spearman_manhattan": 0.7940188158576397, |
| "eval_sts_dev_spearman_max": 0.823939141053365, |
| "step": 2750 |
| }, |
| { |
| "epoch": 6.273154075970977, |
| "grad_norm": 0.3394823968410492, |
| "learning_rate": 1.9630854582310895e-06, |
| "loss": 0.076, |
| "step": 2760 |
| }, |
| { |
| "epoch": 6.2959169156352255, |
| "grad_norm": 0.25371795892715454, |
| "learning_rate": 1.9701980867029415e-06, |
| "loss": 0.0753, |
| "step": 2770 |
| }, |
| { |
| "epoch": 6.318679755299474, |
| "grad_norm": 0.2900475561618805, |
| "learning_rate": 1.977310715174793e-06, |
| "loss": 0.0735, |
| "step": 2780 |
| }, |
| { |
| "epoch": 6.341442594963722, |
| "grad_norm": 0.23057663440704346, |
| "learning_rate": 1.984423343646645e-06, |
| "loss": 0.0751, |
| "step": 2790 |
| }, |
| { |
| "epoch": 6.36420543462797, |
| "grad_norm": 0.3174869418144226, |
| "learning_rate": 1.9915359721184965e-06, |
| "loss": 0.0738, |
| "step": 2800 |
| }, |
| { |
| "epoch": 6.386968274292218, |
| "grad_norm": 0.2789759635925293, |
| "learning_rate": 1.9986486005903484e-06, |
| "loss": 0.0749, |
| "step": 2810 |
| }, |
| { |
| "epoch": 6.409731113956466, |
| "grad_norm": 0.27371764183044434, |
| "learning_rate": 2.0057612290622e-06, |
| "loss": 0.0753, |
| "step": 2820 |
| }, |
| { |
| "epoch": 6.432493953620714, |
| "grad_norm": 0.28073859214782715, |
| "learning_rate": 2.012873857534052e-06, |
| "loss": 0.077, |
| "step": 2830 |
| }, |
| { |
| "epoch": 6.4552567932849625, |
| "grad_norm": 0.2226971834897995, |
| "learning_rate": 2.019986486005904e-06, |
| "loss": 0.0747, |
| "step": 2840 |
| }, |
| { |
| "epoch": 6.47801963294921, |
| "grad_norm": 0.2504160404205322, |
| "learning_rate": 2.0270991144777554e-06, |
| "loss": 0.0722, |
| "step": 2850 |
| }, |
| { |
| "epoch": 6.500782472613459, |
| "grad_norm": 0.24718140065670013, |
| "learning_rate": 2.0342117429496073e-06, |
| "loss": 0.0736, |
| "step": 2860 |
| }, |
| { |
| "epoch": 6.523545312277706, |
| "grad_norm": 0.3116447627544403, |
| "learning_rate": 2.041324371421459e-06, |
| "loss": 0.073, |
| "step": 2870 |
| }, |
| { |
| "epoch": 6.546308151941955, |
| "grad_norm": 0.2456272393465042, |
| "learning_rate": 2.048436999893311e-06, |
| "loss": 0.0774, |
| "step": 2880 |
| }, |
| { |
| "epoch": 6.569070991606203, |
| "grad_norm": 0.2620035409927368, |
| "learning_rate": 2.0555496283651623e-06, |
| "loss": 0.075, |
| "step": 2890 |
| }, |
| { |
| "epoch": 6.591833831270451, |
| "grad_norm": 0.24615773558616638, |
| "learning_rate": 2.0626622568370143e-06, |
| "loss": 0.0718, |
| "step": 2900 |
| }, |
| { |
| "epoch": 6.614596670934699, |
| "grad_norm": 0.24443495273590088, |
| "learning_rate": 2.0697748853088662e-06, |
| "loss": 0.0727, |
| "step": 2910 |
| }, |
| { |
| "epoch": 6.637359510598947, |
| "grad_norm": 0.2751477062702179, |
| "learning_rate": 2.0768875137807178e-06, |
| "loss": 0.0735, |
| "step": 2920 |
| }, |
| { |
| "epoch": 6.660122350263196, |
| "grad_norm": 0.21635128557682037, |
| "learning_rate": 2.0840001422525697e-06, |
| "loss": 0.0726, |
| "step": 2930 |
| }, |
| { |
| "epoch": 6.682885189927443, |
| "grad_norm": 0.26079434156417847, |
| "learning_rate": 2.0911127707244213e-06, |
| "loss": 0.075, |
| "step": 2940 |
| }, |
| { |
| "epoch": 6.705648029591692, |
| "grad_norm": 0.2535637617111206, |
| "learning_rate": 2.098225399196273e-06, |
| "loss": 0.0728, |
| "step": 2950 |
| }, |
| { |
| "epoch": 6.7284108692559395, |
| "grad_norm": 0.28646010160446167, |
| "learning_rate": 2.1053380276681247e-06, |
| "loss": 0.0713, |
| "step": 2960 |
| }, |
| { |
| "epoch": 6.751173708920188, |
| "grad_norm": 0.24261872470378876, |
| "learning_rate": 2.1124506561399767e-06, |
| "loss": 0.0722, |
| "step": 2970 |
| }, |
| { |
| "epoch": 6.773936548584436, |
| "grad_norm": 0.3087233901023865, |
| "learning_rate": 2.1195632846118286e-06, |
| "loss": 0.0753, |
| "step": 2980 |
| }, |
| { |
| "epoch": 6.796699388248684, |
| "grad_norm": 0.26111656427383423, |
| "learning_rate": 2.12667591308368e-06, |
| "loss": 0.0733, |
| "step": 2990 |
| }, |
| { |
| "epoch": 6.819462227912932, |
| "grad_norm": 0.22123539447784424, |
| "learning_rate": 2.133788541555532e-06, |
| "loss": 0.0727, |
| "step": 3000 |
| }, |
| { |
| "epoch": 6.819462227912932, |
| "eval_loss": 0.04253983870148659, |
| "eval_runtime": 3.1522, |
| "eval_samples_per_second": 475.855, |
| "eval_steps_per_second": 7.614, |
| "eval_sts_dev_pearson_cosine": 0.8180287893647112, |
| "eval_sts_dev_pearson_dot": 0.7048827437986248, |
| "eval_sts_dev_pearson_euclidean": 0.7892330786682039, |
| "eval_sts_dev_pearson_manhattan": 0.7902606373541703, |
| "eval_sts_dev_pearson_max": 0.8180287893647112, |
| "eval_sts_dev_spearman_cosine": 0.8243152286673464, |
| "eval_sts_dev_spearman_dot": 0.6873609530160721, |
| "eval_sts_dev_spearman_euclidean": 0.7932823760168848, |
| "eval_sts_dev_spearman_manhattan": 0.7950721624458399, |
| "eval_sts_dev_spearman_max": 0.8243152286673464, |
| "step": 3000 |
| }, |
| { |
| "epoch": 6.84222506757718, |
| "grad_norm": 0.25969475507736206, |
| "learning_rate": 2.1409011700273836e-06, |
| "loss": 0.0729, |
| "step": 3010 |
| }, |
| { |
| "epoch": 6.864987907241428, |
| "grad_norm": 0.2635466158390045, |
| "learning_rate": 2.1480137984992356e-06, |
| "loss": 0.073, |
| "step": 3020 |
| }, |
| { |
| "epoch": 6.8877507469056765, |
| "grad_norm": 0.2308150678873062, |
| "learning_rate": 2.155126426971087e-06, |
| "loss": 0.0739, |
| "step": 3030 |
| }, |
| { |
| "epoch": 6.910513586569925, |
| "grad_norm": 0.28183940052986145, |
| "learning_rate": 2.162239055442939e-06, |
| "loss": 0.0717, |
| "step": 3040 |
| }, |
| { |
| "epoch": 6.933276426234173, |
| "grad_norm": 0.2507220506668091, |
| "learning_rate": 2.169351683914791e-06, |
| "loss": 0.0719, |
| "step": 3050 |
| }, |
| { |
| "epoch": 6.956039265898421, |
| "grad_norm": 0.24221095442771912, |
| "learning_rate": 2.1764643123866426e-06, |
| "loss": 0.0712, |
| "step": 3060 |
| }, |
| { |
| "epoch": 6.978802105562669, |
| "grad_norm": 0.2784756124019623, |
| "learning_rate": 2.1835769408584945e-06, |
| "loss": 0.0712, |
| "step": 3070 |
| }, |
| { |
| "epoch": 7.0, |
| "grad_norm": 0.146551251411438, |
| "learning_rate": 2.190689569330346e-06, |
| "loss": 0.0674, |
| "step": 3080 |
| }, |
| { |
| "epoch": 7.0227628396642485, |
| "grad_norm": 0.2597323954105377, |
| "learning_rate": 2.197802197802198e-06, |
| "loss": 0.0729, |
| "step": 3090 |
| }, |
| { |
| "epoch": 7.045525679328496, |
| "grad_norm": 0.23450802266597748, |
| "learning_rate": 2.2049148262740495e-06, |
| "loss": 0.0712, |
| "step": 3100 |
| }, |
| { |
| "epoch": 7.068288518992745, |
| "grad_norm": 0.25625136494636536, |
| "learning_rate": 2.2120274547459015e-06, |
| "loss": 0.0701, |
| "step": 3110 |
| }, |
| { |
| "epoch": 7.091051358656992, |
| "grad_norm": 0.24344876408576965, |
| "learning_rate": 2.2191400832177534e-06, |
| "loss": 0.0699, |
| "step": 3120 |
| }, |
| { |
| "epoch": 7.113814198321241, |
| "grad_norm": 0.26194125413894653, |
| "learning_rate": 2.2262527116896054e-06, |
| "loss": 0.0675, |
| "step": 3130 |
| }, |
| { |
| "epoch": 7.1365770379854885, |
| "grad_norm": 0.30849021673202515, |
| "learning_rate": 2.233365340161457e-06, |
| "loss": 0.0699, |
| "step": 3140 |
| }, |
| { |
| "epoch": 7.159339877649737, |
| "grad_norm": 0.2190207540988922, |
| "learning_rate": 2.2404779686333084e-06, |
| "loss": 0.0716, |
| "step": 3150 |
| }, |
| { |
| "epoch": 7.182102717313985, |
| "grad_norm": 0.24949057400226593, |
| "learning_rate": 2.2475905971051604e-06, |
| "loss": 0.0707, |
| "step": 3160 |
| }, |
| { |
| "epoch": 7.204865556978233, |
| "grad_norm": 0.3089491128921509, |
| "learning_rate": 2.254703225577012e-06, |
| "loss": 0.0717, |
| "step": 3170 |
| }, |
| { |
| "epoch": 7.227628396642481, |
| "grad_norm": 0.24911244213581085, |
| "learning_rate": 2.261815854048864e-06, |
| "loss": 0.0709, |
| "step": 3180 |
| }, |
| { |
| "epoch": 7.250391236306729, |
| "grad_norm": 0.24327076971530914, |
| "learning_rate": 2.268928482520716e-06, |
| "loss": 0.071, |
| "step": 3190 |
| }, |
| { |
| "epoch": 7.273154075970977, |
| "grad_norm": 0.26636022329330444, |
| "learning_rate": 2.2760411109925678e-06, |
| "loss": 0.0722, |
| "step": 3200 |
| }, |
| { |
| "epoch": 7.2959169156352255, |
| "grad_norm": 0.31015798449516296, |
| "learning_rate": 2.2831537394644193e-06, |
| "loss": 0.072, |
| "step": 3210 |
| }, |
| { |
| "epoch": 7.318679755299474, |
| "grad_norm": 0.238671213388443, |
| "learning_rate": 2.2902663679362712e-06, |
| "loss": 0.0729, |
| "step": 3220 |
| }, |
| { |
| "epoch": 7.341442594963722, |
| "grad_norm": 0.2293502241373062, |
| "learning_rate": 2.2973789964081228e-06, |
| "loss": 0.0678, |
| "step": 3230 |
| }, |
| { |
| "epoch": 7.36420543462797, |
| "grad_norm": 0.2628055810928345, |
| "learning_rate": 2.3044916248799747e-06, |
| "loss": 0.0705, |
| "step": 3240 |
| }, |
| { |
| "epoch": 7.386968274292218, |
| "grad_norm": 0.2914957106113434, |
| "learning_rate": 2.3116042533518262e-06, |
| "loss": 0.0715, |
| "step": 3250 |
| }, |
| { |
| "epoch": 7.386968274292218, |
| "eval_loss": 0.04259900003671646, |
| "eval_runtime": 3.1556, |
| "eval_samples_per_second": 475.348, |
| "eval_steps_per_second": 7.606, |
| "eval_sts_dev_pearson_cosine": 0.8190384244394546, |
| "eval_sts_dev_pearson_dot": 0.7052187544623498, |
| "eval_sts_dev_pearson_euclidean": 0.7905914669782148, |
| "eval_sts_dev_pearson_manhattan": 0.7915483272250368, |
| "eval_sts_dev_pearson_max": 0.8190384244394546, |
| "eval_sts_dev_spearman_cosine": 0.825567603424224, |
| "eval_sts_dev_spearman_dot": 0.6876636031299087, |
| "eval_sts_dev_spearman_euclidean": 0.7948957613160437, |
| "eval_sts_dev_spearman_manhattan": 0.7965685334149736, |
| "eval_sts_dev_spearman_max": 0.825567603424224, |
| "step": 3250 |
| }, |
| { |
| "epoch": 7.409731113956466, |
| "grad_norm": 0.2585102915763855, |
| "learning_rate": 2.318716881823678e-06, |
| "loss": 0.0703, |
| "step": 3260 |
| }, |
| { |
| "epoch": 7.432493953620714, |
| "grad_norm": 0.24229033291339874, |
| "learning_rate": 2.32582951029553e-06, |
| "loss": 0.0699, |
| "step": 3270 |
| }, |
| { |
| "epoch": 7.4552567932849625, |
| "grad_norm": 0.26663440465927124, |
| "learning_rate": 2.3329421387673817e-06, |
| "loss": 0.071, |
| "step": 3280 |
| }, |
| { |
| "epoch": 7.47801963294921, |
| "grad_norm": 0.28489962220191956, |
| "learning_rate": 2.3400547672392336e-06, |
| "loss": 0.0692, |
| "step": 3290 |
| }, |
| { |
| "epoch": 7.500782472613459, |
| "grad_norm": 0.3053910732269287, |
| "learning_rate": 2.347167395711085e-06, |
| "loss": 0.0693, |
| "step": 3300 |
| }, |
| { |
| "epoch": 7.523545312277706, |
| "grad_norm": 0.23338255286216736, |
| "learning_rate": 2.354280024182937e-06, |
| "loss": 0.0661, |
| "step": 3310 |
| }, |
| { |
| "epoch": 7.546308151941955, |
| "grad_norm": 0.4110086262226105, |
| "learning_rate": 2.3613926526547886e-06, |
| "loss": 0.0702, |
| "step": 3320 |
| }, |
| { |
| "epoch": 7.569070991606203, |
| "grad_norm": 0.23433230817317963, |
| "learning_rate": 2.3685052811266406e-06, |
| "loss": 0.0697, |
| "step": 3330 |
| }, |
| { |
| "epoch": 7.591833831270451, |
| "grad_norm": 0.26817795634269714, |
| "learning_rate": 2.3756179095984925e-06, |
| "loss": 0.072, |
| "step": 3340 |
| }, |
| { |
| "epoch": 7.614596670934699, |
| "grad_norm": 0.274307519197464, |
| "learning_rate": 2.382730538070344e-06, |
| "loss": 0.0693, |
| "step": 3350 |
| }, |
| { |
| "epoch": 7.637359510598947, |
| "grad_norm": 0.2531595230102539, |
| "learning_rate": 2.389843166542196e-06, |
| "loss": 0.0691, |
| "step": 3360 |
| }, |
| { |
| "epoch": 7.660122350263196, |
| "grad_norm": 0.23066122829914093, |
| "learning_rate": 2.3969557950140475e-06, |
| "loss": 0.0702, |
| "step": 3370 |
| }, |
| { |
| "epoch": 7.682885189927443, |
| "grad_norm": 0.26466798782348633, |
| "learning_rate": 2.4040684234858995e-06, |
| "loss": 0.0672, |
| "step": 3380 |
| }, |
| { |
| "epoch": 7.705648029591692, |
| "grad_norm": 0.27675947546958923, |
| "learning_rate": 2.411181051957751e-06, |
| "loss": 0.0698, |
| "step": 3390 |
| }, |
| { |
| "epoch": 7.7284108692559395, |
| "grad_norm": 0.2792898416519165, |
| "learning_rate": 2.418293680429603e-06, |
| "loss": 0.0687, |
| "step": 3400 |
| }, |
| { |
| "epoch": 7.751173708920188, |
| "grad_norm": 0.3003191351890564, |
| "learning_rate": 2.425406308901455e-06, |
| "loss": 0.0654, |
| "step": 3410 |
| }, |
| { |
| "epoch": 7.773936548584436, |
| "grad_norm": 0.2721976637840271, |
| "learning_rate": 2.4325189373733065e-06, |
| "loss": 0.0687, |
| "step": 3420 |
| }, |
| { |
| "epoch": 7.796699388248684, |
| "grad_norm": 0.23832115530967712, |
| "learning_rate": 2.4396315658451584e-06, |
| "loss": 0.0679, |
| "step": 3430 |
| }, |
| { |
| "epoch": 7.819462227912932, |
| "grad_norm": 0.2580774128437042, |
| "learning_rate": 2.44674419431701e-06, |
| "loss": 0.0713, |
| "step": 3440 |
| }, |
| { |
| "epoch": 7.84222506757718, |
| "grad_norm": 0.2566244900226593, |
| "learning_rate": 2.453856822788862e-06, |
| "loss": 0.0676, |
| "step": 3450 |
| }, |
| { |
| "epoch": 7.864987907241428, |
| "grad_norm": 0.22271563112735748, |
| "learning_rate": 2.4609694512607134e-06, |
| "loss": 0.0708, |
| "step": 3460 |
| }, |
| { |
| "epoch": 7.8877507469056765, |
| "grad_norm": 0.2326367348432541, |
| "learning_rate": 2.4680820797325654e-06, |
| "loss": 0.0666, |
| "step": 3470 |
| }, |
| { |
| "epoch": 7.910513586569925, |
| "grad_norm": 0.2301758974790573, |
| "learning_rate": 2.4751947082044173e-06, |
| "loss": 0.0675, |
| "step": 3480 |
| }, |
| { |
| "epoch": 7.933276426234173, |
| "grad_norm": 0.27631568908691406, |
| "learning_rate": 2.482307336676269e-06, |
| "loss": 0.0693, |
| "step": 3490 |
| }, |
| { |
| "epoch": 7.956039265898421, |
| "grad_norm": 0.2313879281282425, |
| "learning_rate": 2.489419965148121e-06, |
| "loss": 0.0688, |
| "step": 3500 |
| }, |
| { |
| "epoch": 7.956039265898421, |
| "eval_loss": 0.04267411306500435, |
| "eval_runtime": 3.1141, |
| "eval_samples_per_second": 481.677, |
| "eval_steps_per_second": 7.707, |
| "eval_sts_dev_pearson_cosine": 0.8198817503712781, |
| "eval_sts_dev_pearson_dot": 0.7039245984668114, |
| "eval_sts_dev_pearson_euclidean": 0.7917017633196954, |
| "eval_sts_dev_pearson_manhattan": 0.7925964604579336, |
| "eval_sts_dev_pearson_max": 0.8198817503712781, |
| "eval_sts_dev_spearman_cosine": 0.8259962866116397, |
| "eval_sts_dev_spearman_dot": 0.686457485674087, |
| "eval_sts_dev_spearman_euclidean": 0.7960724017615675, |
| "eval_sts_dev_spearman_manhattan": 0.7974479322296961, |
| "eval_sts_dev_spearman_max": 0.8259962866116397, |
| "step": 3500 |
| }, |
| { |
| "epoch": 7.978802105562669, |
| "grad_norm": 0.24964259564876556, |
| "learning_rate": 2.4965325936199723e-06, |
| "loss": 0.068, |
| "step": 3510 |
| }, |
| { |
| "epoch": 8.0, |
| "grad_norm": 0.16819854080677032, |
| "learning_rate": 2.5036452220918243e-06, |
| "loss": 0.063, |
| "step": 3520 |
| }, |
| { |
| "epoch": 8.022762839664248, |
| "grad_norm": 0.271306574344635, |
| "learning_rate": 2.510757850563676e-06, |
| "loss": 0.0659, |
| "step": 3530 |
| }, |
| { |
| "epoch": 8.045525679328497, |
| "grad_norm": 0.2654683291912079, |
| "learning_rate": 2.5178704790355278e-06, |
| "loss": 0.0639, |
| "step": 3540 |
| }, |
| { |
| "epoch": 8.068288518992745, |
| "grad_norm": 0.2875867784023285, |
| "learning_rate": 2.5249831075073793e-06, |
| "loss": 0.0678, |
| "step": 3550 |
| }, |
| { |
| "epoch": 8.091051358656992, |
| "grad_norm": 0.24303793907165527, |
| "learning_rate": 2.5320957359792312e-06, |
| "loss": 0.0689, |
| "step": 3560 |
| }, |
| { |
| "epoch": 8.11381419832124, |
| "grad_norm": 0.2647855281829834, |
| "learning_rate": 2.5392083644510828e-06, |
| "loss": 0.0687, |
| "step": 3570 |
| }, |
| { |
| "epoch": 8.13657703798549, |
| "grad_norm": 0.24669058620929718, |
| "learning_rate": 2.5463209929229347e-06, |
| "loss": 0.0672, |
| "step": 3580 |
| }, |
| { |
| "epoch": 8.159339877649737, |
| "grad_norm": 0.2413569688796997, |
| "learning_rate": 2.553433621394787e-06, |
| "loss": 0.0659, |
| "step": 3590 |
| }, |
| { |
| "epoch": 8.182102717313985, |
| "grad_norm": 0.24042648077011108, |
| "learning_rate": 2.560546249866638e-06, |
| "loss": 0.0658, |
| "step": 3600 |
| }, |
| { |
| "epoch": 8.204865556978232, |
| "grad_norm": 0.2669181525707245, |
| "learning_rate": 2.5676588783384906e-06, |
| "loss": 0.0664, |
| "step": 3610 |
| }, |
| { |
| "epoch": 8.227628396642482, |
| "grad_norm": 0.24161921441555023, |
| "learning_rate": 2.5747715068103417e-06, |
| "loss": 0.0659, |
| "step": 3620 |
| }, |
| { |
| "epoch": 8.25039123630673, |
| "grad_norm": 0.269060879945755, |
| "learning_rate": 2.5818841352821936e-06, |
| "loss": 0.0664, |
| "step": 3630 |
| }, |
| { |
| "epoch": 8.273154075970977, |
| "grad_norm": 0.27089112997055054, |
| "learning_rate": 2.588996763754045e-06, |
| "loss": 0.0652, |
| "step": 3640 |
| }, |
| { |
| "epoch": 8.295916915635225, |
| "grad_norm": 0.2416848987340927, |
| "learning_rate": 2.596109392225897e-06, |
| "loss": 0.0683, |
| "step": 3650 |
| }, |
| { |
| "epoch": 8.318679755299474, |
| "grad_norm": 0.21763497591018677, |
| "learning_rate": 2.6032220206977495e-06, |
| "loss": 0.0641, |
| "step": 3660 |
| }, |
| { |
| "epoch": 8.341442594963722, |
| "grad_norm": 0.23553740978240967, |
| "learning_rate": 2.6103346491696006e-06, |
| "loss": 0.0672, |
| "step": 3670 |
| }, |
| { |
| "epoch": 8.36420543462797, |
| "grad_norm": 0.22709369659423828, |
| "learning_rate": 2.617447277641453e-06, |
| "loss": 0.0655, |
| "step": 3680 |
| }, |
| { |
| "epoch": 8.386968274292219, |
| "grad_norm": 0.22554650902748108, |
| "learning_rate": 2.624559906113304e-06, |
| "loss": 0.0661, |
| "step": 3690 |
| }, |
| { |
| "epoch": 8.409731113956466, |
| "grad_norm": 0.22940964996814728, |
| "learning_rate": 2.6316725345851564e-06, |
| "loss": 0.0638, |
| "step": 3700 |
| }, |
| { |
| "epoch": 8.432493953620714, |
| "grad_norm": 0.22286923229694366, |
| "learning_rate": 2.6387851630570076e-06, |
| "loss": 0.0675, |
| "step": 3710 |
| }, |
| { |
| "epoch": 8.455256793284962, |
| "grad_norm": 0.2673478126525879, |
| "learning_rate": 2.64589779152886e-06, |
| "loss": 0.0648, |
| "step": 3720 |
| }, |
| { |
| "epoch": 8.478019632949211, |
| "grad_norm": 0.3129027485847473, |
| "learning_rate": 2.653010420000712e-06, |
| "loss": 0.067, |
| "step": 3730 |
| }, |
| { |
| "epoch": 8.500782472613459, |
| "grad_norm": 0.2747926712036133, |
| "learning_rate": 2.6601230484725634e-06, |
| "loss": 0.0684, |
| "step": 3740 |
| }, |
| { |
| "epoch": 8.523545312277706, |
| "grad_norm": 0.24347588419914246, |
| "learning_rate": 2.6672356769444154e-06, |
| "loss": 0.0667, |
| "step": 3750 |
| }, |
| { |
| "epoch": 8.523545312277706, |
| "eval_loss": 0.04199772700667381, |
| "eval_runtime": 3.2359, |
| "eval_samples_per_second": 463.553, |
| "eval_steps_per_second": 7.417, |
| "eval_sts_dev_pearson_cosine": 0.8204944023566688, |
| "eval_sts_dev_pearson_dot": 0.7040977870407604, |
| "eval_sts_dev_pearson_euclidean": 0.7919081914053657, |
| "eval_sts_dev_pearson_manhattan": 0.7927432790586826, |
| "eval_sts_dev_pearson_max": 0.8204944023566688, |
| "eval_sts_dev_spearman_cosine": 0.8268027687748516, |
| "eval_sts_dev_spearman_dot": 0.6869146800512241, |
| "eval_sts_dev_spearman_euclidean": 0.7966713636345194, |
| "eval_sts_dev_spearman_manhattan": 0.798229350269622, |
| "eval_sts_dev_spearman_max": 0.8268027687748516, |
| "step": 3750 |
| }, |
| { |
| "epoch": 8.546308151941954, |
| "grad_norm": 0.24286551773548126, |
| "learning_rate": 2.6743483054162665e-06, |
| "loss": 0.0645, |
| "step": 3760 |
| }, |
| { |
| "epoch": 8.569070991606203, |
| "grad_norm": 0.25422462821006775, |
| "learning_rate": 2.681460933888119e-06, |
| "loss": 0.0652, |
| "step": 3770 |
| }, |
| { |
| "epoch": 8.591833831270451, |
| "grad_norm": 0.22316358983516693, |
| "learning_rate": 2.68857356235997e-06, |
| "loss": 0.0633, |
| "step": 3780 |
| }, |
| { |
| "epoch": 8.614596670934699, |
| "grad_norm": 0.2212369740009308, |
| "learning_rate": 2.6956861908318223e-06, |
| "loss": 0.065, |
| "step": 3790 |
| }, |
| { |
| "epoch": 8.637359510598948, |
| "grad_norm": 0.24087338149547577, |
| "learning_rate": 2.7027988193036743e-06, |
| "loss": 0.064, |
| "step": 3800 |
| }, |
| { |
| "epoch": 8.660122350263196, |
| "grad_norm": 0.3741282522678375, |
| "learning_rate": 2.709911447775526e-06, |
| "loss": 0.0677, |
| "step": 3810 |
| }, |
| { |
| "epoch": 8.682885189927443, |
| "grad_norm": 0.3055291771888733, |
| "learning_rate": 2.7170240762473777e-06, |
| "loss": 0.0661, |
| "step": 3820 |
| }, |
| { |
| "epoch": 8.705648029591691, |
| "grad_norm": 0.25181838870048523, |
| "learning_rate": 2.7241367047192293e-06, |
| "loss": 0.0653, |
| "step": 3830 |
| }, |
| { |
| "epoch": 8.72841086925594, |
| "grad_norm": 0.21388790011405945, |
| "learning_rate": 2.7312493331910812e-06, |
| "loss": 0.0625, |
| "step": 3840 |
| }, |
| { |
| "epoch": 8.751173708920188, |
| "grad_norm": 0.23756906390190125, |
| "learning_rate": 2.7383619616629328e-06, |
| "loss": 0.0651, |
| "step": 3850 |
| }, |
| { |
| "epoch": 8.773936548584436, |
| "grad_norm": 0.2743723690509796, |
| "learning_rate": 2.7454745901347847e-06, |
| "loss": 0.0656, |
| "step": 3860 |
| }, |
| { |
| "epoch": 8.796699388248683, |
| "grad_norm": 0.25479790568351746, |
| "learning_rate": 2.7525872186066367e-06, |
| "loss": 0.0636, |
| "step": 3870 |
| }, |
| { |
| "epoch": 8.819462227912933, |
| "grad_norm": 0.23488923907279968, |
| "learning_rate": 2.759699847078488e-06, |
| "loss": 0.0655, |
| "step": 3880 |
| }, |
| { |
| "epoch": 8.84222506757718, |
| "grad_norm": 0.303478866815567, |
| "learning_rate": 2.76681247555034e-06, |
| "loss": 0.0647, |
| "step": 3890 |
| }, |
| { |
| "epoch": 8.864987907241428, |
| "grad_norm": 0.22421741485595703, |
| "learning_rate": 2.7739251040221917e-06, |
| "loss": 0.0638, |
| "step": 3900 |
| }, |
| { |
| "epoch": 8.887750746905677, |
| "grad_norm": 0.2436189502477646, |
| "learning_rate": 2.7810377324940436e-06, |
| "loss": 0.0636, |
| "step": 3910 |
| }, |
| { |
| "epoch": 8.910513586569925, |
| "grad_norm": 0.35646867752075195, |
| "learning_rate": 2.788150360965895e-06, |
| "loss": 0.0666, |
| "step": 3920 |
| }, |
| { |
| "epoch": 8.933276426234173, |
| "grad_norm": 0.2209872603416443, |
| "learning_rate": 2.795262989437747e-06, |
| "loss": 0.062, |
| "step": 3930 |
| }, |
| { |
| "epoch": 8.95603926589842, |
| "grad_norm": 0.25649750232696533, |
| "learning_rate": 2.8023756179095986e-06, |
| "loss": 0.065, |
| "step": 3940 |
| }, |
| { |
| "epoch": 8.97880210556267, |
| "grad_norm": 0.24208350479602814, |
| "learning_rate": 2.8094882463814506e-06, |
| "loss": 0.0643, |
| "step": 3950 |
| }, |
| { |
| "epoch": 9.0, |
| "grad_norm": 0.11922793090343475, |
| "learning_rate": 2.8166008748533025e-06, |
| "loss": 0.0594, |
| "step": 3960 |
| }, |
| { |
| "epoch": 9.022762839664248, |
| "grad_norm": 0.2234543114900589, |
| "learning_rate": 2.823713503325154e-06, |
| "loss": 0.0616, |
| "step": 3970 |
| }, |
| { |
| "epoch": 9.045525679328497, |
| "grad_norm": 0.21580451726913452, |
| "learning_rate": 2.830826131797006e-06, |
| "loss": 0.0638, |
| "step": 3980 |
| }, |
| { |
| "epoch": 9.068288518992745, |
| "grad_norm": 0.2051229476928711, |
| "learning_rate": 2.8379387602688575e-06, |
| "loss": 0.0625, |
| "step": 3990 |
| }, |
| { |
| "epoch": 9.091051358656992, |
| "grad_norm": 0.2237127125263214, |
| "learning_rate": 2.8450513887407095e-06, |
| "loss": 0.0665, |
| "step": 4000 |
| }, |
| { |
| "epoch": 9.091051358656992, |
| "eval_loss": 0.041355252265930176, |
| "eval_runtime": 3.1834, |
| "eval_samples_per_second": 471.199, |
| "eval_steps_per_second": 7.539, |
| "eval_sts_dev_pearson_cosine": 0.8220285778407846, |
| "eval_sts_dev_pearson_dot": 0.7022254885739367, |
| "eval_sts_dev_pearson_euclidean": 0.7933532583617332, |
| "eval_sts_dev_pearson_manhattan": 0.7941338912825391, |
| "eval_sts_dev_pearson_max": 0.8220285778407846, |
| "eval_sts_dev_spearman_cosine": 0.8276471334482826, |
| "eval_sts_dev_spearman_dot": 0.6857559655167198, |
| "eval_sts_dev_spearman_euclidean": 0.7981249234213611, |
| "eval_sts_dev_spearman_manhattan": 0.7997185742063436, |
| "eval_sts_dev_spearman_max": 0.8276471334482826, |
| "step": 4000 |
| }, |
| { |
| "epoch": 9.11381419832124, |
| "grad_norm": 0.23453885316848755, |
| "learning_rate": 2.852164017212561e-06, |
| "loss": 0.0624, |
| "step": 4010 |
| }, |
| { |
| "epoch": 9.13657703798549, |
| "grad_norm": 0.22881363332271576, |
| "learning_rate": 2.859276645684413e-06, |
| "loss": 0.0621, |
| "step": 4020 |
| }, |
| { |
| "epoch": 9.159339877649737, |
| "grad_norm": 0.21634767949581146, |
| "learning_rate": 2.866389274156265e-06, |
| "loss": 0.0648, |
| "step": 4030 |
| }, |
| { |
| "epoch": 9.182102717313985, |
| "grad_norm": 0.2653968334197998, |
| "learning_rate": 2.8735019026281164e-06, |
| "loss": 0.0622, |
| "step": 4040 |
| }, |
| { |
| "epoch": 9.204865556978232, |
| "grad_norm": 0.2806706726551056, |
| "learning_rate": 2.8806145310999684e-06, |
| "loss": 0.0635, |
| "step": 4050 |
| }, |
| { |
| "epoch": 9.227628396642482, |
| "grad_norm": 0.25029635429382324, |
| "learning_rate": 2.88772715957182e-06, |
| "loss": 0.061, |
| "step": 4060 |
| }, |
| { |
| "epoch": 9.25039123630673, |
| "grad_norm": 0.24983397126197815, |
| "learning_rate": 2.894839788043672e-06, |
| "loss": 0.0602, |
| "step": 4070 |
| }, |
| { |
| "epoch": 9.273154075970977, |
| "grad_norm": 0.21316730976104736, |
| "learning_rate": 2.9019524165155234e-06, |
| "loss": 0.0613, |
| "step": 4080 |
| }, |
| { |
| "epoch": 9.295916915635225, |
| "grad_norm": 0.21870028972625732, |
| "learning_rate": 2.9090650449873754e-06, |
| "loss": 0.0604, |
| "step": 4090 |
| }, |
| { |
| "epoch": 9.318679755299474, |
| "grad_norm": 0.21702495217323303, |
| "learning_rate": 2.9161776734592273e-06, |
| "loss": 0.0623, |
| "step": 4100 |
| }, |
| { |
| "epoch": 9.341442594963722, |
| "grad_norm": 0.22777798771858215, |
| "learning_rate": 2.923290301931079e-06, |
| "loss": 0.0641, |
| "step": 4110 |
| }, |
| { |
| "epoch": 9.36420543462797, |
| "grad_norm": 0.2656283378601074, |
| "learning_rate": 2.930402930402931e-06, |
| "loss": 0.0635, |
| "step": 4120 |
| }, |
| { |
| "epoch": 9.386968274292219, |
| "grad_norm": 0.23527038097381592, |
| "learning_rate": 2.9375155588747823e-06, |
| "loss": 0.0608, |
| "step": 4130 |
| }, |
| { |
| "epoch": 9.409731113956466, |
| "grad_norm": 0.21856476366519928, |
| "learning_rate": 2.9446281873466343e-06, |
| "loss": 0.0611, |
| "step": 4140 |
| }, |
| { |
| "epoch": 9.432493953620714, |
| "grad_norm": 0.23688729107379913, |
| "learning_rate": 2.951740815818486e-06, |
| "loss": 0.0607, |
| "step": 4150 |
| }, |
| { |
| "epoch": 9.455256793284962, |
| "grad_norm": 0.26457446813583374, |
| "learning_rate": 2.9588534442903377e-06, |
| "loss": 0.0631, |
| "step": 4160 |
| }, |
| { |
| "epoch": 9.478019632949211, |
| "grad_norm": 0.31578782200813293, |
| "learning_rate": 2.9659660727621897e-06, |
| "loss": 0.0618, |
| "step": 4170 |
| }, |
| { |
| "epoch": 9.500782472613459, |
| "grad_norm": 0.23187491297721863, |
| "learning_rate": 2.9730787012340412e-06, |
| "loss": 0.0609, |
| "step": 4180 |
| }, |
| { |
| "epoch": 9.523545312277706, |
| "grad_norm": 0.24577929079532623, |
| "learning_rate": 2.980191329705893e-06, |
| "loss": 0.0613, |
| "step": 4190 |
| }, |
| { |
| "epoch": 9.546308151941954, |
| "grad_norm": 0.23201169073581696, |
| "learning_rate": 2.9873039581777447e-06, |
| "loss": 0.0606, |
| "step": 4200 |
| }, |
| { |
| "epoch": 9.569070991606203, |
| "grad_norm": 0.2860512137413025, |
| "learning_rate": 2.9944165866495967e-06, |
| "loss": 0.0595, |
| "step": 4210 |
| }, |
| { |
| "epoch": 9.591833831270451, |
| "grad_norm": 0.237753763794899, |
| "learning_rate": 3.001529215121448e-06, |
| "loss": 0.0609, |
| "step": 4220 |
| }, |
| { |
| "epoch": 9.614596670934699, |
| "grad_norm": 0.23422682285308838, |
| "learning_rate": 3.0086418435933e-06, |
| "loss": 0.061, |
| "step": 4230 |
| }, |
| { |
| "epoch": 9.637359510598948, |
| "grad_norm": 0.2497267723083496, |
| "learning_rate": 3.015754472065152e-06, |
| "loss": 0.0616, |
| "step": 4240 |
| }, |
| { |
| "epoch": 9.660122350263196, |
| "grad_norm": 0.2505936622619629, |
| "learning_rate": 3.0228671005370036e-06, |
| "loss": 0.0613, |
| "step": 4250 |
| }, |
| { |
| "epoch": 9.660122350263196, |
| "eval_loss": 0.04175787419080734, |
| "eval_runtime": 3.1427, |
| "eval_samples_per_second": 477.3, |
| "eval_steps_per_second": 7.637, |
| "eval_sts_dev_pearson_cosine": 0.8220874775898197, |
| "eval_sts_dev_pearson_dot": 0.7010536213435227, |
| "eval_sts_dev_pearson_euclidean": 0.7929031352092236, |
| "eval_sts_dev_pearson_manhattan": 0.7936882861676204, |
| "eval_sts_dev_pearson_max": 0.8220874775898197, |
| "eval_sts_dev_spearman_cosine": 0.8282368218808581, |
| "eval_sts_dev_spearman_dot": 0.6844746263331734, |
| "eval_sts_dev_spearman_euclidean": 0.7979913252239026, |
| "eval_sts_dev_spearman_manhattan": 0.7996541111809876, |
| "eval_sts_dev_spearman_max": 0.8282368218808581, |
| "step": 4250 |
| }, |
| { |
| "epoch": 9.682885189927443, |
| "grad_norm": 0.2565889060497284, |
| "learning_rate": 3.0299797290088556e-06, |
| "loss": 0.0623, |
| "step": 4260 |
| }, |
| { |
| "epoch": 9.705648029591691, |
| "grad_norm": 0.2263515293598175, |
| "learning_rate": 3.037092357480707e-06, |
| "loss": 0.0605, |
| "step": 4270 |
| }, |
| { |
| "epoch": 9.72841086925594, |
| "grad_norm": 0.21705535054206848, |
| "learning_rate": 3.044204985952559e-06, |
| "loss": 0.0637, |
| "step": 4280 |
| }, |
| { |
| "epoch": 9.751173708920188, |
| "grad_norm": 0.21649038791656494, |
| "learning_rate": 3.0513176144244106e-06, |
| "loss": 0.0604, |
| "step": 4290 |
| }, |
| { |
| "epoch": 9.773936548584436, |
| "grad_norm": 0.22717022895812988, |
| "learning_rate": 3.0584302428962625e-06, |
| "loss": 0.0606, |
| "step": 4300 |
| }, |
| { |
| "epoch": 9.796699388248683, |
| "grad_norm": 0.23610946536064148, |
| "learning_rate": 3.0655428713681145e-06, |
| "loss": 0.0622, |
| "step": 4310 |
| }, |
| { |
| "epoch": 9.819462227912933, |
| "grad_norm": 0.2080880105495453, |
| "learning_rate": 3.072655499839966e-06, |
| "loss": 0.0598, |
| "step": 4320 |
| }, |
| { |
| "epoch": 9.84222506757718, |
| "grad_norm": 0.2862449884414673, |
| "learning_rate": 3.079768128311818e-06, |
| "loss": 0.0611, |
| "step": 4330 |
| }, |
| { |
| "epoch": 9.864987907241428, |
| "grad_norm": 0.2211073935031891, |
| "learning_rate": 3.0868807567836695e-06, |
| "loss": 0.0604, |
| "step": 4340 |
| }, |
| { |
| "epoch": 9.887750746905677, |
| "grad_norm": 0.2399899959564209, |
| "learning_rate": 3.0939933852555214e-06, |
| "loss": 0.0598, |
| "step": 4350 |
| }, |
| { |
| "epoch": 9.910513586569925, |
| "grad_norm": 0.2330579161643982, |
| "learning_rate": 3.101106013727373e-06, |
| "loss": 0.0626, |
| "step": 4360 |
| }, |
| { |
| "epoch": 9.933276426234173, |
| "grad_norm": 0.23163940012454987, |
| "learning_rate": 3.108218642199225e-06, |
| "loss": 0.0624, |
| "step": 4370 |
| }, |
| { |
| "epoch": 9.95603926589842, |
| "grad_norm": 0.2087012380361557, |
| "learning_rate": 3.115331270671077e-06, |
| "loss": 0.0617, |
| "step": 4380 |
| }, |
| { |
| "epoch": 9.97880210556267, |
| "grad_norm": 0.24286577105522156, |
| "learning_rate": 3.1224438991429284e-06, |
| "loss": 0.0603, |
| "step": 4390 |
| } |
| ], |
| "logging_steps": 10, |
| "max_steps": 4390, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 10, |
| "save_steps": 250, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": true |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 8, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|