| { |
| "best_metric": 0.8299749701259963, |
| "best_model_checkpoint": "output/marbert_simce_EuroBERT-EuroBERT-610M_16_bs_1_e/checkpoint-25000", |
| "epoch": 0.8139208532135841, |
| "eval_steps": 500, |
| "global_step": 29000, |
| "is_hyper_param_search": false, |
| "is_local_process_zero": true, |
| "is_world_process_zero": true, |
| "log_history": [ |
| { |
| "epoch": 0.005613247263541959, |
| "grad_norm": 98309536.0, |
| "learning_rate": 2.8066236317709794e-06, |
| "loss": 7.8472, |
| "step": 200 |
| }, |
| { |
| "epoch": 0.011226494527083918, |
| "grad_norm": 18181398.0, |
| "learning_rate": 5.613247263541959e-06, |
| "loss": 1.8133, |
| "step": 400 |
| }, |
| { |
| "epoch": 0.014033118158854897, |
| "eval_loss": 4.332894802093506, |
| "eval_runtime": 80.7749, |
| "eval_samples_per_second": 81.82, |
| "eval_sequential_score": 0.8033767514947983, |
| "eval_steps_per_second": 1.288, |
| "eval_sts-dev-1152_pearson_cosine": 0.8060511562516744, |
| "eval_sts-dev-1152_spearman_cosine": 0.8033767514947983, |
| "eval_sts-dev-512_pearson_cosine": 0.8032161700580676, |
| "eval_sts-dev-512_spearman_cosine": 0.8009572819138866, |
| "eval_sts-dev-768_pearson_cosine": 0.8037436235908193, |
| "eval_sts-dev-768_spearman_cosine": 0.8010140475354219, |
| "eval_sts-dev-960_pearson_cosine": 0.8052857547543548, |
| "eval_sts-dev-960_spearman_cosine": 0.8024252040181402, |
| "step": 500 |
| }, |
| { |
| "epoch": 0.016839741790625876, |
| "grad_norm": 4002785.75, |
| "learning_rate": 8.41987089531294e-06, |
| "loss": 1.2257, |
| "step": 600 |
| }, |
| { |
| "epoch": 0.022452989054167836, |
| "grad_norm": 14975067.0, |
| "learning_rate": 1.1226494527083917e-05, |
| "loss": 1.0662, |
| "step": 800 |
| }, |
| { |
| "epoch": 0.028066236317709794, |
| "grad_norm": 8259098.0, |
| "learning_rate": 1.4033118158854899e-05, |
| "loss": 1.0452, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.028066236317709794, |
| "eval_loss": 4.0553975105285645, |
| "eval_runtime": 83.8854, |
| "eval_samples_per_second": 78.786, |
| "eval_sequential_score": 0.8152056130630069, |
| "eval_steps_per_second": 1.24, |
| "eval_sts-dev-1152_pearson_cosine": 0.8162027650076662, |
| "eval_sts-dev-1152_spearman_cosine": 0.8152056130630069, |
| "eval_sts-dev-512_pearson_cosine": 0.8109347704147131, |
| "eval_sts-dev-512_spearman_cosine": 0.811300104502657, |
| "eval_sts-dev-768_pearson_cosine": 0.8133183334539484, |
| "eval_sts-dev-768_spearman_cosine": 0.8130045482521145, |
| "eval_sts-dev-960_pearson_cosine": 0.8156636692837823, |
| "eval_sts-dev-960_spearman_cosine": 0.814950405147375, |
| "step": 1000 |
| }, |
| { |
| "epoch": 0.03367948358125175, |
| "grad_norm": 4874028.5, |
| "learning_rate": 1.683974179062588e-05, |
| "loss": 1.0306, |
| "step": 1200 |
| }, |
| { |
| "epoch": 0.03929273084479371, |
| "grad_norm": 4182119.25, |
| "learning_rate": 1.9646365422396855e-05, |
| "loss": 1.0223, |
| "step": 1400 |
| }, |
| { |
| "epoch": 0.042099354476564696, |
| "eval_loss": 4.015018939971924, |
| "eval_runtime": 84.6437, |
| "eval_samples_per_second": 78.08, |
| "eval_sequential_score": 0.7963919935425935, |
| "eval_steps_per_second": 1.229, |
| "eval_sts-dev-1152_pearson_cosine": 0.7984721704385993, |
| "eval_sts-dev-1152_spearman_cosine": 0.7963919935425935, |
| "eval_sts-dev-512_pearson_cosine": 0.7945396929823907, |
| "eval_sts-dev-512_spearman_cosine": 0.7938897942194647, |
| "eval_sts-dev-768_pearson_cosine": 0.796786289376193, |
| "eval_sts-dev-768_spearman_cosine": 0.7957412002954445, |
| "eval_sts-dev-960_pearson_cosine": 0.7984310473849502, |
| "eval_sts-dev-960_spearman_cosine": 0.7964965649245895, |
| "step": 1500 |
| }, |
| { |
| "epoch": 0.04490597810833567, |
| "grad_norm": 4491974.5, |
| "learning_rate": 2.2452989054167835e-05, |
| "loss": 0.9923, |
| "step": 1600 |
| }, |
| { |
| "epoch": 0.050519225371877634, |
| "grad_norm": 2410247.5, |
| "learning_rate": 2.5259612685938815e-05, |
| "loss": 1.007, |
| "step": 1800 |
| }, |
| { |
| "epoch": 0.05613247263541959, |
| "grad_norm": 3212723.25, |
| "learning_rate": 2.8066236317709798e-05, |
| "loss": 0.9898, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.05613247263541959, |
| "eval_loss": 4.562768459320068, |
| "eval_runtime": 84.3142, |
| "eval_samples_per_second": 78.385, |
| "eval_sequential_score": 0.7838450146591204, |
| "eval_steps_per_second": 1.233, |
| "eval_sts-dev-1152_pearson_cosine": 0.7765908851768635, |
| "eval_sts-dev-1152_spearman_cosine": 0.7838450146591204, |
| "eval_sts-dev-512_pearson_cosine": 0.7672097671946088, |
| "eval_sts-dev-512_spearman_cosine": 0.7778618805232163, |
| "eval_sts-dev-768_pearson_cosine": 0.7696489245819802, |
| "eval_sts-dev-768_spearman_cosine": 0.7793358424223233, |
| "eval_sts-dev-960_pearson_cosine": 0.7741670009813553, |
| "eval_sts-dev-960_spearman_cosine": 0.7829158933416259, |
| "step": 2000 |
| }, |
| { |
| "epoch": 0.06174571989896155, |
| "grad_norm": 1835005.25, |
| "learning_rate": 3.087285994948078e-05, |
| "loss": 1.033, |
| "step": 2200 |
| }, |
| { |
| "epoch": 0.0673589671625035, |
| "grad_norm": 2749047.0, |
| "learning_rate": 3.367948358125176e-05, |
| "loss": 1.0091, |
| "step": 2400 |
| }, |
| { |
| "epoch": 0.0701655907942745, |
| "eval_loss": 4.613296985626221, |
| "eval_runtime": 81.7193, |
| "eval_samples_per_second": 80.874, |
| "eval_sequential_score": 0.7854737360223817, |
| "eval_steps_per_second": 1.273, |
| "eval_sts-dev-1152_pearson_cosine": 0.7821361682028354, |
| "eval_sts-dev-1152_spearman_cosine": 0.7854737360223817, |
| "eval_sts-dev-512_pearson_cosine": 0.7723481989731885, |
| "eval_sts-dev-512_spearman_cosine": 0.7767002410536074, |
| "eval_sts-dev-768_pearson_cosine": 0.7752960500176977, |
| "eval_sts-dev-768_spearman_cosine": 0.7801463868858681, |
| "eval_sts-dev-960_pearson_cosine": 0.779209273182729, |
| "eval_sts-dev-960_spearman_cosine": 0.7833584025542436, |
| "step": 2500 |
| }, |
| { |
| "epoch": 0.07297221442604547, |
| "grad_norm": 2484525.5, |
| "learning_rate": 3.648610721302274e-05, |
| "loss": 1.046, |
| "step": 2600 |
| }, |
| { |
| "epoch": 0.07858546168958742, |
| "grad_norm": 2774297.5, |
| "learning_rate": 3.929273084479371e-05, |
| "loss": 1.0212, |
| "step": 2800 |
| }, |
| { |
| "epoch": 0.08419870895312939, |
| "grad_norm": 3957846.75, |
| "learning_rate": 4.20993544765647e-05, |
| "loss": 1.0923, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.08419870895312939, |
| "eval_loss": 5.038168430328369, |
| "eval_runtime": 81.5206, |
| "eval_samples_per_second": 81.071, |
| "eval_sequential_score": 0.769819909731699, |
| "eval_steps_per_second": 1.276, |
| "eval_sts-dev-1152_pearson_cosine": 0.7639981739632917, |
| "eval_sts-dev-1152_spearman_cosine": 0.769819909731699, |
| "eval_sts-dev-512_pearson_cosine": 0.7557090629225184, |
| "eval_sts-dev-512_spearman_cosine": 0.7642757401766183, |
| "eval_sts-dev-768_pearson_cosine": 0.7573996144114894, |
| "eval_sts-dev-768_spearman_cosine": 0.7655547584963449, |
| "eval_sts-dev-960_pearson_cosine": 0.761317538183819, |
| "eval_sts-dev-960_spearman_cosine": 0.7681871634317281, |
| "step": 3000 |
| }, |
| { |
| "epoch": 0.08981195621667135, |
| "grad_norm": 2094610.625, |
| "learning_rate": 4.490597810833567e-05, |
| "loss": 1.0542, |
| "step": 3200 |
| }, |
| { |
| "epoch": 0.0954252034802133, |
| "grad_norm": 1429313.5, |
| "learning_rate": 4.7712601740106656e-05, |
| "loss": 1.025, |
| "step": 3400 |
| }, |
| { |
| "epoch": 0.09823182711198428, |
| "eval_loss": 4.955362319946289, |
| "eval_runtime": 78.4429, |
| "eval_samples_per_second": 84.252, |
| "eval_sequential_score": 0.773224539195293, |
| "eval_steps_per_second": 1.326, |
| "eval_sts-dev-1152_pearson_cosine": 0.7657713900430686, |
| "eval_sts-dev-1152_spearman_cosine": 0.773224539195293, |
| "eval_sts-dev-512_pearson_cosine": 0.7600635376400412, |
| "eval_sts-dev-512_spearman_cosine": 0.7680689406591721, |
| "eval_sts-dev-768_pearson_cosine": 0.7604851132447141, |
| "eval_sts-dev-768_spearman_cosine": 0.7693172932298855, |
| "eval_sts-dev-960_pearson_cosine": 0.7631668495431749, |
| "eval_sts-dev-960_spearman_cosine": 0.7712486101715305, |
| "step": 3500 |
| }, |
| { |
| "epoch": 0.10103845074375527, |
| "grad_norm": 993321.0, |
| "learning_rate": 4.99423082920136e-05, |
| "loss": 1.0056, |
| "step": 3600 |
| }, |
| { |
| "epoch": 0.10665169800729722, |
| "grad_norm": 13974978.0, |
| "learning_rate": 4.963046122181682e-05, |
| "loss": 1.0689, |
| "step": 3800 |
| }, |
| { |
| "epoch": 0.11226494527083918, |
| "grad_norm": 6447055.0, |
| "learning_rate": 4.931861415162005e-05, |
| "loss": 1.0453, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.11226494527083918, |
| "eval_loss": 5.834151268005371, |
| "eval_runtime": 77.2575, |
| "eval_samples_per_second": 85.545, |
| "eval_sequential_score": 0.7656099368624604, |
| "eval_steps_per_second": 1.346, |
| "eval_sts-dev-1152_pearson_cosine": 0.7552591908965304, |
| "eval_sts-dev-1152_spearman_cosine": 0.7656099368624604, |
| "eval_sts-dev-512_pearson_cosine": 0.7502607892289657, |
| "eval_sts-dev-512_spearman_cosine": 0.7606979870614468, |
| "eval_sts-dev-768_pearson_cosine": 0.7475751433884098, |
| "eval_sts-dev-768_spearman_cosine": 0.7607444598882842, |
| "eval_sts-dev-960_pearson_cosine": 0.7529520081710266, |
| "eval_sts-dev-960_spearman_cosine": 0.764507705472108, |
| "step": 4000 |
| }, |
| { |
| "epoch": 0.11787819253438114, |
| "grad_norm": 1890051.125, |
| "learning_rate": 4.9006767081423274e-05, |
| "loss": 1.0874, |
| "step": 4200 |
| }, |
| { |
| "epoch": 0.1234914397979231, |
| "grad_norm": 1684058.0, |
| "learning_rate": 4.8694920011226495e-05, |
| "loss": 1.0051, |
| "step": 4400 |
| }, |
| { |
| "epoch": 0.12629806342969407, |
| "eval_loss": 5.076698303222656, |
| "eval_runtime": 79.9271, |
| "eval_samples_per_second": 82.688, |
| "eval_sequential_score": 0.7752075113901122, |
| "eval_steps_per_second": 1.301, |
| "eval_sts-dev-1152_pearson_cosine": 0.7719939114671934, |
| "eval_sts-dev-1152_spearman_cosine": 0.7752075113901122, |
| "eval_sts-dev-512_pearson_cosine": 0.7652586024520893, |
| "eval_sts-dev-512_spearman_cosine": 0.7706670879702195, |
| "eval_sts-dev-768_pearson_cosine": 0.7663537315286835, |
| "eval_sts-dev-768_spearman_cosine": 0.771176148682848, |
| "eval_sts-dev-960_pearson_cosine": 0.7691859699812915, |
| "eval_sts-dev-960_spearman_cosine": 0.7728489604857174, |
| "step": 4500 |
| }, |
| { |
| "epoch": 0.12910468706146505, |
| "grad_norm": 1788628.75, |
| "learning_rate": 4.838307294102972e-05, |
| "loss": 1.0007, |
| "step": 4600 |
| }, |
| { |
| "epoch": 0.134717934325007, |
| "grad_norm": 1246448.375, |
| "learning_rate": 4.807122587083295e-05, |
| "loss": 0.9307, |
| "step": 4800 |
| }, |
| { |
| "epoch": 0.140331181588549, |
| "grad_norm": 1078949.875, |
| "learning_rate": 4.775937880063617e-05, |
| "loss": 0.9642, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.140331181588549, |
| "eval_loss": 5.198572158813477, |
| "eval_runtime": 77.2033, |
| "eval_samples_per_second": 85.605, |
| "eval_sequential_score": 0.7683338860401957, |
| "eval_steps_per_second": 1.347, |
| "eval_sts-dev-1152_pearson_cosine": 0.7579755842587974, |
| "eval_sts-dev-1152_spearman_cosine": 0.7683338860401957, |
| "eval_sts-dev-512_pearson_cosine": 0.7497028852140948, |
| "eval_sts-dev-512_spearman_cosine": 0.761778428268311, |
| "eval_sts-dev-768_pearson_cosine": 0.7527948972243363, |
| "eval_sts-dev-768_spearman_cosine": 0.7652442137002148, |
| "eval_sts-dev-960_pearson_cosine": 0.7555515440882907, |
| "eval_sts-dev-960_spearman_cosine": 0.7666530937388959, |
| "step": 5000 |
| }, |
| { |
| "epoch": 0.14594442885209094, |
| "grad_norm": 1531033.125, |
| "learning_rate": 4.744753173043939e-05, |
| "loss": 0.9259, |
| "step": 5200 |
| }, |
| { |
| "epoch": 0.1515576761156329, |
| "grad_norm": 2660688.5, |
| "learning_rate": 4.713568466024262e-05, |
| "loss": 0.8908, |
| "step": 5400 |
| }, |
| { |
| "epoch": 0.15436429974740387, |
| "eval_loss": 5.210824966430664, |
| "eval_runtime": 78.072, |
| "eval_samples_per_second": 84.653, |
| "eval_sequential_score": 0.7761786001654587, |
| "eval_steps_per_second": 1.332, |
| "eval_sts-dev-1152_pearson_cosine": 0.7628982988651709, |
| "eval_sts-dev-1152_spearman_cosine": 0.7761786001654587, |
| "eval_sts-dev-512_pearson_cosine": 0.7563657029921103, |
| "eval_sts-dev-512_spearman_cosine": 0.7711187793718405, |
| "eval_sts-dev-768_pearson_cosine": 0.7545503313040781, |
| "eval_sts-dev-768_spearman_cosine": 0.7713354514018652, |
| "eval_sts-dev-960_pearson_cosine": 0.7595387577495225, |
| "eval_sts-dev-960_spearman_cosine": 0.774137594546881, |
| "step": 5500 |
| }, |
| { |
| "epoch": 0.15717092337917485, |
| "grad_norm": 1493755.375, |
| "learning_rate": 4.6823837590045846e-05, |
| "loss": 0.8812, |
| "step": 5600 |
| }, |
| { |
| "epoch": 0.1627841706427168, |
| "grad_norm": 1185704.125, |
| "learning_rate": 4.651199051984907e-05, |
| "loss": 0.8544, |
| "step": 5800 |
| }, |
| { |
| "epoch": 0.16839741790625878, |
| "grad_norm": 1171420.375, |
| "learning_rate": 4.6200143449652295e-05, |
| "loss": 0.8314, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.16839741790625878, |
| "eval_loss": 5.24008321762085, |
| "eval_runtime": 77.1769, |
| "eval_samples_per_second": 85.634, |
| "eval_sequential_score": 0.7731812487805466, |
| "eval_steps_per_second": 1.348, |
| "eval_sts-dev-1152_pearson_cosine": 0.7629915610098019, |
| "eval_sts-dev-1152_spearman_cosine": 0.7731812487805466, |
| "eval_sts-dev-512_pearson_cosine": 0.7548517081547739, |
| "eval_sts-dev-512_spearman_cosine": 0.7660070542727266, |
| "eval_sts-dev-768_pearson_cosine": 0.7552724107119573, |
| "eval_sts-dev-768_spearman_cosine": 0.7688482677049882, |
| "eval_sts-dev-960_pearson_cosine": 0.7593839964325895, |
| "eval_sts-dev-960_spearman_cosine": 0.7709090473847962, |
| "step": 6000 |
| }, |
| { |
| "epoch": 0.17401066516980074, |
| "grad_norm": 20138014.0, |
| "learning_rate": 4.588829637945552e-05, |
| "loss": 0.8258, |
| "step": 6200 |
| }, |
| { |
| "epoch": 0.1796239124333427, |
| "grad_norm": 936058.875, |
| "learning_rate": 4.557644930925874e-05, |
| "loss": 0.8083, |
| "step": 6400 |
| }, |
| { |
| "epoch": 0.18243053606511367, |
| "eval_loss": 5.270838260650635, |
| "eval_runtime": 76.7742, |
| "eval_samples_per_second": 86.084, |
| "eval_sequential_score": 0.7680196935178831, |
| "eval_steps_per_second": 1.355, |
| "eval_sts-dev-1152_pearson_cosine": 0.7542454277081971, |
| "eval_sts-dev-1152_spearman_cosine": 0.7680196935178831, |
| "eval_sts-dev-512_pearson_cosine": 0.7491425293129472, |
| "eval_sts-dev-512_spearman_cosine": 0.7637887014241396, |
| "eval_sts-dev-768_pearson_cosine": 0.7462311896234177, |
| "eval_sts-dev-768_spearman_cosine": 0.7621924407912392, |
| "eval_sts-dev-960_pearson_cosine": 0.7506447400007437, |
| "eval_sts-dev-960_spearman_cosine": 0.7656016598904541, |
| "step": 6500 |
| }, |
| { |
| "epoch": 0.18523715969688465, |
| "grad_norm": 15407098.0, |
| "learning_rate": 4.5264602239061963e-05, |
| "loss": 0.8373, |
| "step": 6600 |
| }, |
| { |
| "epoch": 0.1908504069604266, |
| "grad_norm": 1347636.375, |
| "learning_rate": 4.495275516886519e-05, |
| "loss": 0.8031, |
| "step": 6800 |
| }, |
| { |
| "epoch": 0.19646365422396855, |
| "grad_norm": 1346039.0, |
| "learning_rate": 4.464090809866842e-05, |
| "loss": 0.7375, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.19646365422396855, |
| "eval_loss": 5.154874801635742, |
| "eval_runtime": 77.428, |
| "eval_samples_per_second": 85.357, |
| "eval_sequential_score": 0.776258910277576, |
| "eval_steps_per_second": 1.343, |
| "eval_sts-dev-1152_pearson_cosine": 0.7608005418896666, |
| "eval_sts-dev-1152_spearman_cosine": 0.776258910277576, |
| "eval_sts-dev-512_pearson_cosine": 0.7589220526161604, |
| "eval_sts-dev-512_spearman_cosine": 0.773786883290433, |
| "eval_sts-dev-768_pearson_cosine": 0.7534825416262227, |
| "eval_sts-dev-768_spearman_cosine": 0.7718540709899384, |
| "eval_sts-dev-960_pearson_cosine": 0.7570206619012192, |
| "eval_sts-dev-960_spearman_cosine": 0.7739587544161404, |
| "step": 7000 |
| }, |
| { |
| "epoch": 0.20207690148751054, |
| "grad_norm": 1730871.5, |
| "learning_rate": 4.432906102847164e-05, |
| "loss": 0.743, |
| "step": 7200 |
| }, |
| { |
| "epoch": 0.2076901487510525, |
| "grad_norm": 610056.0, |
| "learning_rate": 4.4017213958274867e-05, |
| "loss": 0.739, |
| "step": 7400 |
| }, |
| { |
| "epoch": 0.21049677238282347, |
| "eval_loss": 4.818795204162598, |
| "eval_runtime": 78.238, |
| "eval_samples_per_second": 84.473, |
| "eval_sequential_score": 0.7867819228719439, |
| "eval_steps_per_second": 1.329, |
| "eval_sts-dev-1152_pearson_cosine": 0.7777460591194046, |
| "eval_sts-dev-1152_spearman_cosine": 0.7867819228719439, |
| "eval_sts-dev-512_pearson_cosine": 0.7742009652343147, |
| "eval_sts-dev-512_spearman_cosine": 0.7834916540309068, |
| "eval_sts-dev-768_pearson_cosine": 0.771136418007053, |
| "eval_sts-dev-768_spearman_cosine": 0.7825233109168519, |
| "eval_sts-dev-960_pearson_cosine": 0.7749317385070862, |
| "eval_sts-dev-960_spearman_cosine": 0.7849087778447466, |
| "step": 7500 |
| }, |
| { |
| "epoch": 0.21330339601459444, |
| "grad_norm": 816702.6875, |
| "learning_rate": 4.370536688807809e-05, |
| "loss": 0.7399, |
| "step": 7600 |
| }, |
| { |
| "epoch": 0.2189166432781364, |
| "grad_norm": 984849.5625, |
| "learning_rate": 4.3393519817881315e-05, |
| "loss": 0.6723, |
| "step": 7800 |
| }, |
| { |
| "epoch": 0.22452989054167835, |
| "grad_norm": 2291592.5, |
| "learning_rate": 4.3081672747684535e-05, |
| "loss": 0.6866, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.22452989054167835, |
| "eval_loss": 5.077595233917236, |
| "eval_runtime": 81.793, |
| "eval_samples_per_second": 80.802, |
| "eval_sequential_score": 0.7714443289674112, |
| "eval_steps_per_second": 1.272, |
| "eval_sts-dev-1152_pearson_cosine": 0.7519731587462412, |
| "eval_sts-dev-1152_spearman_cosine": 0.7714443289674112, |
| "eval_sts-dev-512_pearson_cosine": 0.74485948560521, |
| "eval_sts-dev-512_spearman_cosine": 0.7666552681351647, |
| "eval_sts-dev-768_pearson_cosine": 0.7434046002477062, |
| "eval_sts-dev-768_spearman_cosine": 0.7663030810953583, |
| "eval_sts-dev-960_pearson_cosine": 0.7477923465698348, |
| "eval_sts-dev-960_spearman_cosine": 0.7687824273352952, |
| "step": 8000 |
| }, |
| { |
| "epoch": 0.23014313780522033, |
| "grad_norm": 854538.25, |
| "learning_rate": 4.276982567748776e-05, |
| "loss": 0.6556, |
| "step": 8200 |
| }, |
| { |
| "epoch": 0.2357563850687623, |
| "grad_norm": 567974.3125, |
| "learning_rate": 4.245797860729099e-05, |
| "loss": 0.6886, |
| "step": 8400 |
| }, |
| { |
| "epoch": 0.23856300870053326, |
| "eval_loss": 4.77580451965332, |
| "eval_runtime": 81.3533, |
| "eval_samples_per_second": 81.238, |
| "eval_sequential_score": 0.7845369317488485, |
| "eval_steps_per_second": 1.278, |
| "eval_sts-dev-1152_pearson_cosine": 0.7759169223036488, |
| "eval_sts-dev-1152_spearman_cosine": 0.7845369317488485, |
| "eval_sts-dev-512_pearson_cosine": 0.7701848660015407, |
| "eval_sts-dev-512_spearman_cosine": 0.7810897586341158, |
| "eval_sts-dev-768_pearson_cosine": 0.7706226329945012, |
| "eval_sts-dev-768_spearman_cosine": 0.7807681848391878, |
| "eval_sts-dev-960_pearson_cosine": 0.7736197337766615, |
| "eval_sts-dev-960_spearman_cosine": 0.7827940121873471, |
| "step": 8500 |
| }, |
| { |
| "epoch": 0.24136963233230424, |
| "grad_norm": 984615.125, |
| "learning_rate": 4.214613153709421e-05, |
| "loss": 0.685, |
| "step": 8600 |
| }, |
| { |
| "epoch": 0.2469828795958462, |
| "grad_norm": 891299.0625, |
| "learning_rate": 4.183428446689743e-05, |
| "loss": 0.6401, |
| "step": 8800 |
| }, |
| { |
| "epoch": 0.25259612685938815, |
| "grad_norm": 815193.5, |
| "learning_rate": 4.152243739670066e-05, |
| "loss": 0.6617, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.25259612685938815, |
| "eval_loss": 4.602816581726074, |
| "eval_runtime": 78.6877, |
| "eval_samples_per_second": 83.99, |
| "eval_sequential_score": 0.7821317387888623, |
| "eval_steps_per_second": 1.322, |
| "eval_sts-dev-1152_pearson_cosine": 0.7751221600500908, |
| "eval_sts-dev-1152_spearman_cosine": 0.7821317387888623, |
| "eval_sts-dev-512_pearson_cosine": 0.7694106022891276, |
| "eval_sts-dev-512_spearman_cosine": 0.7776661813002868, |
| "eval_sts-dev-768_pearson_cosine": 0.7688507047470482, |
| "eval_sts-dev-768_spearman_cosine": 0.7773842606816174, |
| "eval_sts-dev-960_pearson_cosine": 0.772897246519623, |
| "eval_sts-dev-960_spearman_cosine": 0.7804687323409016, |
| "step": 9000 |
| }, |
| { |
| "epoch": 0.2582093741229301, |
| "grad_norm": 677073.1875, |
| "learning_rate": 4.121059032650389e-05, |
| "loss": 0.6208, |
| "step": 9200 |
| }, |
| { |
| "epoch": 0.26382262138647206, |
| "grad_norm": 998130.3125, |
| "learning_rate": 4.089874325630711e-05, |
| "loss": 0.6307, |
| "step": 9400 |
| }, |
| { |
| "epoch": 0.26662924501824303, |
| "eval_loss": 4.539032459259033, |
| "eval_runtime": 78.0206, |
| "eval_samples_per_second": 84.708, |
| "eval_sequential_score": 0.7853296979011097, |
| "eval_steps_per_second": 1.333, |
| "eval_sts-dev-1152_pearson_cosine": 0.7765914965051797, |
| "eval_sts-dev-1152_spearman_cosine": 0.7853296979011097, |
| "eval_sts-dev-512_pearson_cosine": 0.7723537190754219, |
| "eval_sts-dev-512_spearman_cosine": 0.7823374065712144, |
| "eval_sts-dev-768_pearson_cosine": 0.7715991971997825, |
| "eval_sts-dev-768_spearman_cosine": 0.7821482216403839, |
| "eval_sts-dev-960_pearson_cosine": 0.7750935828664101, |
| "eval_sts-dev-960_spearman_cosine": 0.7843683074967508, |
| "step": 9500 |
| }, |
| { |
| "epoch": 0.269435868650014, |
| "grad_norm": 4926974.0, |
| "learning_rate": 4.0586896186110335e-05, |
| "loss": 0.6557, |
| "step": 9600 |
| }, |
| { |
| "epoch": 0.275049115913556, |
| "grad_norm": 1349007.875, |
| "learning_rate": 4.027504911591356e-05, |
| "loss": 0.6102, |
| "step": 9800 |
| }, |
| { |
| "epoch": 0.280662363177098, |
| "grad_norm": 4552381.5, |
| "learning_rate": 3.996320204571678e-05, |
| "loss": 0.5917, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.280662363177098, |
| "eval_loss": 4.608828067779541, |
| "eval_runtime": 77.5783, |
| "eval_samples_per_second": 85.191, |
| "eval_sequential_score": 0.7826250058709692, |
| "eval_steps_per_second": 1.341, |
| "eval_sts-dev-1152_pearson_cosine": 0.7759367710047786, |
| "eval_sts-dev-1152_spearman_cosine": 0.7826250058709692, |
| "eval_sts-dev-512_pearson_cosine": 0.7683710236695287, |
| "eval_sts-dev-512_spearman_cosine": 0.7770467944017624, |
| "eval_sts-dev-768_pearson_cosine": 0.7700346176363122, |
| "eval_sts-dev-768_spearman_cosine": 0.7779312583550618, |
| "eval_sts-dev-960_pearson_cosine": 0.7737155642846232, |
| "eval_sts-dev-960_spearman_cosine": 0.7807350237124752, |
| "step": 10000 |
| }, |
| { |
| "epoch": 0.2862756104406399, |
| "grad_norm": 836897.8125, |
| "learning_rate": 3.9651354975520004e-05, |
| "loss": 0.5845, |
| "step": 10200 |
| }, |
| { |
| "epoch": 0.2918888577041819, |
| "grad_norm": 735510.0625, |
| "learning_rate": 3.933950790532323e-05, |
| "loss": 0.6018, |
| "step": 10400 |
| }, |
| { |
| "epoch": 0.29469548133595286, |
| "eval_loss": 4.563432216644287, |
| "eval_runtime": 77.4091, |
| "eval_samples_per_second": 85.378, |
| "eval_sequential_score": 0.7901603432215759, |
| "eval_steps_per_second": 1.344, |
| "eval_sts-dev-1152_pearson_cosine": 0.7810780782124218, |
| "eval_sts-dev-1152_spearman_cosine": 0.7901603432215759, |
| "eval_sts-dev-512_pearson_cosine": 0.7767016322826037, |
| "eval_sts-dev-512_spearman_cosine": 0.787071277210133, |
| "eval_sts-dev-768_pearson_cosine": 0.7756406085198688, |
| "eval_sts-dev-768_spearman_cosine": 0.786612173354875, |
| "eval_sts-dev-960_pearson_cosine": 0.7789839662649704, |
| "eval_sts-dev-960_spearman_cosine": 0.7885719593916782, |
| "step": 10500 |
| }, |
| { |
| "epoch": 0.29750210496772383, |
| "grad_norm": 1244390.5, |
| "learning_rate": 3.902766083512646e-05, |
| "loss": 0.5859, |
| "step": 10600 |
| }, |
| { |
| "epoch": 0.3031153522312658, |
| "grad_norm": 1826701.25, |
| "learning_rate": 3.871581376492968e-05, |
| "loss": 0.5933, |
| "step": 10800 |
| }, |
| { |
| "epoch": 0.30872859949480774, |
| "grad_norm": 1036837.75, |
| "learning_rate": 3.840396669473291e-05, |
| "loss": 0.5717, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.30872859949480774, |
| "eval_loss": 4.405139446258545, |
| "eval_runtime": 80.2702, |
| "eval_samples_per_second": 82.334, |
| "eval_sequential_score": 0.7930013510501455, |
| "eval_steps_per_second": 1.296, |
| "eval_sts-dev-1152_pearson_cosine": 0.7842500678425964, |
| "eval_sts-dev-1152_spearman_cosine": 0.7930013510501455, |
| "eval_sts-dev-512_pearson_cosine": 0.7804560477931245, |
| "eval_sts-dev-512_spearman_cosine": 0.790194108314055, |
| "eval_sts-dev-768_pearson_cosine": 0.7795286908922767, |
| "eval_sts-dev-768_spearman_cosine": 0.7903488938053814, |
| "eval_sts-dev-960_pearson_cosine": 0.7823368063838988, |
| "eval_sts-dev-960_spearman_cosine": 0.7916644882542199, |
| "step": 11000 |
| }, |
| { |
| "epoch": 0.3143418467583497, |
| "grad_norm": 505930.4375, |
| "learning_rate": 3.809211962453613e-05, |
| "loss": 0.5719, |
| "step": 11200 |
| }, |
| { |
| "epoch": 0.31995509402189165, |
| "grad_norm": 573586.3125, |
| "learning_rate": 3.7780272554339355e-05, |
| "loss": 0.5422, |
| "step": 11400 |
| }, |
| { |
| "epoch": 0.3227617176536626, |
| "eval_loss": 4.44298791885376, |
| "eval_runtime": 79.3492, |
| "eval_samples_per_second": 83.29, |
| "eval_sequential_score": 0.7942242704582879, |
| "eval_steps_per_second": 1.311, |
| "eval_sts-dev-1152_pearson_cosine": 0.7875744329285601, |
| "eval_sts-dev-1152_spearman_cosine": 0.7942242704582879, |
| "eval_sts-dev-512_pearson_cosine": 0.7840443572822089, |
| "eval_sts-dev-512_spearman_cosine": 0.7920209001098614, |
| "eval_sts-dev-768_pearson_cosine": 0.7834004871669387, |
| "eval_sts-dev-768_spearman_cosine": 0.7917739294182365, |
| "eval_sts-dev-960_pearson_cosine": 0.7857857853856216, |
| "eval_sts-dev-960_spearman_cosine": 0.7931912176322746, |
| "step": 11500 |
| }, |
| { |
| "epoch": 0.3255683412854336, |
| "grad_norm": 2376617.5, |
| "learning_rate": 3.7468425484142576e-05, |
| "loss": 0.527, |
| "step": 11600 |
| }, |
| { |
| "epoch": 0.33118158854897556, |
| "grad_norm": 1004661.5, |
| "learning_rate": 3.7156578413945803e-05, |
| "loss": 0.5291, |
| "step": 11800 |
| }, |
| { |
| "epoch": 0.33679483581251757, |
| "grad_norm": 681331.25, |
| "learning_rate": 3.684473134374903e-05, |
| "loss": 0.542, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.33679483581251757, |
| "eval_loss": 4.3000807762146, |
| "eval_runtime": 78.0165, |
| "eval_samples_per_second": 84.713, |
| "eval_sequential_score": 0.7933961511062919, |
| "eval_steps_per_second": 1.333, |
| "eval_sts-dev-1152_pearson_cosine": 0.7901723077330912, |
| "eval_sts-dev-1152_spearman_cosine": 0.7933961511062919, |
| "eval_sts-dev-512_pearson_cosine": 0.7863821340291279, |
| "eval_sts-dev-512_spearman_cosine": 0.7907396171652296, |
| "eval_sts-dev-768_pearson_cosine": 0.7861497979708226, |
| "eval_sts-dev-768_spearman_cosine": 0.7905983485083213, |
| "eval_sts-dev-960_pearson_cosine": 0.7892627814382922, |
| "eval_sts-dev-960_spearman_cosine": 0.7927711835546136, |
| "step": 12000 |
| }, |
| { |
| "epoch": 0.3424080830760595, |
| "grad_norm": 339770.1875, |
| "learning_rate": 3.653288427355225e-05, |
| "loss": 0.5213, |
| "step": 12200 |
| }, |
| { |
| "epoch": 0.3480213303396015, |
| "grad_norm": 16329254.0, |
| "learning_rate": 3.622103720335547e-05, |
| "loss": 0.5226, |
| "step": 12400 |
| }, |
| { |
| "epoch": 0.35082795397137245, |
| "eval_loss": 4.8531880378723145, |
| "eval_runtime": 78.4497, |
| "eval_samples_per_second": 84.245, |
| "eval_sequential_score": 0.7723953575556768, |
| "eval_steps_per_second": 1.326, |
| "eval_sts-dev-1152_pearson_cosine": 0.7559096984425644, |
| "eval_sts-dev-1152_spearman_cosine": 0.7723953575556768, |
| "eval_sts-dev-512_pearson_cosine": 0.7513107904922858, |
| "eval_sts-dev-512_spearman_cosine": 0.7687285653830139, |
| "eval_sts-dev-768_pearson_cosine": 0.7485837494255871, |
| "eval_sts-dev-768_spearman_cosine": 0.7673974329389454, |
| "eval_sts-dev-960_pearson_cosine": 0.7527733497014006, |
| "eval_sts-dev-960_spearman_cosine": 0.7700910811459013, |
| "step": 12500 |
| }, |
| { |
| "epoch": 0.35363457760314343, |
| "grad_norm": 751577.375, |
| "learning_rate": 3.59091901331587e-05, |
| "loss": 0.5111, |
| "step": 12600 |
| }, |
| { |
| "epoch": 0.3592478248666854, |
| "grad_norm": 1020267.75, |
| "learning_rate": 3.559734306296193e-05, |
| "loss": 0.51, |
| "step": 12800 |
| }, |
| { |
| "epoch": 0.36486107213022734, |
| "grad_norm": 2245223.25, |
| "learning_rate": 3.528549599276515e-05, |
| "loss": 0.5439, |
| "step": 13000 |
| }, |
| { |
| "epoch": 0.36486107213022734, |
| "eval_loss": 4.562457084655762, |
| "eval_runtime": 76.5025, |
| "eval_samples_per_second": 86.389, |
| "eval_sequential_score": 0.7871283857415448, |
| "eval_steps_per_second": 1.359, |
| "eval_sts-dev-1152_pearson_cosine": 0.7813516950726106, |
| "eval_sts-dev-1152_spearman_cosine": 0.7871283857415448, |
| "eval_sts-dev-512_pearson_cosine": 0.7790901108872998, |
| "eval_sts-dev-512_spearman_cosine": 0.785904654893658, |
| "eval_sts-dev-768_pearson_cosine": 0.7768149257929241, |
| "eval_sts-dev-768_spearman_cosine": 0.7839641875290246, |
| "eval_sts-dev-960_pearson_cosine": 0.7799312425424749, |
| "eval_sts-dev-960_spearman_cosine": 0.7859198424326749, |
| "step": 13000 |
| }, |
| { |
| "epoch": 0.3704743193937693, |
| "grad_norm": 295381.71875, |
| "learning_rate": 3.4973648922568375e-05, |
| "loss": 0.4944, |
| "step": 13200 |
| }, |
| { |
| "epoch": 0.37608756665731125, |
| "grad_norm": 258496.15625, |
| "learning_rate": 3.46618018523716e-05, |
| "loss": 0.5055, |
| "step": 13400 |
| }, |
| { |
| "epoch": 0.3788941902890822, |
| "eval_loss": 4.690097808837891, |
| "eval_runtime": 77.0112, |
| "eval_samples_per_second": 85.819, |
| "eval_sequential_score": 0.7716690451444436, |
| "eval_steps_per_second": 1.35, |
| "eval_sts-dev-1152_pearson_cosine": 0.7550740608400445, |
| "eval_sts-dev-1152_spearman_cosine": 0.7716690451444436, |
| "eval_sts-dev-512_pearson_cosine": 0.7512508042826231, |
| "eval_sts-dev-512_spearman_cosine": 0.7686861357730667, |
| "eval_sts-dev-768_pearson_cosine": 0.7485196351380123, |
| "eval_sts-dev-768_spearman_cosine": 0.7674469031229442, |
| "eval_sts-dev-960_pearson_cosine": 0.7531056155361794, |
| "eval_sts-dev-960_spearman_cosine": 0.770399492414998, |
| "step": 13500 |
| }, |
| { |
| "epoch": 0.3817008139208532, |
| "grad_norm": 1523134.625, |
| "learning_rate": 3.4349954782174824e-05, |
| "loss": 0.4914, |
| "step": 13600 |
| }, |
| { |
| "epoch": 0.38731406118439515, |
| "grad_norm": 824847.5625, |
| "learning_rate": 3.4038107711978044e-05, |
| "loss": 0.4832, |
| "step": 13800 |
| }, |
| { |
| "epoch": 0.3929273084479371, |
| "grad_norm": 942315.875, |
| "learning_rate": 3.372626064178127e-05, |
| "loss": 0.4974, |
| "step": 14000 |
| }, |
| { |
| "epoch": 0.3929273084479371, |
| "eval_loss": 4.3223676681518555, |
| "eval_runtime": 76.7925, |
| "eval_samples_per_second": 86.063, |
| "eval_sequential_score": 0.7833165592743722, |
| "eval_steps_per_second": 1.354, |
| "eval_sts-dev-1152_pearson_cosine": 0.7744088760069867, |
| "eval_sts-dev-1152_spearman_cosine": 0.7833165592743722, |
| "eval_sts-dev-512_pearson_cosine": 0.7698343591511576, |
| "eval_sts-dev-512_spearman_cosine": 0.7802103790467256, |
| "eval_sts-dev-768_pearson_cosine": 0.7695304076449843, |
| "eval_sts-dev-768_spearman_cosine": 0.780087860180158, |
| "eval_sts-dev-960_pearson_cosine": 0.7727501854173389, |
| "eval_sts-dev-960_spearman_cosine": 0.7819591085796292, |
| "step": 14000 |
| }, |
| { |
| "epoch": 0.3985405557114791, |
| "grad_norm": 755688.0625, |
| "learning_rate": 3.34144135715845e-05, |
| "loss": 0.4834, |
| "step": 14200 |
| }, |
| { |
| "epoch": 0.40415380297502107, |
| "grad_norm": 1176346.25, |
| "learning_rate": 3.310256650138772e-05, |
| "loss": 0.4526, |
| "step": 14400 |
| }, |
| { |
| "epoch": 0.40696042660679205, |
| "eval_loss": 4.603125095367432, |
| "eval_runtime": 77.0079, |
| "eval_samples_per_second": 85.822, |
| "eval_sequential_score": 0.7798168210294973, |
| "eval_steps_per_second": 1.351, |
| "eval_sts-dev-1152_pearson_cosine": 0.7695286357100837, |
| "eval_sts-dev-1152_spearman_cosine": 0.7798168210294973, |
| "eval_sts-dev-512_pearson_cosine": 0.7680031892756588, |
| "eval_sts-dev-512_spearman_cosine": 0.7793198654141038, |
| "eval_sts-dev-768_pearson_cosine": 0.765316531020936, |
| "eval_sts-dev-768_spearman_cosine": 0.7771469239892022, |
| "eval_sts-dev-960_pearson_cosine": 0.7678963030870366, |
| "eval_sts-dev-960_spearman_cosine": 0.7788543197202844, |
| "step": 14500 |
| }, |
| { |
| "epoch": 0.409767050238563, |
| "grad_norm": 555539.0625, |
| "learning_rate": 3.279071943119095e-05, |
| "loss": 0.4621, |
| "step": 14600 |
| }, |
| { |
| "epoch": 0.415380297502105, |
| "grad_norm": 293199.1875, |
| "learning_rate": 3.2478872360994175e-05, |
| "loss": 0.4483, |
| "step": 14800 |
| }, |
| { |
| "epoch": 0.42099354476564693, |
| "grad_norm": 1106311.5, |
| "learning_rate": 3.2167025290797396e-05, |
| "loss": 0.4422, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.42099354476564693, |
| "eval_loss": 4.340238094329834, |
| "eval_runtime": 78.5719, |
| "eval_samples_per_second": 84.114, |
| "eval_sequential_score": 0.7852896447445069, |
| "eval_steps_per_second": 1.324, |
| "eval_sts-dev-1152_pearson_cosine": 0.7790739260952181, |
| "eval_sts-dev-1152_spearman_cosine": 0.7852896447445069, |
| "eval_sts-dev-512_pearson_cosine": 0.7764628401440794, |
| "eval_sts-dev-512_spearman_cosine": 0.7835744067282309, |
| "eval_sts-dev-768_pearson_cosine": 0.7743919883086495, |
| "eval_sts-dev-768_spearman_cosine": 0.7820714131385429, |
| "eval_sts-dev-960_pearson_cosine": 0.7773513056662522, |
| "eval_sts-dev-960_spearman_cosine": 0.7840212510775064, |
| "step": 15000 |
| }, |
| { |
| "epoch": 0.4266067920291889, |
| "grad_norm": 610679.5625, |
| "learning_rate": 3.1855178220600616e-05, |
| "loss": 0.4144, |
| "step": 15200 |
| }, |
| { |
| "epoch": 0.43222003929273084, |
| "grad_norm": 687958.4375, |
| "learning_rate": 3.1543331150403844e-05, |
| "loss": 0.4099, |
| "step": 15400 |
| }, |
| { |
| "epoch": 0.4350266629245018, |
| "eval_loss": 4.453821182250977, |
| "eval_runtime": 77.8538, |
| "eval_samples_per_second": 84.89, |
| "eval_sequential_score": 0.7885469182471105, |
| "eval_steps_per_second": 1.336, |
| "eval_sts-dev-1152_pearson_cosine": 0.7803450152456344, |
| "eval_sts-dev-1152_spearman_cosine": 0.7885469182471105, |
| "eval_sts-dev-512_pearson_cosine": 0.7769126522889329, |
| "eval_sts-dev-512_spearman_cosine": 0.7860781907940958, |
| "eval_sts-dev-768_pearson_cosine": 0.7768788991918325, |
| "eval_sts-dev-768_spearman_cosine": 0.7860822710268899, |
| "eval_sts-dev-960_pearson_cosine": 0.7790539464640599, |
| "eval_sts-dev-960_spearman_cosine": 0.7873534072847509, |
| "step": 15500 |
| }, |
| { |
| "epoch": 0.4378332865562728, |
| "grad_norm": 465139.34375, |
| "learning_rate": 3.123148408020707e-05, |
| "loss": 0.4196, |
| "step": 15600 |
| }, |
| { |
| "epoch": 0.44344653381981475, |
| "grad_norm": 1473414.0, |
| "learning_rate": 3.091963701001029e-05, |
| "loss": 0.4273, |
| "step": 15800 |
| }, |
| { |
| "epoch": 0.4490597810833567, |
| "grad_norm": 1426157.25, |
| "learning_rate": 3.060778993981352e-05, |
| "loss": 1.9924, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.4490597810833567, |
| "eval_loss": 3.5090487003326416, |
| "eval_runtime": 79.6383, |
| "eval_samples_per_second": 82.988, |
| "eval_sequential_score": 0.7927361346499643, |
| "eval_steps_per_second": 1.306, |
| "eval_sts-dev-1152_pearson_cosine": 0.790420349742375, |
| "eval_sts-dev-1152_spearman_cosine": 0.7927361346499643, |
| "eval_sts-dev-512_pearson_cosine": 0.7865016349246065, |
| "eval_sts-dev-512_spearman_cosine": 0.7896668024220279, |
| "eval_sts-dev-768_pearson_cosine": 0.7865658784180989, |
| "eval_sts-dev-768_spearman_cosine": 0.7894374189283365, |
| "eval_sts-dev-960_pearson_cosine": 0.78835917820683, |
| "eval_sts-dev-960_spearman_cosine": 0.7907010125477784, |
| "step": 16000 |
| }, |
| { |
| "epoch": 0.45467302834689866, |
| "grad_norm": 2128073.0, |
| "learning_rate": 3.029594286961674e-05, |
| "loss": 2.0174, |
| "step": 16200 |
| }, |
| { |
| "epoch": 0.46028627561044066, |
| "grad_norm": 1494404.25, |
| "learning_rate": 2.9984095799419964e-05, |
| "loss": 1.9566, |
| "step": 16400 |
| }, |
| { |
| "epoch": 0.46309289924221164, |
| "eval_loss": 3.305478572845459, |
| "eval_runtime": 76.7558, |
| "eval_samples_per_second": 86.104, |
| "eval_sequential_score": 0.8035377612749389, |
| "eval_steps_per_second": 1.355, |
| "eval_sts-dev-1152_pearson_cosine": 0.7997045755932278, |
| "eval_sts-dev-1152_spearman_cosine": 0.8035377612749389, |
| "eval_sts-dev-512_pearson_cosine": 0.7962257322892368, |
| "eval_sts-dev-512_spearman_cosine": 0.8010941658538488, |
| "eval_sts-dev-768_pearson_cosine": 0.7967126438007915, |
| "eval_sts-dev-768_spearman_cosine": 0.8016654322418212, |
| "eval_sts-dev-960_pearson_cosine": 0.7985639882269593, |
| "eval_sts-dev-960_spearman_cosine": 0.8025996694295968, |
| "step": 16500 |
| }, |
| { |
| "epoch": 0.4658995228739826, |
| "grad_norm": 21955176.0, |
| "learning_rate": 2.967224872922319e-05, |
| "loss": 1.8733, |
| "step": 16600 |
| }, |
| { |
| "epoch": 0.4715127701375246, |
| "grad_norm": 1634486.5, |
| "learning_rate": 2.9360401659026416e-05, |
| "loss": 1.8465, |
| "step": 16800 |
| }, |
| { |
| "epoch": 0.4771260174010665, |
| "grad_norm": 1099246.25, |
| "learning_rate": 2.904855458882964e-05, |
| "loss": 1.8083, |
| "step": 17000 |
| }, |
| { |
| "epoch": 0.4771260174010665, |
| "eval_loss": 3.1462347507476807, |
| "eval_runtime": 76.7838, |
| "eval_samples_per_second": 86.073, |
| "eval_sequential_score": 0.8044901468397658, |
| "eval_steps_per_second": 1.354, |
| "eval_sts-dev-1152_pearson_cosine": 0.8003150062678777, |
| "eval_sts-dev-1152_spearman_cosine": 0.8044901468397658, |
| "eval_sts-dev-512_pearson_cosine": 0.7967573482929446, |
| "eval_sts-dev-512_spearman_cosine": 0.8015237418575695, |
| "eval_sts-dev-768_pearson_cosine": 0.7971621831925142, |
| "eval_sts-dev-768_spearman_cosine": 0.801918123779033, |
| "eval_sts-dev-960_pearson_cosine": 0.7989525078627395, |
| "eval_sts-dev-960_spearman_cosine": 0.8033675149464842, |
| "step": 17000 |
| }, |
| { |
| "epoch": 0.4827392646646085, |
| "grad_norm": 1532188.125, |
| "learning_rate": 2.8736707518632867e-05, |
| "loss": 1.7193, |
| "step": 17200 |
| }, |
| { |
| "epoch": 0.48835251192815043, |
| "grad_norm": 1495786.125, |
| "learning_rate": 2.8424860448436085e-05, |
| "loss": 1.7423, |
| "step": 17400 |
| }, |
| { |
| "epoch": 0.4911591355599214, |
| "eval_loss": 3.0544025897979736, |
| "eval_runtime": 77.4424, |
| "eval_samples_per_second": 85.341, |
| "eval_sequential_score": 0.8085294051469485, |
| "eval_steps_per_second": 1.343, |
| "eval_sts-dev-1152_pearson_cosine": 0.806796528773235, |
| "eval_sts-dev-1152_spearman_cosine": 0.8085294051469485, |
| "eval_sts-dev-512_pearson_cosine": 0.8042370140263899, |
| "eval_sts-dev-512_spearman_cosine": 0.8066761351903039, |
| "eval_sts-dev-768_pearson_cosine": 0.8033611078675769, |
| "eval_sts-dev-768_spearman_cosine": 0.8060225357046799, |
| "eval_sts-dev-960_pearson_cosine": 0.8053597943880864, |
| "eval_sts-dev-960_spearman_cosine": 0.8073196764122358, |
| "step": 17500 |
| }, |
| { |
| "epoch": 0.4939657591916924, |
| "grad_norm": 1554646.625, |
| "learning_rate": 2.8113013378239312e-05, |
| "loss": 1.6114, |
| "step": 17600 |
| }, |
| { |
| "epoch": 0.49957900645523434, |
| "grad_norm": 1555168.625, |
| "learning_rate": 2.7801166308042536e-05, |
| "loss": 1.6524, |
| "step": 17800 |
| }, |
| { |
| "epoch": 0.5051922537187763, |
| "grad_norm": 2316977.25, |
| "learning_rate": 2.748931923784576e-05, |
| "loss": 1.568, |
| "step": 18000 |
| }, |
| { |
| "epoch": 0.5051922537187763, |
| "eval_loss": 3.023185968399048, |
| "eval_runtime": 77.2548, |
| "eval_samples_per_second": 85.548, |
| "eval_sequential_score": 0.8160116208449028, |
| "eval_steps_per_second": 1.346, |
| "eval_sts-dev-1152_pearson_cosine": 0.8098685109159951, |
| "eval_sts-dev-1152_spearman_cosine": 0.8160116208449028, |
| "eval_sts-dev-512_pearson_cosine": 0.8067482146442919, |
| "eval_sts-dev-512_spearman_cosine": 0.8138083394885887, |
| "eval_sts-dev-768_pearson_cosine": 0.8066934851514658, |
| "eval_sts-dev-768_spearman_cosine": 0.8137348986202628, |
| "eval_sts-dev-960_pearson_cosine": 0.8086043862820518, |
| "eval_sts-dev-960_spearman_cosine": 0.8147544112729422, |
| "step": 18000 |
| }, |
| { |
| "epoch": 0.5108055009823183, |
| "grad_norm": 1329637.625, |
| "learning_rate": 2.7177472167648988e-05, |
| "loss": 1.5263, |
| "step": 18200 |
| }, |
| { |
| "epoch": 0.5164187482458602, |
| "grad_norm": 1966328.0, |
| "learning_rate": 2.6865625097452212e-05, |
| "loss": 1.5547, |
| "step": 18400 |
| }, |
| { |
| "epoch": 0.5192253718776312, |
| "eval_loss": 2.870816469192505, |
| "eval_runtime": 76.6326, |
| "eval_samples_per_second": 86.243, |
| "eval_sequential_score": 0.8176598949005246, |
| "eval_steps_per_second": 1.357, |
| "eval_sts-dev-1152_pearson_cosine": 0.814666190335647, |
| "eval_sts-dev-1152_spearman_cosine": 0.8176598949005246, |
| "eval_sts-dev-512_pearson_cosine": 0.8121636324942605, |
| "eval_sts-dev-512_spearman_cosine": 0.8159107971728017, |
| "eval_sts-dev-768_pearson_cosine": 0.8122018182896737, |
| "eval_sts-dev-768_spearman_cosine": 0.816248481623874, |
| "eval_sts-dev-960_pearson_cosine": 0.813853882558624, |
| "eval_sts-dev-960_spearman_cosine": 0.8171316174695632, |
| "step": 18500 |
| }, |
| { |
| "epoch": 0.5220319955094022, |
| "grad_norm": 1405333.625, |
| "learning_rate": 2.6553778027255433e-05, |
| "loss": 1.5059, |
| "step": 18600 |
| }, |
| { |
| "epoch": 0.5276452427729441, |
| "grad_norm": 2138761.0, |
| "learning_rate": 2.6241930957058657e-05, |
| "loss": 1.4385, |
| "step": 18800 |
| }, |
| { |
| "epoch": 0.5332584900364861, |
| "grad_norm": 2403917.0, |
| "learning_rate": 2.5930083886861884e-05, |
| "loss": 1.476, |
| "step": 19000 |
| }, |
| { |
| "epoch": 0.5332584900364861, |
| "eval_loss": 2.9468226432800293, |
| "eval_runtime": 76.877, |
| "eval_samples_per_second": 85.969, |
| "eval_sequential_score": 0.81019252718923, |
| "eval_steps_per_second": 1.353, |
| "eval_sts-dev-1152_pearson_cosine": 0.8023306627696717, |
| "eval_sts-dev-1152_spearman_cosine": 0.81019252718923, |
| "eval_sts-dev-512_pearson_cosine": 0.8017622030828988, |
| "eval_sts-dev-512_spearman_cosine": 0.8093692797674851, |
| "eval_sts-dev-768_pearson_cosine": 0.8003993423781782, |
| "eval_sts-dev-768_spearman_cosine": 0.8091560440850984, |
| "eval_sts-dev-960_pearson_cosine": 0.8019211195358515, |
| "eval_sts-dev-960_spearman_cosine": 0.8101363718932728, |
| "step": 19000 |
| }, |
| { |
| "epoch": 0.538871737300028, |
| "grad_norm": 1055316.25, |
| "learning_rate": 2.561823681666511e-05, |
| "loss": 1.4558, |
| "step": 19200 |
| }, |
| { |
| "epoch": 0.54448498456357, |
| "grad_norm": 1079136.125, |
| "learning_rate": 2.5306389746468333e-05, |
| "loss": 1.4557, |
| "step": 19400 |
| }, |
| { |
| "epoch": 0.547291608195341, |
| "eval_loss": 2.8981781005859375, |
| "eval_runtime": 79.4411, |
| "eval_samples_per_second": 83.194, |
| "eval_sequential_score": 0.8094444352298047, |
| "eval_steps_per_second": 1.309, |
| "eval_sts-dev-1152_pearson_cosine": 0.8031328356570406, |
| "eval_sts-dev-1152_spearman_cosine": 0.8094444352298047, |
| "eval_sts-dev-512_pearson_cosine": 0.8001098459602658, |
| "eval_sts-dev-512_spearman_cosine": 0.8070996808860118, |
| "eval_sts-dev-768_pearson_cosine": 0.8001671724511775, |
| "eval_sts-dev-768_spearman_cosine": 0.807121764709248, |
| "eval_sts-dev-960_pearson_cosine": 0.8020898505862861, |
| "eval_sts-dev-960_spearman_cosine": 0.8084113769099328, |
| "step": 19500 |
| }, |
| { |
| "epoch": 0.550098231827112, |
| "grad_norm": 1276695.375, |
| "learning_rate": 2.4994542676271557e-05, |
| "loss": 1.4552, |
| "step": 19600 |
| }, |
| { |
| "epoch": 0.555711479090654, |
| "grad_norm": 1328590.75, |
| "learning_rate": 2.468269560607478e-05, |
| "loss": 1.4342, |
| "step": 19800 |
| }, |
| { |
| "epoch": 0.561324726354196, |
| "grad_norm": 1148225.75, |
| "learning_rate": 2.4370848535878008e-05, |
| "loss": 1.4503, |
| "step": 20000 |
| }, |
| { |
| "epoch": 0.561324726354196, |
| "eval_loss": 2.807321310043335, |
| "eval_runtime": 77.894, |
| "eval_samples_per_second": 84.846, |
| "eval_sequential_score": 0.8179010934691895, |
| "eval_steps_per_second": 1.335, |
| "eval_sts-dev-1152_pearson_cosine": 0.8140052747106419, |
| "eval_sts-dev-1152_spearman_cosine": 0.8179010934691895, |
| "eval_sts-dev-512_pearson_cosine": 0.8113404216022915, |
| "eval_sts-dev-512_spearman_cosine": 0.8159118696426358, |
| "eval_sts-dev-768_pearson_cosine": 0.8108446270228379, |
| "eval_sts-dev-768_spearman_cosine": 0.815575862581075, |
| "eval_sts-dev-960_pearson_cosine": 0.8128306079700292, |
| "eval_sts-dev-960_spearman_cosine": 0.8169448135786315, |
| "step": 20000 |
| }, |
| { |
| "epoch": 0.5669379736177379, |
| "grad_norm": 1494490.25, |
| "learning_rate": 2.405900146568123e-05, |
| "loss": 1.391, |
| "step": 20200 |
| }, |
| { |
| "epoch": 0.5725512208812799, |
| "grad_norm": 1219068.5, |
| "learning_rate": 2.3747154395484456e-05, |
| "loss": 1.3529, |
| "step": 20400 |
| }, |
| { |
| "epoch": 0.5753578445130508, |
| "eval_loss": 2.697373151779175, |
| "eval_runtime": 77.2603, |
| "eval_samples_per_second": 85.542, |
| "eval_sequential_score": 0.818387454814582, |
| "eval_steps_per_second": 1.346, |
| "eval_sts-dev-1152_pearson_cosine": 0.814425658636787, |
| "eval_sts-dev-1152_spearman_cosine": 0.818387454814582, |
| "eval_sts-dev-512_pearson_cosine": 0.8114864131532289, |
| "eval_sts-dev-512_spearman_cosine": 0.816474256076468, |
| "eval_sts-dev-768_pearson_cosine": 0.8111751402507601, |
| "eval_sts-dev-768_spearman_cosine": 0.8161965909973262, |
| "eval_sts-dev-960_pearson_cosine": 0.8133366326386466, |
| "eval_sts-dev-960_spearman_cosine": 0.817509136299924, |
| "step": 20500 |
| }, |
| { |
| "epoch": 0.5781644681448218, |
| "grad_norm": 1434187.0, |
| "learning_rate": 2.343530732528768e-05, |
| "loss": 1.3428, |
| "step": 20600 |
| }, |
| { |
| "epoch": 0.5837777154083638, |
| "grad_norm": 9602563.0, |
| "learning_rate": 2.3123460255090905e-05, |
| "loss": 1.3401, |
| "step": 20800 |
| }, |
| { |
| "epoch": 0.5893909626719057, |
| "grad_norm": 1751803.25, |
| "learning_rate": 2.281161318489413e-05, |
| "loss": 1.3809, |
| "step": 21000 |
| }, |
| { |
| "epoch": 0.5893909626719057, |
| "eval_loss": 2.703920602798462, |
| "eval_runtime": 77.5938, |
| "eval_samples_per_second": 85.174, |
| "eval_sequential_score": 0.8188296877385577, |
| "eval_steps_per_second": 1.34, |
| "eval_sts-dev-1152_pearson_cosine": 0.8145212969625442, |
| "eval_sts-dev-1152_spearman_cosine": 0.8188296877385577, |
| "eval_sts-dev-512_pearson_cosine": 0.8121156149968475, |
| "eval_sts-dev-512_spearman_cosine": 0.8170665731423932, |
| "eval_sts-dev-768_pearson_cosine": 0.8113063721736813, |
| "eval_sts-dev-768_spearman_cosine": 0.8162345284905963, |
| "eval_sts-dev-960_pearson_cosine": 0.813392544100018, |
| "eval_sts-dev-960_spearman_cosine": 0.8177012935138736, |
| "step": 21000 |
| }, |
| { |
| "epoch": 0.5950042099354477, |
| "grad_norm": 1005006.3125, |
| "learning_rate": 2.2499766114697353e-05, |
| "loss": 1.3193, |
| "step": 21200 |
| }, |
| { |
| "epoch": 0.6006174571989896, |
| "grad_norm": 2510840.0, |
| "learning_rate": 2.2187919044500577e-05, |
| "loss": 1.2531, |
| "step": 21400 |
| }, |
| { |
| "epoch": 0.6034240808307606, |
| "eval_loss": 2.6553146839141846, |
| "eval_runtime": 76.7412, |
| "eval_samples_per_second": 86.121, |
| "eval_sequential_score": 0.8132629704436432, |
| "eval_steps_per_second": 1.355, |
| "eval_sts-dev-1152_pearson_cosine": 0.8100792978741007, |
| "eval_sts-dev-1152_spearman_cosine": 0.8132629704436432, |
| "eval_sts-dev-512_pearson_cosine": 0.8064513413918455, |
| "eval_sts-dev-512_spearman_cosine": 0.810722844279224, |
| "eval_sts-dev-768_pearson_cosine": 0.8070561573065372, |
| "eval_sts-dev-768_spearman_cosine": 0.8113323371597876, |
| "eval_sts-dev-960_pearson_cosine": 0.8088718794978185, |
| "eval_sts-dev-960_spearman_cosine": 0.812389308544286, |
| "step": 21500 |
| }, |
| { |
| "epoch": 0.6062307044625316, |
| "grad_norm": 919804.125, |
| "learning_rate": 2.18760719743038e-05, |
| "loss": 1.3294, |
| "step": 21600 |
| }, |
| { |
| "epoch": 0.6118439517260735, |
| "grad_norm": 1363194.375, |
| "learning_rate": 2.156422490410703e-05, |
| "loss": 1.3076, |
| "step": 21800 |
| }, |
| { |
| "epoch": 0.6174571989896155, |
| "grad_norm": 1170667.75, |
| "learning_rate": 2.125237783391025e-05, |
| "loss": 1.2634, |
| "step": 22000 |
| }, |
| { |
| "epoch": 0.6174571989896155, |
| "eval_loss": 2.615736484527588, |
| "eval_runtime": 77.1191, |
| "eval_samples_per_second": 85.699, |
| "eval_sequential_score": 0.8135000785581838, |
| "eval_steps_per_second": 1.349, |
| "eval_sts-dev-1152_pearson_cosine": 0.8105061703368251, |
| "eval_sts-dev-1152_spearman_cosine": 0.8135000785581838, |
| "eval_sts-dev-512_pearson_cosine": 0.8074183216846054, |
| "eval_sts-dev-512_spearman_cosine": 0.8123168589567965, |
| "eval_sts-dev-768_pearson_cosine": 0.808323777710555, |
| "eval_sts-dev-768_spearman_cosine": 0.8122644439071114, |
| "eval_sts-dev-960_pearson_cosine": 0.809931025467389, |
| "eval_sts-dev-960_spearman_cosine": 0.8131210124611652, |
| "step": 22000 |
| }, |
| { |
| "epoch": 0.6230704462531574, |
| "grad_norm": 2690514.75, |
| "learning_rate": 2.0940530763713477e-05, |
| "loss": 1.242, |
| "step": 22200 |
| }, |
| { |
| "epoch": 0.6286836935166994, |
| "grad_norm": 687846.625, |
| "learning_rate": 2.06286836935167e-05, |
| "loss": 1.2545, |
| "step": 22400 |
| }, |
| { |
| "epoch": 0.6314903171484704, |
| "eval_loss": 2.6083288192749023, |
| "eval_runtime": 76.9345, |
| "eval_samples_per_second": 85.904, |
| "eval_sequential_score": 0.8198803113021629, |
| "eval_steps_per_second": 1.352, |
| "eval_sts-dev-1152_pearson_cosine": 0.8155088381997109, |
| "eval_sts-dev-1152_spearman_cosine": 0.8198803113021629, |
| "eval_sts-dev-512_pearson_cosine": 0.8119264111774269, |
| "eval_sts-dev-512_spearman_cosine": 0.817475857375689, |
| "eval_sts-dev-768_pearson_cosine": 0.8125582626459574, |
| "eval_sts-dev-768_spearman_cosine": 0.8178761844864308, |
| "eval_sts-dev-960_pearson_cosine": 0.8147256189246097, |
| "eval_sts-dev-960_spearman_cosine": 0.8192721867604403, |
| "step": 22500 |
| }, |
| { |
| "epoch": 0.6342969407802413, |
| "grad_norm": 1860284.25, |
| "learning_rate": 2.0316836623319925e-05, |
| "loss": 1.2362, |
| "step": 22600 |
| }, |
| { |
| "epoch": 0.6399101880437833, |
| "grad_norm": 7078587.0, |
| "learning_rate": 2.000498955312315e-05, |
| "loss": 1.1474, |
| "step": 22800 |
| }, |
| { |
| "epoch": 0.6455234353073253, |
| "grad_norm": 1408995.125, |
| "learning_rate": 1.9693142482926373e-05, |
| "loss": 1.2125, |
| "step": 23000 |
| }, |
| { |
| "epoch": 0.6455234353073253, |
| "eval_loss": 2.5617470741271973, |
| "eval_runtime": 76.5184, |
| "eval_samples_per_second": 86.371, |
| "eval_sequential_score": 0.8207731554186944, |
| "eval_steps_per_second": 1.359, |
| "eval_sts-dev-1152_pearson_cosine": 0.8178959248835184, |
| "eval_sts-dev-1152_spearman_cosine": 0.8207731554186944, |
| "eval_sts-dev-512_pearson_cosine": 0.8140061268760344, |
| "eval_sts-dev-512_spearman_cosine": 0.8180611571485459, |
| "eval_sts-dev-768_pearson_cosine": 0.8150538202678299, |
| "eval_sts-dev-768_spearman_cosine": 0.8188299877250856, |
| "eval_sts-dev-960_pearson_cosine": 0.8170381399447351, |
| "eval_sts-dev-960_spearman_cosine": 0.8200353073212047, |
| "step": 23000 |
| }, |
| { |
| "epoch": 0.6511366825708672, |
| "grad_norm": 1224838.5, |
| "learning_rate": 1.9381295412729597e-05, |
| "loss": 1.206, |
| "step": 23200 |
| }, |
| { |
| "epoch": 0.6567499298344092, |
| "grad_norm": 855258.6875, |
| "learning_rate": 1.906944834253282e-05, |
| "loss": 1.1236, |
| "step": 23400 |
| }, |
| { |
| "epoch": 0.6595565534661801, |
| "eval_loss": 2.5598337650299072, |
| "eval_runtime": 77.1811, |
| "eval_samples_per_second": 85.63, |
| "eval_sequential_score": 0.8251304266663447, |
| "eval_steps_per_second": 1.347, |
| "eval_sts-dev-1152_pearson_cosine": 0.8201666525971009, |
| "eval_sts-dev-1152_spearman_cosine": 0.8251304266663447, |
| "eval_sts-dev-512_pearson_cosine": 0.8165225072856572, |
| "eval_sts-dev-512_spearman_cosine": 0.8231945111392679, |
| "eval_sts-dev-768_pearson_cosine": 0.8174329120885295, |
| "eval_sts-dev-768_spearman_cosine": 0.8234756819083271, |
| "eval_sts-dev-960_pearson_cosine": 0.8193489194006733, |
| "eval_sts-dev-960_spearman_cosine": 0.8246625530393321, |
| "step": 23500 |
| }, |
| { |
| "epoch": 0.6623631770979511, |
| "grad_norm": 919016.75, |
| "learning_rate": 1.875760127233605e-05, |
| "loss": 1.1785, |
| "step": 23600 |
| }, |
| { |
| "epoch": 0.6679764243614931, |
| "grad_norm": 757121.4375, |
| "learning_rate": 1.844575420213927e-05, |
| "loss": 1.1376, |
| "step": 23800 |
| }, |
| { |
| "epoch": 0.6735896716250351, |
| "grad_norm": 844441.125, |
| "learning_rate": 1.8133907131942497e-05, |
| "loss": 1.1386, |
| "step": 24000 |
| }, |
| { |
| "epoch": 0.6735896716250351, |
| "eval_loss": 2.522897958755493, |
| "eval_runtime": 79.1821, |
| "eval_samples_per_second": 83.466, |
| "eval_sequential_score": 0.8207794501178992, |
| "eval_steps_per_second": 1.313, |
| "eval_sts-dev-1152_pearson_cosine": 0.8186757389035366, |
| "eval_sts-dev-1152_spearman_cosine": 0.8207794501178992, |
| "eval_sts-dev-512_pearson_cosine": 0.8149579009500032, |
| "eval_sts-dev-512_spearman_cosine": 0.8186514991034529, |
| "eval_sts-dev-768_pearson_cosine": 0.8158079152733716, |
| "eval_sts-dev-768_spearman_cosine": 0.8186101448904235, |
| "eval_sts-dev-960_pearson_cosine": 0.8173995719285795, |
| "eval_sts-dev-960_spearman_cosine": 0.8198375969222076, |
| "step": 24000 |
| }, |
| { |
| "epoch": 0.6792029188885771, |
| "grad_norm": 2238345.25, |
| "learning_rate": 1.782206006174572e-05, |
| "loss": 1.1293, |
| "step": 24200 |
| }, |
| { |
| "epoch": 0.684816166152119, |
| "grad_norm": 1067979.625, |
| "learning_rate": 1.7510212991548945e-05, |
| "loss": 1.101, |
| "step": 24400 |
| }, |
| { |
| "epoch": 0.68762278978389, |
| "eval_loss": 2.542306661605835, |
| "eval_runtime": 78.4755, |
| "eval_samples_per_second": 84.217, |
| "eval_sequential_score": 0.8273983158949195, |
| "eval_steps_per_second": 1.325, |
| "eval_sts-dev-1152_pearson_cosine": 0.8233467985109526, |
| "eval_sts-dev-1152_spearman_cosine": 0.8273983158949195, |
| "eval_sts-dev-512_pearson_cosine": 0.8191791525417332, |
| "eval_sts-dev-512_spearman_cosine": 0.8241280875045953, |
| "eval_sts-dev-768_pearson_cosine": 0.8205748238510661, |
| "eval_sts-dev-768_spearman_cosine": 0.8253162521368189, |
| "eval_sts-dev-960_pearson_cosine": 0.8223154809807092, |
| "eval_sts-dev-960_spearman_cosine": 0.8263287206559005, |
| "step": 24500 |
| }, |
| { |
| "epoch": 0.690429413415661, |
| "grad_norm": 1075801.875, |
| "learning_rate": 1.719836592135217e-05, |
| "loss": 1.1306, |
| "step": 24600 |
| }, |
| { |
| "epoch": 0.696042660679203, |
| "grad_norm": 1117327.75, |
| "learning_rate": 1.6886518851155393e-05, |
| "loss": 1.0517, |
| "step": 24800 |
| }, |
| { |
| "epoch": 0.7016559079427449, |
| "grad_norm": 1052429.25, |
| "learning_rate": 1.657467178095862e-05, |
| "loss": 1.0617, |
| "step": 25000 |
| }, |
| { |
| "epoch": 0.7016559079427449, |
| "eval_loss": 2.4987776279449463, |
| "eval_runtime": 78.7361, |
| "eval_samples_per_second": 83.939, |
| "eval_sequential_score": 0.8318737027912343, |
| "eval_steps_per_second": 1.321, |
| "eval_sts-dev-1152_pearson_cosine": 0.828742240805668, |
| "eval_sts-dev-1152_spearman_cosine": 0.8318737027912343, |
| "eval_sts-dev-512_pearson_cosine": 0.8253460176937367, |
| "eval_sts-dev-512_spearman_cosine": 0.8296126954497343, |
| "eval_sts-dev-768_pearson_cosine": 0.8260069313893207, |
| "eval_sts-dev-768_spearman_cosine": 0.8299749701259963, |
| "eval_sts-dev-960_pearson_cosine": 0.8277316254972338, |
| "eval_sts-dev-960_spearman_cosine": 0.8309505358919553, |
| "step": 25000 |
| }, |
| { |
| "epoch": 0.7072691552062869, |
| "grad_norm": 1933842.375, |
| "learning_rate": 1.626282471076184e-05, |
| "loss": 1.0408, |
| "step": 25200 |
| }, |
| { |
| "epoch": 0.7128824024698288, |
| "grad_norm": 1313519.75, |
| "learning_rate": 1.595097764056507e-05, |
| "loss": 1.0741, |
| "step": 25400 |
| }, |
| { |
| "epoch": 0.7156890261015998, |
| "eval_loss": 2.4365484714508057, |
| "eval_runtime": 76.9868, |
| "eval_samples_per_second": 85.846, |
| "eval_sequential_score": 0.8275808733889652, |
| "eval_steps_per_second": 1.351, |
| "eval_sts-dev-1152_pearson_cosine": 0.8235693092621815, |
| "eval_sts-dev-1152_spearman_cosine": 0.8275808733889652, |
| "eval_sts-dev-512_pearson_cosine": 0.8200900106962739, |
| "eval_sts-dev-512_spearman_cosine": 0.8251150183246924, |
| "eval_sts-dev-768_pearson_cosine": 0.8206274312682788, |
| "eval_sts-dev-768_spearman_cosine": 0.8253381212332793, |
| "eval_sts-dev-960_pearson_cosine": 0.8226055577499416, |
| "eval_sts-dev-960_spearman_cosine": 0.8267093188750975, |
| "step": 25500 |
| }, |
| { |
| "epoch": 0.7184956497333708, |
| "grad_norm": 2527256.75, |
| "learning_rate": 1.5639130570368293e-05, |
| "loss": 1.0373, |
| "step": 25600 |
| }, |
| { |
| "epoch": 0.7241088969969127, |
| "grad_norm": 1548847.25, |
| "learning_rate": 1.5327283500171517e-05, |
| "loss": 1.0239, |
| "step": 25800 |
| }, |
| { |
| "epoch": 0.7297221442604547, |
| "grad_norm": 771091.3125, |
| "learning_rate": 1.5015436429974741e-05, |
| "loss": 0.9982, |
| "step": 26000 |
| }, |
| { |
| "epoch": 0.7297221442604547, |
| "eval_loss": 2.417414903640747, |
| "eval_runtime": 76.5981, |
| "eval_samples_per_second": 86.282, |
| "eval_sequential_score": 0.8253319450226069, |
| "eval_steps_per_second": 1.358, |
| "eval_sts-dev-1152_pearson_cosine": 0.8229695273001155, |
| "eval_sts-dev-1152_spearman_cosine": 0.8253319450226069, |
| "eval_sts-dev-512_pearson_cosine": 0.8186723943926887, |
| "eval_sts-dev-512_spearman_cosine": 0.8223105621184139, |
| "eval_sts-dev-768_pearson_cosine": 0.8198835227077859, |
| "eval_sts-dev-768_spearman_cosine": 0.8231779055377496, |
| "eval_sts-dev-960_pearson_cosine": 0.8221140652235788, |
| "eval_sts-dev-960_spearman_cosine": 0.8246685858536678, |
| "step": 26000 |
| }, |
| { |
| "epoch": 0.7353353915239966, |
| "grad_norm": 1151759.625, |
| "learning_rate": 1.4703589359777967e-05, |
| "loss": 0.9829, |
| "step": 26200 |
| }, |
| { |
| "epoch": 0.7409486387875386, |
| "grad_norm": 1203075.5, |
| "learning_rate": 1.439174228958119e-05, |
| "loss": 0.9758, |
| "step": 26400 |
| }, |
| { |
| "epoch": 0.7437552624193096, |
| "eval_loss": 2.421391010284424, |
| "eval_runtime": 78.2685, |
| "eval_samples_per_second": 84.44, |
| "eval_sequential_score": 0.8273907944767567, |
| "eval_steps_per_second": 1.329, |
| "eval_sts-dev-1152_pearson_cosine": 0.8245632242048817, |
| "eval_sts-dev-1152_spearman_cosine": 0.8273907944767567, |
| "eval_sts-dev-512_pearson_cosine": 0.821320018207968, |
| "eval_sts-dev-512_spearman_cosine": 0.8249051957336987, |
| "eval_sts-dev-768_pearson_cosine": 0.8222668265396057, |
| "eval_sts-dev-768_spearman_cosine": 0.8258229540823162, |
| "eval_sts-dev-960_pearson_cosine": 0.8239329103520061, |
| "eval_sts-dev-960_spearman_cosine": 0.8268971547689833, |
| "step": 26500 |
| }, |
| { |
| "epoch": 0.7465618860510805, |
| "grad_norm": 873702.5625, |
| "learning_rate": 1.4079895219384415e-05, |
| "loss": 1.0123, |
| "step": 26600 |
| }, |
| { |
| "epoch": 0.7521751333146225, |
| "grad_norm": 1191796.125, |
| "learning_rate": 1.3768048149187641e-05, |
| "loss": 1.0156, |
| "step": 26800 |
| }, |
| { |
| "epoch": 0.7577883805781644, |
| "grad_norm": 3614394.75, |
| "learning_rate": 1.3456201078990863e-05, |
| "loss": 0.9687, |
| "step": 27000 |
| }, |
| { |
| "epoch": 0.7577883805781644, |
| "eval_loss": 2.381603240966797, |
| "eval_runtime": 76.1545, |
| "eval_samples_per_second": 86.784, |
| "eval_sequential_score": 0.822164187171298, |
| "eval_steps_per_second": 1.366, |
| "eval_sts-dev-1152_pearson_cosine": 0.8179990780257289, |
| "eval_sts-dev-1152_spearman_cosine": 0.822164187171298, |
| "eval_sts-dev-512_pearson_cosine": 0.8141681847358285, |
| "eval_sts-dev-512_spearman_cosine": 0.8190636570312585, |
| "eval_sts-dev-768_pearson_cosine": 0.8154464750993287, |
| "eval_sts-dev-768_spearman_cosine": 0.8202098023000759, |
| "eval_sts-dev-960_pearson_cosine": 0.8171115628702965, |
| "eval_sts-dev-960_spearman_cosine": 0.821364481930215, |
| "step": 27000 |
| }, |
| { |
| "epoch": 0.7634016278417064, |
| "grad_norm": 1148854.5, |
| "learning_rate": 1.3144354008794087e-05, |
| "loss": 0.9569, |
| "step": 27200 |
| }, |
| { |
| "epoch": 0.7690148751052484, |
| "grad_norm": 1114442.0, |
| "learning_rate": 1.2832506938597313e-05, |
| "loss": 0.9543, |
| "step": 27400 |
| }, |
| { |
| "epoch": 0.7718214987370193, |
| "eval_loss": 2.355195999145508, |
| "eval_runtime": 76.6074, |
| "eval_samples_per_second": 86.271, |
| "eval_sequential_score": 0.8289062223349991, |
| "eval_steps_per_second": 1.358, |
| "eval_sts-dev-1152_pearson_cosine": 0.8234371744631814, |
| "eval_sts-dev-1152_spearman_cosine": 0.8289062223349991, |
| "eval_sts-dev-512_pearson_cosine": 0.8207871259825303, |
| "eval_sts-dev-512_spearman_cosine": 0.8272360451735147, |
| "eval_sts-dev-768_pearson_cosine": 0.8214215565214141, |
| "eval_sts-dev-768_spearman_cosine": 0.8275606625119857, |
| "eval_sts-dev-960_pearson_cosine": 0.8229212299901374, |
| "eval_sts-dev-960_spearman_cosine": 0.8283389770018872, |
| "step": 27500 |
| }, |
| { |
| "epoch": 0.7746281223687903, |
| "grad_norm": 1473106.125, |
| "learning_rate": 1.2520659868400536e-05, |
| "loss": 0.9453, |
| "step": 27600 |
| }, |
| { |
| "epoch": 0.7802413696323323, |
| "grad_norm": 1301560.0, |
| "learning_rate": 1.2208812798203761e-05, |
| "loss": 0.9948, |
| "step": 27800 |
| }, |
| { |
| "epoch": 0.7858546168958742, |
| "grad_norm": 689391.125, |
| "learning_rate": 1.1896965728006985e-05, |
| "loss": 0.9874, |
| "step": 28000 |
| }, |
| { |
| "epoch": 0.7858546168958742, |
| "eval_loss": 2.3867998123168945, |
| "eval_runtime": 77.0268, |
| "eval_samples_per_second": 85.801, |
| "eval_sequential_score": 0.8280466360566167, |
| "eval_steps_per_second": 1.35, |
| "eval_sts-dev-1152_pearson_cosine": 0.8240164476876701, |
| "eval_sts-dev-1152_spearman_cosine": 0.8280466360566167, |
| "eval_sts-dev-512_pearson_cosine": 0.8215847717028831, |
| "eval_sts-dev-512_spearman_cosine": 0.8264438807620287, |
| "eval_sts-dev-768_pearson_cosine": 0.8221793287179034, |
| "eval_sts-dev-768_spearman_cosine": 0.8268230282109075, |
| "eval_sts-dev-960_pearson_cosine": 0.8233492812551204, |
| "eval_sts-dev-960_spearman_cosine": 0.8274233448566846, |
| "step": 28000 |
| }, |
| { |
| "epoch": 0.7914678641594162, |
| "grad_norm": 1482000.5, |
| "learning_rate": 1.1585118657810211e-05, |
| "loss": 0.8872, |
| "step": 28200 |
| }, |
| { |
| "epoch": 0.7970811114229582, |
| "grad_norm": 11877957.0, |
| "learning_rate": 1.1273271587613435e-05, |
| "loss": 0.9327, |
| "step": 28400 |
| }, |
| { |
| "epoch": 0.7998877350547292, |
| "eval_loss": 2.3834006786346436, |
| "eval_runtime": 76.4589, |
| "eval_samples_per_second": 86.439, |
| "eval_sequential_score": 0.8250187145214471, |
| "eval_steps_per_second": 1.36, |
| "eval_sts-dev-1152_pearson_cosine": 0.821012856551093, |
| "eval_sts-dev-1152_spearman_cosine": 0.8250187145214471, |
| "eval_sts-dev-512_pearson_cosine": 0.8184871654411439, |
| "eval_sts-dev-512_spearman_cosine": 0.823483460862761, |
| "eval_sts-dev-768_pearson_cosine": 0.8190353483169376, |
| "eval_sts-dev-768_spearman_cosine": 0.8237308290024404, |
| "eval_sts-dev-960_pearson_cosine": 0.8205974063302174, |
| "eval_sts-dev-960_spearman_cosine": 0.8245795527826257, |
| "step": 28500 |
| }, |
| { |
| "epoch": 0.8026943586865002, |
| "grad_norm": 993543.625, |
| "learning_rate": 1.096142451741666e-05, |
| "loss": 0.8715, |
| "step": 28600 |
| }, |
| { |
| "epoch": 0.8083076059500421, |
| "grad_norm": 1650682.25, |
| "learning_rate": 1.0649577447219884e-05, |
| "loss": 0.9566, |
| "step": 28800 |
| }, |
| { |
| "epoch": 0.8139208532135841, |
| "grad_norm": 1086581.625, |
| "learning_rate": 1.0337730377023108e-05, |
| "loss": 0.9265, |
| "step": 29000 |
| }, |
| { |
| "epoch": 0.8139208532135841, |
| "eval_loss": 2.345508337020874, |
| "eval_runtime": 78.2528, |
| "eval_samples_per_second": 84.457, |
| "eval_sequential_score": 0.8306689922163598, |
| "eval_steps_per_second": 1.329, |
| "eval_sts-dev-1152_pearson_cosine": 0.8264443610084379, |
| "eval_sts-dev-1152_spearman_cosine": 0.8306689922163598, |
| "eval_sts-dev-512_pearson_cosine": 0.8238103920299558, |
| "eval_sts-dev-512_spearman_cosine": 0.8293245725151981, |
| "eval_sts-dev-768_pearson_cosine": 0.8243518007889306, |
| "eval_sts-dev-768_spearman_cosine": 0.8293091429698137, |
| "eval_sts-dev-960_pearson_cosine": 0.8258566703064338, |
| "eval_sts-dev-960_spearman_cosine": 0.830247434103489, |
| "step": 29000 |
| } |
| ], |
| "logging_steps": 200, |
| "max_steps": 35630, |
| "num_input_tokens_seen": 0, |
| "num_train_epochs": 1, |
| "save_steps": 500, |
| "stateful_callbacks": { |
| "TrainerControl": { |
| "args": { |
| "should_epoch_stop": false, |
| "should_evaluate": false, |
| "should_log": false, |
| "should_save": true, |
| "should_training_stop": false |
| }, |
| "attributes": {} |
| } |
| }, |
| "total_flos": 0.0, |
| "train_batch_size": 64, |
| "trial_name": null, |
| "trial_params": null |
| } |
|
|