{ "best_global_step": null, "best_metric": null, "best_model_checkpoint": null, "epoch": 2.5542672061934395, "eval_steps": 500, "global_step": 582000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0020957857939255743, "grad_norm": 0.6932975053787231, "learning_rate": 9.980000000000001e-06, "loss": 0.1815, "step": 500 }, { "epoch": 0.0020957857939255743, "eval_runtime": 348.3762, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.2909454305069534, "eval_sts_eval_spearman_cosine": 0.19080497648254555, "step": 500 }, { "epoch": 0.004191571587851149, "grad_norm": 1.0086381435394287, "learning_rate": 1.9980000000000002e-05, "loss": 0.0526, "step": 1000 }, { "epoch": 0.004191571587851149, "eval_runtime": 341.6014, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.360402528594094, "eval_sts_eval_spearman_cosine": 0.2282384851993861, "step": 1000 }, { "epoch": 0.0062873573817767234, "grad_norm": 1.2526216506958008, "learning_rate": 1.9989531014631404e-05, "loss": 0.0508, "step": 1500 }, { "epoch": 0.0062873573817767234, "eval_runtime": 343.2253, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.3451540778343026, "eval_sts_eval_spearman_cosine": 0.21371644086161254, "step": 1500 }, { "epoch": 0.008383143175702297, "grad_norm": 1.0197882652282715, "learning_rate": 1.997904104933221e-05, "loss": 0.0521, "step": 2000 }, { "epoch": 0.008383143175702297, "eval_runtime": 342.8062, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.3735936853884094, "eval_sts_eval_spearman_cosine": 0.22825118385748758, "step": 2000 }, { "epoch": 0.010478928969627873, "grad_norm": 7.507567405700684, "learning_rate": 1.9968551084033017e-05, "loss": 0.0499, "step": 2500 }, { "epoch": 0.010478928969627873, "eval_runtime": 349.6421, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.395542812257435, "eval_sts_eval_spearman_cosine": 0.23788459862119793, "step": 2500 }, { "epoch": 0.012574714763553447, "grad_norm": 0.6680296659469604, "learning_rate": 1.995806111873382e-05, "loss": 0.0508, "step": 3000 }, { "epoch": 0.012574714763553447, "eval_runtime": 339.6674, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.400554934575168, "eval_sts_eval_spearman_cosine": 0.24847867586530295, "step": 3000 }, { "epoch": 0.01467050055747902, "grad_norm": 1.0880173444747925, "learning_rate": 1.9947571153434627e-05, "loss": 0.0516, "step": 3500 }, { "epoch": 0.01467050055747902, "eval_runtime": 343.4069, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.38294777392495327, "eval_sts_eval_spearman_cosine": 0.23016512788798368, "step": 3500 }, { "epoch": 0.016766286351404595, "grad_norm": 1.3657889366149902, "learning_rate": 1.993708118813543e-05, "loss": 0.0495, "step": 4000 }, { "epoch": 0.016766286351404595, "eval_runtime": 341.8601, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.39609521632487044, "eval_sts_eval_spearman_cosine": 0.23900126870119787, "step": 4000 }, { "epoch": 0.01886207214533017, "grad_norm": 0.8786454796791077, "learning_rate": 1.9926591222836237e-05, "loss": 0.0503, "step": 4500 }, { "epoch": 0.01886207214533017, "eval_runtime": 340.2199, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.3964852975444805, "eval_sts_eval_spearman_cosine": 0.23972930128684, "step": 4500 }, { "epoch": 0.020957857939255746, "grad_norm": 1.2659673690795898, "learning_rate": 1.991610125753704e-05, "loss": 0.0509, "step": 5000 }, { "epoch": 0.020957857939255746, "eval_runtime": 350.9812, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.3865956574763358, "eval_sts_eval_spearman_cosine": 0.2421840091695498, "step": 5000 }, { "epoch": 0.02305364373318132, "grad_norm": 0.3289322257041931, "learning_rate": 1.9905611292237847e-05, "loss": 0.0514, "step": 5500 }, { "epoch": 0.02305364373318132, "eval_runtime": 349.7549, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4161430364624649, "eval_sts_eval_spearman_cosine": 0.2513699351455699, "step": 5500 }, { "epoch": 0.025149429527106894, "grad_norm": 1.000472903251648, "learning_rate": 1.989512132693865e-05, "loss": 0.0506, "step": 6000 }, { "epoch": 0.025149429527106894, "eval_runtime": 342.6386, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4029184838077335, "eval_sts_eval_spearman_cosine": 0.24133842933702113, "step": 6000 }, { "epoch": 0.027245215321032468, "grad_norm": 0.5057510733604431, "learning_rate": 1.9884631361639457e-05, "loss": 0.0472, "step": 6500 }, { "epoch": 0.027245215321032468, "eval_runtime": 347.3922, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4224852791758297, "eval_sts_eval_spearman_cosine": 0.25366758114154564, "step": 6500 }, { "epoch": 0.02934100111495804, "grad_norm": 0.7144165635108948, "learning_rate": 1.9874141396340264e-05, "loss": 0.0475, "step": 7000 }, { "epoch": 0.02934100111495804, "eval_runtime": 339.9324, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4120217050716396, "eval_sts_eval_spearman_cosine": 0.24771726549141482, "step": 7000 }, { "epoch": 0.031436786908883615, "grad_norm": 0.4331231415271759, "learning_rate": 1.9863651431041067e-05, "loss": 0.0497, "step": 7500 }, { "epoch": 0.031436786908883615, "eval_runtime": 343.2368, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4226288834790758, "eval_sts_eval_spearman_cosine": 0.2566988587171999, "step": 7500 }, { "epoch": 0.03353257270280919, "grad_norm": 0.49721041321754456, "learning_rate": 1.9853161465741874e-05, "loss": 0.0511, "step": 8000 }, { "epoch": 0.03353257270280919, "eval_runtime": 339.9876, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4211803796737199, "eval_sts_eval_spearman_cosine": 0.2532930037006367, "step": 8000 }, { "epoch": 0.03562835849673476, "grad_norm": 0.4282959997653961, "learning_rate": 1.9842671500442677e-05, "loss": 0.051, "step": 8500 }, { "epoch": 0.03562835849673476, "eval_runtime": 337.379, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.42856751199612286, "eval_sts_eval_spearman_cosine": 0.2573611566742001, "step": 8500 }, { "epoch": 0.03772414429066034, "grad_norm": 0.8110671043395996, "learning_rate": 1.9832181535143484e-05, "loss": 0.0484, "step": 9000 }, { "epoch": 0.03772414429066034, "eval_runtime": 343.182, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.40234368861852254, "eval_sts_eval_spearman_cosine": 0.2411479242979506, "step": 9000 }, { "epoch": 0.03981993008458592, "grad_norm": 4.15521764755249, "learning_rate": 1.9821691569844287e-05, "loss": 0.049, "step": 9500 }, { "epoch": 0.03981993008458592, "eval_runtime": 342.3983, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4174447945953029, "eval_sts_eval_spearman_cosine": 0.251069224935445, "step": 9500 }, { "epoch": 0.04191571587851149, "grad_norm": 0.9809379577636719, "learning_rate": 1.9811201604545094e-05, "loss": 0.051, "step": 10000 }, { "epoch": 0.04191571587851149, "eval_runtime": 342.9756, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.40352376633479803, "eval_sts_eval_spearman_cosine": 0.2397405710373801, "step": 10000 }, { "epoch": 0.044011501672437066, "grad_norm": 0.13379138708114624, "learning_rate": 1.98007116392459e-05, "loss": 0.047, "step": 10500 }, { "epoch": 0.044011501672437066, "eval_runtime": 347.33, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.42162260007219515, "eval_sts_eval_spearman_cosine": 0.2534602551674713, "step": 10500 }, { "epoch": 0.04610728746636264, "grad_norm": 0.3253801167011261, "learning_rate": 1.9790221673946704e-05, "loss": 0.0503, "step": 11000 }, { "epoch": 0.04610728746636264, "eval_runtime": 337.7247, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.41671735926849013, "eval_sts_eval_spearman_cosine": 0.2514131808734932, "step": 11000 }, { "epoch": 0.048203073260288214, "grad_norm": 0.3967018723487854, "learning_rate": 1.977973170864751e-05, "loss": 0.0489, "step": 11500 }, { "epoch": 0.048203073260288214, "eval_runtime": 343.8675, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4232337473397235, "eval_sts_eval_spearman_cosine": 0.2513405950814912, "step": 11500 }, { "epoch": 0.05029885905421379, "grad_norm": 1.0883729457855225, "learning_rate": 1.9769241743348314e-05, "loss": 0.0479, "step": 12000 }, { "epoch": 0.05029885905421379, "eval_runtime": 343.2174, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4174763031191874, "eval_sts_eval_spearman_cosine": 0.24768755559483227, "step": 12000 }, { "epoch": 0.05239464484813936, "grad_norm": 0.49014097452163696, "learning_rate": 1.9758751778049118e-05, "loss": 0.048, "step": 12500 }, { "epoch": 0.05239464484813936, "eval_runtime": 339.0921, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.42584536637077786, "eval_sts_eval_spearman_cosine": 0.2500550182574066, "step": 12500 }, { "epoch": 0.054490430642064935, "grad_norm": 0.42660441994667053, "learning_rate": 1.9748261812749924e-05, "loss": 0.0521, "step": 13000 }, { "epoch": 0.054490430642064935, "eval_runtime": 343.3648, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4260018526254466, "eval_sts_eval_spearman_cosine": 0.256835422663195, "step": 13000 }, { "epoch": 0.05658621643599051, "grad_norm": 0.3051619529724121, "learning_rate": 1.973777184745073e-05, "loss": 0.0478, "step": 13500 }, { "epoch": 0.05658621643599051, "eval_runtime": 342.7987, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.43334389722574734, "eval_sts_eval_spearman_cosine": 0.25739578169620453, "step": 13500 }, { "epoch": 0.05868200222991608, "grad_norm": 0.3471350371837616, "learning_rate": 1.9727281882151534e-05, "loss": 0.0495, "step": 14000 }, { "epoch": 0.05868200222991608, "eval_runtime": 342.0235, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.41310236152941326, "eval_sts_eval_spearman_cosine": 0.2499463357814277, "step": 14000 }, { "epoch": 0.06077778802384166, "grad_norm": 0.4968419075012207, "learning_rate": 1.971679191685234e-05, "loss": 0.0542, "step": 14500 }, { "epoch": 0.06077778802384166, "eval_runtime": 344.0591, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4355748655054854, "eval_sts_eval_spearman_cosine": 0.25737909556531063, "step": 14500 }, { "epoch": 0.06287357381776723, "grad_norm": 0.2557956874370575, "learning_rate": 1.9706301951553148e-05, "loss": 0.0501, "step": 15000 }, { "epoch": 0.06287357381776723, "eval_runtime": 347.2993, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4311685478091839, "eval_sts_eval_spearman_cosine": 0.251425769244764, "step": 15000 }, { "epoch": 0.06496935961169281, "grad_norm": 0.8080115914344788, "learning_rate": 1.969581198625395e-05, "loss": 0.0464, "step": 15500 }, { "epoch": 0.06496935961169281, "eval_runtime": 341.6427, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4412137512684416, "eval_sts_eval_spearman_cosine": 0.25960237914450285, "step": 15500 }, { "epoch": 0.06706514540561838, "grad_norm": 0.6723015308380127, "learning_rate": 1.9685322020954754e-05, "loss": 0.0477, "step": 16000 }, { "epoch": 0.06706514540561838, "eval_runtime": 341.3922, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.43698787577001114, "eval_sts_eval_spearman_cosine": 0.2599578571600709, "step": 16000 }, { "epoch": 0.06916093119954396, "grad_norm": 1.298898696899414, "learning_rate": 1.967483205565556e-05, "loss": 0.0493, "step": 16500 }, { "epoch": 0.06916093119954396, "eval_runtime": 345.9487, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.43196060783777573, "eval_sts_eval_spearman_cosine": 0.2562605062163696, "step": 16500 }, { "epoch": 0.07125671699346953, "grad_norm": 0.35999351739883423, "learning_rate": 1.9664342090356368e-05, "loss": 0.05, "step": 17000 }, { "epoch": 0.07125671699346953, "eval_runtime": 335.6876, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.43835726636462274, "eval_sts_eval_spearman_cosine": 0.2621247001486648, "step": 17000 }, { "epoch": 0.07335250278739511, "grad_norm": 0.4840575158596039, "learning_rate": 1.965385212505717e-05, "loss": 0.0493, "step": 17500 }, { "epoch": 0.07335250278739511, "eval_runtime": 334.9137, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.43542151931061157, "eval_sts_eval_spearman_cosine": 0.2624588264113512, "step": 17500 }, { "epoch": 0.07544828858132067, "grad_norm": 0.2812293767929077, "learning_rate": 1.9643362159757978e-05, "loss": 0.0479, "step": 18000 }, { "epoch": 0.07544828858132067, "eval_runtime": 338.7171, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.43243354975862824, "eval_sts_eval_spearman_cosine": 0.2575510361622727, "step": 18000 }, { "epoch": 0.07754407437524626, "grad_norm": 0.3048178553581238, "learning_rate": 1.9632872194458785e-05, "loss": 0.0487, "step": 18500 }, { "epoch": 0.07754407437524626, "eval_runtime": 339.976, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4362558077695027, "eval_sts_eval_spearman_cosine": 0.2588512553939992, "step": 18500 }, { "epoch": 0.07963986016917184, "grad_norm": 0.33140528202056885, "learning_rate": 1.9622382229159588e-05, "loss": 0.0468, "step": 19000 }, { "epoch": 0.07963986016917184, "eval_runtime": 339.8773, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.43718316260078444, "eval_sts_eval_spearman_cosine": 0.2519644557776237, "step": 19000 }, { "epoch": 0.0817356459630974, "grad_norm": 0.49007290601730347, "learning_rate": 1.961189226386039e-05, "loss": 0.0486, "step": 19500 }, { "epoch": 0.0817356459630974, "eval_runtime": 334.801, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.44421971660493154, "eval_sts_eval_spearman_cosine": 0.2617400427108499, "step": 19500 }, { "epoch": 0.08383143175702298, "grad_norm": 0.37855038046836853, "learning_rate": 1.9601402298561198e-05, "loss": 0.0475, "step": 20000 }, { "epoch": 0.08383143175702298, "eval_runtime": 342.2845, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.44149299758221483, "eval_sts_eval_spearman_cosine": 0.2602507327570822, "step": 20000 }, { "epoch": 0.08592721755094855, "grad_norm": 0.2980867922306061, "learning_rate": 1.9590912333262e-05, "loss": 0.0478, "step": 20500 }, { "epoch": 0.08592721755094855, "eval_runtime": 339.2215, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4412258505802059, "eval_sts_eval_spearman_cosine": 0.2616215802350853, "step": 20500 }, { "epoch": 0.08802300334487413, "grad_norm": 0.5150878429412842, "learning_rate": 1.9580422367962808e-05, "loss": 0.0498, "step": 21000 }, { "epoch": 0.08802300334487413, "eval_runtime": 337.3704, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4389377913108178, "eval_sts_eval_spearman_cosine": 0.26058656369187216, "step": 21000 }, { "epoch": 0.0901187891387997, "grad_norm": 0.7273392081260681, "learning_rate": 1.9569932402663615e-05, "loss": 0.0486, "step": 21500 }, { "epoch": 0.0901187891387997, "eval_runtime": 339.7503, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4122910994637134, "eval_sts_eval_spearman_cosine": 0.25030233539871855, "step": 21500 }, { "epoch": 0.09221457493272528, "grad_norm": 0.4513995945453644, "learning_rate": 1.955944243736442e-05, "loss": 0.0487, "step": 22000 }, { "epoch": 0.09221457493272528, "eval_runtime": 334.4768, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4397282592570656, "eval_sts_eval_spearman_cosine": 0.2641706649781271, "step": 22000 }, { "epoch": 0.09431036072665085, "grad_norm": 0.5422834753990173, "learning_rate": 1.9548952472065225e-05, "loss": 0.0492, "step": 22500 }, { "epoch": 0.09431036072665085, "eval_runtime": 336.8817, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.44716545652601647, "eval_sts_eval_spearman_cosine": 0.2689488451751612, "step": 22500 }, { "epoch": 0.09640614652057643, "grad_norm": 0.7466606497764587, "learning_rate": 1.9538462506766028e-05, "loss": 0.046, "step": 23000 }, { "epoch": 0.09640614652057643, "eval_runtime": 339.9961, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4386385430873794, "eval_sts_eval_spearman_cosine": 0.26711427090838047, "step": 23000 }, { "epoch": 0.098501932314502, "grad_norm": 0.9236578941345215, "learning_rate": 1.9527972541466835e-05, "loss": 0.0464, "step": 23500 }, { "epoch": 0.098501932314502, "eval_runtime": 342.1072, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.44425402073826004, "eval_sts_eval_spearman_cosine": 0.26634888355939335, "step": 23500 }, { "epoch": 0.10059771810842758, "grad_norm": 0.4924934506416321, "learning_rate": 1.9517482576167638e-05, "loss": 0.047, "step": 24000 }, { "epoch": 0.10059771810842758, "eval_runtime": 342.0035, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.45709790911834247, "eval_sts_eval_spearman_cosine": 0.2737052658788197, "step": 24000 }, { "epoch": 0.10269350390235314, "grad_norm": 0.3284561038017273, "learning_rate": 1.9506992610868445e-05, "loss": 0.0464, "step": 24500 }, { "epoch": 0.10269350390235314, "eval_runtime": 341.5378, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.44898052946628325, "eval_sts_eval_spearman_cosine": 0.26656637354845736, "step": 24500 }, { "epoch": 0.10478928969627872, "grad_norm": 0.40734410285949707, "learning_rate": 1.949650264556925e-05, "loss": 0.0456, "step": 25000 }, { "epoch": 0.10478928969627872, "eval_runtime": 339.8957, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.445947649561943, "eval_sts_eval_spearman_cosine": 0.26308104220903084, "step": 25000 }, { "epoch": 0.1068850754902043, "grad_norm": 0.5639435648918152, "learning_rate": 1.9486012680270055e-05, "loss": 0.0473, "step": 25500 }, { "epoch": 0.1068850754902043, "eval_runtime": 342.3653, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4408670949655098, "eval_sts_eval_spearman_cosine": 0.25545911465245225, "step": 25500 }, { "epoch": 0.10898086128412987, "grad_norm": 1.0850768089294434, "learning_rate": 1.9475522714970862e-05, "loss": 0.0472, "step": 26000 }, { "epoch": 0.10898086128412987, "eval_runtime": 338.6989, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4426827047790798, "eval_sts_eval_spearman_cosine": 0.2586257604204867, "step": 26000 }, { "epoch": 0.11107664707805545, "grad_norm": 0.45907062292099, "learning_rate": 1.9465032749671665e-05, "loss": 0.0482, "step": 26500 }, { "epoch": 0.11107664707805545, "eval_runtime": 339.364, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4402542590259104, "eval_sts_eval_spearman_cosine": 0.2605261505289138, "step": 26500 }, { "epoch": 0.11317243287198102, "grad_norm": 0.33141788840293884, "learning_rate": 1.945454278437247e-05, "loss": 0.0477, "step": 27000 }, { "epoch": 0.11317243287198102, "eval_runtime": 337.7124, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4521352288341381, "eval_sts_eval_spearman_cosine": 0.27289086513767563, "step": 27000 }, { "epoch": 0.1152682186659066, "grad_norm": 0.4580657184123993, "learning_rate": 1.9444052819073275e-05, "loss": 0.0494, "step": 27500 }, { "epoch": 0.1152682186659066, "eval_runtime": 337.8905, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.44746591009732417, "eval_sts_eval_spearman_cosine": 0.26493673455396866, "step": 27500 }, { "epoch": 0.11736400445983217, "grad_norm": 0.2567861080169678, "learning_rate": 1.9433562853774082e-05, "loss": 0.0473, "step": 28000 }, { "epoch": 0.11736400445983217, "eval_runtime": 342.1918, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.45050588944238457, "eval_sts_eval_spearman_cosine": 0.26671484494724956, "step": 28000 }, { "epoch": 0.11945979025375775, "grad_norm": 0.2028268277645111, "learning_rate": 1.942307288847489e-05, "loss": 0.0474, "step": 28500 }, { "epoch": 0.11945979025375775, "eval_runtime": 336.8275, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.44729049729338544, "eval_sts_eval_spearman_cosine": 0.26432708965422613, "step": 28500 }, { "epoch": 0.12155557604768331, "grad_norm": 0.2951292097568512, "learning_rate": 1.9412582923175692e-05, "loss": 0.0467, "step": 29000 }, { "epoch": 0.12155557604768331, "eval_runtime": 342.9256, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4438657025810523, "eval_sts_eval_spearman_cosine": 0.25989708247152815, "step": 29000 }, { "epoch": 0.1236513618416089, "grad_norm": 0.3810523450374603, "learning_rate": 1.94020929578765e-05, "loss": 0.0461, "step": 29500 }, { "epoch": 0.1236513618416089, "eval_runtime": 338.8538, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4472718756762472, "eval_sts_eval_spearman_cosine": 0.2609917611260882, "step": 29500 }, { "epoch": 0.12574714763553446, "grad_norm": 0.7794930934906006, "learning_rate": 1.9391602992577302e-05, "loss": 0.0473, "step": 30000 }, { "epoch": 0.12574714763553446, "eval_runtime": 339.017, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4427659201597099, "eval_sts_eval_spearman_cosine": 0.2580044219347961, "step": 30000 }, { "epoch": 0.12784293342946004, "grad_norm": 0.5840896368026733, "learning_rate": 1.9381113027278105e-05, "loss": 0.049, "step": 30500 }, { "epoch": 0.12784293342946004, "eval_runtime": 338.2747, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.44224054667388013, "eval_sts_eval_spearman_cosine": 0.2612493711584714, "step": 30500 }, { "epoch": 0.12993871922338562, "grad_norm": 0.32866010069847107, "learning_rate": 1.9370623061978912e-05, "loss": 0.047, "step": 31000 }, { "epoch": 0.12993871922338562, "eval_runtime": 339.1953, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.43740317304013204, "eval_sts_eval_spearman_cosine": 0.25575071615403866, "step": 31000 }, { "epoch": 0.1320345050173112, "grad_norm": 0.3385889232158661, "learning_rate": 1.936013309667972e-05, "loss": 0.0469, "step": 31500 }, { "epoch": 0.1320345050173112, "eval_runtime": 337.8127, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.44541443921587665, "eval_sts_eval_spearman_cosine": 0.2616946173792524, "step": 31500 }, { "epoch": 0.13413029081123676, "grad_norm": 0.7898842692375183, "learning_rate": 1.9349643131380522e-05, "loss": 0.0489, "step": 32000 }, { "epoch": 0.13413029081123676, "eval_runtime": 344.3325, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4552813572412773, "eval_sts_eval_spearman_cosine": 0.26517605060557997, "step": 32000 }, { "epoch": 0.13622607660516234, "grad_norm": 0.9938085675239563, "learning_rate": 1.933915316608133e-05, "loss": 0.0478, "step": 32500 }, { "epoch": 0.13622607660516234, "eval_runtime": 343.4184, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4444114305945971, "eval_sts_eval_spearman_cosine": 0.26085178151263844, "step": 32500 }, { "epoch": 0.13832186239908792, "grad_norm": 0.4296727478504181, "learning_rate": 1.9328663200782135e-05, "loss": 0.0486, "step": 33000 }, { "epoch": 0.13832186239908792, "eval_runtime": 338.8704, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.44777549834041086, "eval_sts_eval_spearman_cosine": 0.26373337999867896, "step": 33000 }, { "epoch": 0.1404176481930135, "grad_norm": 0.9105572700500488, "learning_rate": 1.931817323548294e-05, "loss": 0.0485, "step": 33500 }, { "epoch": 0.1404176481930135, "eval_runtime": 339.8098, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4399068179623664, "eval_sts_eval_spearman_cosine": 0.26207731782615873, "step": 33500 }, { "epoch": 0.14251343398693905, "grad_norm": 0.3035168945789337, "learning_rate": 1.9307683270183742e-05, "loss": 0.048, "step": 34000 }, { "epoch": 0.14251343398693905, "eval_runtime": 333.0821, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.43088706272088545, "eval_sts_eval_spearman_cosine": 0.26311186805378234, "step": 34000 }, { "epoch": 0.14460921978086463, "grad_norm": 0.27919381856918335, "learning_rate": 1.929719330488455e-05, "loss": 0.048, "step": 34500 }, { "epoch": 0.14460921978086463, "eval_runtime": 342.3981, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.45801059213971973, "eval_sts_eval_spearman_cosine": 0.2706508623909724, "step": 34500 }, { "epoch": 0.14670500557479021, "grad_norm": 0.3119344711303711, "learning_rate": 1.9286703339585352e-05, "loss": 0.0484, "step": 35000 }, { "epoch": 0.14670500557479021, "eval_runtime": 349.4101, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4534628976940601, "eval_sts_eval_spearman_cosine": 0.2706809959564022, "step": 35000 }, { "epoch": 0.1488007913687158, "grad_norm": 0.7175905704498291, "learning_rate": 1.927621337428616e-05, "loss": 0.0472, "step": 35500 }, { "epoch": 0.1488007913687158, "eval_runtime": 355.9232, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.44957017355312734, "eval_sts_eval_spearman_cosine": 0.26292275016007366, "step": 35500 }, { "epoch": 0.15089657716264135, "grad_norm": 0.31285443902015686, "learning_rate": 1.9265723408986966e-05, "loss": 0.0479, "step": 36000 }, { "epoch": 0.15089657716264135, "eval_runtime": 340.0595, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4499148869509332, "eval_sts_eval_spearman_cosine": 0.2640133574175953, "step": 36000 }, { "epoch": 0.15299236295656693, "grad_norm": 1.521690845489502, "learning_rate": 1.9255233443687772e-05, "loss": 0.0468, "step": 36500 }, { "epoch": 0.15299236295656693, "eval_runtime": 340.142, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.44505814411516414, "eval_sts_eval_spearman_cosine": 0.2661552147966051, "step": 36500 }, { "epoch": 0.1550881487504925, "grad_norm": 0.7089241147041321, "learning_rate": 1.9244743478388576e-05, "loss": 0.0494, "step": 37000 }, { "epoch": 0.1550881487504925, "eval_runtime": 337.3069, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4494158077781873, "eval_sts_eval_spearman_cosine": 0.2663163553125806, "step": 37000 }, { "epoch": 0.1571839345444181, "grad_norm": 0.5163518190383911, "learning_rate": 1.923425351308938e-05, "loss": 0.0503, "step": 37500 }, { "epoch": 0.1571839345444181, "eval_runtime": 342.0593, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4484683519620878, "eval_sts_eval_spearman_cosine": 0.26714417547343056, "step": 37500 }, { "epoch": 0.15927972033834367, "grad_norm": 0.49407804012298584, "learning_rate": 1.9223763547790186e-05, "loss": 0.0497, "step": 38000 }, { "epoch": 0.15927972033834367, "eval_runtime": 338.9417, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.45237495071039724, "eval_sts_eval_spearman_cosine": 0.2701074771352738, "step": 38000 }, { "epoch": 0.16137550613226923, "grad_norm": 1.0480082035064697, "learning_rate": 1.921327358249099e-05, "loss": 0.0502, "step": 38500 }, { "epoch": 0.16137550613226923, "eval_runtime": 340.338, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4475961302074136, "eval_sts_eval_spearman_cosine": 0.26764528114725794, "step": 38500 }, { "epoch": 0.1634712919261948, "grad_norm": 0.4279385209083557, "learning_rate": 1.9202783617191796e-05, "loss": 0.0493, "step": 39000 }, { "epoch": 0.1634712919261948, "eval_runtime": 339.9318, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.45457950324888374, "eval_sts_eval_spearman_cosine": 0.27012908871823244, "step": 39000 }, { "epoch": 0.1655670777201204, "grad_norm": 0.3919336795806885, "learning_rate": 1.9192293651892602e-05, "loss": 0.0464, "step": 39500 }, { "epoch": 0.1655670777201204, "eval_runtime": 336.8222, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4441983424104144, "eval_sts_eval_spearman_cosine": 0.2647264331849491, "step": 39500 }, { "epoch": 0.16766286351404597, "grad_norm": 0.2185303121805191, "learning_rate": 1.9181803686593406e-05, "loss": 0.0445, "step": 40000 }, { "epoch": 0.16766286351404597, "eval_runtime": 337.3471, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4480837994730781, "eval_sts_eval_spearman_cosine": 0.26198464770120794, "step": 40000 }, { "epoch": 0.16975864930797152, "grad_norm": 0.557152509689331, "learning_rate": 1.9171313721294213e-05, "loss": 0.0473, "step": 40500 }, { "epoch": 0.16975864930797152, "eval_runtime": 336.0129, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4438604221838804, "eval_sts_eval_spearman_cosine": 0.2587145673583128, "step": 40500 }, { "epoch": 0.1718544351018971, "grad_norm": 0.3796949088573456, "learning_rate": 1.9160823755995016e-05, "loss": 0.0462, "step": 41000 }, { "epoch": 0.1718544351018971, "eval_runtime": 337.1235, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4521035261767795, "eval_sts_eval_spearman_cosine": 0.26358421653502634, "step": 41000 }, { "epoch": 0.17395022089582268, "grad_norm": 0.5144481062889099, "learning_rate": 1.9150333790695823e-05, "loss": 0.0451, "step": 41500 }, { "epoch": 0.17395022089582268, "eval_runtime": 337.2358, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.45000273795039925, "eval_sts_eval_spearman_cosine": 0.2641742796707658, "step": 41500 }, { "epoch": 0.17604600668974826, "grad_norm": 0.5425313115119934, "learning_rate": 1.9139843825396626e-05, "loss": 0.0469, "step": 42000 }, { "epoch": 0.17604600668974826, "eval_runtime": 336.0425, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4474757364996772, "eval_sts_eval_spearman_cosine": 0.264786152396235, "step": 42000 }, { "epoch": 0.17814179248367382, "grad_norm": 0.4734934866428375, "learning_rate": 1.9129353860097433e-05, "loss": 0.0487, "step": 42500 }, { "epoch": 0.17814179248367382, "eval_runtime": 340.7786, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.44456692067317294, "eval_sts_eval_spearman_cosine": 0.2584306205207322, "step": 42500 }, { "epoch": 0.1802375782775994, "grad_norm": 0.6385965943336487, "learning_rate": 1.911886389479824e-05, "loss": 0.0492, "step": 43000 }, { "epoch": 0.1802375782775994, "eval_runtime": 349.4686, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4448936987230867, "eval_sts_eval_spearman_cosine": 0.26181483708948056, "step": 43000 }, { "epoch": 0.18233336407152498, "grad_norm": 0.30056899785995483, "learning_rate": 1.9108373929499043e-05, "loss": 0.0466, "step": 43500 }, { "epoch": 0.18233336407152498, "eval_runtime": 340.9671, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4392933634607715, "eval_sts_eval_spearman_cosine": 0.2566618557376792, "step": 43500 }, { "epoch": 0.18442914986545056, "grad_norm": 0.7003186345100403, "learning_rate": 1.909788396419985e-05, "loss": 0.0458, "step": 44000 }, { "epoch": 0.18442914986545056, "eval_runtime": 341.791, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.44888714906941574, "eval_sts_eval_spearman_cosine": 0.2570510927545476, "step": 44000 }, { "epoch": 0.18652493565937614, "grad_norm": 0.3971806764602661, "learning_rate": 1.9087393998900653e-05, "loss": 0.0444, "step": 44500 }, { "epoch": 0.18652493565937614, "eval_runtime": 340.9187, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4453661050881699, "eval_sts_eval_spearman_cosine": 0.26063218237269625, "step": 44500 }, { "epoch": 0.1886207214533017, "grad_norm": 0.276896208524704, "learning_rate": 1.9076904033601456e-05, "loss": 0.0482, "step": 45000 }, { "epoch": 0.1886207214533017, "eval_runtime": 337.8038, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4443341382293888, "eval_sts_eval_spearman_cosine": 0.2588268101400388, "step": 45000 }, { "epoch": 0.19071650724722727, "grad_norm": 0.4351238012313843, "learning_rate": 1.9066414068302263e-05, "loss": 0.0476, "step": 45500 }, { "epoch": 0.19071650724722727, "eval_runtime": 348.0867, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.44909680070935876, "eval_sts_eval_spearman_cosine": 0.258192202657196, "step": 45500 }, { "epoch": 0.19281229304115285, "grad_norm": 0.3994763195514679, "learning_rate": 1.905592410300307e-05, "loss": 0.0461, "step": 46000 }, { "epoch": 0.19281229304115285, "eval_runtime": 340.8737, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4500463295252902, "eval_sts_eval_spearman_cosine": 0.26060275419800166, "step": 46000 }, { "epoch": 0.19490807883507844, "grad_norm": 0.3078446090221405, "learning_rate": 1.9045434137703873e-05, "loss": 0.0451, "step": 46500 }, { "epoch": 0.19490807883507844, "eval_runtime": 347.8626, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.44205264425993956, "eval_sts_eval_spearman_cosine": 0.2541560550925476, "step": 46500 }, { "epoch": 0.197003864629004, "grad_norm": 0.31960633397102356, "learning_rate": 1.903494417240468e-05, "loss": 0.0488, "step": 47000 }, { "epoch": 0.197003864629004, "eval_runtime": 340.635, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.45065272094964826, "eval_sts_eval_spearman_cosine": 0.2598847704692066, "step": 47000 }, { "epoch": 0.19909965042292957, "grad_norm": 0.415128231048584, "learning_rate": 1.9024454207105486e-05, "loss": 0.0498, "step": 47500 }, { "epoch": 0.19909965042292957, "eval_runtime": 338.3837, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.45138927320634414, "eval_sts_eval_spearman_cosine": 0.26135169488359056, "step": 47500 }, { "epoch": 0.20119543621685515, "grad_norm": 0.2819114923477173, "learning_rate": 1.901396424180629e-05, "loss": 0.0477, "step": 48000 }, { "epoch": 0.20119543621685515, "eval_runtime": 344.2006, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4494398217690626, "eval_sts_eval_spearman_cosine": 0.26235654160901856, "step": 48000 }, { "epoch": 0.20329122201078073, "grad_norm": 0.6809430122375488, "learning_rate": 1.9003474276507093e-05, "loss": 0.0462, "step": 48500 }, { "epoch": 0.20329122201078073, "eval_runtime": 340.8038, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.45066512909271356, "eval_sts_eval_spearman_cosine": 0.26654040086525005, "step": 48500 }, { "epoch": 0.20538700780470628, "grad_norm": 0.3669719994068146, "learning_rate": 1.89929843112079e-05, "loss": 0.043, "step": 49000 }, { "epoch": 0.20538700780470628, "eval_runtime": 339.2981, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4457283854626467, "eval_sts_eval_spearman_cosine": 0.26041017359085866, "step": 49000 }, { "epoch": 0.20748279359863186, "grad_norm": 0.4737715721130371, "learning_rate": 1.8982494345908706e-05, "loss": 0.0477, "step": 49500 }, { "epoch": 0.20748279359863186, "eval_runtime": 336.5166, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4556312650766483, "eval_sts_eval_spearman_cosine": 0.2640695349942812, "step": 49500 }, { "epoch": 0.20957857939255745, "grad_norm": 0.49677714705467224, "learning_rate": 1.897200438060951e-05, "loss": 0.0479, "step": 50000 }, { "epoch": 0.20957857939255745, "eval_runtime": 337.3909, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4500329590529354, "eval_sts_eval_spearman_cosine": 0.26123640216810107, "step": 50000 }, { "epoch": 0.21167436518648303, "grad_norm": 0.5639549493789673, "learning_rate": 1.8961514415310316e-05, "loss": 0.0458, "step": 50500 }, { "epoch": 0.21167436518648303, "eval_runtime": 344.5448, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.44313611033471045, "eval_sts_eval_spearman_cosine": 0.2591664126487338, "step": 50500 }, { "epoch": 0.2137701509804086, "grad_norm": 0.359203040599823, "learning_rate": 1.8951024450011123e-05, "loss": 0.0483, "step": 51000 }, { "epoch": 0.2137701509804086, "eval_runtime": 344.7789, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.44068292427335354, "eval_sts_eval_spearman_cosine": 0.2551957260489125, "step": 51000 }, { "epoch": 0.21586593677433416, "grad_norm": 0.19245052337646484, "learning_rate": 1.8940534484711926e-05, "loss": 0.0467, "step": 51500 }, { "epoch": 0.21586593677433416, "eval_runtime": 338.5251, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4502135312908011, "eval_sts_eval_spearman_cosine": 0.2605712413860504, "step": 51500 }, { "epoch": 0.21796172256825974, "grad_norm": 1.190847635269165, "learning_rate": 1.893004451941273e-05, "loss": 0.0474, "step": 52000 }, { "epoch": 0.21796172256825974, "eval_runtime": 341.9192, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.45481759449031217, "eval_sts_eval_spearman_cosine": 0.26110866429697666, "step": 52000 }, { "epoch": 0.22005750836218532, "grad_norm": 0.4441085755825043, "learning_rate": 1.8919554554113536e-05, "loss": 0.046, "step": 52500 }, { "epoch": 0.22005750836218532, "eval_runtime": 343.8323, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.45667077467534434, "eval_sts_eval_spearman_cosine": 0.2647237120347806, "step": 52500 }, { "epoch": 0.2221532941561109, "grad_norm": 0.34636542201042175, "learning_rate": 1.890906458881434e-05, "loss": 0.0467, "step": 53000 }, { "epoch": 0.2221532941561109, "eval_runtime": 340.2865, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.45648889431652745, "eval_sts_eval_spearman_cosine": 0.2664298725432066, "step": 53000 }, { "epoch": 0.22424907995003646, "grad_norm": 0.5705260634422302, "learning_rate": 1.8898574623515147e-05, "loss": 0.0485, "step": 53500 }, { "epoch": 0.22424907995003646, "eval_runtime": 339.299, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4570148676567805, "eval_sts_eval_spearman_cosine": 0.2639067904674141, "step": 53500 }, { "epoch": 0.22634486574396204, "grad_norm": 0.49348214268684387, "learning_rate": 1.8888084658215953e-05, "loss": 0.0446, "step": 54000 }, { "epoch": 0.22634486574396204, "eval_runtime": 336.732, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.457701736196621, "eval_sts_eval_spearman_cosine": 0.2693994678350602, "step": 54000 }, { "epoch": 0.22844065153788762, "grad_norm": 0.34465575218200684, "learning_rate": 1.8877594692916757e-05, "loss": 0.0472, "step": 54500 }, { "epoch": 0.22844065153788762, "eval_runtime": 339.5535, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.45266312544157006, "eval_sts_eval_spearman_cosine": 0.26824750531254465, "step": 54500 }, { "epoch": 0.2305364373318132, "grad_norm": 0.452663391828537, "learning_rate": 1.8867104727617563e-05, "loss": 0.0495, "step": 55000 }, { "epoch": 0.2305364373318132, "eval_runtime": 338.7742, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.45647187842304715, "eval_sts_eval_spearman_cosine": 0.264849968710597, "step": 55000 }, { "epoch": 0.23263222312573875, "grad_norm": 0.6373937726020813, "learning_rate": 1.8856614762318367e-05, "loss": 0.0485, "step": 55500 }, { "epoch": 0.23263222312573875, "eval_runtime": 335.958, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.46083420731355956, "eval_sts_eval_spearman_cosine": 0.26879825907222543, "step": 55500 }, { "epoch": 0.23472800891966433, "grad_norm": 0.47966334223747253, "learning_rate": 1.8846124797019173e-05, "loss": 0.0464, "step": 56000 }, { "epoch": 0.23472800891966433, "eval_runtime": 338.7608, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4581371106971205, "eval_sts_eval_spearman_cosine": 0.2656489063980585, "step": 56000 }, { "epoch": 0.2368237947135899, "grad_norm": 0.9081245064735413, "learning_rate": 1.8835634831719977e-05, "loss": 0.0469, "step": 56500 }, { "epoch": 0.2368237947135899, "eval_runtime": 345.2108, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.45191881291494673, "eval_sts_eval_spearman_cosine": 0.2621934302121334, "step": 56500 }, { "epoch": 0.2389195805075155, "grad_norm": 0.4270106256008148, "learning_rate": 1.8825144866420783e-05, "loss": 0.0474, "step": 57000 }, { "epoch": 0.2389195805075155, "eval_runtime": 343.3558, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4609667483529351, "eval_sts_eval_spearman_cosine": 0.26601310989020205, "step": 57000 }, { "epoch": 0.24101536630144108, "grad_norm": 0.24430905282497406, "learning_rate": 1.881465490112159e-05, "loss": 0.0454, "step": 57500 }, { "epoch": 0.24101536630144108, "eval_runtime": 342.6044, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.46036525459981814, "eval_sts_eval_spearman_cosine": 0.26780150300173383, "step": 57500 }, { "epoch": 0.24311115209536663, "grad_norm": 0.3330262303352356, "learning_rate": 1.8804164935822393e-05, "loss": 0.0482, "step": 58000 }, { "epoch": 0.24311115209536663, "eval_runtime": 343.0287, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.45428565236477536, "eval_sts_eval_spearman_cosine": 0.263593304645589, "step": 58000 }, { "epoch": 0.2452069378892922, "grad_norm": 0.32041916251182556, "learning_rate": 1.87936749705232e-05, "loss": 0.0463, "step": 58500 }, { "epoch": 0.2452069378892922, "eval_runtime": 341.4234, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4577820688406681, "eval_sts_eval_spearman_cosine": 0.26413517757913746, "step": 58500 }, { "epoch": 0.2473027236832178, "grad_norm": 0.5235299468040466, "learning_rate": 1.8783185005224004e-05, "loss": 0.0462, "step": 59000 }, { "epoch": 0.2473027236832178, "eval_runtime": 344.4739, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.46319110358428206, "eval_sts_eval_spearman_cosine": 0.26733839082265276, "step": 59000 }, { "epoch": 0.24939850947714337, "grad_norm": 0.19928471744060516, "learning_rate": 1.877269503992481e-05, "loss": 0.0452, "step": 59500 }, { "epoch": 0.24939850947714337, "eval_runtime": 337.8174, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.46113987839474907, "eval_sts_eval_spearman_cosine": 0.2711928272780907, "step": 59500 }, { "epoch": 0.2514942952710689, "grad_norm": 0.3985499143600464, "learning_rate": 1.8762205074625614e-05, "loss": 0.048, "step": 60000 }, { "epoch": 0.2514942952710689, "eval_runtime": 336.311, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4484191941544729, "eval_sts_eval_spearman_cosine": 0.26093738709759023, "step": 60000 }, { "epoch": 0.25359008106499453, "grad_norm": 0.4283686578273773, "learning_rate": 1.875171510932642e-05, "loss": 0.0471, "step": 60500 }, { "epoch": 0.25359008106499453, "eval_runtime": 339.163, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.45584581266780366, "eval_sts_eval_spearman_cosine": 0.2680626176959015, "step": 60500 }, { "epoch": 0.2556858668589201, "grad_norm": 0.34197545051574707, "learning_rate": 1.8741225144027224e-05, "loss": 0.0489, "step": 61000 }, { "epoch": 0.2556858668589201, "eval_runtime": 333.6283, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.45851657161222814, "eval_sts_eval_spearman_cosine": 0.26968624243762407, "step": 61000 }, { "epoch": 0.25778165265284564, "grad_norm": 0.18788769841194153, "learning_rate": 1.873073517872803e-05, "loss": 0.0468, "step": 61500 }, { "epoch": 0.25778165265284564, "eval_runtime": 340.5071, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4556590131676169, "eval_sts_eval_spearman_cosine": 0.2670438948160231, "step": 61500 }, { "epoch": 0.25987743844677125, "grad_norm": 0.493650883436203, "learning_rate": 1.8720245213428837e-05, "loss": 0.047, "step": 62000 }, { "epoch": 0.25987743844677125, "eval_runtime": 349.9441, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4518687788231237, "eval_sts_eval_spearman_cosine": 0.26879230777027613, "step": 62000 }, { "epoch": 0.2619732242406968, "grad_norm": 0.815079391002655, "learning_rate": 1.870975524812964e-05, "loss": 0.0454, "step": 62500 }, { "epoch": 0.2619732242406968, "eval_runtime": 340.671, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.46214359757803203, "eval_sts_eval_spearman_cosine": 0.2714711287177641, "step": 62500 }, { "epoch": 0.2640690100346224, "grad_norm": 3.0666303634643555, "learning_rate": 1.8699265282830447e-05, "loss": 0.0459, "step": 63000 }, { "epoch": 0.2640690100346224, "eval_runtime": 332.5861, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.45700111125069, "eval_sts_eval_spearman_cosine": 0.2696201749575335, "step": 63000 }, { "epoch": 0.26616479582854796, "grad_norm": 1.108765721321106, "learning_rate": 1.868877531753125e-05, "loss": 0.0485, "step": 63500 }, { "epoch": 0.26616479582854796, "eval_runtime": 341.7587, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4647307791236681, "eval_sts_eval_spearman_cosine": 0.26993075878119266, "step": 63500 }, { "epoch": 0.2682605816224735, "grad_norm": 0.46058839559555054, "learning_rate": 1.8678285352232057e-05, "loss": 0.0483, "step": 64000 }, { "epoch": 0.2682605816224735, "eval_runtime": 337.6833, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4535176063382498, "eval_sts_eval_spearman_cosine": 0.26434247269076455, "step": 64000 }, { "epoch": 0.583319767757339, "grad_norm": 2.075059652328491, "learning_rate": 1.7122132284292488e-05, "loss": 0.0439, "step": 64500 }, { "epoch": 0.583319767757339, "eval_runtime": 344.9064, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.45822043743820173, "eval_sts_eval_spearman_cosine": 0.2666989490990656, "step": 64500 }, { "epoch": 0.5878416264221246, "grad_norm": 0.9772994518280029, "learning_rate": 1.7099471551716325e-05, "loss": 0.044, "step": 65000 }, { "epoch": 0.5878416264221246, "eval_runtime": 345.646, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.46288454298619824, "eval_sts_eval_spearman_cosine": 0.2719717741875009, "step": 65000 }, { "epoch": 0.5923634850869102, "grad_norm": 0.24728697538375854, "learning_rate": 1.7076810819140162e-05, "loss": 0.0429, "step": 65500 }, { "epoch": 0.5923634850869102, "eval_runtime": 346.425, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4688961106065947, "eval_sts_eval_spearman_cosine": 0.27444318157215, "step": 65500 }, { "epoch": 0.5968853437516957, "grad_norm": 0.9094905257225037, "learning_rate": 1.7054150086564003e-05, "loss": 0.0474, "step": 66000 }, { "epoch": 0.5968853437516957, "eval_runtime": 344.1504, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.46671489582863623, "eval_sts_eval_spearman_cosine": 0.2745626969039024, "step": 66000 }, { "epoch": 0.6014072024164813, "grad_norm": 0.3827674984931946, "learning_rate": 1.7031489353987836e-05, "loss": 0.0442, "step": 66500 }, { "epoch": 0.6014072024164813, "eval_runtime": 343.4757, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.47081681987930996, "eval_sts_eval_spearman_cosine": 0.2757111208978273, "step": 66500 }, { "epoch": 0.6059290610812669, "grad_norm": 0.36521339416503906, "learning_rate": 1.7008828621411673e-05, "loss": 0.043, "step": 67000 }, { "epoch": 0.6059290610812669, "eval_runtime": 344.8757, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4690431532298536, "eval_sts_eval_spearman_cosine": 0.2731779206466308, "step": 67000 }, { "epoch": 0.6104509197460524, "grad_norm": 0.2846500277519226, "learning_rate": 1.698616788883551e-05, "loss": 0.0443, "step": 67500 }, { "epoch": 0.6104509197460524, "eval_runtime": 345.5968, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4727687783925766, "eval_sts_eval_spearman_cosine": 0.2766166202975483, "step": 67500 }, { "epoch": 0.614972778410838, "grad_norm": 0.8947261571884155, "learning_rate": 1.6963507156259348e-05, "loss": 0.0457, "step": 68000 }, { "epoch": 0.614972778410838, "eval_runtime": 349.4023, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4704530278251583, "eval_sts_eval_spearman_cosine": 0.27125019091164215, "step": 68000 }, { "epoch": 0.6194946370756236, "grad_norm": 0.38364651799201965, "learning_rate": 1.6940846423683185e-05, "loss": 0.0446, "step": 68500 }, { "epoch": 0.6194946370756236, "eval_runtime": 350.8617, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.46779124698310753, "eval_sts_eval_spearman_cosine": 0.2678404320353407, "step": 68500 }, { "epoch": 0.6240164957404092, "grad_norm": 0.5186108946800232, "learning_rate": 1.6918185691107022e-05, "loss": 0.0432, "step": 69000 }, { "epoch": 0.6240164957404092, "eval_runtime": 350.1606, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4636033741912878, "eval_sts_eval_spearman_cosine": 0.26892000445898917, "step": 69000 }, { "epoch": 0.6285383544051947, "grad_norm": 1.1911261081695557, "learning_rate": 1.689552495853086e-05, "loss": 0.0456, "step": 69500 }, { "epoch": 0.6285383544051947, "eval_runtime": 347.4134, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.46225531288469346, "eval_sts_eval_spearman_cosine": 0.26734142228003843, "step": 69500 }, { "epoch": 0.6330602130699803, "grad_norm": 0.41027602553367615, "learning_rate": 1.6872864225954696e-05, "loss": 0.0443, "step": 70000 }, { "epoch": 0.6330602130699803, "eval_runtime": 346.6981, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.46672010694779265, "eval_sts_eval_spearman_cosine": 0.2669844490247406, "step": 70000 }, { "epoch": 0.4527182358758332, "grad_norm": 0.30712175369262695, "learning_rate": 1.7743547739500525e-05, "loss": 0.0451, "step": 70500 }, { "epoch": 0.4527182358758332, "eval_runtime": 347.3582, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4628803794075834, "eval_sts_eval_spearman_cosine": 0.2689064855693454, "step": 70500 }, { "epoch": 0.4559290035061582, "grad_norm": 0.5855965614318848, "learning_rate": 1.7727487352445195e-05, "loss": 0.0466, "step": 71000 }, { "epoch": 0.4559290035061582, "eval_runtime": 343.4729, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.46319245871557635, "eval_sts_eval_spearman_cosine": 0.26779162636351556, "step": 71000 }, { "epoch": 0.4591397711364833, "grad_norm": 0.39146557450294495, "learning_rate": 1.7711426965389866e-05, "loss": 0.0452, "step": 71500 }, { "epoch": 0.4591397711364833, "eval_runtime": 351.9998, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4635937757873226, "eval_sts_eval_spearman_cosine": 0.2683592675390492, "step": 71500 }, { "epoch": 0.46235053876680837, "grad_norm": 0.3798949122428894, "learning_rate": 1.7695366578334537e-05, "loss": 0.0458, "step": 72000 }, { "epoch": 0.46235053876680837, "eval_runtime": 346.2702, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4605400968087333, "eval_sts_eval_spearman_cosine": 0.2689833049472947, "step": 72000 }, { "epoch": 0.46556130639713345, "grad_norm": 0.29719600081443787, "learning_rate": 1.767930619127921e-05, "loss": 0.0435, "step": 72500 }, { "epoch": 0.46556130639713345, "eval_runtime": 347.625, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.45951635903810295, "eval_sts_eval_spearman_cosine": 0.2663982862724518, "step": 72500 }, { "epoch": 0.46877207402745846, "grad_norm": 0.49266085028648376, "learning_rate": 1.7663245804223882e-05, "loss": 0.0446, "step": 73000 }, { "epoch": 0.46877207402745846, "eval_runtime": 344.2244, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.46177660323495817, "eval_sts_eval_spearman_cosine": 0.2655927562285823, "step": 73000 }, { "epoch": 0.47198284165778354, "grad_norm": 0.2423386573791504, "learning_rate": 1.7647185417168556e-05, "loss": 0.0428, "step": 73500 }, { "epoch": 0.47198284165778354, "eval_runtime": 343.3661, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.45703133542055974, "eval_sts_eval_spearman_cosine": 0.26595783245237525, "step": 73500 }, { "epoch": 0.4751936092881086, "grad_norm": 0.4229910969734192, "learning_rate": 1.7631125030113227e-05, "loss": 0.0445, "step": 74000 }, { "epoch": 0.4751936092881086, "eval_runtime": 339.8683, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4644684757589911, "eval_sts_eval_spearman_cosine": 0.26727197290814453, "step": 74000 }, { "epoch": 0.4784043769184337, "grad_norm": 0.8358988761901855, "learning_rate": 1.76150646430579e-05, "loss": 0.0439, "step": 74500 }, { "epoch": 0.4784043769184337, "eval_runtime": 345.4191, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.45985234310165035, "eval_sts_eval_spearman_cosine": 0.26436627598065215, "step": 74500 }, { "epoch": 0.4816151445487587, "grad_norm": 0.2180459201335907, "learning_rate": 1.7599004256002572e-05, "loss": 0.0441, "step": 75000 }, { "epoch": 0.4816151445487587, "eval_runtime": 341.1852, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.465738124354375, "eval_sts_eval_spearman_cosine": 0.26937733022168964, "step": 75000 }, { "epoch": 0.4848259121790838, "grad_norm": 0.679786741733551, "learning_rate": 1.7582943868947243e-05, "loss": 0.0434, "step": 75500 }, { "epoch": 0.4848259121790838, "eval_runtime": 344.0105, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.46572761024773474, "eval_sts_eval_spearman_cosine": 0.26887397384052586, "step": 75500 }, { "epoch": 0.48803667980940885, "grad_norm": 0.41084718704223633, "learning_rate": 1.7566883481891914e-05, "loss": 0.0463, "step": 76000 }, { "epoch": 0.48803667980940885, "eval_runtime": 347.5491, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4636901266559994, "eval_sts_eval_spearman_cosine": 0.2676799428117979, "step": 76000 }, { "epoch": 0.49124744743973386, "grad_norm": 0.5301337838172913, "learning_rate": 1.7550823094836588e-05, "loss": 0.0427, "step": 76500 }, { "epoch": 0.49124744743973386, "eval_runtime": 348.3985, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.46560725142672826, "eval_sts_eval_spearman_cosine": 0.2692058738450744, "step": 76500 }, { "epoch": 0.49445821507005894, "grad_norm": 7.734156608581543, "learning_rate": 1.753476270778126e-05, "loss": 0.0437, "step": 77000 }, { "epoch": 0.49445821507005894, "eval_runtime": 347.8764, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4638793783305779, "eval_sts_eval_spearman_cosine": 0.2648514825294853, "step": 77000 }, { "epoch": 0.497668982700384, "grad_norm": 0.21175654232501984, "learning_rate": 1.751870232072593e-05, "loss": 0.044, "step": 77500 }, { "epoch": 0.497668982700384, "eval_runtime": 351.8794, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4640394512917372, "eval_sts_eval_spearman_cosine": 0.26941320067029395, "step": 77500 }, { "epoch": 0.5008797503307091, "grad_norm": 0.5250917077064514, "learning_rate": 1.7502641933670604e-05, "loss": 0.0423, "step": 78000 }, { "epoch": 0.5008797503307091, "eval_runtime": 352.3033, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.46725853182374755, "eval_sts_eval_spearman_cosine": 0.2712327653385633, "step": 78000 }, { "epoch": 0.5040905179610341, "grad_norm": 0.5282318592071533, "learning_rate": 1.7486581546615275e-05, "loss": 0.0443, "step": 78500 }, { "epoch": 0.5040905179610341, "eval_runtime": 355.8972, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.47290899287409766, "eval_sts_eval_spearman_cosine": 0.2728568609491143, "step": 78500 }, { "epoch": 0.5073012855913592, "grad_norm": 0.2974955439567566, "learning_rate": 1.747052115955995e-05, "loss": 0.0448, "step": 79000 }, { "epoch": 0.5073012855913592, "eval_runtime": 355.1428, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.47443853510881395, "eval_sts_eval_spearman_cosine": 0.2736658456783792, "step": 79000 }, { "epoch": 0.5105120532216842, "grad_norm": 0.39728039503097534, "learning_rate": 1.7454460772504616e-05, "loss": 0.0435, "step": 79500 }, { "epoch": 0.5105120532216842, "eval_runtime": 355.9779, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4757678988913061, "eval_sts_eval_spearman_cosine": 0.2820097708977893, "step": 79500 }, { "epoch": 0.5137228208520093, "grad_norm": 0.38706257939338684, "learning_rate": 1.743840038544929e-05, "loss": 0.0431, "step": 80000 }, { "epoch": 0.5137228208520093, "eval_runtime": 359.9987, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.47330621195637657, "eval_sts_eval_spearman_cosine": 0.27963278189746404, "step": 80000 }, { "epoch": 0.5169335884823344, "grad_norm": 0.3191979229450226, "learning_rate": 1.742233999839396e-05, "loss": 0.0466, "step": 80500 }, { "epoch": 0.5169335884823344, "eval_runtime": 351.5813, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.473744942713387, "eval_sts_eval_spearman_cosine": 0.27823949145469434, "step": 80500 }, { "epoch": 0.5201443561126594, "grad_norm": 0.3576139211654663, "learning_rate": 1.7406279611338636e-05, "loss": 0.0452, "step": 81000 }, { "epoch": 0.5201443561126594, "eval_runtime": 347.9056, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4608150641566586, "eval_sts_eval_spearman_cosine": 0.2684142956620662, "step": 81000 }, { "epoch": 0.5233551237429844, "grad_norm": 0.19141677021980286, "learning_rate": 1.7390219224283306e-05, "loss": 0.0439, "step": 81500 }, { "epoch": 0.5233551237429844, "eval_runtime": 350.6581, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.46476430975811067, "eval_sts_eval_spearman_cosine": 0.27262869543698814, "step": 81500 }, { "epoch": 0.5265658913733096, "grad_norm": 0.9887452125549316, "learning_rate": 1.737415883722798e-05, "loss": 0.0447, "step": 82000 }, { "epoch": 0.5265658913733096, "eval_runtime": 350.2612, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.46117109568538445, "eval_sts_eval_spearman_cosine": 0.27068983028657323, "step": 82000 }, { "epoch": 0.5297766590036346, "grad_norm": 0.47332170605659485, "learning_rate": 1.735809845017265e-05, "loss": 0.0468, "step": 82500 }, { "epoch": 0.5297766590036346, "eval_runtime": 349.9598, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.469059328221574, "eval_sts_eval_spearman_cosine": 0.27523073067008536, "step": 82500 }, { "epoch": 0.5329874266339596, "grad_norm": 0.4340454936027527, "learning_rate": 1.7342038063117322e-05, "loss": 0.0438, "step": 83000 }, { "epoch": 0.5329874266339596, "eval_runtime": 356.8639, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.47196369823798523, "eval_sts_eval_spearman_cosine": 0.27450566899354834, "step": 83000 }, { "epoch": 0.5361981942642847, "grad_norm": 0.3562575876712799, "learning_rate": 1.7325977676061993e-05, "loss": 0.0459, "step": 83500 }, { "epoch": 0.5361981942642847, "eval_runtime": 346.7412, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4755607973440591, "eval_sts_eval_spearman_cosine": 0.2764005340581686, "step": 83500 }, { "epoch": 0.5394089618946097, "grad_norm": 0.5740967988967896, "learning_rate": 1.7309917289006667e-05, "loss": 0.0433, "step": 84000 }, { "epoch": 0.5394089618946097, "eval_runtime": 353.0306, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.47524206625740384, "eval_sts_eval_spearman_cosine": 0.2779296472475131, "step": 84000 }, { "epoch": 0.5426197295249349, "grad_norm": 0.1839008331298828, "learning_rate": 1.7293856901951338e-05, "loss": 0.0417, "step": 84500 }, { "epoch": 0.5426197295249349, "eval_runtime": 355.3481, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.46961540047640865, "eval_sts_eval_spearman_cosine": 0.270701313858848, "step": 84500 }, { "epoch": 0.5458304971552599, "grad_norm": 0.449339896440506, "learning_rate": 1.727779651489601e-05, "loss": 0.0454, "step": 85000 }, { "epoch": 0.5458304971552599, "eval_runtime": 356.6489, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4769232338601671, "eval_sts_eval_spearman_cosine": 0.2788912638647163, "step": 85000 }, { "epoch": 0.5490412647855849, "grad_norm": 0.38551419973373413, "learning_rate": 1.7261736127840683e-05, "loss": 0.0438, "step": 85500 }, { "epoch": 0.5490412647855849, "eval_runtime": 351.5643, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4715708432505386, "eval_sts_eval_spearman_cosine": 0.273216570366565, "step": 85500 }, { "epoch": 0.55225203241591, "grad_norm": 0.4938383102416992, "learning_rate": 1.7245675740785354e-05, "loss": 0.0462, "step": 86000 }, { "epoch": 0.55225203241591, "eval_runtime": 352.6926, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.47257172497916444, "eval_sts_eval_spearman_cosine": 0.27274037210153873, "step": 86000 }, { "epoch": 0.555462800046235, "grad_norm": 0.44991201162338257, "learning_rate": 1.722961535373003e-05, "loss": 0.045, "step": 86500 }, { "epoch": 0.555462800046235, "eval_runtime": 350.7982, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4704646767711806, "eval_sts_eval_spearman_cosine": 0.2705496062578908, "step": 86500 }, { "epoch": 0.5586735676765601, "grad_norm": 0.19991353154182434, "learning_rate": 1.72135549666747e-05, "loss": 0.0427, "step": 87000 }, { "epoch": 0.5586735676765601, "eval_runtime": 347.5934, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4730149706329284, "eval_sts_eval_spearman_cosine": 0.27316385579130154, "step": 87000 }, { "epoch": 0.5618843353068852, "grad_norm": 0.19858591258525848, "learning_rate": 1.719749457961937e-05, "loss": 0.0463, "step": 87500 }, { "epoch": 0.5618843353068852, "eval_runtime": 349.1212, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.47256927615175537, "eval_sts_eval_spearman_cosine": 0.2741095375038477, "step": 87500 }, { "epoch": 0.5650951029372102, "grad_norm": 0.9973406195640564, "learning_rate": 1.718143419256404e-05, "loss": 0.0452, "step": 88000 }, { "epoch": 0.5650951029372102, "eval_runtime": 348.1479, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.47645789534945443, "eval_sts_eval_spearman_cosine": 0.2743891596344515, "step": 88000 }, { "epoch": 0.5683058705675352, "grad_norm": 0.39851826429367065, "learning_rate": 1.7165373805508715e-05, "loss": 0.0451, "step": 88500 }, { "epoch": 0.5683058705675352, "eval_runtime": 343.6687, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.47425630103124783, "eval_sts_eval_spearman_cosine": 0.2710973569898065, "step": 88500 }, { "epoch": 0.5715166381978604, "grad_norm": 0.4525317847728729, "learning_rate": 1.7149313418453386e-05, "loss": 0.0475, "step": 89000 }, { "epoch": 0.5715166381978604, "eval_runtime": 344.018, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4732385180328227, "eval_sts_eval_spearman_cosine": 0.27070120241484114, "step": 89000 }, { "epoch": 0.5747274058281854, "grad_norm": 0.6527873277664185, "learning_rate": 1.7133253031398057e-05, "loss": 0.0451, "step": 89500 }, { "epoch": 0.5747274058281854, "eval_runtime": 345.018, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.47060856519620364, "eval_sts_eval_spearman_cosine": 0.26820191893411294, "step": 89500 }, { "epoch": 0.5779381734585105, "grad_norm": 0.29684481024742126, "learning_rate": 1.711719264434273e-05, "loss": 0.0443, "step": 90000 }, { "epoch": 0.5779381734585105, "eval_runtime": 343.1423, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4746864415358376, "eval_sts_eval_spearman_cosine": 0.27312518765490756, "step": 90000 }, { "epoch": 0.5811489410888355, "grad_norm": 0.4218785762786865, "learning_rate": 1.7101132257287402e-05, "loss": 0.0432, "step": 90500 }, { "epoch": 0.5811489410888355, "eval_runtime": 347.8976, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4762448423443765, "eval_sts_eval_spearman_cosine": 0.2746532578443328, "step": 90500 }, { "epoch": 0.5843597087191605, "grad_norm": 0.38961222767829895, "learning_rate": 1.7085071870232076e-05, "loss": 0.0453, "step": 91000 }, { "epoch": 0.5843597087191605, "eval_runtime": 348.2753, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4754641189689519, "eval_sts_eval_spearman_cosine": 0.2763278336548382, "step": 91000 }, { "epoch": 0.5875704763494857, "grad_norm": 0.8080690503120422, "learning_rate": 1.7069011483176743e-05, "loss": 0.0459, "step": 91500 }, { "epoch": 0.5875704763494857, "eval_runtime": 353.0106, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4793385600520937, "eval_sts_eval_spearman_cosine": 0.27929328035960593, "step": 91500 }, { "epoch": 0.5907812439798107, "grad_norm": 0.4822283089160919, "learning_rate": 1.7052951096121418e-05, "loss": 0.0466, "step": 92000 }, { "epoch": 0.5907812439798107, "eval_runtime": 343.1695, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.47641056589041914, "eval_sts_eval_spearman_cosine": 0.2761405903635882, "step": 92000 }, { "epoch": 0.5939920116101357, "grad_norm": 0.21969841420650482, "learning_rate": 1.703689070906609e-05, "loss": 0.0466, "step": 92500 }, { "epoch": 0.5939920116101357, "eval_runtime": 346.1791, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4749186235786941, "eval_sts_eval_spearman_cosine": 0.27878616222337294, "step": 92500 }, { "epoch": 0.5972027792404608, "grad_norm": 0.4392305314540863, "learning_rate": 1.7020830322010763e-05, "loss": 0.0451, "step": 93000 }, { "epoch": 0.5972027792404608, "eval_runtime": 352.6396, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4764752181118148, "eval_sts_eval_spearman_cosine": 0.275212289230118, "step": 93000 }, { "epoch": 0.6004135468707859, "grad_norm": 0.6342678070068359, "learning_rate": 1.7004769934955433e-05, "loss": 0.0429, "step": 93500 }, { "epoch": 0.6004135468707859, "eval_runtime": 348.8057, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4744487709212541, "eval_sts_eval_spearman_cosine": 0.2760971440213833, "step": 93500 }, { "epoch": 0.6036243145011109, "grad_norm": 0.45058977603912354, "learning_rate": 1.6988709547900108e-05, "loss": 0.0451, "step": 94000 }, { "epoch": 0.6036243145011109, "eval_runtime": 3120.1268, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.472286294965952, "eval_sts_eval_spearman_cosine": 0.2743795918522128, "step": 94000 }, { "epoch": 0.46712340955601034, "grad_norm": 0.6003493666648865, "learning_rate": 1.7669855146275314e-05, "loss": 0.043, "step": 94500 }, { "epoch": 0.46712340955601034, "eval_runtime": 348.8582, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.47061381008476433, "eval_sts_eval_spearman_cosine": 0.27048970734818206, "step": 94500 }, { "epoch": 0.4695949619875236, "grad_norm": 0.4507014751434326, "learning_rate": 1.7657493503956962e-05, "loss": 0.0425, "step": 95000 }, { "epoch": 0.4695949619875236, "eval_runtime": 351.6197, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.47596595379098067, "eval_sts_eval_spearman_cosine": 0.2707911954568496, "step": 95000 }, { "epoch": 0.4720665144190369, "grad_norm": 0.5137360095977783, "learning_rate": 1.764513186163861e-05, "loss": 0.0428, "step": 95500 }, { "epoch": 0.4720665144190369, "eval_runtime": 349.6737, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4714390480066494, "eval_sts_eval_spearman_cosine": 0.2703213722181371, "step": 95500 }, { "epoch": 0.47453806685055017, "grad_norm": 0.6501486897468567, "learning_rate": 1.763277021932026e-05, "loss": 0.0411, "step": 96000 }, { "epoch": 0.47453806685055017, "eval_runtime": 356.4351, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4727042879456572, "eval_sts_eval_spearman_cosine": 0.2689072599157799, "step": 96000 }, { "epoch": 0.4770096192820634, "grad_norm": 0.44074612855911255, "learning_rate": 1.762040857700191e-05, "loss": 0.0434, "step": 96500 }, { "epoch": 0.4770096192820634, "eval_runtime": 352.6796, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4717289265400968, "eval_sts_eval_spearman_cosine": 0.27603987468652486, "step": 96500 }, { "epoch": 0.47948117171357674, "grad_norm": 0.1666240245103836, "learning_rate": 1.7608046934683555e-05, "loss": 0.0407, "step": 97000 }, { "epoch": 0.47948117171357674, "eval_runtime": 359.2374, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.47405185900245117, "eval_sts_eval_spearman_cosine": 0.2732695339246637, "step": 97000 }, { "epoch": 0.48195272414509, "grad_norm": 0.4110087752342224, "learning_rate": 1.7595685292365203e-05, "loss": 0.0445, "step": 97500 }, { "epoch": 0.48195272414509, "eval_runtime": 359.8996, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4742531062267226, "eval_sts_eval_spearman_cosine": 0.2740101129025145, "step": 97500 }, { "epoch": 0.4844242765766033, "grad_norm": 1.1226764917373657, "learning_rate": 1.7583323650046854e-05, "loss": 0.0447, "step": 98000 }, { "epoch": 0.4844242765766033, "eval_runtime": 362.7028, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4720524791868722, "eval_sts_eval_spearman_cosine": 0.2693524916301518, "step": 98000 }, { "epoch": 0.48689582900811657, "grad_norm": 0.26226094365119934, "learning_rate": 1.75709620077285e-05, "loss": 0.044, "step": 98500 }, { "epoch": 0.48689582900811657, "eval_runtime": 351.5922, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4780929221421183, "eval_sts_eval_spearman_cosine": 0.2755192001132513, "step": 98500 }, { "epoch": 0.4893673814396299, "grad_norm": 0.24040096998214722, "learning_rate": 1.7558600365410147e-05, "loss": 0.0425, "step": 99000 }, { "epoch": 0.4893673814396299, "eval_runtime": 351.5046, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.478234519508142, "eval_sts_eval_spearman_cosine": 0.2704981053414561, "step": 99000 }, { "epoch": 0.49183893387114314, "grad_norm": 0.23684976994991302, "learning_rate": 1.75462387230918e-05, "loss": 0.0444, "step": 99500 }, { "epoch": 0.49183893387114314, "eval_runtime": 351.7723, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.47756936289059754, "eval_sts_eval_spearman_cosine": 0.2729231899656772, "step": 99500 }, { "epoch": 0.49431048630265645, "grad_norm": 0.16446171700954437, "learning_rate": 1.7533877080773443e-05, "loss": 0.0448, "step": 100000 }, { "epoch": 0.49431048630265645, "eval_runtime": 348.4735, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4721086221517329, "eval_sts_eval_spearman_cosine": 0.2690262174419515, "step": 100000 }, { "epoch": 0.4967820387341697, "grad_norm": 0.4354134202003479, "learning_rate": 1.752151543845509e-05, "loss": 0.0432, "step": 100500 }, { "epoch": 0.4967820387341697, "eval_runtime": 347.9725, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4761007883135632, "eval_sts_eval_spearman_cosine": 0.2746545893815913, "step": 100500 }, { "epoch": 0.49925359116568296, "grad_norm": 0.45141008496284485, "learning_rate": 1.7509153796136743e-05, "loss": 0.0426, "step": 101000 }, { "epoch": 0.49925359116568296, "eval_runtime": 358.734, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.475320300437908, "eval_sts_eval_spearman_cosine": 0.2739335566477747, "step": 101000 }, { "epoch": 0.5017251435971962, "grad_norm": 0.2849339246749878, "learning_rate": 1.7496792153818388e-05, "loss": 0.043, "step": 101500 }, { "epoch": 0.5017251435971962, "eval_runtime": 348.7329, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4746668259328616, "eval_sts_eval_spearman_cosine": 0.27730528466135707, "step": 101500 }, { "epoch": 0.5041966960287095, "grad_norm": 0.4919917583465576, "learning_rate": 1.7484430511500036e-05, "loss": 0.0432, "step": 102000 }, { "epoch": 0.5041966960287095, "eval_runtime": 350.5986, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4755222984129452, "eval_sts_eval_spearman_cosine": 0.2774316976611839, "step": 102000 }, { "epoch": 0.5066682484602228, "grad_norm": 0.5823877453804016, "learning_rate": 1.7472068869181687e-05, "loss": 0.042, "step": 102500 }, { "epoch": 0.5066682484602228, "eval_runtime": 346.9064, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.47750438907486104, "eval_sts_eval_spearman_cosine": 0.27764826040692653, "step": 102500 }, { "epoch": 0.5091398008917362, "grad_norm": 0.35131126642227173, "learning_rate": 1.7459707226863335e-05, "loss": 0.0441, "step": 103000 }, { "epoch": 0.5091398008917362, "eval_runtime": 346.944, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.47532735456012026, "eval_sts_eval_spearman_cosine": 0.2754806244919039, "step": 103000 }, { "epoch": 0.5116113533232494, "grad_norm": 0.27014538645744324, "learning_rate": 1.744734558454498e-05, "loss": 0.0441, "step": 103500 }, { "epoch": 0.5116113533232494, "eval_runtime": 353.1054, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4771363509734021, "eval_sts_eval_spearman_cosine": 0.2781881001375129, "step": 103500 }, { "epoch": 0.5140829057547627, "grad_norm": 0.3019545078277588, "learning_rate": 1.743498394222663e-05, "loss": 0.0436, "step": 104000 }, { "epoch": 0.5140829057547627, "eval_runtime": 345.7203, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4808563495292716, "eval_sts_eval_spearman_cosine": 0.27838772803371714, "step": 104000 }, { "epoch": 0.516554458186276, "grad_norm": 0.40374359488487244, "learning_rate": 1.742262229990828e-05, "loss": 0.0446, "step": 104500 }, { "epoch": 0.516554458186276, "eval_runtime": 349.3393, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4759553774757468, "eval_sts_eval_spearman_cosine": 0.2763126890222426, "step": 104500 }, { "epoch": 0.5190260106177892, "grad_norm": 0.33097535371780396, "learning_rate": 1.7410260657589924e-05, "loss": 0.0435, "step": 105000 }, { "epoch": 0.5190260106177892, "eval_runtime": 347.4137, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4774125842128709, "eval_sts_eval_spearman_cosine": 0.2779317632393117, "step": 105000 }, { "epoch": 0.5214975630493025, "grad_norm": 0.15432271361351013, "learning_rate": 1.7397899015271576e-05, "loss": 0.0434, "step": 105500 }, { "epoch": 0.5214975630493025, "eval_runtime": 346.1339, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4812145491411003, "eval_sts_eval_spearman_cosine": 0.2763383745550038, "step": 105500 }, { "epoch": 0.5239691154808158, "grad_norm": 0.21999186277389526, "learning_rate": 1.7385537372953224e-05, "loss": 0.0426, "step": 106000 }, { "epoch": 0.5239691154808158, "eval_runtime": 350.3504, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.47655930886678244, "eval_sts_eval_spearman_cosine": 0.2743768022532169, "step": 106000 }, { "epoch": 0.5264406679123291, "grad_norm": 0.6331019401550293, "learning_rate": 1.737317573063487e-05, "loss": 0.0442, "step": 106500 }, { "epoch": 0.5264406679123291, "eval_runtime": 343.3127, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4769005799248368, "eval_sts_eval_spearman_cosine": 0.2748487642392616, "step": 106500 }, { "epoch": 0.5289122203438423, "grad_norm": 0.5318840146064758, "learning_rate": 1.7360814088316517e-05, "loss": 0.0468, "step": 107000 }, { "epoch": 0.5289122203438423, "eval_runtime": 344.6121, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.47679404922194163, "eval_sts_eval_spearman_cosine": 0.2743102043714479, "step": 107000 }, { "epoch": 0.5313837727753556, "grad_norm": 0.5366224646568298, "learning_rate": 1.734845244599817e-05, "loss": 0.0428, "step": 107500 }, { "epoch": 0.5313837727753556, "eval_runtime": 346.9276, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4713599885178066, "eval_sts_eval_spearman_cosine": 0.2684253022599424, "step": 107500 }, { "epoch": 0.533855325206869, "grad_norm": 0.32612520456314087, "learning_rate": 1.7336090803679813e-05, "loss": 0.042, "step": 108000 }, { "epoch": 0.533855325206869, "eval_runtime": 351.8722, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.47639094346776945, "eval_sts_eval_spearman_cosine": 0.27194295428534837, "step": 108000 }, { "epoch": 0.5363268776383823, "grad_norm": 0.6419724822044373, "learning_rate": 1.732372916136146e-05, "loss": 0.0435, "step": 108500 }, { "epoch": 0.5363268776383823, "eval_runtime": 349.7295, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4772499149476767, "eval_sts_eval_spearman_cosine": 0.27607806136571705, "step": 108500 }, { "epoch": 0.5387984300698955, "grad_norm": 0.3064916133880615, "learning_rate": 1.7311367519043113e-05, "loss": 0.0438, "step": 109000 }, { "epoch": 0.5387984300698955, "eval_runtime": 352.6395, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4745617692148967, "eval_sts_eval_spearman_cosine": 0.2765052061261612, "step": 109000 }, { "epoch": 0.5412699825014088, "grad_norm": 0.3791288435459137, "learning_rate": 1.729900587672476e-05, "loss": 0.0442, "step": 109500 }, { "epoch": 0.5412699825014088, "eval_runtime": 357.7868, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4721537925917805, "eval_sts_eval_spearman_cosine": 0.2730974199248271, "step": 109500 }, { "epoch": 0.5437415349329221, "grad_norm": 0.7005060315132141, "learning_rate": 1.7286644234406406e-05, "loss": 0.0443, "step": 110000 }, { "epoch": 0.5437415349329221, "eval_runtime": 365.9416, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4681211774953873, "eval_sts_eval_spearman_cosine": 0.2718547350610707, "step": 110000 }, { "epoch": 0.5462130873644353, "grad_norm": 0.4645637273788452, "learning_rate": 1.7274282592088057e-05, "loss": 0.0411, "step": 110500 }, { "epoch": 0.5462130873644353, "eval_runtime": 354.6244, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4705518795834941, "eval_sts_eval_spearman_cosine": 0.2720621303110948, "step": 110500 }, { "epoch": 0.5486846397959486, "grad_norm": 0.3828817903995514, "learning_rate": 1.7261920949769705e-05, "loss": 0.0431, "step": 111000 }, { "epoch": 0.5486846397959486, "eval_runtime": 354.1812, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.47778642253275416, "eval_sts_eval_spearman_cosine": 0.27521758826409737, "step": 111000 }, { "epoch": 0.5511561922274619, "grad_norm": 0.1714819222688675, "learning_rate": 1.724955930745135e-05, "loss": 0.0435, "step": 111500 }, { "epoch": 0.5511561922274619, "eval_runtime": 353.1025, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.473374245464836, "eval_sts_eval_spearman_cosine": 0.27425478670083997, "step": 111500 }, { "epoch": 0.5536277446589752, "grad_norm": 0.5929749608039856, "learning_rate": 1.7237197665133e-05, "loss": 0.0444, "step": 112000 }, { "epoch": 0.5536277446589752, "eval_runtime": 353.0782, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.47090576911868537, "eval_sts_eval_spearman_cosine": 0.27009417050488094, "step": 112000 }, { "epoch": 0.5560992970904884, "grad_norm": 0.31440436840057373, "learning_rate": 1.722483602281465e-05, "loss": 0.0423, "step": 112500 }, { "epoch": 0.5560992970904884, "eval_runtime": 350.6381, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4720633929586935, "eval_sts_eval_spearman_cosine": 0.27200179555180753, "step": 112500 }, { "epoch": 0.5585708495220018, "grad_norm": 0.3018611967563629, "learning_rate": 1.7212474380496294e-05, "loss": 0.0446, "step": 113000 }, { "epoch": 0.5585708495220018, "eval_runtime": 357.9762, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.47368691342759917, "eval_sts_eval_spearman_cosine": 0.27279901647553306, "step": 113000 }, { "epoch": 0.5610424019535151, "grad_norm": 0.4143754839897156, "learning_rate": 1.7200112738177946e-05, "loss": 0.042, "step": 113500 }, { "epoch": 0.5610424019535151, "eval_runtime": 353.2197, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4758254176707042, "eval_sts_eval_spearman_cosine": 0.2734399104263918, "step": 113500 }, { "epoch": 0.5635139543850283, "grad_norm": 0.5375134348869324, "learning_rate": 1.7187751095859594e-05, "loss": 0.0438, "step": 114000 }, { "epoch": 0.5635139543850283, "eval_runtime": 353.6461, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.47648808465754533, "eval_sts_eval_spearman_cosine": 0.27628418499613566, "step": 114000 }, { "epoch": 0.5659855068165416, "grad_norm": 0.20166069269180298, "learning_rate": 1.7175389453541242e-05, "loss": 0.0412, "step": 114500 }, { "epoch": 0.5659855068165416, "eval_runtime": 357.0724, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.47310592133809937, "eval_sts_eval_spearman_cosine": 0.27735204314775597, "step": 114500 }, { "epoch": 0.5684570592480549, "grad_norm": 0.596878170967102, "learning_rate": 1.716302781122289e-05, "loss": 0.0417, "step": 115000 }, { "epoch": 0.5684570592480549, "eval_runtime": 348.6849, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.47789215864446555, "eval_sts_eval_spearman_cosine": 0.27866297440974647, "step": 115000 }, { "epoch": 0.5709286116795682, "grad_norm": 0.7349388599395752, "learning_rate": 1.7150666168904538e-05, "loss": 0.0448, "step": 115500 }, { "epoch": 0.5709286116795682, "eval_runtime": 370.4334, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.47537636134056893, "eval_sts_eval_spearman_cosine": 0.2774549132277889, "step": 115500 }, { "epoch": 0.5734001641110814, "grad_norm": 0.28028154373168945, "learning_rate": 1.7138304526586186e-05, "loss": 0.0446, "step": 116000 }, { "epoch": 0.5734001641110814, "eval_runtime": 360.3886, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.47051430099347447, "eval_sts_eval_spearman_cosine": 0.27546678922904744, "step": 116000 }, { "epoch": 0.5758717165425947, "grad_norm": 0.2971450686454773, "learning_rate": 1.7125942884267834e-05, "loss": 0.0419, "step": 116500 }, { "epoch": 0.5758717165425947, "eval_runtime": 356.864, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4773068802978553, "eval_sts_eval_spearman_cosine": 0.27684638606894435, "step": 116500 }, { "epoch": 0.578343268974108, "grad_norm": 0.3874066174030304, "learning_rate": 1.7113581241949482e-05, "loss": 0.0435, "step": 117000 }, { "epoch": 0.578343268974108, "eval_runtime": 350.3127, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4738501974219631, "eval_sts_eval_spearman_cosine": 0.2752639462941418, "step": 117000 }, { "epoch": 0.5808148214056214, "grad_norm": 0.43897515535354614, "learning_rate": 1.710121959963113e-05, "loss": 0.0451, "step": 117500 }, { "epoch": 0.5808148214056214, "eval_runtime": 351.5572, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.47107801861270926, "eval_sts_eval_spearman_cosine": 0.27471615796269955, "step": 117500 }, { "epoch": 0.5832863738371346, "grad_norm": 0.4214724898338318, "learning_rate": 1.708885795731278e-05, "loss": 0.0439, "step": 118000 }, { "epoch": 0.5832863738371346, "eval_runtime": 348.212, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4742046989414999, "eval_sts_eval_spearman_cosine": 0.2767745994400087, "step": 118000 }, { "epoch": 0.5857579262686479, "grad_norm": 0.34276074171066284, "learning_rate": 1.7076496314994427e-05, "loss": 0.0438, "step": 118500 }, { "epoch": 0.5857579262686479, "eval_runtime": 360.3665, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4749690465131123, "eval_sts_eval_spearman_cosine": 0.27733540458955797, "step": 118500 }, { "epoch": 0.5882294787001612, "grad_norm": 0.3796885907649994, "learning_rate": 1.7064134672676075e-05, "loss": 0.043, "step": 119000 }, { "epoch": 0.5882294787001612, "eval_runtime": 361.6953, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4743958702070106, "eval_sts_eval_spearman_cosine": 0.27697371975849006, "step": 119000 }, { "epoch": 0.5907010311316744, "grad_norm": 0.3283519446849823, "learning_rate": 1.7051773030357723e-05, "loss": 0.0458, "step": 119500 }, { "epoch": 0.5907010311316744, "eval_runtime": 354.4337, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.47551764666089147, "eval_sts_eval_spearman_cosine": 0.27362045817756925, "step": 119500 }, { "epoch": 0.5931725835631877, "grad_norm": 0.6150197982788086, "learning_rate": 1.703941138803937e-05, "loss": 0.0419, "step": 120000 }, { "epoch": 0.5931725835631877, "eval_runtime": 350.4075, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.47440642807831124, "eval_sts_eval_spearman_cosine": 0.274634556845175, "step": 120000 }, { "epoch": 0.595644135994701, "grad_norm": 0.3073731064796448, "learning_rate": 1.702704974572102e-05, "loss": 0.0441, "step": 120500 }, { "epoch": 0.595644135994701, "eval_runtime": 356.5712, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.47685837338179815, "eval_sts_eval_spearman_cosine": 0.2730438042231458, "step": 120500 }, { "epoch": 0.5981156884262143, "grad_norm": 0.4049532115459442, "learning_rate": 1.7014688103402667e-05, "loss": 0.0425, "step": 121000 }, { "epoch": 0.5981156884262143, "eval_runtime": 354.7946, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4767033050063385, "eval_sts_eval_spearman_cosine": 0.27522264047358447, "step": 121000 }, { "epoch": 0.6005872408577275, "grad_norm": 0.27307021617889404, "learning_rate": 1.7002326461084315e-05, "loss": 0.0414, "step": 121500 }, { "epoch": 0.6005872408577275, "eval_runtime": 352.2369, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.47174638937736557, "eval_sts_eval_spearman_cosine": 0.2733033612847581, "step": 121500 }, { "epoch": 0.6030587932892408, "grad_norm": 0.19267737865447998, "learning_rate": 1.6989964818765963e-05, "loss": 0.0407, "step": 122000 }, { "epoch": 0.6030587932892408, "eval_runtime": 357.4407, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.47351730871544534, "eval_sts_eval_spearman_cosine": 0.2732189691675332, "step": 122000 }, { "epoch": 0.6055303457207541, "grad_norm": 0.3801515996456146, "learning_rate": 1.697760317644761e-05, "loss": 0.0445, "step": 122500 }, { "epoch": 0.6055303457207541, "eval_runtime": 358.1247, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.47165910616123774, "eval_sts_eval_spearman_cosine": 0.2731844125330328, "step": 122500 }, { "epoch": 0.6080018981522674, "grad_norm": 0.22224166989326477, "learning_rate": 1.696524153412926e-05, "loss": 0.0434, "step": 123000 }, { "epoch": 0.6080018981522674, "eval_runtime": 354.0787, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.47400636715403816, "eval_sts_eval_spearman_cosine": 0.27134460123985926, "step": 123000 }, { "epoch": 0.6104734505837807, "grad_norm": 1.1925078630447388, "learning_rate": 1.6952879891810908e-05, "loss": 0.0439, "step": 123500 }, { "epoch": 0.6104734505837807, "eval_runtime": 357.0677, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.47002522241205036, "eval_sts_eval_spearman_cosine": 0.27203670302352057, "step": 123500 }, { "epoch": 0.612945003015294, "grad_norm": 0.7482672929763794, "learning_rate": 1.6940518249492556e-05, "loss": 0.0438, "step": 124000 }, { "epoch": 0.612945003015294, "eval_runtime": 350.015, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.47121754709945546, "eval_sts_eval_spearman_cosine": 0.27610912826012535, "step": 124000 }, { "epoch": 0.6154165554468073, "grad_norm": 0.506924033164978, "learning_rate": 1.6928156607174204e-05, "loss": 0.0418, "step": 124500 }, { "epoch": 0.6154165554468073, "eval_runtime": 355.6755, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4713879732548388, "eval_sts_eval_spearman_cosine": 0.27544190518233325, "step": 124500 }, { "epoch": 0.6178881078783205, "grad_norm": 0.7116819620132446, "learning_rate": 1.6915794964855852e-05, "loss": 0.0423, "step": 125000 }, { "epoch": 0.6178881078783205, "eval_runtime": 345.5613, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.46918085234439666, "eval_sts_eval_spearman_cosine": 0.276273314715805, "step": 125000 }, { "epoch": 0.6203596603098338, "grad_norm": 0.33908677101135254, "learning_rate": 1.69034333225375e-05, "loss": 0.0426, "step": 125500 }, { "epoch": 0.6203596603098338, "eval_runtime": 347.5763, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4741948869312397, "eval_sts_eval_spearman_cosine": 0.277327910535926, "step": 125500 }, { "epoch": 0.6228312127413471, "grad_norm": 0.38708099722862244, "learning_rate": 1.6891071680219148e-05, "loss": 0.0448, "step": 126000 }, { "epoch": 0.6228312127413471, "eval_runtime": 357.9942, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.46728430633479723, "eval_sts_eval_spearman_cosine": 0.2732222426400965, "step": 126000 }, { "epoch": 0.6253027651728604, "grad_norm": 0.3043486177921295, "learning_rate": 1.6878710037900796e-05, "loss": 0.0424, "step": 126500 }, { "epoch": 0.6253027651728604, "eval_runtime": 343.9587, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4668242598107478, "eval_sts_eval_spearman_cosine": 0.26913826604327384, "step": 126500 }, { "epoch": 0.6277743176043736, "grad_norm": 0.5621631741523743, "learning_rate": 1.6866348395582445e-05, "loss": 0.0451, "step": 127000 }, { "epoch": 0.6277743176043736, "eval_runtime": 356.7019, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.47606892133555256, "eval_sts_eval_spearman_cosine": 0.2759886978958406, "step": 127000 }, { "epoch": 0.630245870035887, "grad_norm": 0.21794866025447845, "learning_rate": 1.6853986753264093e-05, "loss": 0.0437, "step": 127500 }, { "epoch": 0.630245870035887, "eval_runtime": 350.5451, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.47389231138966403, "eval_sts_eval_spearman_cosine": 0.2712984811174458, "step": 127500 }, { "epoch": 0.6327174224674003, "grad_norm": 0.3935137093067169, "learning_rate": 1.684162511094574e-05, "loss": 0.0429, "step": 128000 }, { "epoch": 0.6327174224674003, "eval_runtime": 349.4843, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.47186889523742714, "eval_sts_eval_spearman_cosine": 0.26897313626666514, "step": 128000 }, { "epoch": 0.6351889748989135, "grad_norm": 0.3976909816265106, "learning_rate": 1.682926346862739e-05, "loss": 0.0439, "step": 128500 }, { "epoch": 0.6351889748989135, "eval_runtime": 363.7196, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.47510241004720527, "eval_sts_eval_spearman_cosine": 0.2690851994496032, "step": 128500 }, { "epoch": 0.6376605273304268, "grad_norm": 0.4250900149345398, "learning_rate": 1.6816901826309037e-05, "loss": 0.0442, "step": 129000 }, { "epoch": 0.6376605273304268, "eval_runtime": 364.1594, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4800557397740977, "eval_sts_eval_spearman_cosine": 0.2768933309161896, "step": 129000 }, { "epoch": 0.6401320797619401, "grad_norm": 0.17380313575267792, "learning_rate": 1.6804540183990685e-05, "loss": 0.042, "step": 129500 }, { "epoch": 0.6401320797619401, "eval_runtime": 355.1295, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48064096107538556, "eval_sts_eval_spearman_cosine": 0.2749553196086838, "step": 129500 }, { "epoch": 0.6426036321934534, "grad_norm": 0.3765636384487152, "learning_rate": 1.6792178541672337e-05, "loss": 0.0462, "step": 130000 }, { "epoch": 0.6426036321934534, "eval_runtime": 353.5088, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4783103111283838, "eval_sts_eval_spearman_cosine": 0.2730936391754123, "step": 130000 }, { "epoch": 0.6450751846249666, "grad_norm": 0.40914735198020935, "learning_rate": 1.677981689935398e-05, "loss": 0.043, "step": 130500 }, { "epoch": 0.6450751846249666, "eval_runtime": 358.5396, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4753525233061729, "eval_sts_eval_spearman_cosine": 0.2750321706177921, "step": 130500 }, { "epoch": 0.6475467370564799, "grad_norm": 0.5730130672454834, "learning_rate": 1.676745525703563e-05, "loss": 0.0445, "step": 131000 }, { "epoch": 0.6475467370564799, "eval_runtime": 354.923, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.47738624078287145, "eval_sts_eval_spearman_cosine": 0.27704348353465424, "step": 131000 }, { "epoch": 0.6500182894879932, "grad_norm": 0.26606285572052, "learning_rate": 1.675509361471728e-05, "loss": 0.0424, "step": 131500 }, { "epoch": 0.6500182894879932, "eval_runtime": 357.185, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4754174736681618, "eval_sts_eval_spearman_cosine": 0.2753348281865302, "step": 131500 }, { "epoch": 0.6524898419195064, "grad_norm": 0.30445486307144165, "learning_rate": 1.6742731972398926e-05, "loss": 0.0451, "step": 132000 }, { "epoch": 0.6524898419195064, "eval_runtime": 351.0032, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4752834298354018, "eval_sts_eval_spearman_cosine": 0.27340534370153263, "step": 132000 }, { "epoch": 0.6549613943510197, "grad_norm": 0.4701381325721741, "learning_rate": 1.6730370330080574e-05, "loss": 0.0453, "step": 132500 }, { "epoch": 0.6549613943510197, "eval_runtime": 352.3515, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4785094905361482, "eval_sts_eval_spearman_cosine": 0.2768395773970143, "step": 132500 }, { "epoch": 0.6574329467825331, "grad_norm": 0.3265584409236908, "learning_rate": 1.6718008687762222e-05, "loss": 0.0453, "step": 133000 }, { "epoch": 0.6574329467825331, "eval_runtime": 349.4095, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4817755272107253, "eval_sts_eval_spearman_cosine": 0.28203421604530277, "step": 133000 }, { "epoch": 0.6599044992140464, "grad_norm": 0.4641675055027008, "learning_rate": 1.670564704544387e-05, "loss": 0.0424, "step": 133500 }, { "epoch": 0.6599044992140464, "eval_runtime": 350.9989, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4834859945666823, "eval_sts_eval_spearman_cosine": 0.28206930787978296, "step": 133500 }, { "epoch": 0.6623760516455596, "grad_norm": 0.1821311116218567, "learning_rate": 1.6693285403125518e-05, "loss": 0.0446, "step": 134000 }, { "epoch": 0.6623760516455596, "eval_runtime": 346.5094, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4817876660061913, "eval_sts_eval_spearman_cosine": 0.28064886443953285, "step": 134000 }, { "epoch": 0.6648476040770729, "grad_norm": 0.30296289920806885, "learning_rate": 1.6680923760807166e-05, "loss": 0.0433, "step": 134500 }, { "epoch": 0.6648476040770729, "eval_runtime": 348.7524, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.47874594480443455, "eval_sts_eval_spearman_cosine": 0.2780935581564944, "step": 134500 }, { "epoch": 0.6673191565085862, "grad_norm": 0.46374350786209106, "learning_rate": 1.6668562118488814e-05, "loss": 0.0443, "step": 135000 }, { "epoch": 0.6673191565085862, "eval_runtime": 349.867, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48013606840552253, "eval_sts_eval_spearman_cosine": 0.27919226455735185, "step": 135000 }, { "epoch": 0.6697907089400995, "grad_norm": 0.4830170273780823, "learning_rate": 1.6656200476170462e-05, "loss": 0.0447, "step": 135500 }, { "epoch": 0.6697907089400995, "eval_runtime": 364.1254, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4778147179256511, "eval_sts_eval_spearman_cosine": 0.27696030231950486, "step": 135500 }, { "epoch": 0.6722622613716127, "grad_norm": 0.303434282541275, "learning_rate": 1.664383883385211e-05, "loss": 0.0396, "step": 136000 }, { "epoch": 0.6722622613716127, "eval_runtime": 353.6391, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.47189119017260983, "eval_sts_eval_spearman_cosine": 0.2718464466613731, "step": 136000 }, { "epoch": 0.674733813803126, "grad_norm": 0.22661001980304718, "learning_rate": 1.6631477191533762e-05, "loss": 0.0418, "step": 136500 }, { "epoch": 0.674733813803126, "eval_runtime": 348.5249, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4787379646367328, "eval_sts_eval_spearman_cosine": 0.2736115216638369, "step": 136500 }, { "epoch": 0.6772053662346393, "grad_norm": 0.454865425825119, "learning_rate": 1.6619115549215407e-05, "loss": 0.0428, "step": 137000 }, { "epoch": 0.6772053662346393, "eval_runtime": 349.009, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.47932365799697796, "eval_sts_eval_spearman_cosine": 0.27743792199907236, "step": 137000 }, { "epoch": 0.6796769186661525, "grad_norm": 0.27750155329704285, "learning_rate": 1.6606753906897055e-05, "loss": 0.0444, "step": 137500 }, { "epoch": 0.6796769186661525, "eval_runtime": 349.7751, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4774058634158337, "eval_sts_eval_spearman_cosine": 0.2762139383537864, "step": 137500 }, { "epoch": 0.6821484710976659, "grad_norm": 0.1724577695131302, "learning_rate": 1.6594392264578706e-05, "loss": 0.0409, "step": 138000 }, { "epoch": 0.6821484710976659, "eval_runtime": 348.7992, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.46956752564496146, "eval_sts_eval_spearman_cosine": 0.27248475865607674, "step": 138000 }, { "epoch": 0.6846200235291792, "grad_norm": 0.5488813519477844, "learning_rate": 1.658203062226035e-05, "loss": 0.0429, "step": 138500 }, { "epoch": 0.6846200235291792, "eval_runtime": 361.5618, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4775566792086831, "eval_sts_eval_spearman_cosine": 0.2730840218459975, "step": 138500 }, { "epoch": 0.6870915759606925, "grad_norm": 0.38838642835617065, "learning_rate": 1.6569668979942e-05, "loss": 0.0447, "step": 139000 }, { "epoch": 0.6870915759606925, "eval_runtime": 356.0353, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4771161828761703, "eval_sts_eval_spearman_cosine": 0.2768546147214538, "step": 139000 }, { "epoch": 0.6895631283922057, "grad_norm": 0.5702659487724304, "learning_rate": 1.655730733762365e-05, "loss": 0.0454, "step": 139500 }, { "epoch": 0.6895631283922057, "eval_runtime": 348.7968, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4778773240359122, "eval_sts_eval_spearman_cosine": 0.2743791269293609, "step": 139500 }, { "epoch": 0.692034680823719, "grad_norm": 0.30302563309669495, "learning_rate": 1.6544945695305295e-05, "loss": 0.0443, "step": 140000 }, { "epoch": 0.692034680823719, "eval_runtime": 351.3301, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4846824429365133, "eval_sts_eval_spearman_cosine": 0.2822210539748703, "step": 140000 }, { "epoch": 0.6945062332552323, "grad_norm": 0.4959500730037689, "learning_rate": 1.6532584052986943e-05, "loss": 0.045, "step": 140500 }, { "epoch": 0.6945062332552323, "eval_runtime": 353.3686, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4752001783528622, "eval_sts_eval_spearman_cosine": 0.2752855172418256, "step": 140500 }, { "epoch": 0.6969777856867455, "grad_norm": 0.2711613178253174, "learning_rate": 1.6520222410668595e-05, "loss": 0.043, "step": 141000 }, { "epoch": 0.6969777856867455, "eval_runtime": 353.9564, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48345439140146595, "eval_sts_eval_spearman_cosine": 0.2780323217823716, "step": 141000 }, { "epoch": 0.6994493381182588, "grad_norm": 0.306177020072937, "learning_rate": 1.650786076835024e-05, "loss": 0.042, "step": 141500 }, { "epoch": 0.6994493381182588, "eval_runtime": 355.6927, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48267366960149294, "eval_sts_eval_spearman_cosine": 0.27647749414727496, "step": 141500 }, { "epoch": 0.7019208905497721, "grad_norm": 0.46190640330314636, "learning_rate": 1.6495499126031888e-05, "loss": 0.0427, "step": 142000 }, { "epoch": 0.7019208905497721, "eval_runtime": 352.7512, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4823986977857132, "eval_sts_eval_spearman_cosine": 0.2762322953592656, "step": 142000 }, { "epoch": 0.7043924429812854, "grad_norm": 0.2072329819202423, "learning_rate": 1.648313748371354e-05, "loss": 0.0404, "step": 142500 }, { "epoch": 0.7043924429812854, "eval_runtime": 366.6881, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4837086276156223, "eval_sts_eval_spearman_cosine": 0.2809026856104047, "step": 142500 }, { "epoch": 0.7068639954127987, "grad_norm": 0.20465226471424103, "learning_rate": 1.6470775841395187e-05, "loss": 0.045, "step": 143000 }, { "epoch": 0.7068639954127987, "eval_runtime": 356.9271, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4831013375043555, "eval_sts_eval_spearman_cosine": 0.2784365984877317, "step": 143000 }, { "epoch": 0.709335547844312, "grad_norm": 0.2385605126619339, "learning_rate": 1.6458414199076832e-05, "loss": 0.046, "step": 143500 }, { "epoch": 0.709335547844312, "eval_runtime": 353.3877, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48239207226209047, "eval_sts_eval_spearman_cosine": 0.27809957469886687, "step": 143500 }, { "epoch": 0.7118071002758253, "grad_norm": 0.28685086965560913, "learning_rate": 1.6446052556758484e-05, "loss": 0.0449, "step": 144000 }, { "epoch": 0.7118071002758253, "eval_runtime": 357.4805, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48136449201058673, "eval_sts_eval_spearman_cosine": 0.2732797632077566, "step": 144000 }, { "epoch": 0.7142786527073385, "grad_norm": 0.2466755509376526, "learning_rate": 1.643369091444013e-05, "loss": 0.0414, "step": 144500 }, { "epoch": 0.7142786527073385, "eval_runtime": 351.1781, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4827740023837088, "eval_sts_eval_spearman_cosine": 0.2736058462814855, "step": 144500 }, { "epoch": 0.7167502051388518, "grad_norm": 1.0304739475250244, "learning_rate": 1.6421329272121776e-05, "loss": 0.0472, "step": 145000 }, { "epoch": 0.7167502051388518, "eval_runtime": 356.6422, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48044812147188143, "eval_sts_eval_spearman_cosine": 0.2751432177794927, "step": 145000 }, { "epoch": 0.7192217575703651, "grad_norm": 0.6099510788917542, "learning_rate": 1.6408967629803428e-05, "loss": 0.0429, "step": 145500 }, { "epoch": 0.7192217575703651, "eval_runtime": 347.6735, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48042434287625346, "eval_sts_eval_spearman_cosine": 0.27820623525347016, "step": 145500 }, { "epoch": 0.7216933100018784, "grad_norm": 0.3327592611312866, "learning_rate": 1.6396605987485076e-05, "loss": 0.0429, "step": 146000 }, { "epoch": 0.7216933100018784, "eval_runtime": 351.8106, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48050230338662725, "eval_sts_eval_spearman_cosine": 0.2781249258464364, "step": 146000 }, { "epoch": 0.7241648624333916, "grad_norm": 0.49182677268981934, "learning_rate": 1.638424434516672e-05, "loss": 0.0446, "step": 146500 }, { "epoch": 0.7241648624333916, "eval_runtime": 347.1096, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.474296799155447, "eval_sts_eval_spearman_cosine": 0.2750337247223636, "step": 146500 }, { "epoch": 0.7266364148649049, "grad_norm": 0.40488386154174805, "learning_rate": 1.6371882702848372e-05, "loss": 0.0429, "step": 147000 }, { "epoch": 0.7266364148649049, "eval_runtime": 354.2681, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4797581799906488, "eval_sts_eval_spearman_cosine": 0.27730872770757026, "step": 147000 }, { "epoch": 0.7291079672964182, "grad_norm": 0.5303768515586853, "learning_rate": 1.635952106053002e-05, "loss": 0.0484, "step": 147500 }, { "epoch": 0.7291079672964182, "eval_runtime": 346.3275, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48146665213289364, "eval_sts_eval_spearman_cosine": 0.2807505196926688, "step": 147500 }, { "epoch": 0.7315795197279316, "grad_norm": 0.45398592948913574, "learning_rate": 1.6347159418211665e-05, "loss": 0.0439, "step": 148000 }, { "epoch": 0.7315795197279316, "eval_runtime": 348.2478, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.47888269139728484, "eval_sts_eval_spearman_cosine": 0.27587552150117806, "step": 148000 }, { "epoch": 0.7340510721594448, "grad_norm": 0.16634605824947357, "learning_rate": 1.6334797775893316e-05, "loss": 0.0429, "step": 148500 }, { "epoch": 0.7340510721594448, "eval_runtime": 348.1449, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4817241532270239, "eval_sts_eval_spearman_cosine": 0.2763647893491263, "step": 148500 }, { "epoch": 0.7365226245909581, "grad_norm": 0.1774919480085373, "learning_rate": 1.6322436133574965e-05, "loss": 0.0453, "step": 149000 }, { "epoch": 0.7365226245909581, "eval_runtime": 345.1224, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.481196182119923, "eval_sts_eval_spearman_cosine": 0.2784780476362978, "step": 149000 }, { "epoch": 0.7389941770224714, "grad_norm": 0.47423601150512695, "learning_rate": 1.6310074491256613e-05, "loss": 0.043, "step": 149500 }, { "epoch": 0.7389941770224714, "eval_runtime": 346.6342, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4787825818205537, "eval_sts_eval_spearman_cosine": 0.27555525233783046, "step": 149500 }, { "epoch": 0.7414657294539846, "grad_norm": 0.2921883165836334, "learning_rate": 1.629771284893826e-05, "loss": 0.0438, "step": 150000 }, { "epoch": 0.7414657294539846, "eval_runtime": 360.9342, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48062039166981635, "eval_sts_eval_spearman_cosine": 0.27650049065284626, "step": 150000 }, { "epoch": 0.7439372818854979, "grad_norm": 0.36444535851478577, "learning_rate": 1.628535120661991e-05, "loss": 0.0446, "step": 150500 }, { "epoch": 0.7439372818854979, "eval_runtime": 356.3961, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4774976314983115, "eval_sts_eval_spearman_cosine": 0.2731187076329645, "step": 150500 }, { "epoch": 0.7464088343170112, "grad_norm": 0.30662378668785095, "learning_rate": 1.6272989564301557e-05, "loss": 0.0443, "step": 151000 }, { "epoch": 0.7464088343170112, "eval_runtime": 353.2854, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48231631205392345, "eval_sts_eval_spearman_cosine": 0.2758666632282848, "step": 151000 }, { "epoch": 0.7488803867485245, "grad_norm": 0.763900637626648, "learning_rate": 1.6260627921983205e-05, "loss": 0.0438, "step": 151500 }, { "epoch": 0.7488803867485245, "eval_runtime": 354.002, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4752564131176146, "eval_sts_eval_spearman_cosine": 0.27248349183918086, "step": 151500 }, { "epoch": 0.7513519391800377, "grad_norm": 0.3983304798603058, "learning_rate": 1.6248266279664853e-05, "loss": 0.0463, "step": 152000 }, { "epoch": 0.7513519391800377, "eval_runtime": 352.74, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4778514841006647, "eval_sts_eval_spearman_cosine": 0.2755914450635911, "step": 152000 }, { "epoch": 0.753823491611551, "grad_norm": 0.28362348675727844, "learning_rate": 1.62359046373465e-05, "loss": 0.046, "step": 152500 }, { "epoch": 0.753823491611551, "eval_runtime": 356.1546, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.47936682853569745, "eval_sts_eval_spearman_cosine": 0.277400514428421, "step": 152500 }, { "epoch": 0.7562950440430644, "grad_norm": 0.3205190896987915, "learning_rate": 1.622354299502815e-05, "loss": 0.0423, "step": 153000 }, { "epoch": 0.7562950440430644, "eval_runtime": 352.8994, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4802152380447401, "eval_sts_eval_spearman_cosine": 0.27685750963246364, "step": 153000 }, { "epoch": 0.7587665964745776, "grad_norm": 0.5925592184066772, "learning_rate": 1.6211181352709798e-05, "loss": 0.0453, "step": 153500 }, { "epoch": 0.7587665964745776, "eval_runtime": 352.1803, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.47893673841694023, "eval_sts_eval_spearman_cosine": 0.2752126501446172, "step": 153500 }, { "epoch": 0.7612381489060909, "grad_norm": 0.258529931306839, "learning_rate": 1.6198819710391446e-05, "loss": 0.046, "step": 154000 }, { "epoch": 0.7612381489060909, "eval_runtime": 355.9249, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4751180084909587, "eval_sts_eval_spearman_cosine": 0.2725672239915469, "step": 154000 }, { "epoch": 0.7637097013376042, "grad_norm": 0.30937460064888, "learning_rate": 1.6186458068073094e-05, "loss": 0.0432, "step": 154500 }, { "epoch": 0.7637097013376042, "eval_runtime": 354.9083, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48241532297430045, "eval_sts_eval_spearman_cosine": 0.27626330233001395, "step": 154500 }, { "epoch": 0.7661812537691175, "grad_norm": 0.4443804919719696, "learning_rate": 1.6174096425754742e-05, "loss": 0.0462, "step": 155000 }, { "epoch": 0.7661812537691175, "eval_runtime": 360.3303, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48717464771755575, "eval_sts_eval_spearman_cosine": 0.27861440659073966, "step": 155000 }, { "epoch": 0.7686528062006307, "grad_norm": 0.2178599089384079, "learning_rate": 1.616173478343639e-05, "loss": 0.0455, "step": 155500 }, { "epoch": 0.7686528062006307, "eval_runtime": 359.8296, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48369128736967226, "eval_sts_eval_spearman_cosine": 0.2775429795404087, "step": 155500 }, { "epoch": 0.771124358632144, "grad_norm": 0.21428875625133514, "learning_rate": 1.6149373141118038e-05, "loss": 0.043, "step": 156000 }, { "epoch": 0.771124358632144, "eval_runtime": 347.2659, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4834798591618791, "eval_sts_eval_spearman_cosine": 0.27829576132668243, "step": 156000 }, { "epoch": 0.7735959110636573, "grad_norm": 0.35430580377578735, "learning_rate": 1.6137011498799686e-05, "loss": 0.0442, "step": 156500 }, { "epoch": 0.7735959110636573, "eval_runtime": 361.6383, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4842239837891406, "eval_sts_eval_spearman_cosine": 0.27837275109777904, "step": 156500 }, { "epoch": 0.7760674634951706, "grad_norm": 0.2756548523902893, "learning_rate": 1.6124649856481334e-05, "loss": 0.0437, "step": 157000 }, { "epoch": 0.7760674634951706, "eval_runtime": 356.0734, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.47886054825673, "eval_sts_eval_spearman_cosine": 0.2769340741175428, "step": 157000 }, { "epoch": 0.7785390159266838, "grad_norm": 0.36924052238464355, "learning_rate": 1.6112288214162982e-05, "loss": 0.044, "step": 157500 }, { "epoch": 0.7785390159266838, "eval_runtime": 354.555, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4840896418467861, "eval_sts_eval_spearman_cosine": 0.2812133448327937, "step": 157500 }, { "epoch": 0.7810105683581972, "grad_norm": 0.26707127690315247, "learning_rate": 1.609992657184463e-05, "loss": 0.0443, "step": 158000 }, { "epoch": 0.7810105683581972, "eval_runtime": 348.8358, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48473133611494834, "eval_sts_eval_spearman_cosine": 0.27970379441934345, "step": 158000 }, { "epoch": 0.7834821207897105, "grad_norm": 0.5884903073310852, "learning_rate": 1.608756492952628e-05, "loss": 0.0436, "step": 158500 }, { "epoch": 0.7834821207897105, "eval_runtime": 360.183, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.481973110257658, "eval_sts_eval_spearman_cosine": 0.2783423981707349, "step": 158500 }, { "epoch": 0.7859536732212237, "grad_norm": 0.3798254728317261, "learning_rate": 1.6075203287207927e-05, "loss": 0.0435, "step": 159000 }, { "epoch": 0.7859536732212237, "eval_runtime": 353.4645, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4874572296674145, "eval_sts_eval_spearman_cosine": 0.28473710218178966, "step": 159000 }, { "epoch": 0.788425225652737, "grad_norm": 0.6774258017539978, "learning_rate": 1.6062841644889575e-05, "loss": 0.0438, "step": 159500 }, { "epoch": 0.788425225652737, "eval_runtime": 352.2807, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48412394085916877, "eval_sts_eval_spearman_cosine": 0.2834928703387605, "step": 159500 }, { "epoch": 0.7908967780842503, "grad_norm": 0.5371294617652893, "learning_rate": 1.6050480002571223e-05, "loss": 0.0446, "step": 160000 }, { "epoch": 0.7908967780842503, "eval_runtime": 359.0288, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4788351239524581, "eval_sts_eval_spearman_cosine": 0.2814846411855885, "step": 160000 }, { "epoch": 0.7933683305157636, "grad_norm": 0.2468501329421997, "learning_rate": 1.603811836025287e-05, "loss": 0.0434, "step": 160500 }, { "epoch": 0.7933683305157636, "eval_runtime": 356.7618, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48595859468011865, "eval_sts_eval_spearman_cosine": 0.2840254955625868, "step": 160500 }, { "epoch": 0.7958398829472768, "grad_norm": 0.3102988600730896, "learning_rate": 1.602575671793452e-05, "loss": 0.0455, "step": 161000 }, { "epoch": 0.7958398829472768, "eval_runtime": 355.5722, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4840169723888127, "eval_sts_eval_spearman_cosine": 0.28329477371364664, "step": 161000 }, { "epoch": 0.7983114353787901, "grad_norm": 0.39537060260772705, "learning_rate": 1.6013395075616167e-05, "loss": 0.043, "step": 161500 }, { "epoch": 0.7983114353787901, "eval_runtime": 365.3219, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4861271377610934, "eval_sts_eval_spearman_cosine": 0.2844674508970958, "step": 161500 }, { "epoch": 0.8007829878103034, "grad_norm": 0.46431612968444824, "learning_rate": 1.6001033433297815e-05, "loss": 0.0436, "step": 162000 }, { "epoch": 0.8007829878103034, "eval_runtime": 353.7005, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.486257256659239, "eval_sts_eval_spearman_cosine": 0.2844752004495755, "step": 162000 }, { "epoch": 0.8032545402418166, "grad_norm": 0.4711023271083832, "learning_rate": 1.5988671790979463e-05, "loss": 0.0443, "step": 162500 }, { "epoch": 0.8032545402418166, "eval_runtime": 362.1103, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48338537306951634, "eval_sts_eval_spearman_cosine": 0.2823247540101375, "step": 162500 }, { "epoch": 0.80572609267333, "grad_norm": 0.2862902879714966, "learning_rate": 1.597631014866111e-05, "loss": 0.0441, "step": 163000 }, { "epoch": 0.80572609267333, "eval_runtime": 358.1417, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48291432685815194, "eval_sts_eval_spearman_cosine": 0.281182681579739, "step": 163000 }, { "epoch": 0.8081976451048433, "grad_norm": 0.4142751395702362, "learning_rate": 1.596394850634276e-05, "loss": 0.0435, "step": 163500 }, { "epoch": 0.8081976451048433, "eval_runtime": 353.3421, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48000109093555254, "eval_sts_eval_spearman_cosine": 0.27772185702864965, "step": 163500 }, { "epoch": 0.8106691975363566, "grad_norm": 0.29578280448913574, "learning_rate": 1.5951586864024408e-05, "loss": 0.0421, "step": 164000 }, { "epoch": 0.8106691975363566, "eval_runtime": 351.0756, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4788784643614026, "eval_sts_eval_spearman_cosine": 0.27399654965567477, "step": 164000 }, { "epoch": 0.8131407499678698, "grad_norm": 0.5704227089881897, "learning_rate": 1.5939225221706056e-05, "loss": 0.0437, "step": 164500 }, { "epoch": 0.8131407499678698, "eval_runtime": 353.7547, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4716137997354264, "eval_sts_eval_spearman_cosine": 0.27378063012548537, "step": 164500 }, { "epoch": 0.8156123023993831, "grad_norm": 0.4035494923591614, "learning_rate": 1.5926863579387704e-05, "loss": 0.0457, "step": 165000 }, { "epoch": 0.8156123023993831, "eval_runtime": 353.919, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.47935107701735785, "eval_sts_eval_spearman_cosine": 0.2744611359896148, "step": 165000 }, { "epoch": 0.8180838548308964, "grad_norm": 0.2153516560792923, "learning_rate": 1.5914501937069352e-05, "loss": 0.0453, "step": 165500 }, { "epoch": 0.8180838548308964, "eval_runtime": 350.0689, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4852095103270528, "eval_sts_eval_spearman_cosine": 0.28147348457312, "step": 165500 }, { "epoch": 0.8205554072624097, "grad_norm": 0.17788711190223694, "learning_rate": 1.5902140294751e-05, "loss": 0.0427, "step": 166000 }, { "epoch": 0.8205554072624097, "eval_runtime": 354.8475, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48161171381224954, "eval_sts_eval_spearman_cosine": 0.27880799944756124, "step": 166000 }, { "epoch": 0.8230269596939229, "grad_norm": 0.3010837435722351, "learning_rate": 1.5889778652432648e-05, "loss": 0.045, "step": 166500 }, { "epoch": 0.8230269596939229, "eval_runtime": 353.1149, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4831672504786356, "eval_sts_eval_spearman_cosine": 0.28093595990246467, "step": 166500 }, { "epoch": 0.8254985121254362, "grad_norm": 0.35212332010269165, "learning_rate": 1.5877417010114296e-05, "loss": 0.0439, "step": 167000 }, { "epoch": 0.8254985121254362, "eval_runtime": 351.5706, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48305749960852484, "eval_sts_eval_spearman_cosine": 0.2817931546003782, "step": 167000 }, { "epoch": 0.8279700645569495, "grad_norm": 0.4728643596172333, "learning_rate": 1.5865055367795945e-05, "loss": 0.045, "step": 167500 }, { "epoch": 0.8279700645569495, "eval_runtime": 353.7284, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48234526794576216, "eval_sts_eval_spearman_cosine": 0.2794543519039015, "step": 167500 }, { "epoch": 0.8304416169884627, "grad_norm": 0.4030551314353943, "learning_rate": 1.5852693725477593e-05, "loss": 0.0422, "step": 168000 }, { "epoch": 0.8304416169884627, "eval_runtime": 367.88, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48333707883357363, "eval_sts_eval_spearman_cosine": 0.28019353055534224, "step": 168000 }, { "epoch": 0.8329131694199761, "grad_norm": 0.4918217658996582, "learning_rate": 1.584033208315924e-05, "loss": 0.0449, "step": 168500 }, { "epoch": 0.8329131694199761, "eval_runtime": 354.6742, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4823532370926877, "eval_sts_eval_spearman_cosine": 0.2782552647716708, "step": 168500 }, { "epoch": 0.8353847218514894, "grad_norm": 0.5661600232124329, "learning_rate": 1.582797044084089e-05, "loss": 0.0437, "step": 169000 }, { "epoch": 0.8353847218514894, "eval_runtime": 354.0639, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4810812330080455, "eval_sts_eval_spearman_cosine": 0.27650228946681016, "step": 169000 }, { "epoch": 0.8378562742830027, "grad_norm": 0.11885937303304672, "learning_rate": 1.5815608798522537e-05, "loss": 0.0445, "step": 169500 }, { "epoch": 0.8378562742830027, "eval_runtime": 355.004, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48364908187667605, "eval_sts_eval_spearman_cosine": 0.2787804737481844, "step": 169500 }, { "epoch": 0.8403278267145159, "grad_norm": 0.6810688376426697, "learning_rate": 1.5803247156204185e-05, "loss": 0.0419, "step": 170000 }, { "epoch": 0.8403278267145159, "eval_runtime": 361.3515, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48646268542353543, "eval_sts_eval_spearman_cosine": 0.28317459899990616, "step": 170000 }, { "epoch": 0.8427993791460292, "grad_norm": 0.21923783421516418, "learning_rate": 1.5790885513885833e-05, "loss": 0.0423, "step": 170500 }, { "epoch": 0.8427993791460292, "eval_runtime": 350.2224, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48023109121004875, "eval_sts_eval_spearman_cosine": 0.2775060941849577, "step": 170500 }, { "epoch": 0.8452709315775425, "grad_norm": 0.31895536184310913, "learning_rate": 1.577852387156748e-05, "loss": 0.0411, "step": 171000 }, { "epoch": 0.8452709315775425, "eval_runtime": 353.8542, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48595448060673885, "eval_sts_eval_spearman_cosine": 0.2803977553863038, "step": 171000 }, { "epoch": 0.8477424840090557, "grad_norm": 0.47445449233055115, "learning_rate": 1.5766162229249133e-05, "loss": 0.0437, "step": 171500 }, { "epoch": 0.8477424840090557, "eval_runtime": 349.8617, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4808313429856045, "eval_sts_eval_spearman_cosine": 0.2754943689944077, "step": 171500 }, { "epoch": 0.850214036440569, "grad_norm": 0.37713077664375305, "learning_rate": 1.5753800586930777e-05, "loss": 0.044, "step": 172000 }, { "epoch": 0.850214036440569, "eval_runtime": 351.3471, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48380373754004946, "eval_sts_eval_spearman_cosine": 0.2774187674487188, "step": 172000 }, { "epoch": 0.8526855888720823, "grad_norm": 0.2000829577445984, "learning_rate": 1.5741438944612426e-05, "loss": 0.0447, "step": 172500 }, { "epoch": 0.8526855888720823, "eval_runtime": 369.7828, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.47612550383068, "eval_sts_eval_spearman_cosine": 0.27404052976639764, "step": 172500 }, { "epoch": 0.8551571413035957, "grad_norm": 0.4180271029472351, "learning_rate": 1.5729077302294077e-05, "loss": 0.0444, "step": 173000 }, { "epoch": 0.8551571413035957, "eval_runtime": 356.5521, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4823572520671673, "eval_sts_eval_spearman_cosine": 0.2757143998568574, "step": 173000 }, { "epoch": 0.8576286937351089, "grad_norm": 0.44467222690582275, "learning_rate": 1.5716715659975722e-05, "loss": 0.0419, "step": 173500 }, { "epoch": 0.8576286937351089, "eval_runtime": 350.935, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4810414360663209, "eval_sts_eval_spearman_cosine": 0.27495303162089246, "step": 173500 }, { "epoch": 0.8601002461666222, "grad_norm": 0.320420503616333, "learning_rate": 1.570435401765737e-05, "loss": 0.0461, "step": 174000 }, { "epoch": 0.8601002461666222, "eval_runtime": 353.5673, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48111918816854327, "eval_sts_eval_spearman_cosine": 0.2743493217578695, "step": 174000 }, { "epoch": 0.8625717985981355, "grad_norm": 0.24482694268226624, "learning_rate": 1.569199237533902e-05, "loss": 0.0455, "step": 174500 }, { "epoch": 0.8625717985981355, "eval_runtime": 350.9825, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4831021349697924, "eval_sts_eval_spearman_cosine": 0.2761126510883435, "step": 174500 }, { "epoch": 0.8650433510296487, "grad_norm": 0.488921582698822, "learning_rate": 1.5679630733020666e-05, "loss": 0.042, "step": 175000 }, { "epoch": 0.8650433510296487, "eval_runtime": 359.3337, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4804747995959108, "eval_sts_eval_spearman_cosine": 0.27447131272224484, "step": 175000 }, { "epoch": 0.867514903461162, "grad_norm": 0.19869692623615265, "learning_rate": 1.5667269090702314e-05, "loss": 0.0466, "step": 175500 }, { "epoch": 0.867514903461162, "eval_runtime": 348.5777, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4820153442980401, "eval_sts_eval_spearman_cosine": 0.2757074846224293, "step": 175500 }, { "epoch": 0.8699864558926753, "grad_norm": 0.6080918312072754, "learning_rate": 1.5654907448383966e-05, "loss": 0.0439, "step": 176000 }, { "epoch": 0.8699864558926753, "eval_runtime": 359.3727, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4836276498121003, "eval_sts_eval_spearman_cosine": 0.27443700226529744, "step": 176000 }, { "epoch": 0.8724580083241886, "grad_norm": 0.3558516800403595, "learning_rate": 1.5642545806065614e-05, "loss": 0.0423, "step": 176500 }, { "epoch": 0.8724580083241886, "eval_runtime": 350.9028, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4839601276939695, "eval_sts_eval_spearman_cosine": 0.27708217383154315, "step": 176500 }, { "epoch": 0.8749295607557018, "grad_norm": 0.24701793491840363, "learning_rate": 1.563018416374726e-05, "loss": 0.0438, "step": 177000 }, { "epoch": 0.8749295607557018, "eval_runtime": 361.3415, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4810765426812144, "eval_sts_eval_spearman_cosine": 0.27234030164676715, "step": 177000 }, { "epoch": 0.8774011131872151, "grad_norm": 0.4917159676551819, "learning_rate": 1.561782252142891e-05, "loss": 0.0438, "step": 177500 }, { "epoch": 0.8774011131872151, "eval_runtime": 354.1616, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48141021514805704, "eval_sts_eval_spearman_cosine": 0.2770835710191359, "step": 177500 }, { "epoch": 0.8798726656187285, "grad_norm": 0.3336072564125061, "learning_rate": 1.5605460879110558e-05, "loss": 0.0417, "step": 178000 }, { "epoch": 0.8798726656187285, "eval_runtime": 345.174, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4842368321163971, "eval_sts_eval_spearman_cosine": 0.2777268304306239, "step": 178000 }, { "epoch": 0.8823442180502418, "grad_norm": 0.5589418411254883, "learning_rate": 1.5593099236792203e-05, "loss": 0.044, "step": 178500 }, { "epoch": 0.8823442180502418, "eval_runtime": 354.5013, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4856300339192928, "eval_sts_eval_spearman_cosine": 0.2780074330908306, "step": 178500 }, { "epoch": 0.884815770481755, "grad_norm": 0.5769656896591187, "learning_rate": 1.5580737594473854e-05, "loss": 0.0426, "step": 179000 }, { "epoch": 0.884815770481755, "eval_runtime": 350.6325, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48322422984759716, "eval_sts_eval_spearman_cosine": 0.2746212728547049, "step": 179000 }, { "epoch": 0.8872873229132683, "grad_norm": 0.49219727516174316, "learning_rate": 1.5568375952155502e-05, "loss": 0.0446, "step": 179500 }, { "epoch": 0.8872873229132683, "eval_runtime": 356.3307, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48435045914053587, "eval_sts_eval_spearman_cosine": 0.27578940020350107, "step": 179500 }, { "epoch": 0.8897588753447816, "grad_norm": 0.2873639166355133, "learning_rate": 1.5556014309837147e-05, "loss": 0.0451, "step": 180000 }, { "epoch": 0.8897588753447816, "eval_runtime": 349.9279, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48365705773927453, "eval_sts_eval_spearman_cosine": 0.27670281614439496, "step": 180000 }, { "epoch": 0.8922304277762948, "grad_norm": 0.4848998188972473, "learning_rate": 1.55436526675188e-05, "loss": 0.0432, "step": 180500 }, { "epoch": 0.8922304277762948, "eval_runtime": 361.776, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4868306203962824, "eval_sts_eval_spearman_cosine": 0.2770082458573038, "step": 180500 }, { "epoch": 0.8947019802078081, "grad_norm": 0.2671701908111572, "learning_rate": 1.5531291025200447e-05, "loss": 0.0425, "step": 181000 }, { "epoch": 0.8947019802078081, "eval_runtime": 349.8905, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4853832642217757, "eval_sts_eval_spearman_cosine": 0.27488297415690577, "step": 181000 }, { "epoch": 0.8971735326393214, "grad_norm": 0.1412971019744873, "learning_rate": 1.551892938288209e-05, "loss": 0.0447, "step": 181500 }, { "epoch": 0.8971735326393214, "eval_runtime": 352.1754, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4790821579628606, "eval_sts_eval_spearman_cosine": 0.27583051524204666, "step": 181500 }, { "epoch": 0.8996450850708347, "grad_norm": 0.36510348320007324, "learning_rate": 1.5506567740563743e-05, "loss": 0.0422, "step": 182000 }, { "epoch": 0.8996450850708347, "eval_runtime": 358.4574, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4848606703778922, "eval_sts_eval_spearman_cosine": 0.2797740395513707, "step": 182000 }, { "epoch": 0.9021166375023479, "grad_norm": 0.44640928506851196, "learning_rate": 1.549420609824539e-05, "loss": 0.045, "step": 182500 }, { "epoch": 0.9021166375023479, "eval_runtime": 350.8195, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4893319367133637, "eval_sts_eval_spearman_cosine": 0.27892531273198756, "step": 182500 }, { "epoch": 0.9045881899338613, "grad_norm": 0.6036549806594849, "learning_rate": 1.548184445592704e-05, "loss": 0.044, "step": 183000 }, { "epoch": 0.9045881899338613, "eval_runtime": 352.0295, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48865679894831415, "eval_sts_eval_spearman_cosine": 0.27864935026290305, "step": 183000 }, { "epoch": 0.9070597423653746, "grad_norm": 0.7666980624198914, "learning_rate": 1.5469482813608684e-05, "loss": 0.0436, "step": 183500 }, { "epoch": 0.9070597423653746, "eval_runtime": 352.7254, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4875518553171948, "eval_sts_eval_spearman_cosine": 0.2781325025217399, "step": 183500 }, { "epoch": 0.9095312947968878, "grad_norm": 0.5617075562477112, "learning_rate": 1.5457121171290335e-05, "loss": 0.046, "step": 184000 }, { "epoch": 0.9095312947968878, "eval_runtime": 351.7145, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48852702278166804, "eval_sts_eval_spearman_cosine": 0.2776914760584351, "step": 184000 }, { "epoch": 0.9120028472284011, "grad_norm": 0.5808592438697815, "learning_rate": 1.5444759528971984e-05, "loss": 0.0443, "step": 184500 }, { "epoch": 0.9120028472284011, "eval_runtime": 354.6022, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48699771875394404, "eval_sts_eval_spearman_cosine": 0.2773118932679343, "step": 184500 }, { "epoch": 0.9144743996599144, "grad_norm": 0.4502893388271332, "learning_rate": 1.5432397886653628e-05, "loss": 0.0445, "step": 185000 }, { "epoch": 0.9144743996599144, "eval_runtime": 350.7683, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4857649971477108, "eval_sts_eval_spearman_cosine": 0.2752740244347658, "step": 185000 }, { "epoch": 0.9169459520914277, "grad_norm": 0.2359243631362915, "learning_rate": 1.542003624433528e-05, "loss": 0.043, "step": 185500 }, { "epoch": 0.9169459520914277, "eval_runtime": 351.8687, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48804461577673686, "eval_sts_eval_spearman_cosine": 0.2767089006487716, "step": 185500 }, { "epoch": 0.9194175045229409, "grad_norm": 0.1460631638765335, "learning_rate": 1.5407674602016928e-05, "loss": 0.0454, "step": 186000 }, { "epoch": 0.9194175045229409, "eval_runtime": 353.7367, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4844943144419428, "eval_sts_eval_spearman_cosine": 0.27430901484219716, "step": 186000 }, { "epoch": 0.9218890569544542, "grad_norm": 0.1740436553955078, "learning_rate": 1.5395312959698573e-05, "loss": 0.0433, "step": 186500 }, { "epoch": 0.9218890569544542, "eval_runtime": 360.8371, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48676066617022984, "eval_sts_eval_spearman_cosine": 0.27754437511566243, "step": 186500 }, { "epoch": 0.9243606093859675, "grad_norm": 0.2961822748184204, "learning_rate": 1.5382951317380224e-05, "loss": 0.0443, "step": 187000 }, { "epoch": 0.9243606093859675, "eval_runtime": 348.4783, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4888586006321719, "eval_sts_eval_spearman_cosine": 0.2775395114133613, "step": 187000 }, { "epoch": 0.9268321618174808, "grad_norm": 0.5247482061386108, "learning_rate": 1.5370589675061872e-05, "loss": 0.0432, "step": 187500 }, { "epoch": 0.9268321618174808, "eval_runtime": 359.2449, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4854807347380232, "eval_sts_eval_spearman_cosine": 0.27654282151531956, "step": 187500 }, { "epoch": 0.929303714248994, "grad_norm": 0.3431486487388611, "learning_rate": 1.5358228032743517e-05, "loss": 0.0434, "step": 188000 }, { "epoch": 0.929303714248994, "eval_runtime": 351.5102, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4851395982139297, "eval_sts_eval_spearman_cosine": 0.27926092999044083, "step": 188000 }, { "epoch": 0.9317752666805074, "grad_norm": 0.21631205081939697, "learning_rate": 1.534586639042517e-05, "loss": 0.0463, "step": 188500 }, { "epoch": 0.9317752666805074, "eval_runtime": 352.5934, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48676838104800646, "eval_sts_eval_spearman_cosine": 0.28007444159996925, "step": 188500 }, { "epoch": 0.9342468191120207, "grad_norm": 0.2833181917667389, "learning_rate": 1.5333504748106816e-05, "loss": 0.0439, "step": 189000 }, { "epoch": 0.9342468191120207, "eval_runtime": 357.6268, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48510605494174464, "eval_sts_eval_spearman_cosine": 0.2795046951486951, "step": 189000 }, { "epoch": 0.9367183715435339, "grad_norm": 0.1693524420261383, "learning_rate": 1.5321143105788465e-05, "loss": 0.0423, "step": 189500 }, { "epoch": 0.9367183715435339, "eval_runtime": 352.1207, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48972879188105933, "eval_sts_eval_spearman_cosine": 0.2812036497390371, "step": 189500 }, { "epoch": 0.9391899239750472, "grad_norm": 0.38071319460868835, "learning_rate": 1.5308781463470113e-05, "loss": 0.0441, "step": 190000 }, { "epoch": 0.9391899239750472, "eval_runtime": 354.6811, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48589271912110843, "eval_sts_eval_spearman_cosine": 0.2768248485559499, "step": 190000 }, { "epoch": 0.9416614764065605, "grad_norm": 0.7885337471961975, "learning_rate": 1.529641982115176e-05, "loss": 0.0446, "step": 190500 }, { "epoch": 0.9416614764065605, "eval_runtime": 359.0813, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48274001601179634, "eval_sts_eval_spearman_cosine": 0.2754070839753513, "step": 190500 }, { "epoch": 0.9441330288380738, "grad_norm": 0.37679627537727356, "learning_rate": 1.528405817883341e-05, "loss": 0.0436, "step": 191000 }, { "epoch": 0.9441330288380738, "eval_runtime": 363.8641, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4875010848448704, "eval_sts_eval_spearman_cosine": 0.2814489909053816, "step": 191000 }, { "epoch": 0.946604581269587, "grad_norm": 0.2987896502017975, "learning_rate": 1.5271696536515057e-05, "loss": 0.045, "step": 191500 }, { "epoch": 0.946604581269587, "eval_runtime": 350.1708, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48731008291929906, "eval_sts_eval_spearman_cosine": 0.2795376070167327, "step": 191500 }, { "epoch": 0.9490761337011003, "grad_norm": 0.49409791827201843, "learning_rate": 1.5259334894196705e-05, "loss": 0.0445, "step": 192000 }, { "epoch": 0.9490761337011003, "eval_runtime": 356.2028, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4817091910355563, "eval_sts_eval_spearman_cosine": 0.2794496699681267, "step": 192000 }, { "epoch": 0.9515476861326136, "grad_norm": 0.16187487542629242, "learning_rate": 1.5246973251878352e-05, "loss": 0.0429, "step": 192500 }, { "epoch": 0.9515476861326136, "eval_runtime": 354.7445, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48855852786317133, "eval_sts_eval_spearman_cosine": 0.28269655941200594, "step": 192500 }, { "epoch": 0.9540192385641268, "grad_norm": 0.30470192432403564, "learning_rate": 1.5234611609560001e-05, "loss": 0.043, "step": 193000 }, { "epoch": 0.9540192385641268, "eval_runtime": 348.5033, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4875605557649797, "eval_sts_eval_spearman_cosine": 0.2814633276062691, "step": 193000 }, { "epoch": 0.9564907909956402, "grad_norm": 0.4054383337497711, "learning_rate": 1.522224996724165e-05, "loss": 0.0446, "step": 193500 }, { "epoch": 0.9564907909956402, "eval_runtime": 352.75, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4893620327939022, "eval_sts_eval_spearman_cosine": 0.28265963423717566, "step": 193500 }, { "epoch": 0.9589623434271535, "grad_norm": 0.576188325881958, "learning_rate": 1.5209888324923296e-05, "loss": 0.0456, "step": 194000 }, { "epoch": 0.9589623434271535, "eval_runtime": 346.069, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4852737235533939, "eval_sts_eval_spearman_cosine": 0.2821725728970222, "step": 194000 }, { "epoch": 0.9614338958586668, "grad_norm": 0.2855347692966461, "learning_rate": 1.5197526682604946e-05, "loss": 0.0406, "step": 194500 }, { "epoch": 0.9614338958586668, "eval_runtime": 351.9401, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49009876187674406, "eval_sts_eval_spearman_cosine": 0.282763012150424, "step": 194500 }, { "epoch": 0.96390544829018, "grad_norm": 0.4514446556568146, "learning_rate": 1.5185165040286594e-05, "loss": 0.0444, "step": 195000 }, { "epoch": 0.96390544829018, "eval_runtime": 343.4656, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4894141554914524, "eval_sts_eval_spearman_cosine": 0.284443079771587, "step": 195000 }, { "epoch": 0.9663770007216933, "grad_norm": 1.045225977897644, "learning_rate": 1.517280339796824e-05, "loss": 0.0448, "step": 195500 }, { "epoch": 0.9663770007216933, "eval_runtime": 348.9679, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4876594534235396, "eval_sts_eval_spearman_cosine": 0.278511672449215, "step": 195500 }, { "epoch": 0.9688485531532066, "grad_norm": 0.252397745847702, "learning_rate": 1.516044175564989e-05, "loss": 0.0427, "step": 196000 }, { "epoch": 0.9688485531532066, "eval_runtime": 345.9059, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48686594545124817, "eval_sts_eval_spearman_cosine": 0.2783949903648262, "step": 196000 }, { "epoch": 0.9713201055847199, "grad_norm": 0.7091432213783264, "learning_rate": 1.5148080113331538e-05, "loss": 0.0453, "step": 196500 }, { "epoch": 0.9713201055847199, "eval_runtime": 348.2132, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4838711304065382, "eval_sts_eval_spearman_cosine": 0.27882503944345266, "step": 196500 }, { "epoch": 0.9737916580162331, "grad_norm": 0.4237551987171173, "learning_rate": 1.5135718471013186e-05, "loss": 0.0443, "step": 197000 }, { "epoch": 0.9737916580162331, "eval_runtime": 343.9869, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4811513569118576, "eval_sts_eval_spearman_cosine": 0.27505441397681957, "step": 197000 }, { "epoch": 0.9762632104477464, "grad_norm": 0.39256131649017334, "learning_rate": 1.5123356828694836e-05, "loss": 0.0444, "step": 197500 }, { "epoch": 0.9762632104477464, "eval_runtime": 349.7757, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4853654706739, "eval_sts_eval_spearman_cosine": 0.2753627957542618, "step": 197500 }, { "epoch": 0.9787347628792598, "grad_norm": 0.6198240518569946, "learning_rate": 1.5110995186376482e-05, "loss": 0.0448, "step": 198000 }, { "epoch": 0.9787347628792598, "eval_runtime": 348.4994, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4814411768119215, "eval_sts_eval_spearman_cosine": 0.274538891697605, "step": 198000 }, { "epoch": 0.981206315310773, "grad_norm": 0.13083238899707794, "learning_rate": 1.509863354405813e-05, "loss": 0.0445, "step": 198500 }, { "epoch": 0.981206315310773, "eval_runtime": 350.3367, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48463619290275417, "eval_sts_eval_spearman_cosine": 0.2751544519161882, "step": 198500 }, { "epoch": 0.9836778677422863, "grad_norm": 0.5370303988456726, "learning_rate": 1.508627190173978e-05, "loss": 0.046, "step": 199000 }, { "epoch": 0.9836778677422863, "eval_runtime": 347.9216, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4789604627425077, "eval_sts_eval_spearman_cosine": 0.27099134997680385, "step": 199000 }, { "epoch": 0.9861494201737996, "grad_norm": 0.34955519437789917, "learning_rate": 1.5073910259421427e-05, "loss": 0.0459, "step": 199500 }, { "epoch": 0.9861494201737996, "eval_runtime": 349.7841, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48520087681593516, "eval_sts_eval_spearman_cosine": 0.27317248003665495, "step": 199500 }, { "epoch": 0.9886209726053129, "grad_norm": 0.3296341598033905, "learning_rate": 1.5061548617103075e-05, "loss": 0.0394, "step": 200000 }, { "epoch": 0.9886209726053129, "eval_runtime": 350.4619, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4848787411182726, "eval_sts_eval_spearman_cosine": 0.27292869014038024, "step": 200000 }, { "epoch": 0.9910925250368261, "grad_norm": 0.23168236017227173, "learning_rate": 1.5049186974784725e-05, "loss": 0.045, "step": 200500 }, { "epoch": 0.9910925250368261, "eval_runtime": 348.2002, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48448790338877523, "eval_sts_eval_spearman_cosine": 0.27371783340324, "step": 200500 }, { "epoch": 0.9935640774683394, "grad_norm": 0.5017878413200378, "learning_rate": 1.5036825332466371e-05, "loss": 0.0434, "step": 201000 }, { "epoch": 0.9935640774683394, "eval_runtime": 349.6117, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4881462240344604, "eval_sts_eval_spearman_cosine": 0.27534044853770423, "step": 201000 }, { "epoch": 0.9960356298998527, "grad_norm": 0.45280563831329346, "learning_rate": 1.5024463690148019e-05, "loss": 0.0465, "step": 201500 }, { "epoch": 0.9960356298998527, "eval_runtime": 344.7899, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4879919660088833, "eval_sts_eval_spearman_cosine": 0.27706113149507156, "step": 201500 }, { "epoch": 0.9985071823313659, "grad_norm": 0.42820578813552856, "learning_rate": 1.5012102047829669e-05, "loss": 0.0443, "step": 202000 }, { "epoch": 0.9985071823313659, "eval_runtime": 347.0139, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4858684883031994, "eval_sts_eval_spearman_cosine": 0.27548101752875465, "step": 202000 }, { "epoch": 1.0009787347628794, "grad_norm": 0.4900229573249817, "learning_rate": 1.4999740405511315e-05, "loss": 0.0406, "step": 202500 }, { "epoch": 1.0009787347628794, "eval_runtime": 347.4167, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48205307189910496, "eval_sts_eval_spearman_cosine": 0.2745885742266806, "step": 202500 }, { "epoch": 1.0034502871943924, "grad_norm": 0.3422326445579529, "learning_rate": 1.4987378763192963e-05, "loss": 0.0358, "step": 203000 }, { "epoch": 1.0034502871943924, "eval_runtime": 350.7214, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4866708703476673, "eval_sts_eval_spearman_cosine": 0.27513118375957296, "step": 203000 }, { "epoch": 1.0059218396259058, "grad_norm": 0.4834200441837311, "learning_rate": 1.4975017120874613e-05, "loss": 0.039, "step": 203500 }, { "epoch": 1.0059218396259058, "eval_runtime": 355.4487, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4821359092901655, "eval_sts_eval_spearman_cosine": 0.27392100909381967, "step": 203500 }, { "epoch": 1.008393392057419, "grad_norm": 0.5293706655502319, "learning_rate": 1.4962655478556261e-05, "loss": 0.0389, "step": 204000 }, { "epoch": 1.008393392057419, "eval_runtime": 349.3012, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4834337911398694, "eval_sts_eval_spearman_cosine": 0.27396569969894286, "step": 204000 }, { "epoch": 1.0108649444889324, "grad_norm": 0.26698869466781616, "learning_rate": 1.4950293836237908e-05, "loss": 0.0382, "step": 204500 }, { "epoch": 1.0108649444889324, "eval_runtime": 349.4803, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48482360012823356, "eval_sts_eval_spearman_cosine": 0.2735692333175773, "step": 204500 }, { "epoch": 1.0133364969204457, "grad_norm": 0.35508427023887634, "learning_rate": 1.4937932193919558e-05, "loss": 0.0374, "step": 205000 }, { "epoch": 1.0133364969204457, "eval_runtime": 349.2045, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4841002048709283, "eval_sts_eval_spearman_cosine": 0.2714161117127289, "step": 205000 }, { "epoch": 1.015808049351959, "grad_norm": 0.21066102385520935, "learning_rate": 1.4925570551601206e-05, "loss": 0.0393, "step": 205500 }, { "epoch": 1.015808049351959, "eval_runtime": 350.3979, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48613570360008457, "eval_sts_eval_spearman_cosine": 0.2745290305358181, "step": 205500 }, { "epoch": 1.0182796017834723, "grad_norm": 0.2642689049243927, "learning_rate": 1.4913208909282852e-05, "loss": 0.0388, "step": 206000 }, { "epoch": 1.0182796017834723, "eval_runtime": 350.8236, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4868098202526381, "eval_sts_eval_spearman_cosine": 0.27588020299546784, "step": 206000 }, { "epoch": 1.0207511542149854, "grad_norm": 0.3832593262195587, "learning_rate": 1.4900847266964502e-05, "loss": 0.0398, "step": 206500 }, { "epoch": 1.0207511542149854, "eval_runtime": 350.1628, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48579560202107697, "eval_sts_eval_spearman_cosine": 0.2764651656516823, "step": 206500 }, { "epoch": 1.0232227066464987, "grad_norm": 0.21136637032032013, "learning_rate": 1.488848562464615e-05, "loss": 0.0399, "step": 207000 }, { "epoch": 1.0232227066464987, "eval_runtime": 352.089, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4856511809082517, "eval_sts_eval_spearman_cosine": 0.2771527339199119, "step": 207000 }, { "epoch": 1.025694259078012, "grad_norm": 0.4501872956752777, "learning_rate": 1.4876123982327796e-05, "loss": 0.0403, "step": 207500 }, { "epoch": 1.025694259078012, "eval_runtime": 348.9865, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48599556672533384, "eval_sts_eval_spearman_cosine": 0.2756545311991507, "step": 207500 }, { "epoch": 1.0281658115095254, "grad_norm": 0.26018786430358887, "learning_rate": 1.4863762340009444e-05, "loss": 0.0383, "step": 208000 }, { "epoch": 1.0281658115095254, "eval_runtime": 350.3283, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4884948325834411, "eval_sts_eval_spearman_cosine": 0.2786323964535252, "step": 208000 }, { "epoch": 1.0306373639410387, "grad_norm": 0.30717191100120544, "learning_rate": 1.4851400697691094e-05, "loss": 0.0376, "step": 208500 }, { "epoch": 1.0306373639410387, "eval_runtime": 353.2126, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.488440383409237, "eval_sts_eval_spearman_cosine": 0.27706797574783293, "step": 208500 }, { "epoch": 1.033108916372552, "grad_norm": 0.36191171407699585, "learning_rate": 1.483903905537274e-05, "loss": 0.0418, "step": 209000 }, { "epoch": 1.033108916372552, "eval_runtime": 353.2329, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48326397670825283, "eval_sts_eval_spearman_cosine": 0.27608890367921657, "step": 209000 }, { "epoch": 1.0355804688040653, "grad_norm": 0.8149949908256531, "learning_rate": 1.4826677413054389e-05, "loss": 0.0381, "step": 209500 }, { "epoch": 1.0355804688040653, "eval_runtime": 361.8908, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4835542224419461, "eval_sts_eval_spearman_cosine": 0.2768399872588871, "step": 209500 }, { "epoch": 1.0380520212355786, "grad_norm": 0.3276541233062744, "learning_rate": 1.4814315770736039e-05, "loss": 0.038, "step": 210000 }, { "epoch": 1.0380520212355786, "eval_runtime": 348.5501, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48343553628054775, "eval_sts_eval_spearman_cosine": 0.27609571295880986, "step": 210000 }, { "epoch": 1.0405235736670917, "grad_norm": 0.2675011456012726, "learning_rate": 1.4801954128417687e-05, "loss": 0.0386, "step": 210500 }, { "epoch": 1.0405235736670917, "eval_runtime": 358.9494, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4839813650448361, "eval_sts_eval_spearman_cosine": 0.2734967356035507, "step": 210500 }, { "epoch": 1.042995126098605, "grad_norm": 0.3917083144187927, "learning_rate": 1.4789592486099333e-05, "loss": 0.0378, "step": 211000 }, { "epoch": 1.042995126098605, "eval_runtime": 349.8485, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4846604652576775, "eval_sts_eval_spearman_cosine": 0.27683759035473876, "step": 211000 }, { "epoch": 1.0454666785301183, "grad_norm": 0.4404506981372833, "learning_rate": 1.4777230843780983e-05, "loss": 0.0389, "step": 211500 }, { "epoch": 1.0454666785301183, "eval_runtime": 358.752, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4819160700064963, "eval_sts_eval_spearman_cosine": 0.2764039063215482, "step": 211500 }, { "epoch": 1.0479382309616316, "grad_norm": 0.5715792179107666, "learning_rate": 1.4764869201462631e-05, "loss": 0.0378, "step": 212000 }, { "epoch": 1.0479382309616316, "eval_runtime": 364.7207, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4841226131031762, "eval_sts_eval_spearman_cosine": 0.2756589829089453, "step": 212000 }, { "epoch": 1.050409783393145, "grad_norm": 0.2439395934343338, "learning_rate": 1.4752507559144277e-05, "loss": 0.039, "step": 212500 }, { "epoch": 1.050409783393145, "eval_runtime": 355.1416, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4855482173603827, "eval_sts_eval_spearman_cosine": 0.27433866028167847, "step": 212500 }, { "epoch": 1.0528813358246583, "grad_norm": 0.5039857625961304, "learning_rate": 1.4740145916825927e-05, "loss": 0.0367, "step": 213000 }, { "epoch": 1.0528813358246583, "eval_runtime": 356.3108, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4859781893402894, "eval_sts_eval_spearman_cosine": 0.27492621644323223, "step": 213000 }, { "epoch": 1.0553528882561716, "grad_norm": 0.5950500965118408, "learning_rate": 1.4727784274507575e-05, "loss": 0.0394, "step": 213500 }, { "epoch": 1.0553528882561716, "eval_runtime": 350.1329, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4839083700219428, "eval_sts_eval_spearman_cosine": 0.27466401633782295, "step": 213500 }, { "epoch": 1.0578244406876847, "grad_norm": 0.38654768466949463, "learning_rate": 1.4715422632189222e-05, "loss": 0.0372, "step": 214000 }, { "epoch": 1.0578244406876847, "eval_runtime": 349.3862, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48258359751516716, "eval_sts_eval_spearman_cosine": 0.27395115601522063, "step": 214000 }, { "epoch": 1.060295993119198, "grad_norm": 0.4911021888256073, "learning_rate": 1.4703060989870872e-05, "loss": 0.039, "step": 214500 }, { "epoch": 1.060295993119198, "eval_runtime": 353.1062, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48474845737143224, "eval_sts_eval_spearman_cosine": 0.27574449954789376, "step": 214500 }, { "epoch": 1.0627675455507113, "grad_norm": 0.39190831780433655, "learning_rate": 1.469069934755252e-05, "loss": 0.0396, "step": 215000 }, { "epoch": 1.0627675455507113, "eval_runtime": 364.0323, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4882569917544888, "eval_sts_eval_spearman_cosine": 0.28130507552900413, "step": 215000 }, { "epoch": 1.0652390979822246, "grad_norm": 0.3026171028614044, "learning_rate": 1.4678337705234166e-05, "loss": 0.0403, "step": 215500 }, { "epoch": 1.0652390979822246, "eval_runtime": 360.1728, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48887922702264236, "eval_sts_eval_spearman_cosine": 0.2793945087778732, "step": 215500 }, { "epoch": 1.067710650413738, "grad_norm": 0.4823678433895111, "learning_rate": 1.4665976062915816e-05, "loss": 0.0387, "step": 216000 }, { "epoch": 1.067710650413738, "eval_runtime": 359.268, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48722223303827683, "eval_sts_eval_spearman_cosine": 0.2771371802346117, "step": 216000 }, { "epoch": 1.0701822028452512, "grad_norm": 0.23465578258037567, "learning_rate": 1.4653614420597464e-05, "loss": 0.0381, "step": 216500 }, { "epoch": 1.0701822028452512, "eval_runtime": 349.0101, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48675623336004037, "eval_sts_eval_spearman_cosine": 0.2732595586824066, "step": 216500 }, { "epoch": 1.0726537552767645, "grad_norm": 0.6707727313041687, "learning_rate": 1.4641252778279112e-05, "loss": 0.0406, "step": 217000 }, { "epoch": 1.0726537552767645, "eval_runtime": 350.0705, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4830048253368938, "eval_sts_eval_spearman_cosine": 0.27172011085592074, "step": 217000 }, { "epoch": 1.0751253077082776, "grad_norm": 0.32880496978759766, "learning_rate": 1.462889113596076e-05, "loss": 0.0408, "step": 217500 }, { "epoch": 1.0751253077082776, "eval_runtime": 347.3135, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48612566178879973, "eval_sts_eval_spearman_cosine": 0.27494253740259306, "step": 217500 }, { "epoch": 1.077596860139791, "grad_norm": 0.4801892340183258, "learning_rate": 1.4616529493642408e-05, "loss": 0.0401, "step": 218000 }, { "epoch": 1.077596860139791, "eval_runtime": 344.93, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48561872986603283, "eval_sts_eval_spearman_cosine": 0.27495814494638415, "step": 218000 }, { "epoch": 1.0800684125713043, "grad_norm": 0.4894276261329651, "learning_rate": 1.4604167851324056e-05, "loss": 0.0363, "step": 218500 }, { "epoch": 1.0800684125713043, "eval_runtime": 344.2462, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4838358185914938, "eval_sts_eval_spearman_cosine": 0.2724340631769381, "step": 218500 }, { "epoch": 1.0825399650028176, "grad_norm": 3.374525308609009, "learning_rate": 1.4591806209005706e-05, "loss": 0.0392, "step": 219000 }, { "epoch": 1.0825399650028176, "eval_runtime": 353.9176, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4864805463616196, "eval_sts_eval_spearman_cosine": 0.274527532948549, "step": 219000 }, { "epoch": 1.0850115174343309, "grad_norm": 0.2474554032087326, "learning_rate": 1.4579444566687353e-05, "loss": 0.0386, "step": 219500 }, { "epoch": 1.0850115174343309, "eval_runtime": 355.5717, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48491171301784847, "eval_sts_eval_spearman_cosine": 0.27262203308893995, "step": 219500 }, { "epoch": 1.0874830698658442, "grad_norm": 0.8335526585578918, "learning_rate": 1.4567082924369e-05, "loss": 0.0413, "step": 220000 }, { "epoch": 1.0874830698658442, "eval_runtime": 355.5962, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48474630414355296, "eval_sts_eval_spearman_cosine": 0.2741090698054634, "step": 220000 }, { "epoch": 1.0899546222973575, "grad_norm": 0.4168035686016083, "learning_rate": 1.455472128205065e-05, "loss": 0.04, "step": 220500 }, { "epoch": 1.0899546222973575, "eval_runtime": 352.6462, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4847979345214749, "eval_sts_eval_spearman_cosine": 0.2753488452613981, "step": 220500 }, { "epoch": 1.0924261747288706, "grad_norm": 0.48848238587379456, "learning_rate": 1.4542359639732297e-05, "loss": 0.0371, "step": 221000 }, { "epoch": 1.0924261747288706, "eval_runtime": 363.6472, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4863893345721244, "eval_sts_eval_spearman_cosine": 0.2771631548748941, "step": 221000 }, { "epoch": 1.094897727160384, "grad_norm": 0.22858481109142303, "learning_rate": 1.4529997997413945e-05, "loss": 0.0392, "step": 221500 }, { "epoch": 1.094897727160384, "eval_runtime": 358.3861, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4840562109845138, "eval_sts_eval_spearman_cosine": 0.27340118122717394, "step": 221500 }, { "epoch": 1.0973692795918972, "grad_norm": 0.29817822575569153, "learning_rate": 1.4517636355095595e-05, "loss": 0.0397, "step": 222000 }, { "epoch": 1.0973692795918972, "eval_runtime": 352.9803, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48669550015820795, "eval_sts_eval_spearman_cosine": 0.2764420941810833, "step": 222000 }, { "epoch": 1.0998408320234105, "grad_norm": 0.17806734144687653, "learning_rate": 1.4505274712777241e-05, "loss": 0.0406, "step": 222500 }, { "epoch": 1.0998408320234105, "eval_runtime": 355.2021, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48133386758742236, "eval_sts_eval_spearman_cosine": 0.2732123173271213, "step": 222500 }, { "epoch": 1.1023123844549239, "grad_norm": 0.3055611550807953, "learning_rate": 1.449291307045889e-05, "loss": 0.0396, "step": 223000 }, { "epoch": 1.1023123844549239, "eval_runtime": 353.7378, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4810618564800368, "eval_sts_eval_spearman_cosine": 0.2729998383523284, "step": 223000 }, { "epoch": 1.1047839368864372, "grad_norm": 0.6122519969940186, "learning_rate": 1.448055142814054e-05, "loss": 0.0396, "step": 223500 }, { "epoch": 1.1047839368864372, "eval_runtime": 351.4161, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4860355320647418, "eval_sts_eval_spearman_cosine": 0.2755915525844246, "step": 223500 }, { "epoch": 1.1072554893179505, "grad_norm": 0.5115017890930176, "learning_rate": 1.4468189785822187e-05, "loss": 0.0389, "step": 224000 }, { "epoch": 1.1072554893179505, "eval_runtime": 360.4724, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48727804391660967, "eval_sts_eval_spearman_cosine": 0.2771000411298181, "step": 224000 }, { "epoch": 1.1097270417494636, "grad_norm": 0.6970833539962769, "learning_rate": 1.4455828143503834e-05, "loss": 0.0402, "step": 224500 }, { "epoch": 1.1097270417494636, "eval_runtime": 351.2312, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4875739822827565, "eval_sts_eval_spearman_cosine": 0.27664043620019296, "step": 224500 }, { "epoch": 1.1121985941809769, "grad_norm": 0.45335033535957336, "learning_rate": 1.4443466501185483e-05, "loss": 0.0386, "step": 225000 }, { "epoch": 1.1121985941809769, "eval_runtime": 363.7467, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48733697206297644, "eval_sts_eval_spearman_cosine": 0.2773706367717391, "step": 225000 }, { "epoch": 1.1146701466124902, "grad_norm": 0.28834986686706543, "learning_rate": 1.4431104858867132e-05, "loss": 0.0389, "step": 225500 }, { "epoch": 1.1146701466124902, "eval_runtime": 357.4509, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49097296453105005, "eval_sts_eval_spearman_cosine": 0.2781833640928888, "step": 225500 }, { "epoch": 1.1171416990440035, "grad_norm": 0.38632699847221375, "learning_rate": 1.4418743216548778e-05, "loss": 0.0372, "step": 226000 }, { "epoch": 1.1171416990440035, "eval_runtime": 353.3098, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48934495205337536, "eval_sts_eval_spearman_cosine": 0.27676218793090096, "step": 226000 }, { "epoch": 1.1196132514755168, "grad_norm": 0.6793394684791565, "learning_rate": 1.4406381574230428e-05, "loss": 0.0384, "step": 226500 }, { "epoch": 1.1196132514755168, "eval_runtime": 355.827, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48471067984681493, "eval_sts_eval_spearman_cosine": 0.2725666354643282, "step": 226500 }, { "epoch": 1.1220848039070301, "grad_norm": 0.4329316318035126, "learning_rate": 1.4394019931912076e-05, "loss": 0.0424, "step": 227000 }, { "epoch": 1.1220848039070301, "eval_runtime": 357.4062, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4814185494399289, "eval_sts_eval_spearman_cosine": 0.273379053501804, "step": 227000 }, { "epoch": 1.1245563563385435, "grad_norm": 0.539086103439331, "learning_rate": 1.4381658289593722e-05, "loss": 0.041, "step": 227500 }, { "epoch": 1.1245563563385435, "eval_runtime": 362.6027, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4786737958043541, "eval_sts_eval_spearman_cosine": 0.2731561012132798, "step": 227500 }, { "epoch": 1.1270279087700565, "grad_norm": 0.3397796154022217, "learning_rate": 1.4369296647275372e-05, "loss": 0.0392, "step": 228000 }, { "epoch": 1.1270279087700565, "eval_runtime": 354.9915, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4774926131911946, "eval_sts_eval_spearman_cosine": 0.27168413872369374, "step": 228000 }, { "epoch": 1.1294994612015699, "grad_norm": 0.7545970678329468, "learning_rate": 1.435693500495702e-05, "loss": 0.039, "step": 228500 }, { "epoch": 1.1294994612015699, "eval_runtime": 349.9164, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4829904827599877, "eval_sts_eval_spearman_cosine": 0.2742829532722294, "step": 228500 }, { "epoch": 1.1319710136330832, "grad_norm": 0.5405087471008301, "learning_rate": 1.4344573362638667e-05, "loss": 0.0402, "step": 229000 }, { "epoch": 1.1319710136330832, "eval_runtime": 353.2722, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4766578995483797, "eval_sts_eval_spearman_cosine": 0.2720770695448359, "step": 229000 }, { "epoch": 1.1344425660645965, "grad_norm": 0.48583173751831055, "learning_rate": 1.4332211720320316e-05, "loss": 0.0403, "step": 229500 }, { "epoch": 1.1344425660645965, "eval_runtime": 355.1137, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.480732413773417, "eval_sts_eval_spearman_cosine": 0.27334172750795344, "step": 229500 }, { "epoch": 1.1369141184961098, "grad_norm": 0.22091498970985413, "learning_rate": 1.4319850078001965e-05, "loss": 0.0393, "step": 230000 }, { "epoch": 1.1369141184961098, "eval_runtime": 363.6232, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4790738338253233, "eval_sts_eval_spearman_cosine": 0.2727223107539659, "step": 230000 }, { "epoch": 1.139385670927623, "grad_norm": 0.35182541608810425, "learning_rate": 1.4307488435683613e-05, "loss": 0.039, "step": 230500 }, { "epoch": 1.139385670927623, "eval_runtime": 361.2546, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48228350951251353, "eval_sts_eval_spearman_cosine": 0.2754688870340037, "step": 230500 }, { "epoch": 1.1418572233591364, "grad_norm": 0.6107409000396729, "learning_rate": 1.429512679336526e-05, "loss": 0.0382, "step": 231000 }, { "epoch": 1.1418572233591364, "eval_runtime": 353.2514, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4849699121386843, "eval_sts_eval_spearman_cosine": 0.27571047190096, "step": 231000 }, { "epoch": 1.1443287757906497, "grad_norm": 0.574751615524292, "learning_rate": 1.4282765151046909e-05, "loss": 0.036, "step": 231500 }, { "epoch": 1.1443287757906497, "eval_runtime": 356.9059, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48394783734295205, "eval_sts_eval_spearman_cosine": 0.27598293841313987, "step": 231500 }, { "epoch": 1.1468003282221628, "grad_norm": 1.5330328941345215, "learning_rate": 1.4270403508728557e-05, "loss": 0.0408, "step": 232000 }, { "epoch": 1.1468003282221628, "eval_runtime": 357.5988, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4854151325657986, "eval_sts_eval_spearman_cosine": 0.27619606541985114, "step": 232000 }, { "epoch": 1.1492718806536761, "grad_norm": 0.3572717607021332, "learning_rate": 1.4258041866410207e-05, "loss": 0.0393, "step": 232500 }, { "epoch": 1.1492718806536761, "eval_runtime": 356.2865, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4809761586960802, "eval_sts_eval_spearman_cosine": 0.27331326993055155, "step": 232500 }, { "epoch": 1.1517434330851894, "grad_norm": 0.33052122592926025, "learning_rate": 1.4245680224091853e-05, "loss": 0.0385, "step": 233000 }, { "epoch": 1.1517434330851894, "eval_runtime": 346.0803, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4812514453006271, "eval_sts_eval_spearman_cosine": 0.27499802011567775, "step": 233000 }, { "epoch": 1.1542149855167028, "grad_norm": 0.3633083701133728, "learning_rate": 1.4233318581773501e-05, "loss": 0.0398, "step": 233500 }, { "epoch": 1.1542149855167028, "eval_runtime": 347.1634, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4811825207608612, "eval_sts_eval_spearman_cosine": 0.2772043983246158, "step": 233500 }, { "epoch": 1.156686537948216, "grad_norm": 0.24222052097320557, "learning_rate": 1.4220956939455148e-05, "loss": 0.0411, "step": 234000 }, { "epoch": 1.156686537948216, "eval_runtime": 350.8593, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4813622636921158, "eval_sts_eval_spearman_cosine": 0.27509721770864465, "step": 234000 }, { "epoch": 1.1591580903797294, "grad_norm": 0.33518186211586, "learning_rate": 1.4208595297136798e-05, "loss": 0.0404, "step": 234500 }, { "epoch": 1.1591580903797294, "eval_runtime": 349.0297, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48173789520256594, "eval_sts_eval_spearman_cosine": 0.27474900735708513, "step": 234500 }, { "epoch": 1.1616296428112425, "grad_norm": 0.2773831784725189, "learning_rate": 1.4196233654818446e-05, "loss": 0.0393, "step": 235000 }, { "epoch": 1.1616296428112425, "eval_runtime": 355.7496, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4839657728622226, "eval_sts_eval_spearman_cosine": 0.27649223019535785, "step": 235000 }, { "epoch": 1.1641011952427558, "grad_norm": 0.4159078299999237, "learning_rate": 1.4183872012500092e-05, "loss": 0.0389, "step": 235500 }, { "epoch": 1.1641011952427558, "eval_runtime": 346.6131, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48074112584452566, "eval_sts_eval_spearman_cosine": 0.2715412842408497, "step": 235500 }, { "epoch": 1.166572747674269, "grad_norm": 0.8826048374176025, "learning_rate": 1.4171510370181742e-05, "loss": 0.0379, "step": 236000 }, { "epoch": 1.166572747674269, "eval_runtime": 354.198, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4846583937779474, "eval_sts_eval_spearman_cosine": 0.2759112654413909, "step": 236000 }, { "epoch": 1.1690443001057824, "grad_norm": 0.3525620996952057, "learning_rate": 1.415914872786339e-05, "loss": 0.0392, "step": 236500 }, { "epoch": 1.1690443001057824, "eval_runtime": 353.9673, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48277527355587835, "eval_sts_eval_spearman_cosine": 0.274022621494111, "step": 236500 }, { "epoch": 1.1715158525372957, "grad_norm": 0.3730442523956299, "learning_rate": 1.4146787085545038e-05, "loss": 0.039, "step": 237000 }, { "epoch": 1.1715158525372957, "eval_runtime": 363.1075, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48205234449736517, "eval_sts_eval_spearman_cosine": 0.27321940534874056, "step": 237000 }, { "epoch": 1.173987404968809, "grad_norm": 0.37293675541877747, "learning_rate": 1.4134425443226686e-05, "loss": 0.041, "step": 237500 }, { "epoch": 1.173987404968809, "eval_runtime": 365.5928, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.47926507488370396, "eval_sts_eval_spearman_cosine": 0.2703483793606203, "step": 237500 }, { "epoch": 1.1764589574003224, "grad_norm": 0.32277989387512207, "learning_rate": 1.4122063800908334e-05, "loss": 0.0403, "step": 238000 }, { "epoch": 1.1764589574003224, "eval_runtime": 360.8936, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.47891782481095946, "eval_sts_eval_spearman_cosine": 0.2747512020243968, "step": 238000 }, { "epoch": 1.1789305098318357, "grad_norm": 0.21185632050037384, "learning_rate": 1.4109702158589982e-05, "loss": 0.0388, "step": 238500 }, { "epoch": 1.1789305098318357, "eval_runtime": 358.6859, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4818595783530937, "eval_sts_eval_spearman_cosine": 0.27527300570152846, "step": 238500 }, { "epoch": 1.1814020622633488, "grad_norm": 0.5380460619926453, "learning_rate": 1.4097340516271632e-05, "loss": 0.0405, "step": 239000 }, { "epoch": 1.1814020622633488, "eval_runtime": 353.2301, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4827215872077783, "eval_sts_eval_spearman_cosine": 0.2744238390384825, "step": 239000 }, { "epoch": 1.183873614694862, "grad_norm": 0.6487014889717102, "learning_rate": 1.4084978873953279e-05, "loss": 0.039, "step": 239500 }, { "epoch": 1.183873614694862, "eval_runtime": 352.6559, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48203147960890247, "eval_sts_eval_spearman_cosine": 0.27689321713901327, "step": 239500 }, { "epoch": 1.1863451671263754, "grad_norm": 0.2271808236837387, "learning_rate": 1.4072617231634927e-05, "loss": 0.0405, "step": 240000 }, { "epoch": 1.1863451671263754, "eval_runtime": 350.9678, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48150383202711905, "eval_sts_eval_spearman_cosine": 0.27461361618305474, "step": 240000 }, { "epoch": 1.1888167195578887, "grad_norm": 0.30215132236480713, "learning_rate": 1.4060255589316576e-05, "loss": 0.0389, "step": 240500 }, { "epoch": 1.1888167195578887, "eval_runtime": 349.0965, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48400357664984395, "eval_sts_eval_spearman_cosine": 0.2737539985931217, "step": 240500 }, { "epoch": 1.191288271989402, "grad_norm": 0.5929027199745178, "learning_rate": 1.4047893946998223e-05, "loss": 0.0393, "step": 241000 }, { "epoch": 1.191288271989402, "eval_runtime": 352.7177, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48731443384976636, "eval_sts_eval_spearman_cosine": 0.27814307498355995, "step": 241000 }, { "epoch": 1.1937598244209153, "grad_norm": 0.2281690537929535, "learning_rate": 1.4035532304679871e-05, "loss": 0.0374, "step": 241500 }, { "epoch": 1.1937598244209153, "eval_runtime": 346.1592, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48574386596134217, "eval_sts_eval_spearman_cosine": 0.2793685535082663, "step": 241500 }, { "epoch": 1.1962313768524286, "grad_norm": 0.6423130035400391, "learning_rate": 1.402317066236152e-05, "loss": 0.0404, "step": 242000 }, { "epoch": 1.1962313768524286, "eval_runtime": 345.073, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48394359281183286, "eval_sts_eval_spearman_cosine": 0.27467843511106765, "step": 242000 }, { "epoch": 1.1987029292839417, "grad_norm": 0.2347993105649948, "learning_rate": 1.4010809020043167e-05, "loss": 0.0388, "step": 242500 }, { "epoch": 1.1987029292839417, "eval_runtime": 351.1988, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4822791487161432, "eval_sts_eval_spearman_cosine": 0.27626350777085545, "step": 242500 }, { "epoch": 1.201174481715455, "grad_norm": 0.27961108088493347, "learning_rate": 1.3998447377724815e-05, "loss": 0.0387, "step": 243000 }, { "epoch": 1.201174481715455, "eval_runtime": 347.3171, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48734247806925457, "eval_sts_eval_spearman_cosine": 0.27747276652889424, "step": 243000 }, { "epoch": 1.2036460341469684, "grad_norm": 0.3533613085746765, "learning_rate": 1.3986085735406465e-05, "loss": 0.0401, "step": 243500 }, { "epoch": 1.2036460341469684, "eval_runtime": 352.2281, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4816796656675827, "eval_sts_eval_spearman_cosine": 0.27227737308842026, "step": 243500 }, { "epoch": 1.2061175865784817, "grad_norm": 0.30374521017074585, "learning_rate": 1.3973724093088113e-05, "loss": 0.0394, "step": 244000 }, { "epoch": 1.2061175865784817, "eval_runtime": 350.9065, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.47725454983816384, "eval_sts_eval_spearman_cosine": 0.26948354128026536, "step": 244000 }, { "epoch": 1.208589139009995, "grad_norm": 1.085617184638977, "learning_rate": 1.396136245076976e-05, "loss": 0.0405, "step": 244500 }, { "epoch": 1.208589139009995, "eval_runtime": 349.3181, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4823512823889138, "eval_sts_eval_spearman_cosine": 0.27349311326450604, "step": 244500 }, { "epoch": 1.2110606914415083, "grad_norm": 0.6804192662239075, "learning_rate": 1.394900080845141e-05, "loss": 0.0408, "step": 245000 }, { "epoch": 1.2110606914415083, "eval_runtime": 349.9787, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4844989819524338, "eval_sts_eval_spearman_cosine": 0.27540115343781035, "step": 245000 }, { "epoch": 1.2135322438730216, "grad_norm": 0.1907181292772293, "learning_rate": 1.3936639166133058e-05, "loss": 0.0388, "step": 245500 }, { "epoch": 1.2135322438730216, "eval_runtime": 354.6722, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.47890172742684384, "eval_sts_eval_spearman_cosine": 0.2708446888297242, "step": 245500 }, { "epoch": 1.216003796304535, "grad_norm": 0.2587830126285553, "learning_rate": 1.3924277523814704e-05, "loss": 0.0383, "step": 246000 }, { "epoch": 1.216003796304535, "eval_runtime": 348.2051, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4810382739580044, "eval_sts_eval_spearman_cosine": 0.27381638972734207, "step": 246000 }, { "epoch": 1.218475348736048, "grad_norm": 0.4203026294708252, "learning_rate": 1.3911915881496354e-05, "loss": 0.0416, "step": 246500 }, { "epoch": 1.218475348736048, "eval_runtime": 350.8315, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4798402284007003, "eval_sts_eval_spearman_cosine": 0.2736204427027475, "step": 246500 }, { "epoch": 1.2209469011675613, "grad_norm": 0.6034480929374695, "learning_rate": 1.3899554239178002e-05, "loss": 0.0379, "step": 247000 }, { "epoch": 1.2209469011675613, "eval_runtime": 356.7869, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4828106541051123, "eval_sts_eval_spearman_cosine": 0.2763477570949008, "step": 247000 }, { "epoch": 1.2234184535990746, "grad_norm": 0.6489719152450562, "learning_rate": 1.3887192596859648e-05, "loss": 0.0415, "step": 247500 }, { "epoch": 1.2234184535990746, "eval_runtime": 352.1577, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48313147674067614, "eval_sts_eval_spearman_cosine": 0.275620359150208, "step": 247500 }, { "epoch": 1.225890006030588, "grad_norm": 0.2531510591506958, "learning_rate": 1.3874830954541298e-05, "loss": 0.0378, "step": 248000 }, { "epoch": 1.225890006030588, "eval_runtime": 344.449, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48459914086526035, "eval_sts_eval_spearman_cosine": 0.2754164740735851, "step": 248000 }, { "epoch": 1.2283615584621013, "grad_norm": 0.4041600525379181, "learning_rate": 1.3862469312222946e-05, "loss": 0.0392, "step": 248500 }, { "epoch": 1.2283615584621013, "eval_runtime": 345.9082, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4839851687743848, "eval_sts_eval_spearman_cosine": 0.2771513488438261, "step": 248500 }, { "epoch": 1.2308331108936146, "grad_norm": 0.2569056451320648, "learning_rate": 1.3850107669904593e-05, "loss": 0.0391, "step": 249000 }, { "epoch": 1.2308331108936146, "eval_runtime": 350.2463, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48298088862167476, "eval_sts_eval_spearman_cosine": 0.27574384044950745, "step": 249000 }, { "epoch": 1.2333046633251277, "grad_norm": 0.19898466765880585, "learning_rate": 1.3837746027586242e-05, "loss": 0.0386, "step": 249500 }, { "epoch": 1.2333046633251277, "eval_runtime": 357.6651, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4812737463699898, "eval_sts_eval_spearman_cosine": 0.27165574512675084, "step": 249500 }, { "epoch": 1.235776215756641, "grad_norm": 0.4130780100822449, "learning_rate": 1.382538438526789e-05, "loss": 0.0416, "step": 250000 }, { "epoch": 1.235776215756641, "eval_runtime": 347.8476, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4853862383266641, "eval_sts_eval_spearman_cosine": 0.2769492520416516, "step": 250000 }, { "epoch": 1.2382477681881543, "grad_norm": 0.19831153750419617, "learning_rate": 1.3813022742949539e-05, "loss": 0.0404, "step": 250500 }, { "epoch": 1.2382477681881543, "eval_runtime": 351.4098, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4799412656348255, "eval_sts_eval_spearman_cosine": 0.2734250106751099, "step": 250500 }, { "epoch": 1.2407193206196676, "grad_norm": 0.4829835593700409, "learning_rate": 1.3800661100631187e-05, "loss": 0.0379, "step": 251000 }, { "epoch": 1.2407193206196676, "eval_runtime": 350.4099, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4803912421186088, "eval_sts_eval_spearman_cosine": 0.2748867287847875, "step": 251000 }, { "epoch": 1.243190873051181, "grad_norm": 0.7996386885643005, "learning_rate": 1.3788299458312835e-05, "loss": 0.0387, "step": 251500 }, { "epoch": 1.243190873051181, "eval_runtime": 346.1936, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48008841702449306, "eval_sts_eval_spearman_cosine": 0.27429996961881425, "step": 251500 }, { "epoch": 1.2456624254826942, "grad_norm": 0.4876335859298706, "learning_rate": 1.3775937815994483e-05, "loss": 0.0421, "step": 252000 }, { "epoch": 1.2456624254826942, "eval_runtime": 352.5733, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48026247420844775, "eval_sts_eval_spearman_cosine": 0.27511460944689603, "step": 252000 }, { "epoch": 1.2481339779142075, "grad_norm": 0.2890876531600952, "learning_rate": 1.3763576173676133e-05, "loss": 0.0391, "step": 252500 }, { "epoch": 1.2481339779142075, "eval_runtime": 345.1572, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48097485431804443, "eval_sts_eval_spearman_cosine": 0.2753214636425288, "step": 252500 }, { "epoch": 1.2506055303457209, "grad_norm": 0.4436376690864563, "learning_rate": 1.3751214531357779e-05, "loss": 0.039, "step": 253000 }, { "epoch": 1.2506055303457209, "eval_runtime": 347.3478, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4792561288004625, "eval_sts_eval_spearman_cosine": 0.27554883791578005, "step": 253000 }, { "epoch": 1.2530770827772342, "grad_norm": 0.6802436709403992, "learning_rate": 1.3738852889039427e-05, "loss": 0.042, "step": 253500 }, { "epoch": 1.2530770827772342, "eval_runtime": 346.4347, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.47818431164541775, "eval_sts_eval_spearman_cosine": 0.27252383509249606, "step": 253500 }, { "epoch": 1.2555486352087473, "grad_norm": 0.39708811044692993, "learning_rate": 1.3726491246721077e-05, "loss": 0.0394, "step": 254000 }, { "epoch": 1.2555486352087473, "eval_runtime": 354.862, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4812229876134099, "eval_sts_eval_spearman_cosine": 0.273087068777367, "step": 254000 }, { "epoch": 1.2580201876402606, "grad_norm": 0.5617344975471497, "learning_rate": 1.3714129604402723e-05, "loss": 0.0398, "step": 254500 }, { "epoch": 1.2580201876402606, "eval_runtime": 346.3082, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4845169941718917, "eval_sts_eval_spearman_cosine": 0.27583479207148404, "step": 254500 }, { "epoch": 1.260491740071774, "grad_norm": 0.19840459525585175, "learning_rate": 1.3701767962084372e-05, "loss": 0.0404, "step": 255000 }, { "epoch": 1.260491740071774, "eval_runtime": 356.4249, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48407587146604775, "eval_sts_eval_spearman_cosine": 0.2785653842477047, "step": 255000 }, { "epoch": 1.2629632925032872, "grad_norm": 0.3369232416152954, "learning_rate": 1.3689406319766021e-05, "loss": 0.0398, "step": 255500 }, { "epoch": 1.2629632925032872, "eval_runtime": 349.8251, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4836575773848697, "eval_sts_eval_spearman_cosine": 0.27826915912853095, "step": 255500 }, { "epoch": 1.2654348449348005, "grad_norm": 0.2666911482810974, "learning_rate": 1.3677044677447668e-05, "loss": 0.0392, "step": 256000 }, { "epoch": 1.2654348449348005, "eval_runtime": 359.2282, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4813634530698778, "eval_sts_eval_spearman_cosine": 0.2779291824574844, "step": 256000 }, { "epoch": 1.2679063973663136, "grad_norm": 0.2603444457054138, "learning_rate": 1.3664683035129316e-05, "loss": 0.0386, "step": 256500 }, { "epoch": 1.2679063973663136, "eval_runtime": 362.5487, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4842785545636979, "eval_sts_eval_spearman_cosine": 0.27850734223611145, "step": 256500 }, { "epoch": 1.270377949797827, "grad_norm": 0.39274969696998596, "learning_rate": 1.3652321392810966e-05, "loss": 0.0402, "step": 257000 }, { "epoch": 1.270377949797827, "eval_runtime": 357.0544, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4816903338113793, "eval_sts_eval_spearman_cosine": 0.27638227359518364, "step": 257000 }, { "epoch": 1.2728495022293402, "grad_norm": 0.20876644551753998, "learning_rate": 1.3639959750492612e-05, "loss": 0.0376, "step": 257500 }, { "epoch": 1.2728495022293402, "eval_runtime": 348.1853, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4840713024765724, "eval_sts_eval_spearman_cosine": 0.2792330328182005, "step": 257500 }, { "epoch": 1.2753210546608535, "grad_norm": 0.4255581796169281, "learning_rate": 1.362759810817426e-05, "loss": 0.0387, "step": 258000 }, { "epoch": 1.2753210546608535, "eval_runtime": 353.2356, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4874952136473901, "eval_sts_eval_spearman_cosine": 0.2790533354975876, "step": 258000 }, { "epoch": 1.2777926070923669, "grad_norm": 0.7375885248184204, "learning_rate": 1.361523646585591e-05, "loss": 0.0397, "step": 258500 }, { "epoch": 1.2777926070923669, "eval_runtime": 355.7356, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4882498490360949, "eval_sts_eval_spearman_cosine": 0.28081430589377565, "step": 258500 }, { "epoch": 1.2802641595238802, "grad_norm": 0.328334778547287, "learning_rate": 1.3602874823537558e-05, "loss": 0.038, "step": 259000 }, { "epoch": 1.2802641595238802, "eval_runtime": 350.5168, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4858936654560781, "eval_sts_eval_spearman_cosine": 0.28018586279570806, "step": 259000 }, { "epoch": 1.2827357119553935, "grad_norm": 0.44859379529953003, "learning_rate": 1.3590513181219205e-05, "loss": 0.0389, "step": 259500 }, { "epoch": 1.2827357119553935, "eval_runtime": 357.4669, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48623997471083114, "eval_sts_eval_spearman_cosine": 0.2794839082185696, "step": 259500 }, { "epoch": 1.2852072643869068, "grad_norm": 0.48245733976364136, "learning_rate": 1.3578151538900853e-05, "loss": 0.0412, "step": 260000 }, { "epoch": 1.2852072643869068, "eval_runtime": 353.5552, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48522261312828696, "eval_sts_eval_spearman_cosine": 0.2771339970759204, "step": 260000 }, { "epoch": 1.2876788168184201, "grad_norm": 0.3965257406234741, "learning_rate": 1.3565789896582502e-05, "loss": 0.0394, "step": 260500 }, { "epoch": 1.2876788168184201, "eval_runtime": 351.6896, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4877114474505789, "eval_sts_eval_spearman_cosine": 0.2776711648940105, "step": 260500 }, { "epoch": 1.2901503692499332, "grad_norm": 0.4103907644748688, "learning_rate": 1.3553428254264149e-05, "loss": 0.0426, "step": 261000 }, { "epoch": 1.2901503692499332, "eval_runtime": 359.5548, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48893166057292214, "eval_sts_eval_spearman_cosine": 0.2791975316080028, "step": 261000 }, { "epoch": 1.2926219216814465, "grad_norm": 0.28913941979408264, "learning_rate": 1.3541066611945797e-05, "loss": 0.0391, "step": 261500 }, { "epoch": 1.2926219216814465, "eval_runtime": 351.6675, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4853332330087716, "eval_sts_eval_spearman_cosine": 0.27717390480472814, "step": 261500 }, { "epoch": 1.2950934741129598, "grad_norm": 0.44531798362731934, "learning_rate": 1.3528704969627447e-05, "loss": 0.0382, "step": 262000 }, { "epoch": 1.2950934741129598, "eval_runtime": 349.353, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48554273211642934, "eval_sts_eval_spearman_cosine": 0.27830424752320787, "step": 262000 }, { "epoch": 1.2975650265444731, "grad_norm": 0.3695474863052368, "learning_rate": 1.3516343327309093e-05, "loss": 0.0385, "step": 262500 }, { "epoch": 1.2975650265444731, "eval_runtime": 347.5194, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48575916593393037, "eval_sts_eval_spearman_cosine": 0.27892592949518896, "step": 262500 }, { "epoch": 1.3000365789759865, "grad_norm": 1.490763783454895, "learning_rate": 1.3503981684990741e-05, "loss": 0.0401, "step": 263000 }, { "epoch": 1.3000365789759865, "eval_runtime": 349.019, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4863911726486893, "eval_sts_eval_spearman_cosine": 0.2811663373447271, "step": 263000 }, { "epoch": 1.3025081314074995, "grad_norm": 0.2651903033256531, "learning_rate": 1.3491620042672391e-05, "loss": 0.0392, "step": 263500 }, { "epoch": 1.3025081314074995, "eval_runtime": 345.7587, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49112205117827623, "eval_sts_eval_spearman_cosine": 0.28262153496166487, "step": 263500 }, { "epoch": 1.3049796838390129, "grad_norm": 0.2613411545753479, "learning_rate": 1.347925840035404e-05, "loss": 0.0403, "step": 264000 }, { "epoch": 1.3049796838390129, "eval_runtime": 347.9541, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48743459414154455, "eval_sts_eval_spearman_cosine": 0.28125744872352404, "step": 264000 }, { "epoch": 1.3074512362705262, "grad_norm": 0.49021056294441223, "learning_rate": 1.3466896758035686e-05, "loss": 0.0394, "step": 264500 }, { "epoch": 1.3074512362705262, "eval_runtime": 347.8956, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48442231676367176, "eval_sts_eval_spearman_cosine": 0.2778861962253242, "step": 264500 }, { "epoch": 1.3099227887020395, "grad_norm": 0.5672814249992371, "learning_rate": 1.3454535115717335e-05, "loss": 0.0397, "step": 265000 }, { "epoch": 1.3099227887020395, "eval_runtime": 348.9645, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49033731849644047, "eval_sts_eval_spearman_cosine": 0.2831557469704379, "step": 265000 }, { "epoch": 1.3123943411335528, "grad_norm": 0.5467795133590698, "learning_rate": 1.3442173473398983e-05, "loss": 0.0407, "step": 265500 }, { "epoch": 1.3123943411335528, "eval_runtime": 349.9203, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4862037531099124, "eval_sts_eval_spearman_cosine": 0.2785288842911162, "step": 265500 }, { "epoch": 1.3148658935650661, "grad_norm": 0.25407466292381287, "learning_rate": 1.342981183108063e-05, "loss": 0.0412, "step": 266000 }, { "epoch": 1.3148658935650661, "eval_runtime": 355.222, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48666909211557813, "eval_sts_eval_spearman_cosine": 0.2809089996267956, "step": 266000 }, { "epoch": 1.3173374459965794, "grad_norm": 0.3590388894081116, "learning_rate": 1.341745018876228e-05, "loss": 0.0399, "step": 266500 }, { "epoch": 1.3173374459965794, "eval_runtime": 363.3042, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48868102415178194, "eval_sts_eval_spearman_cosine": 0.2805410406836989, "step": 266500 }, { "epoch": 1.3198089984280927, "grad_norm": 0.3524695336818695, "learning_rate": 1.3405088546443928e-05, "loss": 0.0406, "step": 267000 }, { "epoch": 1.3198089984280927, "eval_runtime": 360.6007, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.487236764450788, "eval_sts_eval_spearman_cosine": 0.280336116293852, "step": 267000 }, { "epoch": 1.322280550859606, "grad_norm": 0.24538561701774597, "learning_rate": 1.3392726904125574e-05, "loss": 0.0397, "step": 267500 }, { "epoch": 1.322280550859606, "eval_runtime": 352.1715, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48878281533355533, "eval_sts_eval_spearman_cosine": 0.28121590676779595, "step": 267500 }, { "epoch": 1.3247521032911191, "grad_norm": 0.3969873785972595, "learning_rate": 1.3380365261807224e-05, "loss": 0.0413, "step": 268000 }, { "epoch": 1.3247521032911191, "eval_runtime": 353.3435, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48857194316758823, "eval_sts_eval_spearman_cosine": 0.2818715461755403, "step": 268000 }, { "epoch": 1.3272236557226325, "grad_norm": 0.5750746130943298, "learning_rate": 1.3368003619488872e-05, "loss": 0.0398, "step": 268500 }, { "epoch": 1.3272236557226325, "eval_runtime": 347.6977, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48974967353770377, "eval_sts_eval_spearman_cosine": 0.27876546233776395, "step": 268500 }, { "epoch": 1.3296952081541458, "grad_norm": 0.5421162247657776, "learning_rate": 1.3355641977170519e-05, "loss": 0.0402, "step": 269000 }, { "epoch": 1.3296952081541458, "eval_runtime": 352.1218, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4900307028917339, "eval_sts_eval_spearman_cosine": 0.28143386722813396, "step": 269000 }, { "epoch": 1.332166760585659, "grad_norm": 0.40396296977996826, "learning_rate": 1.3343280334852168e-05, "loss": 0.0387, "step": 269500 }, { "epoch": 1.332166760585659, "eval_runtime": 347.7074, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49341762536482187, "eval_sts_eval_spearman_cosine": 0.2824937135165987, "step": 269500 }, { "epoch": 1.3346383130171724, "grad_norm": 0.35122135281562805, "learning_rate": 1.3330918692533816e-05, "loss": 0.0425, "step": 270000 }, { "epoch": 1.3346383130171724, "eval_runtime": 350.7806, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4897431371262505, "eval_sts_eval_spearman_cosine": 0.2789012605664985, "step": 270000 }, { "epoch": 1.3371098654486857, "grad_norm": 0.2701301574707031, "learning_rate": 1.3318557050215465e-05, "loss": 0.038, "step": 270500 }, { "epoch": 1.3371098654486857, "eval_runtime": 347.3608, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49226097375456435, "eval_sts_eval_spearman_cosine": 0.27925116254183613, "step": 270500 }, { "epoch": 1.3395814178801988, "grad_norm": 0.5786111950874329, "learning_rate": 1.3306195407897113e-05, "loss": 0.0377, "step": 271000 }, { "epoch": 1.3395814178801988, "eval_runtime": 348.5959, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4867884953038548, "eval_sts_eval_spearman_cosine": 0.27745225869479223, "step": 271000 }, { "epoch": 1.3420529703117121, "grad_norm": 0.3567834794521332, "learning_rate": 1.329383376557876e-05, "loss": 0.0414, "step": 271500 }, { "epoch": 1.3420529703117121, "eval_runtime": 349.309, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48785505256369727, "eval_sts_eval_spearman_cosine": 0.2768917926772637, "step": 271500 }, { "epoch": 1.3445245227432254, "grad_norm": 0.3123275935649872, "learning_rate": 1.3281472123260409e-05, "loss": 0.0389, "step": 272000 }, { "epoch": 1.3445245227432254, "eval_runtime": 342.0991, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48484846333026843, "eval_sts_eval_spearman_cosine": 0.27354174059069597, "step": 272000 }, { "epoch": 1.3469960751747387, "grad_norm": 0.3960287272930145, "learning_rate": 1.3269110480942059e-05, "loss": 0.0386, "step": 272500 }, { "epoch": 1.3469960751747387, "eval_runtime": 353.4317, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49094332191628487, "eval_sts_eval_spearman_cosine": 0.27847732570341766, "step": 272500 }, { "epoch": 1.349467627606252, "grad_norm": 0.22524785995483398, "learning_rate": 1.3256748838623705e-05, "loss": 0.0401, "step": 273000 }, { "epoch": 1.349467627606252, "eval_runtime": 346.8949, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49368260506013545, "eval_sts_eval_spearman_cosine": 0.2812792299677162, "step": 273000 }, { "epoch": 1.3519391800377654, "grad_norm": 0.24267370998859406, "learning_rate": 1.3244387196305353e-05, "loss": 0.0383, "step": 273500 }, { "epoch": 1.3519391800377654, "eval_runtime": 344.3038, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4924434179122473, "eval_sts_eval_spearman_cosine": 0.28009264755025615, "step": 273500 }, { "epoch": 1.3544107324692787, "grad_norm": 0.6457669138908386, "learning_rate": 1.3232025553987003e-05, "loss": 0.0396, "step": 274000 }, { "epoch": 1.3544107324692787, "eval_runtime": 346.8521, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49204726174045066, "eval_sts_eval_spearman_cosine": 0.27964933458281105, "step": 274000 }, { "epoch": 1.356882284900792, "grad_norm": 0.3294457495212555, "learning_rate": 1.321966391166865e-05, "loss": 0.0396, "step": 274500 }, { "epoch": 1.356882284900792, "eval_runtime": 342.3395, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49254786939717654, "eval_sts_eval_spearman_cosine": 0.27928991132772163, "step": 274500 }, { "epoch": 1.3593538373323053, "grad_norm": 0.18210884928703308, "learning_rate": 1.3207302269350297e-05, "loss": 0.0424, "step": 275000 }, { "epoch": 1.3593538373323053, "eval_runtime": 349.7401, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49193035639001026, "eval_sts_eval_spearman_cosine": 0.28137557362368076, "step": 275000 }, { "epoch": 1.3618253897638184, "grad_norm": 0.39346539974212646, "learning_rate": 1.3194940627031947e-05, "loss": 0.0418, "step": 275500 }, { "epoch": 1.3618253897638184, "eval_runtime": 348.3761, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49153005005587846, "eval_sts_eval_spearman_cosine": 0.28142244606458255, "step": 275500 }, { "epoch": 1.3642969421953317, "grad_norm": 0.2743365168571472, "learning_rate": 1.3182578984713594e-05, "loss": 0.0383, "step": 276000 }, { "epoch": 1.3642969421953317, "eval_runtime": 349.6833, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4890509727768181, "eval_sts_eval_spearman_cosine": 0.2787146900689481, "step": 276000 }, { "epoch": 1.366768494626845, "grad_norm": 0.38647884130477905, "learning_rate": 1.3170217342395242e-05, "loss": 0.04, "step": 276500 }, { "epoch": 1.366768494626845, "eval_runtime": 347.693, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4865230973919724, "eval_sts_eval_spearman_cosine": 0.2796788855059967, "step": 276500 }, { "epoch": 1.3692400470583583, "grad_norm": 0.6228724122047424, "learning_rate": 1.3157855700076892e-05, "loss": 0.0414, "step": 277000 }, { "epoch": 1.3692400470583583, "eval_runtime": 355.9927, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4880448105197327, "eval_sts_eval_spearman_cosine": 0.2810465013549945, "step": 277000 }, { "epoch": 1.3717115994898716, "grad_norm": 0.36229875683784485, "learning_rate": 1.3145494057758538e-05, "loss": 0.0379, "step": 277500 }, { "epoch": 1.3717115994898716, "eval_runtime": 356.4231, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4919176695503781, "eval_sts_eval_spearman_cosine": 0.28483259394655597, "step": 277500 }, { "epoch": 1.3741831519213847, "grad_norm": 0.4776243269443512, "learning_rate": 1.3133132415440186e-05, "loss": 0.0381, "step": 278000 }, { "epoch": 1.3741831519213847, "eval_runtime": 353.3588, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.490451665115032, "eval_sts_eval_spearman_cosine": 0.28459775122687014, "step": 278000 }, { "epoch": 1.376654704352898, "grad_norm": 0.2857702672481537, "learning_rate": 1.3120770773121836e-05, "loss": 0.0383, "step": 278500 }, { "epoch": 1.376654704352898, "eval_runtime": 359.2797, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4865789416385806, "eval_sts_eval_spearman_cosine": 0.28135301280628405, "step": 278500 }, { "epoch": 1.3791262567844114, "grad_norm": 0.2266262322664261, "learning_rate": 1.3108409130803484e-05, "loss": 0.039, "step": 279000 }, { "epoch": 1.3791262567844114, "eval_runtime": 361.2384, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4867206968555685, "eval_sts_eval_spearman_cosine": 0.28181754298704265, "step": 279000 }, { "epoch": 1.3815978092159247, "grad_norm": 0.6787658333778381, "learning_rate": 1.309604748848513e-05, "loss": 0.0388, "step": 279500 }, { "epoch": 1.3815978092159247, "eval_runtime": 357.0963, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48498897621014897, "eval_sts_eval_spearman_cosine": 0.2792393350659415, "step": 279500 }, { "epoch": 1.384069361647438, "grad_norm": 0.3144626021385193, "learning_rate": 1.308368584616678e-05, "loss": 0.0408, "step": 280000 }, { "epoch": 1.384069361647438, "eval_runtime": 353.5985, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48312245428936407, "eval_sts_eval_spearman_cosine": 0.2783963908472425, "step": 280000 }, { "epoch": 1.3865409140789513, "grad_norm": 0.45114994049072266, "learning_rate": 1.3071324203848428e-05, "loss": 0.0389, "step": 280500 }, { "epoch": 1.3865409140789513, "eval_runtime": 351.5503, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4902154526555842, "eval_sts_eval_spearman_cosine": 0.281446870326939, "step": 280500 }, { "epoch": 1.3890124665104646, "grad_norm": 0.5589633584022522, "learning_rate": 1.3058962561530075e-05, "loss": 0.0426, "step": 281000 }, { "epoch": 1.3890124665104646, "eval_runtime": 350.003, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48585123869036756, "eval_sts_eval_spearman_cosine": 0.2794433315557342, "step": 281000 }, { "epoch": 1.391484018941978, "grad_norm": 0.31355416774749756, "learning_rate": 1.3046600919211725e-05, "loss": 0.0392, "step": 281500 }, { "epoch": 1.391484018941978, "eval_runtime": 356.7766, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4867158759010777, "eval_sts_eval_spearman_cosine": 0.2779976886822273, "step": 281500 }, { "epoch": 1.3939555713734912, "grad_norm": 0.3041801154613495, "learning_rate": 1.3034239276893373e-05, "loss": 0.0405, "step": 282000 }, { "epoch": 1.3939555713734912, "eval_runtime": 351.5001, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4866455823545288, "eval_sts_eval_spearman_cosine": 0.277782571384575, "step": 282000 }, { "epoch": 1.3964271238050043, "grad_norm": 0.44024696946144104, "learning_rate": 1.3021877634575019e-05, "loss": 0.0407, "step": 282500 }, { "epoch": 1.3964271238050043, "eval_runtime": 348.1814, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48430725301106586, "eval_sts_eval_spearman_cosine": 0.27685967810893675, "step": 282500 }, { "epoch": 1.3988986762365176, "grad_norm": 0.5151857733726501, "learning_rate": 1.3009515992256669e-05, "loss": 0.0396, "step": 283000 }, { "epoch": 1.3988986762365176, "eval_runtime": 357.5463, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48423521981530676, "eval_sts_eval_spearman_cosine": 0.2730369833182661, "step": 283000 }, { "epoch": 1.401370228668031, "grad_norm": 0.10243994742631912, "learning_rate": 1.2997154349938317e-05, "loss": 0.0376, "step": 283500 }, { "epoch": 1.401370228668031, "eval_runtime": 347.0694, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48528557263873134, "eval_sts_eval_spearman_cosine": 0.2769600647391519, "step": 283500 }, { "epoch": 1.4038417810995443, "grad_norm": 0.280739426612854, "learning_rate": 1.2984792707619963e-05, "loss": 0.0399, "step": 284000 }, { "epoch": 1.4038417810995443, "eval_runtime": 360.6086, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4876447646777588, "eval_sts_eval_spearman_cosine": 0.2790960396517198, "step": 284000 }, { "epoch": 1.4063133335310576, "grad_norm": 0.5125046968460083, "learning_rate": 1.2972431065301612e-05, "loss": 0.0405, "step": 284500 }, { "epoch": 1.4063133335310576, "eval_runtime": 346.4518, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48870435247589294, "eval_sts_eval_spearman_cosine": 0.2790928996492, "step": 284500 }, { "epoch": 1.4087848859625707, "grad_norm": 0.40609678626060486, "learning_rate": 1.2960069422983261e-05, "loss": 0.0382, "step": 285000 }, { "epoch": 1.4087848859625707, "eval_runtime": 355.077, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4892622693580057, "eval_sts_eval_spearman_cosine": 0.28039914492723106, "step": 285000 }, { "epoch": 1.411256438394084, "grad_norm": 0.3934013545513153, "learning_rate": 1.294770778066491e-05, "loss": 0.0388, "step": 285500 }, { "epoch": 1.411256438394084, "eval_runtime": 352.265, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4940208212755607, "eval_sts_eval_spearman_cosine": 0.28347000477619533, "step": 285500 }, { "epoch": 1.4137279908255973, "grad_norm": 0.23169732093811035, "learning_rate": 1.2935346138346556e-05, "loss": 0.0394, "step": 286000 }, { "epoch": 1.4137279908255973, "eval_runtime": 356.3088, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4897861725236174, "eval_sts_eval_spearman_cosine": 0.27842670145628373, "step": 286000 }, { "epoch": 1.4161995432571106, "grad_norm": 0.3829602003097534, "learning_rate": 1.2922984496028206e-05, "loss": 0.0388, "step": 286500 }, { "epoch": 1.4161995432571106, "eval_runtime": 358.9338, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49341012918914035, "eval_sts_eval_spearman_cosine": 0.28128676179902634, "step": 286500 }, { "epoch": 1.418671095688624, "grad_norm": 0.3277010917663574, "learning_rate": 1.2910622853709854e-05, "loss": 0.0397, "step": 287000 }, { "epoch": 1.418671095688624, "eval_runtime": 350.0352, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4919289017910807, "eval_sts_eval_spearman_cosine": 0.2812679132821718, "step": 287000 }, { "epoch": 1.4211426481201372, "grad_norm": 0.3177352845668793, "learning_rate": 1.28982612113915e-05, "loss": 0.0404, "step": 287500 }, { "epoch": 1.4211426481201372, "eval_runtime": 353.597, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.494060188337331, "eval_sts_eval_spearman_cosine": 0.28079182301122607, "step": 287500 }, { "epoch": 1.4236142005516506, "grad_norm": 0.4910931885242462, "learning_rate": 1.288589956907315e-05, "loss": 0.0374, "step": 288000 }, { "epoch": 1.4236142005516506, "eval_runtime": 350.8543, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4931009389529387, "eval_sts_eval_spearman_cosine": 0.2792493017068831, "step": 288000 }, { "epoch": 1.4260857529831639, "grad_norm": 0.28777363896369934, "learning_rate": 1.2873537926754798e-05, "loss": 0.041, "step": 288500 }, { "epoch": 1.4260857529831639, "eval_runtime": 366.3902, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4853632329806369, "eval_sts_eval_spearman_cosine": 0.2723723886263026, "step": 288500 }, { "epoch": 1.4285573054146772, "grad_norm": 0.2960756719112396, "learning_rate": 1.2861176284436444e-05, "loss": 0.0409, "step": 289000 }, { "epoch": 1.4285573054146772, "eval_runtime": 362.1506, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4917928274889169, "eval_sts_eval_spearman_cosine": 0.2769621804772828, "step": 289000 }, { "epoch": 1.4310288578461903, "grad_norm": 0.3329346477985382, "learning_rate": 1.2848814642118094e-05, "loss": 0.04, "step": 289500 }, { "epoch": 1.4310288578461903, "eval_runtime": 361.3334, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49323567581579403, "eval_sts_eval_spearman_cosine": 0.27892146403858603, "step": 289500 }, { "epoch": 1.4335004102777036, "grad_norm": 0.4108819365501404, "learning_rate": 1.2836452999799742e-05, "loss": 0.0412, "step": 290000 }, { "epoch": 1.4335004102777036, "eval_runtime": 352.3282, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4899510851954545, "eval_sts_eval_spearman_cosine": 0.2754064594353126, "step": 290000 }, { "epoch": 1.435971962709217, "grad_norm": 0.14604155719280243, "learning_rate": 1.282409135748139e-05, "loss": 0.0404, "step": 290500 }, { "epoch": 1.435971962709217, "eval_runtime": 346.2623, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49222606313555634, "eval_sts_eval_spearman_cosine": 0.2779603196102458, "step": 290500 }, { "epoch": 1.4384435151407302, "grad_norm": 0.5445525646209717, "learning_rate": 1.2811729715163039e-05, "loss": 0.0406, "step": 291000 }, { "epoch": 1.4384435151407302, "eval_runtime": 349.128, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49400961499210627, "eval_sts_eval_spearman_cosine": 0.27941609224320946, "step": 291000 }, { "epoch": 1.4409150675722435, "grad_norm": 0.45638760924339294, "learning_rate": 1.2799368072844687e-05, "loss": 0.0387, "step": 291500 }, { "epoch": 1.4409150675722435, "eval_runtime": 346.2375, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49311568027555674, "eval_sts_eval_spearman_cosine": 0.2775814247887888, "step": 291500 }, { "epoch": 1.4433866200037568, "grad_norm": 0.2408657670021057, "learning_rate": 1.2787006430526335e-05, "loss": 0.037, "step": 292000 }, { "epoch": 1.4433866200037568, "eval_runtime": 349.3213, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4921297632901436, "eval_sts_eval_spearman_cosine": 0.28014725278647706, "step": 292000 }, { "epoch": 1.44585817243527, "grad_norm": 0.17430691421031952, "learning_rate": 1.2774644788207985e-05, "loss": 0.0394, "step": 292500 }, { "epoch": 1.44585817243527, "eval_runtime": 352.0051, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4921008689344246, "eval_sts_eval_spearman_cosine": 0.27777011354963704, "step": 292500 }, { "epoch": 1.4483297248667832, "grad_norm": 0.1842302829027176, "learning_rate": 1.2762283145889631e-05, "loss": 0.0406, "step": 293000 }, { "epoch": 1.4483297248667832, "eval_runtime": 348.4005, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4930271508654208, "eval_sts_eval_spearman_cosine": 0.27861003608414076, "step": 293000 }, { "epoch": 1.4508012772982966, "grad_norm": 0.4643767476081848, "learning_rate": 1.2749921503571279e-05, "loss": 0.0401, "step": 293500 }, { "epoch": 1.4508012772982966, "eval_runtime": 345.959, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49575391003716707, "eval_sts_eval_spearman_cosine": 0.28272485009175763, "step": 293500 }, { "epoch": 1.4532728297298099, "grad_norm": 0.35892894864082336, "learning_rate": 1.2737559861252929e-05, "loss": 0.0388, "step": 294000 }, { "epoch": 1.4532728297298099, "eval_runtime": 347.9403, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4914491765844172, "eval_sts_eval_spearman_cosine": 0.2769901712581849, "step": 294000 }, { "epoch": 1.4557443821613232, "grad_norm": 0.5773041844367981, "learning_rate": 1.2725198218934575e-05, "loss": 0.0377, "step": 294500 }, { "epoch": 1.4557443821613232, "eval_runtime": 351.4117, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49147167596406444, "eval_sts_eval_spearman_cosine": 0.2767588371946935, "step": 294500 }, { "epoch": 1.4582159345928365, "grad_norm": 0.3249050974845886, "learning_rate": 1.2712836576616223e-05, "loss": 0.0386, "step": 295000 }, { "epoch": 1.4582159345928365, "eval_runtime": 350.3254, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4920997705297373, "eval_sts_eval_spearman_cosine": 0.2772686321925923, "step": 295000 }, { "epoch": 1.4606874870243498, "grad_norm": 0.3675588369369507, "learning_rate": 1.2700474934297873e-05, "loss": 0.04, "step": 295500 }, { "epoch": 1.4606874870243498, "eval_runtime": 347.3235, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4899952585947426, "eval_sts_eval_spearman_cosine": 0.27826614498117913, "step": 295500 }, { "epoch": 1.4631590394558631, "grad_norm": 0.5125758647918701, "learning_rate": 1.268811329197952e-05, "loss": 0.0402, "step": 296000 }, { "epoch": 1.4631590394558631, "eval_runtime": 351.3637, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4928425385906473, "eval_sts_eval_spearman_cosine": 0.27801339523553165, "step": 296000 }, { "epoch": 1.4656305918873764, "grad_norm": 0.31151118874549866, "learning_rate": 1.2675751649661168e-05, "loss": 0.0401, "step": 296500 }, { "epoch": 1.4656305918873764, "eval_runtime": 348.624, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4959317589885832, "eval_sts_eval_spearman_cosine": 0.2820246502790184, "step": 296500 }, { "epoch": 1.4681021443188895, "grad_norm": 0.34112969040870667, "learning_rate": 1.2663390007342818e-05, "loss": 0.0393, "step": 297000 }, { "epoch": 1.4681021443188895, "eval_runtime": 345.5247, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49204980479992894, "eval_sts_eval_spearman_cosine": 0.27898801697438036, "step": 297000 }, { "epoch": 1.4705736967504028, "grad_norm": 0.3504377603530884, "learning_rate": 1.2651028365024464e-05, "loss": 0.0394, "step": 297500 }, { "epoch": 1.4705736967504028, "eval_runtime": 348.6511, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4908078473781312, "eval_sts_eval_spearman_cosine": 0.2786608065452842, "step": 297500 }, { "epoch": 1.4730452491819161, "grad_norm": 0.11602591723203659, "learning_rate": 1.2638666722706112e-05, "loss": 0.0392, "step": 298000 }, { "epoch": 1.4730452491819161, "eval_runtime": 365.8047, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4884148074068027, "eval_sts_eval_spearman_cosine": 0.27585033418692895, "step": 298000 }, { "epoch": 1.4755168016134295, "grad_norm": 0.27884018421173096, "learning_rate": 1.2626305080387762e-05, "loss": 0.0396, "step": 298500 }, { "epoch": 1.4755168016134295, "eval_runtime": 351.9176, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48963771964091085, "eval_sts_eval_spearman_cosine": 0.27673103251331166, "step": 298500 }, { "epoch": 1.4779883540449428, "grad_norm": 0.5166835188865662, "learning_rate": 1.261394343806941e-05, "loss": 0.0379, "step": 299000 }, { "epoch": 1.4779883540449428, "eval_runtime": 348.2691, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48910450107040054, "eval_sts_eval_spearman_cosine": 0.2751651264593256, "step": 299000 }, { "epoch": 1.4804599064764559, "grad_norm": 0.3240971267223358, "learning_rate": 1.2601581795751056e-05, "loss": 0.039, "step": 299500 }, { "epoch": 1.4804599064764559, "eval_runtime": 346.3187, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4895852724741694, "eval_sts_eval_spearman_cosine": 0.2741732353754362, "step": 299500 }, { "epoch": 1.4829314589079692, "grad_norm": 0.3945676386356354, "learning_rate": 1.2589220153432706e-05, "loss": 0.0383, "step": 300000 }, { "epoch": 1.4829314589079692, "eval_runtime": 348.5196, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4874381192967956, "eval_sts_eval_spearman_cosine": 0.2749659033856894, "step": 300000 }, { "epoch": 1.4854030113394825, "grad_norm": 0.45276471972465515, "learning_rate": 1.2576858511114354e-05, "loss": 0.0398, "step": 300500 }, { "epoch": 1.4854030113394825, "eval_runtime": 351.9465, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48828188372514714, "eval_sts_eval_spearman_cosine": 0.27410478365719804, "step": 300500 }, { "epoch": 1.4878745637709958, "grad_norm": 0.7216395735740662, "learning_rate": 1.2564496868796e-05, "loss": 0.0394, "step": 301000 }, { "epoch": 1.4878745637709958, "eval_runtime": 354.3983, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4888448337597218, "eval_sts_eval_spearman_cosine": 0.27494399140150794, "step": 301000 }, { "epoch": 1.4903461162025091, "grad_norm": 0.33224913477897644, "learning_rate": 1.255213522647765e-05, "loss": 0.0416, "step": 301500 }, { "epoch": 1.4903461162025091, "eval_runtime": 351.3589, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48770412021650117, "eval_sts_eval_spearman_cosine": 0.27283749178473365, "step": 301500 }, { "epoch": 1.4928176686340224, "grad_norm": 0.22364506125450134, "learning_rate": 1.2539773584159299e-05, "loss": 0.0388, "step": 302000 }, { "epoch": 1.4928176686340224, "eval_runtime": 347.0456, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4905288231787451, "eval_sts_eval_spearman_cosine": 0.27510124732689406, "step": 302000 }, { "epoch": 1.4952892210655357, "grad_norm": 0.19880814850330353, "learning_rate": 1.2527411941840945e-05, "loss": 0.041, "step": 302500 }, { "epoch": 1.4952892210655357, "eval_runtime": 344.188, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49079793230857705, "eval_sts_eval_spearman_cosine": 0.27591283549397133, "step": 302500 }, { "epoch": 1.497760773497049, "grad_norm": 0.5940434336662292, "learning_rate": 1.2515050299522595e-05, "loss": 0.0405, "step": 303000 }, { "epoch": 1.497760773497049, "eval_runtime": 344.6215, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49067242876750766, "eval_sts_eval_spearman_cosine": 0.2743601861033651, "step": 303000 }, { "epoch": 1.5002323259285624, "grad_norm": 0.29406917095184326, "learning_rate": 1.2502688657204243e-05, "loss": 0.0397, "step": 303500 }, { "epoch": 1.5002323259285624, "eval_runtime": 349.9848, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49059752191241024, "eval_sts_eval_spearman_cosine": 0.2733739583374886, "step": 303500 }, { "epoch": 1.5027038783600757, "grad_norm": 0.6097140908241272, "learning_rate": 1.249032701488589e-05, "loss": 0.0413, "step": 304000 }, { "epoch": 1.5027038783600757, "eval_runtime": 352.0157, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49227632818378764, "eval_sts_eval_spearman_cosine": 0.27617010374824386, "step": 304000 }, { "epoch": 1.5051754307915888, "grad_norm": 0.45249441266059875, "learning_rate": 1.2477965372567539e-05, "loss": 0.0412, "step": 304500 }, { "epoch": 1.5051754307915888, "eval_runtime": 347.0436, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4943624228245072, "eval_sts_eval_spearman_cosine": 0.2753617385533702, "step": 304500 }, { "epoch": 1.507646983223102, "grad_norm": 0.2287484109401703, "learning_rate": 1.2465603730249187e-05, "loss": 0.0386, "step": 305000 }, { "epoch": 1.507646983223102, "eval_runtime": 356.3136, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4973223771792854, "eval_sts_eval_spearman_cosine": 0.27869960257601406, "step": 305000 }, { "epoch": 1.5101185356546154, "grad_norm": 0.2343246042728424, "learning_rate": 1.2453242087930835e-05, "loss": 0.0377, "step": 305500 }, { "epoch": 1.5101185356546154, "eval_runtime": 345.5105, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4953556627379743, "eval_sts_eval_spearman_cosine": 0.27899519573998793, "step": 305500 }, { "epoch": 1.5125900880861287, "grad_norm": 0.4245617091655731, "learning_rate": 1.2440880445612485e-05, "loss": 0.0395, "step": 306000 }, { "epoch": 1.5125900880861287, "eval_runtime": 345.5354, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4947930634650026, "eval_sts_eval_spearman_cosine": 0.2784002158054793, "step": 306000 }, { "epoch": 1.5150616405176418, "grad_norm": 0.367117315530777, "learning_rate": 1.2428518803294132e-05, "loss": 0.0423, "step": 306500 }, { "epoch": 1.5150616405176418, "eval_runtime": 348.981, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49543412346717103, "eval_sts_eval_spearman_cosine": 0.27971573681576106, "step": 306500 }, { "epoch": 1.5175331929491551, "grad_norm": 0.2573102116584778, "learning_rate": 1.241615716097578e-05, "loss": 0.0396, "step": 307000 }, { "epoch": 1.5175331929491551, "eval_runtime": 345.2236, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49782108830056726, "eval_sts_eval_spearman_cosine": 0.2818947282766668, "step": 307000 }, { "epoch": 1.5200047453806684, "grad_norm": 0.20630061626434326, "learning_rate": 1.240379551865743e-05, "loss": 0.0395, "step": 307500 }, { "epoch": 1.5200047453806684, "eval_runtime": 343.324, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4972034420673519, "eval_sts_eval_spearman_cosine": 0.28101340377841616, "step": 307500 }, { "epoch": 1.5224762978121817, "grad_norm": 0.4545046389102936, "learning_rate": 1.2391433876339076e-05, "loss": 0.04, "step": 308000 }, { "epoch": 1.5224762978121817, "eval_runtime": 354.2555, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5008785187577119, "eval_sts_eval_spearman_cosine": 0.28276697669782114, "step": 308000 }, { "epoch": 1.524947850243695, "grad_norm": 0.6124901175498962, "learning_rate": 1.2379072234020724e-05, "loss": 0.0373, "step": 308500 }, { "epoch": 1.524947850243695, "eval_runtime": 369.2399, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5029473205983418, "eval_sts_eval_spearman_cosine": 0.2839819319590378, "step": 308500 }, { "epoch": 1.5274194026752084, "grad_norm": 0.38717591762542725, "learning_rate": 1.2366710591702374e-05, "loss": 0.0385, "step": 309000 }, { "epoch": 1.5274194026752084, "eval_runtime": 354.1655, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5032776360661416, "eval_sts_eval_spearman_cosine": 0.28728226389394357, "step": 309000 }, { "epoch": 1.5298909551067217, "grad_norm": 0.3978128731250763, "learning_rate": 1.235434894938402e-05, "loss": 0.0401, "step": 309500 }, { "epoch": 1.5298909551067217, "eval_runtime": 369.4216, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5025923588535678, "eval_sts_eval_spearman_cosine": 0.2853586789042708, "step": 309500 }, { "epoch": 1.532362507538235, "grad_norm": 0.47943806648254395, "learning_rate": 1.2341987307065668e-05, "loss": 0.0404, "step": 310000 }, { "epoch": 1.532362507538235, "eval_runtime": 358.9653, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.500009242935575, "eval_sts_eval_spearman_cosine": 0.2850968494617718, "step": 310000 }, { "epoch": 1.5348340599697483, "grad_norm": 0.18671487271785736, "learning_rate": 1.2329625664747316e-05, "loss": 0.0404, "step": 310500 }, { "epoch": 1.5348340599697483, "eval_runtime": 350.7855, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5004970494373437, "eval_sts_eval_spearman_cosine": 0.2849150898176634, "step": 310500 }, { "epoch": 1.5373056124012616, "grad_norm": 0.37264350056648254, "learning_rate": 1.2317264022428965e-05, "loss": 0.0407, "step": 311000 }, { "epoch": 1.5373056124012616, "eval_runtime": 350.5876, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5011897801347391, "eval_sts_eval_spearman_cosine": 0.2840468435322043, "step": 311000 }, { "epoch": 1.5397771648327747, "grad_norm": 0.29517483711242676, "learning_rate": 1.2304902380110613e-05, "loss": 0.0389, "step": 311500 }, { "epoch": 1.5397771648327747, "eval_runtime": 347.2408, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5006240659054946, "eval_sts_eval_spearman_cosine": 0.28552410411822526, "step": 311500 }, { "epoch": 1.542248717264288, "grad_norm": 0.33081158995628357, "learning_rate": 1.229254073779226e-05, "loss": 0.0403, "step": 312000 }, { "epoch": 1.542248717264288, "eval_runtime": 347.4625, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5020307102325439, "eval_sts_eval_spearman_cosine": 0.28545244638378864, "step": 312000 }, { "epoch": 1.5447202696958013, "grad_norm": 0.8986366987228394, "learning_rate": 1.228017909547391e-05, "loss": 0.0395, "step": 312500 }, { "epoch": 1.5447202696958013, "eval_runtime": 346.4861, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.500735687931723, "eval_sts_eval_spearman_cosine": 0.28298109478022804, "step": 312500 }, { "epoch": 1.5471918221273147, "grad_norm": 0.31971216201782227, "learning_rate": 1.2267817453155557e-05, "loss": 0.0419, "step": 313000 }, { "epoch": 1.5471918221273147, "eval_runtime": 356.6093, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49885356728385655, "eval_sts_eval_spearman_cosine": 0.2824046734322385, "step": 313000 }, { "epoch": 1.5496633745588277, "grad_norm": 0.60982346534729, "learning_rate": 1.2255455810837205e-05, "loss": 0.0389, "step": 313500 }, { "epoch": 1.5496633745588277, "eval_runtime": 351.9126, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4983916541721972, "eval_sts_eval_spearman_cosine": 0.28219223927477943, "step": 313500 }, { "epoch": 1.552134926990341, "grad_norm": 0.24289824068546295, "learning_rate": 1.2243094168518855e-05, "loss": 0.0382, "step": 314000 }, { "epoch": 1.552134926990341, "eval_runtime": 353.0998, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5000475007431389, "eval_sts_eval_spearman_cosine": 0.28570039182114015, "step": 314000 }, { "epoch": 1.5546064794218544, "grad_norm": 0.25680819153785706, "learning_rate": 1.2230732526200501e-05, "loss": 0.0383, "step": 314500 }, { "epoch": 1.5546064794218544, "eval_runtime": 345.8553, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5012613050205126, "eval_sts_eval_spearman_cosine": 0.28444598206176663, "step": 314500 }, { "epoch": 1.5570780318533677, "grad_norm": 0.199354887008667, "learning_rate": 1.221837088388215e-05, "loss": 0.0415, "step": 315000 }, { "epoch": 1.5570780318533677, "eval_runtime": 346.2529, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.499341294604607, "eval_sts_eval_spearman_cosine": 0.28186084705372355, "step": 315000 }, { "epoch": 1.559549584284881, "grad_norm": 0.5362756252288818, "learning_rate": 1.22060092415638e-05, "loss": 0.04, "step": 315500 }, { "epoch": 1.559549584284881, "eval_runtime": 346.3994, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4960734640864559, "eval_sts_eval_spearman_cosine": 0.2820067165367542, "step": 315500 }, { "epoch": 1.5620211367163943, "grad_norm": 0.3035709857940674, "learning_rate": 1.2193647599245446e-05, "loss": 0.0395, "step": 316000 }, { "epoch": 1.5620211367163943, "eval_runtime": 351.3904, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49897346498578904, "eval_sts_eval_spearman_cosine": 0.28485629429431414, "step": 316000 }, { "epoch": 1.5644926891479076, "grad_norm": 0.2682557702064514, "learning_rate": 1.2181285956927094e-05, "loss": 0.0392, "step": 316500 }, { "epoch": 1.5644926891479076, "eval_runtime": 355.0169, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49928137227815605, "eval_sts_eval_spearman_cosine": 0.2841388701784585, "step": 316500 }, { "epoch": 1.566964241579421, "grad_norm": 0.36601677536964417, "learning_rate": 1.2168924314608743e-05, "loss": 0.0408, "step": 317000 }, { "epoch": 1.566964241579421, "eval_runtime": 368.786, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49770200176219165, "eval_sts_eval_spearman_cosine": 0.2833867258030393, "step": 317000 }, { "epoch": 1.5694357940109342, "grad_norm": 0.3721981644630432, "learning_rate": 1.215656267229039e-05, "loss": 0.0415, "step": 317500 }, { "epoch": 1.5694357940109342, "eval_runtime": 352.875, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49918043022286773, "eval_sts_eval_spearman_cosine": 0.28158887855173104, "step": 317500 }, { "epoch": 1.5719073464424476, "grad_norm": 0.31036579608917236, "learning_rate": 1.2144201029972038e-05, "loss": 0.0386, "step": 318000 }, { "epoch": 1.5719073464424476, "eval_runtime": 356.6837, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5010618281502602, "eval_sts_eval_spearman_cosine": 0.28317843552014366, "step": 318000 }, { "epoch": 1.5743788988739609, "grad_norm": 0.34494131803512573, "learning_rate": 1.2131839387653688e-05, "loss": 0.039, "step": 318500 }, { "epoch": 1.5743788988739609, "eval_runtime": 353.2956, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4978909767002847, "eval_sts_eval_spearman_cosine": 0.28232619777737994, "step": 318500 }, { "epoch": 1.576850451305474, "grad_norm": 0.5111923217773438, "learning_rate": 1.2119477745335336e-05, "loss": 0.0419, "step": 319000 }, { "epoch": 1.576850451305474, "eval_runtime": 348.4996, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4989783995862921, "eval_sts_eval_spearman_cosine": 0.28356437498403797, "step": 319000 }, { "epoch": 1.5793220037369873, "grad_norm": 0.6602091789245605, "learning_rate": 1.2107116103016982e-05, "loss": 0.0389, "step": 319500 }, { "epoch": 1.5793220037369873, "eval_runtime": 359.4639, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49964715669972265, "eval_sts_eval_spearman_cosine": 0.2845208227312535, "step": 319500 }, { "epoch": 1.5817935561685006, "grad_norm": 0.4062168300151825, "learning_rate": 1.2094754460698632e-05, "loss": 0.0391, "step": 320000 }, { "epoch": 1.5817935561685006, "eval_runtime": 353.4371, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49941126677393927, "eval_sts_eval_spearman_cosine": 0.28525516235586224, "step": 320000 }, { "epoch": 1.5842651086000137, "grad_norm": 0.3327926993370056, "learning_rate": 1.208239281838028e-05, "loss": 0.0381, "step": 320500 }, { "epoch": 1.5842651086000137, "eval_runtime": 362.29, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4989898483749419, "eval_sts_eval_spearman_cosine": 0.2844725828945601, "step": 320500 }, { "epoch": 1.586736661031527, "grad_norm": 0.33344340324401855, "learning_rate": 1.2070031176061927e-05, "loss": 0.0365, "step": 321000 }, { "epoch": 1.586736661031527, "eval_runtime": 352.7769, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4956129655683651, "eval_sts_eval_spearman_cosine": 0.28148485076729046, "step": 321000 }, { "epoch": 1.5892082134630403, "grad_norm": 0.2020253837108612, "learning_rate": 1.2057669533743576e-05, "loss": 0.0416, "step": 321500 }, { "epoch": 1.5892082134630403, "eval_runtime": 354.4049, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5020746755751532, "eval_sts_eval_spearman_cosine": 0.2843011727633603, "step": 321500 }, { "epoch": 1.5916797658945536, "grad_norm": 0.3160761594772339, "learning_rate": 1.2045307891425225e-05, "loss": 0.039, "step": 322000 }, { "epoch": 1.5916797658945536, "eval_runtime": 354.1892, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.503039671405954, "eval_sts_eval_spearman_cosine": 0.2849200464231155, "step": 322000 }, { "epoch": 1.594151318326067, "grad_norm": 0.541857898235321, "learning_rate": 1.2032946249106871e-05, "loss": 0.0419, "step": 322500 }, { "epoch": 1.594151318326067, "eval_runtime": 354.0159, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5008524319444378, "eval_sts_eval_spearman_cosine": 0.28333586219874024, "step": 322500 }, { "epoch": 1.5966228707575802, "grad_norm": 0.2577064633369446, "learning_rate": 1.202058460678852e-05, "loss": 0.0393, "step": 323000 }, { "epoch": 1.5966228707575802, "eval_runtime": 352.91, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5010455174865548, "eval_sts_eval_spearman_cosine": 0.28339854978347173, "step": 323000 }, { "epoch": 1.5990944231890936, "grad_norm": 0.2659420967102051, "learning_rate": 1.2008222964470169e-05, "loss": 0.039, "step": 323500 }, { "epoch": 1.5990944231890936, "eval_runtime": 349.9222, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5017219547537015, "eval_sts_eval_spearman_cosine": 0.2857238234957337, "step": 323500 }, { "epoch": 1.6015659756206069, "grad_norm": 0.26509514451026917, "learning_rate": 1.1995861322151815e-05, "loss": 0.0394, "step": 324000 }, { "epoch": 1.6015659756206069, "eval_runtime": 352.4492, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.498191992252242, "eval_sts_eval_spearman_cosine": 0.2835046658706402, "step": 324000 }, { "epoch": 1.6040375280521202, "grad_norm": 0.3214963376522064, "learning_rate": 1.1983499679833465e-05, "loss": 0.0395, "step": 324500 }, { "epoch": 1.6040375280521202, "eval_runtime": 354.9395, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49659789613996685, "eval_sts_eval_spearman_cosine": 0.2820393173193108, "step": 324500 }, { "epoch": 1.6065090804836335, "grad_norm": 0.18213170766830444, "learning_rate": 1.1971138037515113e-05, "loss": 0.0413, "step": 325000 }, { "epoch": 1.6065090804836335, "eval_runtime": 353.9657, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4965711231855421, "eval_sts_eval_spearman_cosine": 0.2826759896493402, "step": 325000 }, { "epoch": 1.6089806329151468, "grad_norm": 0.6484494209289551, "learning_rate": 1.1958776395196761e-05, "loss": 0.0411, "step": 325500 }, { "epoch": 1.6089806329151468, "eval_runtime": 345.8352, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4973838106986685, "eval_sts_eval_spearman_cosine": 0.28387090142331245, "step": 325500 }, { "epoch": 1.61145218534666, "grad_norm": 0.4134303033351898, "learning_rate": 1.194641475287841e-05, "loss": 0.0387, "step": 326000 }, { "epoch": 1.61145218534666, "eval_runtime": 346.0085, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49863090055230946, "eval_sts_eval_spearman_cosine": 0.28444774969358283, "step": 326000 }, { "epoch": 1.6139237377781732, "grad_norm": 0.183598130941391, "learning_rate": 1.1934053110560058e-05, "loss": 0.0399, "step": 326500 }, { "epoch": 1.6139237377781732, "eval_runtime": 344.849, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5030397280747549, "eval_sts_eval_spearman_cosine": 0.28725158703118236, "step": 326500 }, { "epoch": 1.6163952902096865, "grad_norm": 0.5429816842079163, "learning_rate": 1.1921691468241706e-05, "loss": 0.0401, "step": 327000 }, { "epoch": 1.6163952902096865, "eval_runtime": 350.1751, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.502432584004638, "eval_sts_eval_spearman_cosine": 0.2871387751793362, "step": 327000 }, { "epoch": 1.6188668426411998, "grad_norm": 0.6115456819534302, "learning_rate": 1.1909329825923355e-05, "loss": 0.0413, "step": 327500 }, { "epoch": 1.6188668426411998, "eval_runtime": 350.4861, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5019722605424248, "eval_sts_eval_spearman_cosine": 0.28402214132152714, "step": 327500 }, { "epoch": 1.621338395072713, "grad_norm": 0.2846935987472534, "learning_rate": 1.1896968183605002e-05, "loss": 0.0385, "step": 328000 }, { "epoch": 1.621338395072713, "eval_runtime": 352.1489, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5036217076501257, "eval_sts_eval_spearman_cosine": 0.28458276508594227, "step": 328000 }, { "epoch": 1.6238099475042262, "grad_norm": 0.4393136203289032, "learning_rate": 1.188460654128665e-05, "loss": 0.0401, "step": 328500 }, { "epoch": 1.6238099475042262, "eval_runtime": 358.3931, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.502102359295973, "eval_sts_eval_spearman_cosine": 0.28550020214388894, "step": 328500 }, { "epoch": 1.6262814999357396, "grad_norm": 0.3991912603378296, "learning_rate": 1.18722448989683e-05, "loss": 0.0402, "step": 329000 }, { "epoch": 1.6262814999357396, "eval_runtime": 354.1578, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5004626845952831, "eval_sts_eval_spearman_cosine": 0.28359130808485467, "step": 329000 }, { "epoch": 1.6287530523672529, "grad_norm": 0.35587528347969055, "learning_rate": 1.1859883256649946e-05, "loss": 0.0391, "step": 329500 }, { "epoch": 1.6287530523672529, "eval_runtime": 357.3808, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5005491767372595, "eval_sts_eval_spearman_cosine": 0.28448979689818615, "step": 329500 }, { "epoch": 1.6312246047987662, "grad_norm": 0.3087911605834961, "learning_rate": 1.1847521614331594e-05, "loss": 0.0395, "step": 330000 }, { "epoch": 1.6312246047987662, "eval_runtime": 355.6406, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5004689064289725, "eval_sts_eval_spearman_cosine": 0.2849688205298449, "step": 330000 }, { "epoch": 1.6336961572302795, "grad_norm": 0.1888357251882553, "learning_rate": 1.1835159972013244e-05, "loss": 0.0397, "step": 330500 }, { "epoch": 1.6336961572302795, "eval_runtime": 360.255, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49746203444241616, "eval_sts_eval_spearman_cosine": 0.2847414065812019, "step": 330500 }, { "epoch": 1.6361677096617928, "grad_norm": 0.13858221471309662, "learning_rate": 1.182279832969489e-05, "loss": 0.0387, "step": 331000 }, { "epoch": 1.6361677096617928, "eval_runtime": 359.654, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5045556099993986, "eval_sts_eval_spearman_cosine": 0.2890314273993812, "step": 331000 }, { "epoch": 1.6386392620933061, "grad_norm": 0.45203831791877747, "learning_rate": 1.1810436687376539e-05, "loss": 0.0387, "step": 331500 }, { "epoch": 1.6386392620933061, "eval_runtime": 352.9764, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.501511607493605, "eval_sts_eval_spearman_cosine": 0.28378801637503265, "step": 331500 }, { "epoch": 1.6411108145248194, "grad_norm": 0.4553143084049225, "learning_rate": 1.1798075045058188e-05, "loss": 0.0395, "step": 332000 }, { "epoch": 1.6411108145248194, "eval_runtime": 351.0495, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5007400346375048, "eval_sts_eval_spearman_cosine": 0.28425551518197506, "step": 332000 }, { "epoch": 1.6435823669563328, "grad_norm": 0.28412994742393494, "learning_rate": 1.1785713402739836e-05, "loss": 0.0381, "step": 332500 }, { "epoch": 1.6435823669563328, "eval_runtime": 348.0381, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5040156300753137, "eval_sts_eval_spearman_cosine": 0.2848371601538386, "step": 332500 }, { "epoch": 1.6460539193878458, "grad_norm": 0.1765538603067398, "learning_rate": 1.1773351760421483e-05, "loss": 0.0389, "step": 333000 }, { "epoch": 1.6460539193878458, "eval_runtime": 347.4497, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5033458707180452, "eval_sts_eval_spearman_cosine": 0.2855222792215426, "step": 333000 }, { "epoch": 1.6485254718193592, "grad_norm": 0.5877232551574707, "learning_rate": 1.1760990118103133e-05, "loss": 0.0377, "step": 333500 }, { "epoch": 1.6485254718193592, "eval_runtime": 355.573, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5018205900882207, "eval_sts_eval_spearman_cosine": 0.28417370379005763, "step": 333500 }, { "epoch": 1.6509970242508725, "grad_norm": 0.6105650663375854, "learning_rate": 1.174862847578478e-05, "loss": 0.0385, "step": 334000 }, { "epoch": 1.6509970242508725, "eval_runtime": 357.4619, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.500412956096121, "eval_sts_eval_spearman_cosine": 0.28274052047703147, "step": 334000 }, { "epoch": 1.6534685766823858, "grad_norm": 0.2774316072463989, "learning_rate": 1.1736266833466427e-05, "loss": 0.0408, "step": 334500 }, { "epoch": 1.6534685766823858, "eval_runtime": 357.7982, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5016071843942964, "eval_sts_eval_spearman_cosine": 0.28404067831582264, "step": 334500 }, { "epoch": 1.6559401291138989, "grad_norm": 0.3393745720386505, "learning_rate": 1.1723905191148077e-05, "loss": 0.0396, "step": 335000 }, { "epoch": 1.6559401291138989, "eval_runtime": 347.9661, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5010287550681064, "eval_sts_eval_spearman_cosine": 0.2851606064951529, "step": 335000 }, { "epoch": 1.6584116815454122, "grad_norm": 0.38425537943840027, "learning_rate": 1.1711543548829725e-05, "loss": 0.0395, "step": 335500 }, { "epoch": 1.6584116815454122, "eval_runtime": 350.1674, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5017292663019395, "eval_sts_eval_spearman_cosine": 0.2849742755664728, "step": 335500 }, { "epoch": 1.6608832339769255, "grad_norm": 0.5884998440742493, "learning_rate": 1.1699181906511372e-05, "loss": 0.042, "step": 336000 }, { "epoch": 1.6608832339769255, "eval_runtime": 354.2564, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5009279093683687, "eval_sts_eval_spearman_cosine": 0.28319614651408975, "step": 336000 }, { "epoch": 1.6633547864084388, "grad_norm": 0.40775129199028015, "learning_rate": 1.168682026419302e-05, "loss": 0.0403, "step": 336500 }, { "epoch": 1.6633547864084388, "eval_runtime": 353.7225, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5023868602281306, "eval_sts_eval_spearman_cosine": 0.28548701798221615, "step": 336500 }, { "epoch": 1.6658263388399521, "grad_norm": 0.43349096179008484, "learning_rate": 1.167445862187467e-05, "loss": 0.0386, "step": 337000 }, { "epoch": 1.6658263388399521, "eval_runtime": 349.2829, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5028371954307572, "eval_sts_eval_spearman_cosine": 0.28404036724921183, "step": 337000 }, { "epoch": 1.6682978912714654, "grad_norm": 0.6170112490653992, "learning_rate": 1.1662096979556316e-05, "loss": 0.0409, "step": 337500 }, { "epoch": 1.6682978912714654, "eval_runtime": 347.3779, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49836842766025014, "eval_sts_eval_spearman_cosine": 0.2803817153306754, "step": 337500 }, { "epoch": 1.6707694437029788, "grad_norm": 0.48710036277770996, "learning_rate": 1.1649735337237964e-05, "loss": 0.0412, "step": 338000 }, { "epoch": 1.6707694437029788, "eval_runtime": 351.178, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5002191466727988, "eval_sts_eval_spearman_cosine": 0.2846745191350612, "step": 338000 }, { "epoch": 1.673240996134492, "grad_norm": 0.32358574867248535, "learning_rate": 1.1637373694919614e-05, "loss": 0.0411, "step": 338500 }, { "epoch": 1.673240996134492, "eval_runtime": 351.1383, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4991090976014429, "eval_sts_eval_spearman_cosine": 0.2825888483379456, "step": 338500 }, { "epoch": 1.6757125485660054, "grad_norm": 0.3418899178504944, "learning_rate": 1.1625012052601262e-05, "loss": 0.0405, "step": 339000 }, { "epoch": 1.6757125485660054, "eval_runtime": 349.5427, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4997880695196612, "eval_sts_eval_spearman_cosine": 0.2841454950403114, "step": 339000 }, { "epoch": 1.6781841009975187, "grad_norm": 0.24693816900253296, "learning_rate": 1.1612650410282908e-05, "loss": 0.0393, "step": 339500 }, { "epoch": 1.6781841009975187, "eval_runtime": 347.6072, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49617004417708593, "eval_sts_eval_spearman_cosine": 0.28096539064744386, "step": 339500 }, { "epoch": 1.680655653429032, "grad_norm": 0.3163521885871887, "learning_rate": 1.1600288767964558e-05, "loss": 0.0398, "step": 340000 }, { "epoch": 1.680655653429032, "eval_runtime": 346.4468, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49906282159457904, "eval_sts_eval_spearman_cosine": 0.28405298925543687, "step": 340000 }, { "epoch": 1.683127205860545, "grad_norm": 0.2401595115661621, "learning_rate": 1.1587927125646206e-05, "loss": 0.0392, "step": 340500 }, { "epoch": 1.683127205860545, "eval_runtime": 353.8565, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49884243942720485, "eval_sts_eval_spearman_cosine": 0.284442498155551, "step": 340500 }, { "epoch": 1.6855987582920584, "grad_norm": 0.2379930317401886, "learning_rate": 1.1575565483327853e-05, "loss": 0.0411, "step": 341000 }, { "epoch": 1.6855987582920584, "eval_runtime": 349.6692, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4962181095321, "eval_sts_eval_spearman_cosine": 0.284240556605049, "step": 341000 }, { "epoch": 1.6880703107235717, "grad_norm": 0.42507851123809814, "learning_rate": 1.1563203841009502e-05, "loss": 0.0405, "step": 341500 }, { "epoch": 1.6880703107235717, "eval_runtime": 356.3836, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4970615793701769, "eval_sts_eval_spearman_cosine": 0.28510526253458207, "step": 341500 }, { "epoch": 1.6905418631550848, "grad_norm": 0.5372989177703857, "learning_rate": 1.155084219869115e-05, "loss": 0.041, "step": 342000 }, { "epoch": 1.6905418631550848, "eval_runtime": 349.7737, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49476615828189663, "eval_sts_eval_spearman_cosine": 0.2817008856338205, "step": 342000 }, { "epoch": 1.6930134155865981, "grad_norm": 0.3501194715499878, "learning_rate": 1.1538480556372797e-05, "loss": 0.0388, "step": 342500 }, { "epoch": 1.6930134155865981, "eval_runtime": 352.0103, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4945822679093295, "eval_sts_eval_spearman_cosine": 0.2814825157539903, "step": 342500 }, { "epoch": 1.6954849680181114, "grad_norm": 0.2848515808582306, "learning_rate": 1.1526118914054447e-05, "loss": 0.0413, "step": 343000 }, { "epoch": 1.6954849680181114, "eval_runtime": 355.4422, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49566559377757885, "eval_sts_eval_spearman_cosine": 0.28057540228382644, "step": 343000 }, { "epoch": 1.6979565204496248, "grad_norm": 0.17649713158607483, "learning_rate": 1.1513757271736095e-05, "loss": 0.0388, "step": 343500 }, { "epoch": 1.6979565204496248, "eval_runtime": 354.4614, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4994725568143785, "eval_sts_eval_spearman_cosine": 0.28522459806682565, "step": 343500 }, { "epoch": 1.700428072881138, "grad_norm": 0.2205881029367447, "learning_rate": 1.1501395629417741e-05, "loss": 0.0413, "step": 344000 }, { "epoch": 1.700428072881138, "eval_runtime": 350.5506, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49881064502389566, "eval_sts_eval_spearman_cosine": 0.28350500568836223, "step": 344000 }, { "epoch": 1.7028996253126514, "grad_norm": 0.7470040917396545, "learning_rate": 1.1489033987099391e-05, "loss": 0.0405, "step": 344500 }, { "epoch": 1.7028996253126514, "eval_runtime": 353.998, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49445739553899026, "eval_sts_eval_spearman_cosine": 0.2784238787029442, "step": 344500 }, { "epoch": 1.7053711777441647, "grad_norm": 0.8189780712127686, "learning_rate": 1.1476672344781039e-05, "loss": 0.0396, "step": 345000 }, { "epoch": 1.7053711777441647, "eval_runtime": 350.3265, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4993692635254109, "eval_sts_eval_spearman_cosine": 0.2826682794637109, "step": 345000 }, { "epoch": 1.707842730175678, "grad_norm": 0.6331354975700378, "learning_rate": 1.1464310702462687e-05, "loss": 0.0403, "step": 345500 }, { "epoch": 1.707842730175678, "eval_runtime": 361.7125, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4996952204490753, "eval_sts_eval_spearman_cosine": 0.2828646964238709, "step": 345500 }, { "epoch": 1.7103142826071913, "grad_norm": 0.39935311675071716, "learning_rate": 1.1451949060144335e-05, "loss": 0.0411, "step": 346000 }, { "epoch": 1.7103142826071913, "eval_runtime": 358.4846, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49885377764513794, "eval_sts_eval_spearman_cosine": 0.28225856467434585, "step": 346000 }, { "epoch": 1.7127858350387046, "grad_norm": 1.7464274168014526, "learning_rate": 1.1439587417825983e-05, "loss": 0.043, "step": 346500 }, { "epoch": 1.7127858350387046, "eval_runtime": 367.0595, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49983523621790693, "eval_sts_eval_spearman_cosine": 0.2827469897837356, "step": 346500 }, { "epoch": 1.715257387470218, "grad_norm": 0.3631017804145813, "learning_rate": 1.1427225775507632e-05, "loss": 0.0402, "step": 347000 }, { "epoch": 1.715257387470218, "eval_runtime": 356.2735, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4978962713420164, "eval_sts_eval_spearman_cosine": 0.28203860613072845, "step": 347000 }, { "epoch": 1.717728939901731, "grad_norm": 0.6784701943397522, "learning_rate": 1.1414864133189281e-05, "loss": 0.0412, "step": 347500 }, { "epoch": 1.717728939901731, "eval_runtime": 361.1363, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4979152841578133, "eval_sts_eval_spearman_cosine": 0.28195758594171255, "step": 347500 }, { "epoch": 1.7202004923332443, "grad_norm": 0.3259699046611786, "learning_rate": 1.1402502490870928e-05, "loss": 0.0414, "step": 348000 }, { "epoch": 1.7202004923332443, "eval_runtime": 362.854, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4982056911396974, "eval_sts_eval_spearman_cosine": 0.28038927901262023, "step": 348000 }, { "epoch": 1.7226720447647577, "grad_norm": 0.4612974226474762, "learning_rate": 1.1390140848552576e-05, "loss": 0.0403, "step": 348500 }, { "epoch": 1.7226720447647577, "eval_runtime": 361.233, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49689325204061263, "eval_sts_eval_spearman_cosine": 0.2795620757854089, "step": 348500 }, { "epoch": 1.725143597196271, "grad_norm": 0.2497444450855255, "learning_rate": 1.1377779206234226e-05, "loss": 0.0385, "step": 349000 }, { "epoch": 1.725143597196271, "eval_runtime": 355.6277, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4962112106732689, "eval_sts_eval_spearman_cosine": 0.27994133498772833, "step": 349000 }, { "epoch": 1.727615149627784, "grad_norm": 0.8433038592338562, "learning_rate": 1.1365417563915872e-05, "loss": 0.0393, "step": 349500 }, { "epoch": 1.727615149627784, "eval_runtime": 351.9711, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49769507398784457, "eval_sts_eval_spearman_cosine": 0.2799644853103217, "step": 349500 }, { "epoch": 1.7300867020592974, "grad_norm": 0.4692220091819763, "learning_rate": 1.135305592159752e-05, "loss": 0.0394, "step": 350000 }, { "epoch": 1.7300867020592974, "eval_runtime": 358.0228, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4974575343573005, "eval_sts_eval_spearman_cosine": 0.27898703756431975, "step": 350000 }, { "epoch": 1.7325582544908107, "grad_norm": 0.31494608521461487, "learning_rate": 1.134069427927917e-05, "loss": 0.0424, "step": 350500 }, { "epoch": 1.7325582544908107, "eval_runtime": 366.7092, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4983672749767639, "eval_sts_eval_spearman_cosine": 0.2811804260792864, "step": 350500 }, { "epoch": 1.735029806922324, "grad_norm": 0.6868336200714111, "learning_rate": 1.1328332636960816e-05, "loss": 0.0424, "step": 351000 }, { "epoch": 1.735029806922324, "eval_runtime": 353.7983, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49699996286592474, "eval_sts_eval_spearman_cosine": 0.28315336273568814, "step": 351000 }, { "epoch": 1.7375013593538373, "grad_norm": 0.2705886960029602, "learning_rate": 1.1315970994642465e-05, "loss": 0.0392, "step": 351500 }, { "epoch": 1.7375013593538373, "eval_runtime": 353.794, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5008954497555022, "eval_sts_eval_spearman_cosine": 0.285204794512744, "step": 351500 }, { "epoch": 1.7399729117853506, "grad_norm": 0.18293221294879913, "learning_rate": 1.1303609352324114e-05, "loss": 0.0394, "step": 352000 }, { "epoch": 1.7399729117853506, "eval_runtime": 352.9967, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5019856643801006, "eval_sts_eval_spearman_cosine": 0.2857870295345043, "step": 352000 }, { "epoch": 1.742444464216864, "grad_norm": 0.2045389860868454, "learning_rate": 1.1291247710005762e-05, "loss": 0.04, "step": 352500 }, { "epoch": 1.742444464216864, "eval_runtime": 355.0336, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5007599598512904, "eval_sts_eval_spearman_cosine": 0.28467401908388423, "step": 352500 }, { "epoch": 1.7449160166483773, "grad_norm": 0.6846463680267334, "learning_rate": 1.1278886067687409e-05, "loss": 0.0405, "step": 353000 }, { "epoch": 1.7449160166483773, "eval_runtime": 357.081, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49748090411168977, "eval_sts_eval_spearman_cosine": 0.28302869612602394, "step": 353000 }, { "epoch": 1.7473875690798906, "grad_norm": 0.5713421702384949, "learning_rate": 1.1266524425369059e-05, "loss": 0.0401, "step": 353500 }, { "epoch": 1.7473875690798906, "eval_runtime": 359.2251, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5012986282839429, "eval_sts_eval_spearman_cosine": 0.283536759314627, "step": 353500 }, { "epoch": 1.7498591215114039, "grad_norm": 0.24230144917964935, "learning_rate": 1.1254162783050707e-05, "loss": 0.0379, "step": 354000 }, { "epoch": 1.7498591215114039, "eval_runtime": 355.976, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5007309122236709, "eval_sts_eval_spearman_cosine": 0.2834531079351492, "step": 354000 }, { "epoch": 1.752330673942917, "grad_norm": 0.4855753183364868, "learning_rate": 1.1241801140732353e-05, "loss": 0.0397, "step": 354500 }, { "epoch": 1.752330673942917, "eval_runtime": 350.4307, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5026944725489646, "eval_sts_eval_spearman_cosine": 0.2835929303881043, "step": 354500 }, { "epoch": 1.7548022263744303, "grad_norm": 0.46367064118385315, "learning_rate": 1.1229439498414003e-05, "loss": 0.0395, "step": 355000 }, { "epoch": 1.7548022263744303, "eval_runtime": 346.8086, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5001119745554021, "eval_sts_eval_spearman_cosine": 0.2808127034916765, "step": 355000 }, { "epoch": 1.7572737788059436, "grad_norm": 0.4892440438270569, "learning_rate": 1.1217077856095651e-05, "loss": 0.0399, "step": 355500 }, { "epoch": 1.7572737788059436, "eval_runtime": 351.8303, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5004484757054417, "eval_sts_eval_spearman_cosine": 0.27961738163483285, "step": 355500 }, { "epoch": 1.759745331237457, "grad_norm": 0.25765493512153625, "learning_rate": 1.1204716213777297e-05, "loss": 0.0381, "step": 356000 }, { "epoch": 1.759745331237457, "eval_runtime": 342.0587, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5004214309767523, "eval_sts_eval_spearman_cosine": 0.2816531651220212, "step": 356000 }, { "epoch": 1.76221688366897, "grad_norm": 0.2795691192150116, "learning_rate": 1.1192354571458947e-05, "loss": 0.0394, "step": 356500 }, { "epoch": 1.76221688366897, "eval_runtime": 345.1193, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5014927206136713, "eval_sts_eval_spearman_cosine": 0.28106819996549476, "step": 356500 }, { "epoch": 1.7646884361004833, "grad_norm": 0.44593343138694763, "learning_rate": 1.1179992929140595e-05, "loss": 0.0397, "step": 357000 }, { "epoch": 1.7646884361004833, "eval_runtime": 347.2924, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5041174356366398, "eval_sts_eval_spearman_cosine": 0.2852293033382613, "step": 357000 }, { "epoch": 1.7671599885319966, "grad_norm": 0.4292694628238678, "learning_rate": 1.1167631286822242e-05, "loss": 0.0416, "step": 357500 }, { "epoch": 1.7671599885319966, "eval_runtime": 347.092, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5008375033112749, "eval_sts_eval_spearman_cosine": 0.28396512577884925, "step": 357500 }, { "epoch": 1.76963154096351, "grad_norm": 0.23955097794532776, "learning_rate": 1.1155269644503892e-05, "loss": 0.0393, "step": 358000 }, { "epoch": 1.76963154096351, "eval_runtime": 355.8338, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5011423360025131, "eval_sts_eval_spearman_cosine": 0.2845883298985793, "step": 358000 }, { "epoch": 1.7721030933950233, "grad_norm": 0.5236731171607971, "learning_rate": 1.114290800218554e-05, "loss": 0.0398, "step": 358500 }, { "epoch": 1.7721030933950233, "eval_runtime": 350.715, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.503066020140682, "eval_sts_eval_spearman_cosine": 0.28321014262240746, "step": 358500 }, { "epoch": 1.7745746458265366, "grad_norm": 0.4630036950111389, "learning_rate": 1.1130546359867188e-05, "loss": 0.0411, "step": 359000 }, { "epoch": 1.7745746458265366, "eval_runtime": 346.0468, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5043751852831845, "eval_sts_eval_spearman_cosine": 0.28379306517219666, "step": 359000 }, { "epoch": 1.7770461982580499, "grad_norm": 0.14880913496017456, "learning_rate": 1.1118184717548836e-05, "loss": 0.0394, "step": 359500 }, { "epoch": 1.7770461982580499, "eval_runtime": 349.9606, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5077472283097004, "eval_sts_eval_spearman_cosine": 0.28374844718773173, "step": 359500 }, { "epoch": 1.7795177506895632, "grad_norm": 0.4792141616344452, "learning_rate": 1.1105823075230484e-05, "loss": 0.0398, "step": 360000 }, { "epoch": 1.7795177506895632, "eval_runtime": 344.6141, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5058233659499196, "eval_sts_eval_spearman_cosine": 0.28251794735440394, "step": 360000 }, { "epoch": 1.7819893031210765, "grad_norm": 0.45490801334381104, "learning_rate": 1.1093461432912132e-05, "loss": 0.0413, "step": 360500 }, { "epoch": 1.7819893031210765, "eval_runtime": 356.3434, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5031480121744981, "eval_sts_eval_spearman_cosine": 0.28205999187805236, "step": 360500 }, { "epoch": 1.7844608555525898, "grad_norm": 0.32611629366874695, "learning_rate": 1.1081099790593779e-05, "loss": 0.0382, "step": 361000 }, { "epoch": 1.7844608555525898, "eval_runtime": 354.7848, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5056238863537799, "eval_sts_eval_spearman_cosine": 0.2810847791922077, "step": 361000 }, { "epoch": 1.7869324079841031, "grad_norm": 0.3447839617729187, "learning_rate": 1.1068738148275428e-05, "loss": 0.0399, "step": 361500 }, { "epoch": 1.7869324079841031, "eval_runtime": 347.0222, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5069938876533324, "eval_sts_eval_spearman_cosine": 0.28307357836080255, "step": 361500 }, { "epoch": 1.7894039604156162, "grad_norm": 0.2418614774942398, "learning_rate": 1.1056376505957076e-05, "loss": 0.0414, "step": 362000 }, { "epoch": 1.7894039604156162, "eval_runtime": 351.4599, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.502991687614488, "eval_sts_eval_spearman_cosine": 0.28089744270123745, "step": 362000 }, { "epoch": 1.7918755128471295, "grad_norm": 0.15252584218978882, "learning_rate": 1.1044014863638723e-05, "loss": 0.0391, "step": 362500 }, { "epoch": 1.7918755128471295, "eval_runtime": 347.9325, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5051875005733366, "eval_sts_eval_spearman_cosine": 0.28209174172771057, "step": 362500 }, { "epoch": 1.7943470652786428, "grad_norm": 0.1617336869239807, "learning_rate": 1.1031653221320373e-05, "loss": 0.0392, "step": 363000 }, { "epoch": 1.7943470652786428, "eval_runtime": 345.6832, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5056860446266056, "eval_sts_eval_spearman_cosine": 0.28361095129321995, "step": 363000 }, { "epoch": 1.796818617710156, "grad_norm": 0.2658255696296692, "learning_rate": 1.101929157900202e-05, "loss": 0.0377, "step": 363500 }, { "epoch": 1.796818617710156, "eval_runtime": 353.1607, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5053964182479622, "eval_sts_eval_spearman_cosine": 0.28496661194856304, "step": 363500 }, { "epoch": 1.7992901701416693, "grad_norm": 0.4153803288936615, "learning_rate": 1.1006929936683667e-05, "loss": 0.0376, "step": 364000 }, { "epoch": 1.7992901701416693, "eval_runtime": 346.0752, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5066409093533972, "eval_sts_eval_spearman_cosine": 0.28643167642983974, "step": 364000 }, { "epoch": 1.8017617225731826, "grad_norm": 0.2680214047431946, "learning_rate": 1.0994568294365317e-05, "loss": 0.0391, "step": 364500 }, { "epoch": 1.8017617225731826, "eval_runtime": 351.8192, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5062471428822024, "eval_sts_eval_spearman_cosine": 0.28607462239659415, "step": 364500 }, { "epoch": 1.8042332750046959, "grad_norm": 0.21690736711025238, "learning_rate": 1.0982206652046965e-05, "loss": 0.0403, "step": 365000 }, { "epoch": 1.8042332750046959, "eval_runtime": 352.1716, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5059746328658489, "eval_sts_eval_spearman_cosine": 0.28653748028982806, "step": 365000 }, { "epoch": 1.8067048274362092, "grad_norm": 0.3397861421108246, "learning_rate": 1.0969845009728613e-05, "loss": 0.0398, "step": 365500 }, { "epoch": 1.8067048274362092, "eval_runtime": 346.8116, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5052267663903592, "eval_sts_eval_spearman_cosine": 0.28387860832864115, "step": 365500 }, { "epoch": 1.8091763798677225, "grad_norm": 0.27249330282211304, "learning_rate": 1.0957483367410261e-05, "loss": 0.0404, "step": 366000 }, { "epoch": 1.8091763798677225, "eval_runtime": 357.2169, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5030140283698896, "eval_sts_eval_spearman_cosine": 0.2802601546297855, "step": 366000 }, { "epoch": 1.8116479322992358, "grad_norm": 0.6933519840240479, "learning_rate": 1.094512172509191e-05, "loss": 0.0378, "step": 366500 }, { "epoch": 1.8116479322992358, "eval_runtime": 354.5835, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5061557667969228, "eval_sts_eval_spearman_cosine": 0.2831090364309977, "step": 366500 }, { "epoch": 1.8141194847307491, "grad_norm": 0.303727388381958, "learning_rate": 1.0932760082773557e-05, "loss": 0.0398, "step": 367000 }, { "epoch": 1.8141194847307491, "eval_runtime": 362.6477, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5044186106376244, "eval_sts_eval_spearman_cosine": 0.2830074636967095, "step": 367000 }, { "epoch": 1.8165910371622624, "grad_norm": 0.46795082092285156, "learning_rate": 1.0920398440455207e-05, "loss": 0.0389, "step": 367500 }, { "epoch": 1.8165910371622624, "eval_runtime": 364.216, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.504894445914845, "eval_sts_eval_spearman_cosine": 0.28229179053955594, "step": 367500 }, { "epoch": 1.8190625895937758, "grad_norm": 0.4275902509689331, "learning_rate": 1.0908036798136854e-05, "loss": 0.0401, "step": 368000 }, { "epoch": 1.8190625895937758, "eval_runtime": 355.228, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5019879948784621, "eval_sts_eval_spearman_cosine": 0.2793164960813674, "step": 368000 }, { "epoch": 1.821534142025289, "grad_norm": 0.5004280209541321, "learning_rate": 1.0895675155818502e-05, "loss": 0.0402, "step": 368500 }, { "epoch": 1.821534142025289, "eval_runtime": 357.6312, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5048642870844897, "eval_sts_eval_spearman_cosine": 0.280069879974527, "step": 368500 }, { "epoch": 1.8240056944568022, "grad_norm": 0.3804904520511627, "learning_rate": 1.0883313513500152e-05, "loss": 0.0364, "step": 369000 }, { "epoch": 1.8240056944568022, "eval_runtime": 359.6613, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5029377906205527, "eval_sts_eval_spearman_cosine": 0.27926783512603054, "step": 369000 }, { "epoch": 1.8264772468883155, "grad_norm": 0.39491790533065796, "learning_rate": 1.0870951871181798e-05, "loss": 0.0414, "step": 369500 }, { "epoch": 1.8264772468883155, "eval_runtime": 362.8532, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.502786697235029, "eval_sts_eval_spearman_cosine": 0.27892440078076786, "step": 369500 }, { "epoch": 1.8289487993198288, "grad_norm": 0.4120674729347229, "learning_rate": 1.0858590228863446e-05, "loss": 0.0386, "step": 370000 }, { "epoch": 1.8289487993198288, "eval_runtime": 347.7582, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5020509618776692, "eval_sts_eval_spearman_cosine": 0.2777216105306344, "step": 370000 }, { "epoch": 1.831420351751342, "grad_norm": 0.6057425141334534, "learning_rate": 1.0846228586545096e-05, "loss": 0.041, "step": 370500 }, { "epoch": 1.831420351751342, "eval_runtime": 367.5718, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5037165769137603, "eval_sts_eval_spearman_cosine": 0.2812410989573556, "step": 370500 }, { "epoch": 1.8338919041828552, "grad_norm": 0.3199741542339325, "learning_rate": 1.0833866944226742e-05, "loss": 0.0406, "step": 371000 }, { "epoch": 1.8338919041828552, "eval_runtime": 359.4924, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5034004571455687, "eval_sts_eval_spearman_cosine": 0.28059759131720025, "step": 371000 }, { "epoch": 1.8363634566143685, "grad_norm": 0.42963504791259766, "learning_rate": 1.082150530190839e-05, "loss": 0.0411, "step": 371500 }, { "epoch": 1.8363634566143685, "eval_runtime": 360.5467, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5044376903440957, "eval_sts_eval_spearman_cosine": 0.2797906832404012, "step": 371500 }, { "epoch": 1.8388350090458818, "grad_norm": 0.13402794301509857, "learning_rate": 1.080914365959004e-05, "loss": 0.0405, "step": 372000 }, { "epoch": 1.8388350090458818, "eval_runtime": 356.0849, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5040240412266525, "eval_sts_eval_spearman_cosine": 0.2799572143187114, "step": 372000 }, { "epoch": 1.8413065614773951, "grad_norm": 0.19751979410648346, "learning_rate": 1.0796782017271687e-05, "loss": 0.0393, "step": 372500 }, { "epoch": 1.8413065614773951, "eval_runtime": 354.0833, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5034112831671536, "eval_sts_eval_spearman_cosine": 0.2813591519946641, "step": 372500 }, { "epoch": 1.8437781139089084, "grad_norm": 0.3063000440597534, "learning_rate": 1.0784420374953335e-05, "loss": 0.0398, "step": 373000 }, { "epoch": 1.8437781139089084, "eval_runtime": 352.115, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5043014221625841, "eval_sts_eval_spearman_cosine": 0.2823251742253166, "step": 373000 }, { "epoch": 1.8462496663404218, "grad_norm": 0.32358071208000183, "learning_rate": 1.0772058732634985e-05, "loss": 0.0411, "step": 373500 }, { "epoch": 1.8462496663404218, "eval_runtime": 350.2347, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.503431054593773, "eval_sts_eval_spearman_cosine": 0.2817084802439809, "step": 373500 }, { "epoch": 1.848721218771935, "grad_norm": 0.37496307492256165, "learning_rate": 1.0759697090316633e-05, "loss": 0.0398, "step": 374000 }, { "epoch": 1.848721218771935, "eval_runtime": 355.2384, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5048985311872014, "eval_sts_eval_spearman_cosine": 0.2843024617425873, "step": 374000 }, { "epoch": 1.8511927712034484, "grad_norm": 9.167440414428711, "learning_rate": 1.0747335447998279e-05, "loss": 0.041, "step": 374500 }, { "epoch": 1.8511927712034484, "eval_runtime": 356.6799, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5033278921320284, "eval_sts_eval_spearman_cosine": 0.2839698576062903, "step": 374500 }, { "epoch": 1.8536643236349617, "grad_norm": 0.4144994914531708, "learning_rate": 1.0734973805679929e-05, "loss": 0.0375, "step": 375000 }, { "epoch": 1.8536643236349617, "eval_runtime": 352.0256, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5051477838599681, "eval_sts_eval_spearman_cosine": 0.28323703466347105, "step": 375000 }, { "epoch": 1.856135876066475, "grad_norm": 0.20148572325706482, "learning_rate": 1.0722612163361577e-05, "loss": 0.0397, "step": 375500 }, { "epoch": 1.856135876066475, "eval_runtime": 352.4517, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5057186658674916, "eval_sts_eval_spearman_cosine": 0.28262108126048463, "step": 375500 }, { "epoch": 1.858607428497988, "grad_norm": 0.2789447009563446, "learning_rate": 1.0710250521043223e-05, "loss": 0.0417, "step": 376000 }, { "epoch": 1.858607428497988, "eval_runtime": 351.4202, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5039089176298372, "eval_sts_eval_spearman_cosine": 0.2826334039712476, "step": 376000 }, { "epoch": 1.8610789809295014, "grad_norm": 0.6984876990318298, "learning_rate": 1.0697888878724873e-05, "loss": 0.0396, "step": 376500 }, { "epoch": 1.8610789809295014, "eval_runtime": 357.5451, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5031669004826369, "eval_sts_eval_spearman_cosine": 0.28124233346511945, "step": 376500 }, { "epoch": 1.8635505333610147, "grad_norm": 0.4724852442741394, "learning_rate": 1.0685527236406521e-05, "loss": 0.0386, "step": 377000 }, { "epoch": 1.8635505333610147, "eval_runtime": 356.3532, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5030328587056563, "eval_sts_eval_spearman_cosine": 0.28270958168511595, "step": 377000 }, { "epoch": 1.866022085792528, "grad_norm": 0.29831716418266296, "learning_rate": 1.0673165594088168e-05, "loss": 0.0403, "step": 377500 }, { "epoch": 1.866022085792528, "eval_runtime": 352.6943, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5050222781479681, "eval_sts_eval_spearman_cosine": 0.28310398428753425, "step": 377500 }, { "epoch": 1.8684936382240411, "grad_norm": 0.32063421607017517, "learning_rate": 1.0660803951769818e-05, "loss": 0.04, "step": 378000 }, { "epoch": 1.8684936382240411, "eval_runtime": 349.4167, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5025037686087466, "eval_sts_eval_spearman_cosine": 0.28309320767486706, "step": 378000 }, { "epoch": 1.8709651906555544, "grad_norm": 0.4701303243637085, "learning_rate": 1.0648442309451466e-05, "loss": 0.0403, "step": 378500 }, { "epoch": 1.8709651906555544, "eval_runtime": 356.0023, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49968348325065204, "eval_sts_eval_spearman_cosine": 0.2799513928080307, "step": 378500 }, { "epoch": 1.8734367430870678, "grad_norm": 0.10622674971818924, "learning_rate": 1.0636080667133114e-05, "loss": 0.0404, "step": 379000 }, { "epoch": 1.8734367430870678, "eval_runtime": 352.5198, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5004537030757017, "eval_sts_eval_spearman_cosine": 0.2816654682427829, "step": 379000 }, { "epoch": 1.875908295518581, "grad_norm": 0.3678479790687561, "learning_rate": 1.0623719024814762e-05, "loss": 0.0401, "step": 379500 }, { "epoch": 1.875908295518581, "eval_runtime": 361.7565, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4994967342142534, "eval_sts_eval_spearman_cosine": 0.28250029288731104, "step": 379500 }, { "epoch": 1.8783798479500944, "grad_norm": 0.5664474368095398, "learning_rate": 1.061135738249641e-05, "loss": 0.0407, "step": 380000 }, { "epoch": 1.8783798479500944, "eval_runtime": 364.8996, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.499137273543498, "eval_sts_eval_spearman_cosine": 0.2817642982594369, "step": 380000 }, { "epoch": 1.8808514003816077, "grad_norm": 0.2945299744606018, "learning_rate": 1.0598995740178058e-05, "loss": 0.0391, "step": 380500 }, { "epoch": 1.8808514003816077, "eval_runtime": 346.4155, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4979121320444093, "eval_sts_eval_spearman_cosine": 0.280650304524464, "step": 380500 }, { "epoch": 1.883322952813121, "grad_norm": 0.3008582592010498, "learning_rate": 1.0586634097859708e-05, "loss": 0.0412, "step": 381000 }, { "epoch": 1.883322952813121, "eval_runtime": 360.583, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5013607270537419, "eval_sts_eval_spearman_cosine": 0.2813152100147876, "step": 381000 }, { "epoch": 1.8857945052446343, "grad_norm": 0.4441869556903839, "learning_rate": 1.0574272455541354e-05, "loss": 0.0396, "step": 381500 }, { "epoch": 1.8857945052446343, "eval_runtime": 360.525, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5005631087505971, "eval_sts_eval_spearman_cosine": 0.2823892918561334, "step": 381500 }, { "epoch": 1.8882660576761476, "grad_norm": 0.6789828538894653, "learning_rate": 1.0561910813223002e-05, "loss": 0.0389, "step": 382000 }, { "epoch": 1.8882660576761476, "eval_runtime": 345.2407, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5011127160918896, "eval_sts_eval_spearman_cosine": 0.2801022724293729, "step": 382000 }, { "epoch": 1.890737610107661, "grad_norm": 0.3972764313220978, "learning_rate": 1.0549549170904652e-05, "loss": 0.0402, "step": 382500 }, { "epoch": 1.890737610107661, "eval_runtime": 347.4852, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5054127291474315, "eval_sts_eval_spearman_cosine": 0.28011441910876356, "step": 382500 }, { "epoch": 1.8932091625391743, "grad_norm": 0.22549353539943695, "learning_rate": 1.0537187528586299e-05, "loss": 0.041, "step": 383000 }, { "epoch": 1.8932091625391743, "eval_runtime": 352.3941, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5051271867899216, "eval_sts_eval_spearman_cosine": 0.2832314542654261, "step": 383000 }, { "epoch": 1.8956807149706874, "grad_norm": 0.48502984642982483, "learning_rate": 1.0524825886267947e-05, "loss": 0.0418, "step": 383500 }, { "epoch": 1.8956807149706874, "eval_runtime": 364.0644, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5048715053032145, "eval_sts_eval_spearman_cosine": 0.2829814553819483, "step": 383500 }, { "epoch": 1.8981522674022007, "grad_norm": 0.3387640714645386, "learning_rate": 1.0512464243949596e-05, "loss": 0.0389, "step": 384000 }, { "epoch": 1.8981522674022007, "eval_runtime": 358.4654, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5019090674385516, "eval_sts_eval_spearman_cosine": 0.281778276429457, "step": 384000 }, { "epoch": 1.900623819833714, "grad_norm": 0.4986640214920044, "learning_rate": 1.0500102601631243e-05, "loss": 0.0377, "step": 384500 }, { "epoch": 1.900623819833714, "eval_runtime": 355.0108, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5014665730409461, "eval_sts_eval_spearman_cosine": 0.27990345462822275, "step": 384500 }, { "epoch": 1.903095372265227, "grad_norm": 0.1650361567735672, "learning_rate": 1.0487740959312891e-05, "loss": 0.0377, "step": 385000 }, { "epoch": 1.903095372265227, "eval_runtime": 350.8101, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5018088612455893, "eval_sts_eval_spearman_cosine": 0.2798007566247036, "step": 385000 }, { "epoch": 1.9055669246967404, "grad_norm": 0.43185004591941833, "learning_rate": 1.047537931699454e-05, "loss": 0.0359, "step": 385500 }, { "epoch": 1.9055669246967404, "eval_runtime": 366.0712, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5007235198199087, "eval_sts_eval_spearman_cosine": 0.27780966073658553, "step": 385500 }, { "epoch": 1.9080384771282537, "grad_norm": 0.43539100885391235, "learning_rate": 1.0463017674676187e-05, "loss": 0.0411, "step": 386000 }, { "epoch": 1.9080384771282537, "eval_runtime": 352.4069, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.501096575736897, "eval_sts_eval_spearman_cosine": 0.2803138455791093, "step": 386000 }, { "epoch": 1.910510029559767, "grad_norm": 0.17171159386634827, "learning_rate": 1.0450656032357835e-05, "loss": 0.041, "step": 386500 }, { "epoch": 1.910510029559767, "eval_runtime": 363.881, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.501264545793906, "eval_sts_eval_spearman_cosine": 0.278529803297502, "step": 386500 }, { "epoch": 1.9129815819912803, "grad_norm": 0.2499004602432251, "learning_rate": 1.0438294390039483e-05, "loss": 0.04, "step": 387000 }, { "epoch": 1.9129815819912803, "eval_runtime": 362.0023, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49944920468736564, "eval_sts_eval_spearman_cosine": 0.2754203383219981, "step": 387000 }, { "epoch": 1.9154531344227936, "grad_norm": 0.508918285369873, "learning_rate": 1.0425932747721133e-05, "loss": 0.0396, "step": 387500 }, { "epoch": 1.9154531344227936, "eval_runtime": 356.4111, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4999477569500457, "eval_sts_eval_spearman_cosine": 0.2764988606542568, "step": 387500 }, { "epoch": 1.917924686854307, "grad_norm": 0.524566113948822, "learning_rate": 1.041357110540278e-05, "loss": 0.0406, "step": 388000 }, { "epoch": 1.917924686854307, "eval_runtime": 350.464, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.501311341869227, "eval_sts_eval_spearman_cosine": 0.27810732460451243, "step": 388000 }, { "epoch": 1.9203962392858203, "grad_norm": 0.46800872683525085, "learning_rate": 1.0401209463084428e-05, "loss": 0.0411, "step": 388500 }, { "epoch": 1.9203962392858203, "eval_runtime": 351.0842, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5013420831199541, "eval_sts_eval_spearman_cosine": 0.2779052662476712, "step": 388500 }, { "epoch": 1.9228677917173336, "grad_norm": 0.30701854825019836, "learning_rate": 1.0388847820766078e-05, "loss": 0.0424, "step": 389000 }, { "epoch": 1.9228677917173336, "eval_runtime": 352.5086, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5019474572329166, "eval_sts_eval_spearman_cosine": 0.2770020974657542, "step": 389000 }, { "epoch": 1.9253393441488469, "grad_norm": 0.29084116220474243, "learning_rate": 1.0376486178447724e-05, "loss": 0.0393, "step": 389500 }, { "epoch": 1.9253393441488469, "eval_runtime": 348.4092, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5034958495312762, "eval_sts_eval_spearman_cosine": 0.27873327684860744, "step": 389500 }, { "epoch": 1.9278108965803602, "grad_norm": 0.5017964839935303, "learning_rate": 1.0364124536129372e-05, "loss": 0.0407, "step": 390000 }, { "epoch": 1.9278108965803602, "eval_runtime": 346.3648, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5040652125056468, "eval_sts_eval_spearman_cosine": 0.27892266892949735, "step": 390000 }, { "epoch": 1.9302824490118733, "grad_norm": 0.4299938976764679, "learning_rate": 1.0351762893811022e-05, "loss": 0.0386, "step": 390500 }, { "epoch": 1.9302824490118733, "eval_runtime": 343.5374, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5043429978392042, "eval_sts_eval_spearman_cosine": 0.2796664577130321, "step": 390500 }, { "epoch": 1.9327540014433866, "grad_norm": 0.4316963851451874, "learning_rate": 1.0339401251492668e-05, "loss": 0.0399, "step": 391000 }, { "epoch": 1.9327540014433866, "eval_runtime": 351.9515, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5048471773521618, "eval_sts_eval_spearman_cosine": 0.279455464071647, "step": 391000 }, { "epoch": 1.9352255538749, "grad_norm": 0.37627100944519043, "learning_rate": 1.0327039609174316e-05, "loss": 0.0398, "step": 391500 }, { "epoch": 1.9352255538749, "eval_runtime": 358.2152, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5038077115240397, "eval_sts_eval_spearman_cosine": 0.2784759244581664, "step": 391500 }, { "epoch": 1.9376971063064132, "grad_norm": 0.42107605934143066, "learning_rate": 1.0314677966855966e-05, "loss": 0.0411, "step": 392000 }, { "epoch": 1.9376971063064132, "eval_runtime": 358.8944, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5039995565512785, "eval_sts_eval_spearman_cosine": 0.2790944366084578, "step": 392000 }, { "epoch": 1.9401686587379263, "grad_norm": 0.389085590839386, "learning_rate": 1.0302316324537613e-05, "loss": 0.0397, "step": 392500 }, { "epoch": 1.9401686587379263, "eval_runtime": 359.5213, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5049900540816398, "eval_sts_eval_spearman_cosine": 0.28116390008541975, "step": 392500 }, { "epoch": 1.9426402111694396, "grad_norm": 0.3784082531929016, "learning_rate": 1.028995468221926e-05, "loss": 0.0393, "step": 393000 }, { "epoch": 1.9426402111694396, "eval_runtime": 351.1119, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5040328433225587, "eval_sts_eval_spearman_cosine": 0.27984293723247583, "step": 393000 }, { "epoch": 1.945111763600953, "grad_norm": 0.2723065912723541, "learning_rate": 1.027759303990091e-05, "loss": 0.0407, "step": 393500 }, { "epoch": 1.945111763600953, "eval_runtime": 354.4056, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5074952712711176, "eval_sts_eval_spearman_cosine": 0.28306360001903397, "step": 393500 }, { "epoch": 1.9475833160324663, "grad_norm": 0.28645461797714233, "learning_rate": 1.0265231397582559e-05, "loss": 0.0394, "step": 394000 }, { "epoch": 1.9475833160324663, "eval_runtime": 354.2514, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5064428910179342, "eval_sts_eval_spearman_cosine": 0.2806018340957793, "step": 394000 }, { "epoch": 1.9500548684639796, "grad_norm": 0.5229191780090332, "learning_rate": 1.0252869755264205e-05, "loss": 0.0414, "step": 394500 }, { "epoch": 1.9500548684639796, "eval_runtime": 353.5445, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5067063290048164, "eval_sts_eval_spearman_cosine": 0.28243464722178885, "step": 394500 }, { "epoch": 1.9525264208954929, "grad_norm": 1.2651978731155396, "learning_rate": 1.0240508112945855e-05, "loss": 0.0391, "step": 395000 }, { "epoch": 1.9525264208954929, "eval_runtime": 348.5231, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5041964868598627, "eval_sts_eval_spearman_cosine": 0.2825256622568816, "step": 395000 }, { "epoch": 1.9549979733270062, "grad_norm": 0.3464127480983734, "learning_rate": 1.0228146470627503e-05, "loss": 0.0396, "step": 395500 }, { "epoch": 1.9549979733270062, "eval_runtime": 359.5174, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.504389034033949, "eval_sts_eval_spearman_cosine": 0.2837257630291786, "step": 395500 }, { "epoch": 1.9574695257585195, "grad_norm": 0.32541897892951965, "learning_rate": 1.021578482830915e-05, "loss": 0.0396, "step": 396000 }, { "epoch": 1.9574695257585195, "eval_runtime": 353.2261, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5033644329034943, "eval_sts_eval_spearman_cosine": 0.28451502740804124, "step": 396000 }, { "epoch": 1.9599410781900328, "grad_norm": 0.5674552917480469, "learning_rate": 1.0203423185990799e-05, "loss": 0.0418, "step": 396500 }, { "epoch": 1.9599410781900328, "eval_runtime": 352.5822, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.506724597893459, "eval_sts_eval_spearman_cosine": 0.2859561990452627, "step": 396500 }, { "epoch": 1.9624126306215461, "grad_norm": 4.273538589477539, "learning_rate": 1.0191061543672447e-05, "loss": 0.0389, "step": 397000 }, { "epoch": 1.9624126306215461, "eval_runtime": 358.8443, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5059627319246257, "eval_sts_eval_spearman_cosine": 0.2855538915443563, "step": 397000 }, { "epoch": 1.9648841830530595, "grad_norm": 0.3477705121040344, "learning_rate": 1.0178699901354094e-05, "loss": 0.0402, "step": 397500 }, { "epoch": 1.9648841830530595, "eval_runtime": 356.3383, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.505153684618177, "eval_sts_eval_spearman_cosine": 0.2847029235285278, "step": 397500 }, { "epoch": 1.9673557354845725, "grad_norm": 0.27185893058776855, "learning_rate": 1.0166338259035743e-05, "loss": 0.0396, "step": 398000 }, { "epoch": 1.9673557354845725, "eval_runtime": 359.7356, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5060974938231484, "eval_sts_eval_spearman_cosine": 0.28390612985034497, "step": 398000 }, { "epoch": 1.9698272879160859, "grad_norm": 0.4494769871234894, "learning_rate": 1.0153976616717392e-05, "loss": 0.0399, "step": 398500 }, { "epoch": 1.9698272879160859, "eval_runtime": 356.6991, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5059969969871523, "eval_sts_eval_spearman_cosine": 0.28295124826607804, "step": 398500 }, { "epoch": 1.9722988403475992, "grad_norm": 0.44827690720558167, "learning_rate": 1.014161497439904e-05, "loss": 0.0406, "step": 399000 }, { "epoch": 1.9722988403475992, "eval_runtime": 355.844, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5057985403023183, "eval_sts_eval_spearman_cosine": 0.2828928128696115, "step": 399000 }, { "epoch": 1.9747703927791123, "grad_norm": 0.45029017329216003, "learning_rate": 1.0129253332080688e-05, "loss": 0.0414, "step": 399500 }, { "epoch": 1.9747703927791123, "eval_runtime": 357.716, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5062879311198427, "eval_sts_eval_spearman_cosine": 0.2835940401730238, "step": 399500 }, { "epoch": 1.9772419452106256, "grad_norm": 0.4414938986301422, "learning_rate": 1.0116891689762336e-05, "loss": 0.0397, "step": 400000 }, { "epoch": 1.9772419452106256, "eval_runtime": 355.1201, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5042169751755124, "eval_sts_eval_spearman_cosine": 0.2825612135027779, "step": 400000 }, { "epoch": 1.9797134976421389, "grad_norm": 0.22063905000686646, "learning_rate": 1.0104530047443984e-05, "loss": 0.0389, "step": 400500 }, { "epoch": 1.9797134976421389, "eval_runtime": 351.3428, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5056257816840796, "eval_sts_eval_spearman_cosine": 0.2832843566218516, "step": 400500 }, { "epoch": 1.9821850500736522, "grad_norm": 0.35029396414756775, "learning_rate": 1.0092168405125634e-05, "loss": 0.0418, "step": 401000 }, { "epoch": 1.9821850500736522, "eval_runtime": 350.9949, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5045538034567716, "eval_sts_eval_spearman_cosine": 0.2824634969738804, "step": 401000 }, { "epoch": 3.0402386757735003, "grad_norm": 0.45807015895843506, "learning_rate": 4.801001526532499e-06, "loss": 0.0289, "step": 401500 }, { "epoch": 3.0402386757735003, "eval_runtime": 274.7512, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5056408332946107, "eval_sts_eval_spearman_cosine": 0.28239994215594555, "step": 401500 }, { "epoch": 3.0440247762414625, "grad_norm": 0.3172495663166046, "learning_rate": 4.782061917370274e-06, "loss": 0.0329, "step": 402000 }, { "epoch": 3.0440247762414625, "eval_runtime": 268.5434, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5027718807405134, "eval_sts_eval_spearman_cosine": 0.28047582372681956, "step": 402000 }, { "epoch": 3.0478108767094243, "grad_norm": 0.2599460780620575, "learning_rate": 4.763122308208048e-06, "loss": 0.0312, "step": 402500 }, { "epoch": 3.0478108767094243, "eval_runtime": 273.2459, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5033761373235706, "eval_sts_eval_spearman_cosine": 0.2813499819300964, "step": 402500 }, { "epoch": 3.0515969771773865, "grad_norm": 0.431499183177948, "learning_rate": 4.744182699045823e-06, "loss": 0.0304, "step": 403000 }, { "epoch": 3.0515969771773865, "eval_runtime": 277.8402, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5031954259593823, "eval_sts_eval_spearman_cosine": 0.28095771653381163, "step": 403000 }, { "epoch": 3.0553830776453483, "grad_norm": 0.19053049385547638, "learning_rate": 4.725243089883598e-06, "loss": 0.0319, "step": 403500 }, { "epoch": 3.0553830776453483, "eval_runtime": 273.7286, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5022452429235089, "eval_sts_eval_spearman_cosine": 0.2802584760113631, "step": 403500 }, { "epoch": 3.0591691781133106, "grad_norm": 0.18396756052970886, "learning_rate": 4.706303480721372e-06, "loss": 0.0305, "step": 404000 }, { "epoch": 3.0591691781133106, "eval_runtime": 276.5275, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5032751798225717, "eval_sts_eval_spearman_cosine": 0.28193815996850985, "step": 404000 }, { "epoch": 3.0629552785812724, "grad_norm": 0.3593258559703827, "learning_rate": 4.687363871559147e-06, "loss": 0.0308, "step": 404500 }, { "epoch": 3.0629552785812724, "eval_runtime": 292.2071, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5017387789529999, "eval_sts_eval_spearman_cosine": 0.2812866182147052, "step": 404500 }, { "epoch": 3.0667413790492346, "grad_norm": 0.4578187167644501, "learning_rate": 4.6684242623969215e-06, "loss": 0.0304, "step": 405000 }, { "epoch": 3.0667413790492346, "eval_runtime": 281.6167, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.500058342438436, "eval_sts_eval_spearman_cosine": 0.28014774049134766, "step": 405000 }, { "epoch": 3.0705274795171964, "grad_norm": 0.13191601634025574, "learning_rate": 4.649484653234696e-06, "loss": 0.0311, "step": 405500 }, { "epoch": 3.0705274795171964, "eval_runtime": 290.4136, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49961097189111137, "eval_sts_eval_spearman_cosine": 0.27929868814794073, "step": 405500 }, { "epoch": 3.0743135799851586, "grad_norm": 0.41584134101867676, "learning_rate": 4.630545044072471e-06, "loss": 0.03, "step": 406000 }, { "epoch": 3.0743135799851586, "eval_runtime": 279.2395, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.5006558432968886, "eval_sts_eval_spearman_cosine": 0.2797398247935739, "step": 406000 }, { "epoch": 3.0780996804531204, "grad_norm": 0.5560721158981323, "learning_rate": 4.611605434910245e-06, "loss": 0.0303, "step": 406500 }, { "epoch": 3.0780996804531204, "eval_runtime": 286.5746, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4988594130851841, "eval_sts_eval_spearman_cosine": 0.2788054880197329, "step": 406500 }, { "epoch": 3.0818857809210827, "grad_norm": 0.3058489263057709, "learning_rate": 4.59266582574802e-06, "loss": 0.0313, "step": 407000 }, { "epoch": 3.0818857809210827, "eval_runtime": 277.1605, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4981631367798321, "eval_sts_eval_spearman_cosine": 0.27935552275462144, "step": 407000 }, { "epoch": 3.0856718813890445, "grad_norm": 0.17935439944267273, "learning_rate": 4.573726216585795e-06, "loss": 0.0314, "step": 407500 }, { "epoch": 3.0856718813890445, "eval_runtime": 295.3491, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4975545487398711, "eval_sts_eval_spearman_cosine": 0.2792780505218252, "step": 407500 }, { "epoch": 3.0894579818570067, "grad_norm": 0.4599781930446625, "learning_rate": 4.55478660742357e-06, "loss": 0.0305, "step": 408000 }, { "epoch": 3.0894579818570067, "eval_runtime": 278.3557, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49778457081638944, "eval_sts_eval_spearman_cosine": 0.2803039408384961, "step": 408000 }, { "epoch": 3.0932440823249685, "grad_norm": 0.28084006905555725, "learning_rate": 4.535846998261344e-06, "loss": 0.0307, "step": 408500 }, { "epoch": 3.0932440823249685, "eval_runtime": 279.9833, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49780187556554756, "eval_sts_eval_spearman_cosine": 0.27951412096327866, "step": 408500 }, { "epoch": 3.0970301827929307, "grad_norm": 0.2811315059661865, "learning_rate": 4.5169073890991184e-06, "loss": 0.0298, "step": 409000 }, { "epoch": 3.0970301827929307, "eval_runtime": 275.724, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49807579637794575, "eval_sts_eval_spearman_cosine": 0.2789146614095119, "step": 409000 }, { "epoch": 3.1008162832608925, "grad_norm": 0.3177525997161865, "learning_rate": 4.497967779936894e-06, "loss": 0.0303, "step": 409500 }, { "epoch": 3.1008162832608925, "eval_runtime": 277.2656, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4989713441519466, "eval_sts_eval_spearman_cosine": 0.27897149532284665, "step": 409500 }, { "epoch": 3.1046023837288548, "grad_norm": 0.3919098377227783, "learning_rate": 4.479028170774668e-06, "loss": 0.0318, "step": 410000 }, { "epoch": 3.1046023837288548, "eval_runtime": 286.5466, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4970569384291356, "eval_sts_eval_spearman_cosine": 0.2786656196119102, "step": 410000 }, { "epoch": 3.1083884841968166, "grad_norm": 0.3543815016746521, "learning_rate": 4.460088561612443e-06, "loss": 0.0311, "step": 410500 }, { "epoch": 3.1083884841968166, "eval_runtime": 289.7722, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49687754760593905, "eval_sts_eval_spearman_cosine": 0.27876514431807164, "step": 410500 }, { "epoch": 3.112174584664779, "grad_norm": 0.438551664352417, "learning_rate": 4.441148952450218e-06, "loss": 0.0331, "step": 411000 }, { "epoch": 3.112174584664779, "eval_runtime": 291.9951, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4967124570935114, "eval_sts_eval_spearman_cosine": 0.2791895727583476, "step": 411000 }, { "epoch": 3.1159606851327406, "grad_norm": 0.9105826020240784, "learning_rate": 4.422209343287992e-06, "loss": 0.0287, "step": 411500 }, { "epoch": 3.1159606851327406, "eval_runtime": 278.6446, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49755692510216837, "eval_sts_eval_spearman_cosine": 0.2791380405191573, "step": 411500 }, { "epoch": 3.119746785600703, "grad_norm": 0.5962665677070618, "learning_rate": 4.403269734125767e-06, "loss": 0.0311, "step": 412000 }, { "epoch": 3.119746785600703, "eval_runtime": 286.7082, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49662786853117924, "eval_sts_eval_spearman_cosine": 0.27789145050798214, "step": 412000 }, { "epoch": 3.1235328860686646, "grad_norm": 0.4517219364643097, "learning_rate": 4.384330124963542e-06, "loss": 0.0304, "step": 412500 }, { "epoch": 3.1235328860686646, "eval_runtime": 273.5482, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49773277864113885, "eval_sts_eval_spearman_cosine": 0.278285833274398, "step": 412500 }, { "epoch": 3.127318986536627, "grad_norm": 0.23721475899219513, "learning_rate": 4.365390515801316e-06, "loss": 0.0288, "step": 413000 }, { "epoch": 3.127318986536627, "eval_runtime": 276.5937, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4980266565152194, "eval_sts_eval_spearman_cosine": 0.27801285265033204, "step": 413000 }, { "epoch": 3.1311050870045887, "grad_norm": 0.3685942590236664, "learning_rate": 4.346450906639091e-06, "loss": 0.0315, "step": 413500 }, { "epoch": 3.1311050870045887, "eval_runtime": 270.4497, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4975907585419326, "eval_sts_eval_spearman_cosine": 0.2784311300813932, "step": 413500 }, { "epoch": 3.134891187472551, "grad_norm": 0.43754178285598755, "learning_rate": 4.3275112974768654e-06, "loss": 0.0295, "step": 414000 }, { "epoch": 3.134891187472551, "eval_runtime": 270.0999, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4977399020312614, "eval_sts_eval_spearman_cosine": 0.2789603146813176, "step": 414000 }, { "epoch": 3.1386772879405127, "grad_norm": 0.32077351212501526, "learning_rate": 4.308571688314641e-06, "loss": 0.0304, "step": 414500 }, { "epoch": 3.1386772879405127, "eval_runtime": 271.1799, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.497107734363144, "eval_sts_eval_spearman_cosine": 0.2785067747337473, "step": 414500 }, { "epoch": 3.142463388408475, "grad_norm": 0.3348529040813446, "learning_rate": 4.289632079152415e-06, "loss": 0.0287, "step": 415000 }, { "epoch": 3.142463388408475, "eval_runtime": 269.19, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49728031566265873, "eval_sts_eval_spearman_cosine": 0.27953889486113515, "step": 415000 }, { "epoch": 3.1462494888764367, "grad_norm": 0.45239362120628357, "learning_rate": 4.27069246999019e-06, "loss": 0.0321, "step": 415500 }, { "epoch": 3.1462494888764367, "eval_runtime": 264.8191, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49493356867995303, "eval_sts_eval_spearman_cosine": 0.2770841323800019, "step": 415500 }, { "epoch": 3.150035589344399, "grad_norm": 0.8851208686828613, "learning_rate": 4.251752860827965e-06, "loss": 0.0306, "step": 416000 }, { "epoch": 3.150035589344399, "eval_runtime": 278.0053, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4927401619960271, "eval_sts_eval_spearman_cosine": 0.2764708401460336, "step": 416000 }, { "epoch": 3.1538216898123608, "grad_norm": 0.6778058409690857, "learning_rate": 4.2328132516657385e-06, "loss": 0.0295, "step": 416500 }, { "epoch": 3.1538216898123608, "eval_runtime": 277.361, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49380401902834975, "eval_sts_eval_spearman_cosine": 0.27704303165911964, "step": 416500 }, { "epoch": 3.157607790280323, "grad_norm": 0.26853764057159424, "learning_rate": 4.213873642503514e-06, "loss": 0.0308, "step": 417000 }, { "epoch": 3.157607790280323, "eval_runtime": 286.665, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4944504246208412, "eval_sts_eval_spearman_cosine": 0.27671640612806647, "step": 417000 }, { "epoch": 3.161393890748285, "grad_norm": 0.29733970761299133, "learning_rate": 4.194934033341289e-06, "loss": 0.031, "step": 417500 }, { "epoch": 3.161393890748285, "eval_runtime": 271.1053, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.494058632791706, "eval_sts_eval_spearman_cosine": 0.2772132047779926, "step": 417500 }, { "epoch": 3.165179991216247, "grad_norm": 0.2960602343082428, "learning_rate": 4.175994424179063e-06, "loss": 0.0283, "step": 418000 }, { "epoch": 3.165179991216247, "eval_runtime": 280.773, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4953284652754595, "eval_sts_eval_spearman_cosine": 0.2775324417277563, "step": 418000 }, { "epoch": 3.168966091684209, "grad_norm": 0.40748828649520874, "learning_rate": 4.157054815016838e-06, "loss": 0.0311, "step": 418500 }, { "epoch": 3.168966091684209, "eval_runtime": 276.5369, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49338725707520975, "eval_sts_eval_spearman_cosine": 0.2774680999909406, "step": 418500 }, { "epoch": 3.172752192152171, "grad_norm": 0.48526930809020996, "learning_rate": 4.1381152058546124e-06, "loss": 0.0315, "step": 419000 }, { "epoch": 3.172752192152171, "eval_runtime": 271.6674, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49319614155180896, "eval_sts_eval_spearman_cosine": 0.2769541284999535, "step": 419000 }, { "epoch": 3.176538292620133, "grad_norm": 0.510814368724823, "learning_rate": 4.119175596692387e-06, "loss": 0.03, "step": 419500 }, { "epoch": 3.176538292620133, "eval_runtime": 269.5049, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49392489415183266, "eval_sts_eval_spearman_cosine": 0.2764216377849875, "step": 419500 }, { "epoch": 3.180324393088095, "grad_norm": 0.2455747276544571, "learning_rate": 4.100235987530162e-06, "loss": 0.0287, "step": 420000 }, { "epoch": 3.180324393088095, "eval_runtime": 272.9825, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4937995571527226, "eval_sts_eval_spearman_cosine": 0.2763688972455515, "step": 420000 }, { "epoch": 3.184110493556057, "grad_norm": 0.2568112909793854, "learning_rate": 4.081296378367936e-06, "loss": 0.0321, "step": 420500 }, { "epoch": 3.184110493556057, "eval_runtime": 277.1112, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4950366303638496, "eval_sts_eval_spearman_cosine": 0.27746903820232827, "step": 420500 }, { "epoch": 3.187896594024019, "grad_norm": 0.2515662908554077, "learning_rate": 4.062356769205711e-06, "loss": 0.0314, "step": 421000 }, { "epoch": 3.187896594024019, "eval_runtime": 284.5335, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4949993620619968, "eval_sts_eval_spearman_cosine": 0.27661910332921247, "step": 421000 }, { "epoch": 3.191682694491981, "grad_norm": 0.26276156306266785, "learning_rate": 4.0434171600434855e-06, "loss": 0.0297, "step": 421500 }, { "epoch": 3.191682694491981, "eval_runtime": 286.2902, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4952251400758804, "eval_sts_eval_spearman_cosine": 0.27639397177210395, "step": 421500 }, { "epoch": 3.195468794959943, "grad_norm": 0.3874708414077759, "learning_rate": 4.024477550881261e-06, "loss": 0.0298, "step": 422000 }, { "epoch": 3.195468794959943, "eval_runtime": 273.8285, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4964734258875357, "eval_sts_eval_spearman_cosine": 0.27736885192846067, "step": 422000 }, { "epoch": 3.199254895427905, "grad_norm": 0.4607601463794708, "learning_rate": 4.005537941719035e-06, "loss": 0.03, "step": 422500 }, { "epoch": 3.199254895427905, "eval_runtime": 281.8705, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49402532532226234, "eval_sts_eval_spearman_cosine": 0.27668951294874133, "step": 422500 }, { "epoch": 3.203040995895867, "grad_norm": 0.14798296988010406, "learning_rate": 3.986598332556809e-06, "loss": 0.0294, "step": 423000 }, { "epoch": 3.203040995895867, "eval_runtime": 283.6638, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4941320812720347, "eval_sts_eval_spearman_cosine": 0.27665150665170835, "step": 423000 }, { "epoch": 3.206827096363829, "grad_norm": 0.5435062050819397, "learning_rate": 3.967658723394585e-06, "loss": 0.0305, "step": 423500 }, { "epoch": 3.206827096363829, "eval_runtime": 279.5502, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4952230890715966, "eval_sts_eval_spearman_cosine": 0.27670384854211916, "step": 423500 }, { "epoch": 3.2106131968317912, "grad_norm": 0.22127704322338104, "learning_rate": 3.948719114232359e-06, "loss": 0.0295, "step": 424000 }, { "epoch": 3.2106131968317912, "eval_runtime": 281.59, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49408917566766763, "eval_sts_eval_spearman_cosine": 0.27600804610303853, "step": 424000 }, { "epoch": 3.214399297299753, "grad_norm": 0.26587721705436707, "learning_rate": 3.929779505070134e-06, "loss": 0.0294, "step": 424500 }, { "epoch": 3.214399297299753, "eval_runtime": 286.512, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49356405813873727, "eval_sts_eval_spearman_cosine": 0.27617054730944274, "step": 424500 }, { "epoch": 3.2181853977677153, "grad_norm": 0.3076368272304535, "learning_rate": 3.910839895907909e-06, "loss": 0.0291, "step": 425000 }, { "epoch": 3.2181853977677153, "eval_runtime": 286.5886, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4939299979000261, "eval_sts_eval_spearman_cosine": 0.2765363786025798, "step": 425000 }, { "epoch": 3.221971498235677, "grad_norm": 0.3268946409225464, "learning_rate": 3.891900286745683e-06, "loss": 0.0309, "step": 425500 }, { "epoch": 3.221971498235677, "eval_runtime": 279.6619, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49315759516482915, "eval_sts_eval_spearman_cosine": 0.27657475197363696, "step": 425500 }, { "epoch": 3.2257575987036393, "grad_norm": 0.3162047564983368, "learning_rate": 3.872960677583458e-06, "loss": 0.0293, "step": 426000 }, { "epoch": 3.2257575987036393, "eval_runtime": 274.0366, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49373818275668163, "eval_sts_eval_spearman_cosine": 0.27699428792443975, "step": 426000 }, { "epoch": 3.229543699171601, "grad_norm": 0.4687923192977905, "learning_rate": 3.8540210684212325e-06, "loss": 0.0298, "step": 426500 }, { "epoch": 3.229543699171601, "eval_runtime": 275.0655, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49342796119139537, "eval_sts_eval_spearman_cosine": 0.27683885783330875, "step": 426500 }, { "epoch": 3.2333297996395634, "grad_norm": 0.2351580262184143, "learning_rate": 3.835081459259007e-06, "loss": 0.0301, "step": 427000 }, { "epoch": 3.2333297996395634, "eval_runtime": 274.9054, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49333619051694244, "eval_sts_eval_spearman_cosine": 0.2778013435344037, "step": 427000 }, { "epoch": 3.237115900107525, "grad_norm": 0.2907162308692932, "learning_rate": 3.816141850096782e-06, "loss": 0.0301, "step": 427500 }, { "epoch": 3.237115900107525, "eval_runtime": 276.6693, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4923301309050141, "eval_sts_eval_spearman_cosine": 0.27758919061243675, "step": 427500 }, { "epoch": 3.2409020005754874, "grad_norm": 0.5261486768722534, "learning_rate": 3.7972022409345568e-06, "loss": 0.03, "step": 428000 }, { "epoch": 3.2409020005754874, "eval_runtime": 282.1304, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49388846952045945, "eval_sts_eval_spearman_cosine": 0.27733621443360806, "step": 428000 }, { "epoch": 3.244688101043449, "grad_norm": 0.40634870529174805, "learning_rate": 3.778262631772331e-06, "loss": 0.0296, "step": 428500 }, { "epoch": 3.244688101043449, "eval_runtime": 275.5071, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49172946552347535, "eval_sts_eval_spearman_cosine": 0.27660618141124227, "step": 428500 }, { "epoch": 3.2484742015114114, "grad_norm": 0.7143800854682922, "learning_rate": 3.7593230226101056e-06, "loss": 0.03, "step": 429000 }, { "epoch": 3.2484742015114114, "eval_runtime": 292.9373, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49135474289252157, "eval_sts_eval_spearman_cosine": 0.2764871967538757, "step": 429000 }, { "epoch": 3.252260301979373, "grad_norm": 0.7426878213882446, "learning_rate": 3.7403834134478806e-06, "loss": 0.0298, "step": 429500 }, { "epoch": 3.252260301979373, "eval_runtime": 294.2572, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49182955850379195, "eval_sts_eval_spearman_cosine": 0.2771660599763999, "step": 429500 }, { "epoch": 3.2560464024473355, "grad_norm": 0.27399373054504395, "learning_rate": 3.721443804285655e-06, "loss": 0.0285, "step": 430000 }, { "epoch": 3.2560464024473355, "eval_runtime": 277.6622, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.491469058062009, "eval_sts_eval_spearman_cosine": 0.2767797148979861, "step": 430000 }, { "epoch": 3.2598325029152972, "grad_norm": 0.4587053060531616, "learning_rate": 3.70250419512343e-06, "loss": 0.0303, "step": 430500 }, { "epoch": 3.2598325029152972, "eval_runtime": 281.3653, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4943649616758182, "eval_sts_eval_spearman_cosine": 0.27838407714331537, "step": 430500 }, { "epoch": 3.2636186033832595, "grad_norm": 0.22718018293380737, "learning_rate": 3.6835645859612045e-06, "loss": 0.0298, "step": 431000 }, { "epoch": 3.2636186033832595, "eval_runtime": 277.4385, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4938683635069082, "eval_sts_eval_spearman_cosine": 0.2791064207357117, "step": 431000 }, { "epoch": 3.2674047038512213, "grad_norm": 0.46360906958580017, "learning_rate": 3.664624976798979e-06, "loss": 0.0296, "step": 431500 }, { "epoch": 3.2674047038512213, "eval_runtime": 282.4328, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4936595934734704, "eval_sts_eval_spearman_cosine": 0.2784841623558409, "step": 431500 }, { "epoch": 3.2711908043191835, "grad_norm": 0.2813345193862915, "learning_rate": 3.6456853676367537e-06, "loss": 0.0293, "step": 432000 }, { "epoch": 3.2711908043191835, "eval_runtime": 285.9475, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4928469616558864, "eval_sts_eval_spearman_cosine": 0.278274619806869, "step": 432000 }, { "epoch": 3.2749769047871453, "grad_norm": 0.35014140605926514, "learning_rate": 3.6267457584745287e-06, "loss": 0.0301, "step": 432500 }, { "epoch": 3.2749769047871453, "eval_runtime": 272.9257, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49284687301580865, "eval_sts_eval_spearman_cosine": 0.2785800037637741, "step": 432500 }, { "epoch": 3.2787630052551076, "grad_norm": 0.28068235516548157, "learning_rate": 3.607806149312303e-06, "loss": 0.028, "step": 433000 }, { "epoch": 3.2787630052551076, "eval_runtime": 271.4042, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4921722266828291, "eval_sts_eval_spearman_cosine": 0.2783815684103249, "step": 433000 }, { "epoch": 3.2825491057230693, "grad_norm": 0.1615879237651825, "learning_rate": 3.5888665401500775e-06, "loss": 0.0315, "step": 433500 }, { "epoch": 3.2825491057230693, "eval_runtime": 271.6356, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49228464569008334, "eval_sts_eval_spearman_cosine": 0.27757544659325145, "step": 433500 }, { "epoch": 3.2863352061910316, "grad_norm": 0.46993377804756165, "learning_rate": 3.5699269309878526e-06, "loss": 0.03, "step": 434000 }, { "epoch": 3.2863352061910316, "eval_runtime": 274.6611, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49119846614405216, "eval_sts_eval_spearman_cosine": 0.27787548692666114, "step": 434000 }, { "epoch": 3.2901213066589934, "grad_norm": 0.5775092840194702, "learning_rate": 3.5509873218256268e-06, "loss": 0.0305, "step": 434500 }, { "epoch": 3.2901213066589934, "eval_runtime": 275.5284, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4912339399728231, "eval_sts_eval_spearman_cosine": 0.277913727466195, "step": 434500 }, { "epoch": 3.2939074071269556, "grad_norm": 0.2491697520017624, "learning_rate": 3.532047712663402e-06, "loss": 0.028, "step": 435000 }, { "epoch": 3.2939074071269556, "eval_runtime": 282.3329, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4915244927592133, "eval_sts_eval_spearman_cosine": 0.2782264555555567, "step": 435000 }, { "epoch": 3.2976935075949174, "grad_norm": 0.4685298502445221, "learning_rate": 3.5131081035011764e-06, "loss": 0.0284, "step": 435500 }, { "epoch": 3.2976935075949174, "eval_runtime": 277.7329, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4918078217224702, "eval_sts_eval_spearman_cosine": 0.27774404747285664, "step": 435500 }, { "epoch": 3.3014796080628797, "grad_norm": 0.2602149546146393, "learning_rate": 3.494168494338951e-06, "loss": 0.0298, "step": 436000 }, { "epoch": 3.3014796080628797, "eval_runtime": 280.746, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4905151856683916, "eval_sts_eval_spearman_cosine": 0.27738086136637213, "step": 436000 }, { "epoch": 3.3052657085308415, "grad_norm": 0.46414852142333984, "learning_rate": 3.4752288851767256e-06, "loss": 0.0295, "step": 436500 }, { "epoch": 3.3052657085308415, "eval_runtime": 276.911, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4920793890686841, "eval_sts_eval_spearman_cosine": 0.2773782342730416, "step": 436500 }, { "epoch": 3.3090518089988037, "grad_norm": 0.5243902802467346, "learning_rate": 3.4562892760145007e-06, "loss": 0.0314, "step": 437000 }, { "epoch": 3.3090518089988037, "eval_runtime": 279.8698, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49196342656364356, "eval_sts_eval_spearman_cosine": 0.27681969661963096, "step": 437000 }, { "epoch": 3.3128379094667655, "grad_norm": 0.5049580335617065, "learning_rate": 3.437349666852275e-06, "loss": 0.0281, "step": 437500 }, { "epoch": 3.3128379094667655, "eval_runtime": 276.7183, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.491469580963889, "eval_sts_eval_spearman_cosine": 0.27616835653096417, "step": 437500 }, { "epoch": 3.3166240099347277, "grad_norm": 0.38252729177474976, "learning_rate": 3.41841005769005e-06, "loss": 0.0307, "step": 438000 }, { "epoch": 3.3166240099347277, "eval_runtime": 283.0642, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49052550936238243, "eval_sts_eval_spearman_cosine": 0.27561995846418325, "step": 438000 }, { "epoch": 3.3204101104026895, "grad_norm": 0.31457623839378357, "learning_rate": 3.3994704485278245e-06, "loss": 0.0298, "step": 438500 }, { "epoch": 3.3204101104026895, "eval_runtime": 275.573, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4897117789367129, "eval_sts_eval_spearman_cosine": 0.2749989234817337, "step": 438500 }, { "epoch": 3.3241962108706518, "grad_norm": 0.2357383370399475, "learning_rate": 3.3805308393655987e-06, "loss": 0.0297, "step": 439000 }, { "epoch": 3.3241962108706518, "eval_runtime": 280.6751, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4895254525035864, "eval_sts_eval_spearman_cosine": 0.2750722222899222, "step": 439000 }, { "epoch": 3.3279823113386136, "grad_norm": 0.929696798324585, "learning_rate": 3.3615912302033738e-06, "loss": 0.0277, "step": 439500 }, { "epoch": 3.3279823113386136, "eval_runtime": 279.1403, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4899919132441356, "eval_sts_eval_spearman_cosine": 0.2754127801390089, "step": 439500 }, { "epoch": 3.331768411806576, "grad_norm": 0.6649629473686218, "learning_rate": 3.342651621041149e-06, "loss": 0.0305, "step": 440000 }, { "epoch": 3.331768411806576, "eval_runtime": 284.839, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48968118864483456, "eval_sts_eval_spearman_cosine": 0.2755663195348624, "step": 440000 }, { "epoch": 3.3355545122745376, "grad_norm": 0.608157217502594, "learning_rate": 3.323712011878923e-06, "loss": 0.0305, "step": 440500 }, { "epoch": 3.3355545122745376, "eval_runtime": 292.3199, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49018476533789734, "eval_sts_eval_spearman_cosine": 0.27533464760617105, "step": 440500 }, { "epoch": 3.3393406127425, "grad_norm": 0.2316209077835083, "learning_rate": 3.3047724027166976e-06, "loss": 0.0306, "step": 441000 }, { "epoch": 3.3393406127425, "eval_runtime": 287.2023, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4902532814290391, "eval_sts_eval_spearman_cosine": 0.2757436693810152, "step": 441000 }, { "epoch": 3.3431267132104616, "grad_norm": 0.2788620889186859, "learning_rate": 3.2858327935544726e-06, "loss": 0.0288, "step": 441500 }, { "epoch": 3.3431267132104616, "eval_runtime": 282.3182, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4913158419277508, "eval_sts_eval_spearman_cosine": 0.27658362285418614, "step": 441500 }, { "epoch": 3.346912813678424, "grad_norm": 0.5513033270835876, "learning_rate": 3.266893184392247e-06, "loss": 0.0315, "step": 442000 }, { "epoch": 3.346912813678424, "eval_runtime": 287.4978, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49038167067555705, "eval_sts_eval_spearman_cosine": 0.27675376687471903, "step": 442000 }, { "epoch": 3.3506989141463857, "grad_norm": 0.3271780014038086, "learning_rate": 3.247953575230022e-06, "loss": 0.0302, "step": 442500 }, { "epoch": 3.3506989141463857, "eval_runtime": 280.8351, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49032012052092355, "eval_sts_eval_spearman_cosine": 0.2768687891026702, "step": 442500 }, { "epoch": 3.354485014614348, "grad_norm": 0.4828197658061981, "learning_rate": 3.2290139660677965e-06, "loss": 0.0286, "step": 443000 }, { "epoch": 3.354485014614348, "eval_runtime": 297.6391, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.490284625957467, "eval_sts_eval_spearman_cosine": 0.2765695915661472, "step": 443000 }, { "epoch": 3.3582711150823097, "grad_norm": 0.4666995704174042, "learning_rate": 3.210074356905571e-06, "loss": 0.0284, "step": 443500 }, { "epoch": 3.3582711150823097, "eval_runtime": 288.9614, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49039531324604485, "eval_sts_eval_spearman_cosine": 0.27606556373694335, "step": 443500 }, { "epoch": 3.362057215550272, "grad_norm": 0.5178301334381104, "learning_rate": 3.1911347477433457e-06, "loss": 0.031, "step": 444000 }, { "epoch": 3.362057215550272, "eval_runtime": 276.0169, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4909237083017741, "eval_sts_eval_spearman_cosine": 0.276458419264094, "step": 444000 }, { "epoch": 3.3658433160182337, "grad_norm": 0.4852887988090515, "learning_rate": 3.1721951385811208e-06, "loss": 0.0284, "step": 444500 }, { "epoch": 3.3658433160182337, "eval_runtime": 284.4837, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.490540370327482, "eval_sts_eval_spearman_cosine": 0.27639372849091803, "step": 444500 }, { "epoch": 3.369629416486196, "grad_norm": 0.5273555517196655, "learning_rate": 3.153255529418895e-06, "loss": 0.0296, "step": 445000 }, { "epoch": 3.369629416486196, "eval_runtime": 272.4693, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4912747950630656, "eval_sts_eval_spearman_cosine": 0.2767487191428973, "step": 445000 }, { "epoch": 3.3734155169541578, "grad_norm": 0.27640289068222046, "learning_rate": 3.1343159202566696e-06, "loss": 0.03, "step": 445500 }, { "epoch": 3.3734155169541578, "eval_runtime": 276.2977, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49039971081323946, "eval_sts_eval_spearman_cosine": 0.27697657290100486, "step": 445500 }, { "epoch": 3.37720161742212, "grad_norm": 0.5537356734275818, "learning_rate": 3.1153763110944446e-06, "loss": 0.0288, "step": 446000 }, { "epoch": 3.37720161742212, "eval_runtime": 279.5555, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49241518298423376, "eval_sts_eval_spearman_cosine": 0.27766110164372154, "step": 446000 }, { "epoch": 3.380987717890082, "grad_norm": 0.3248932361602783, "learning_rate": 3.0964367019322196e-06, "loss": 0.0302, "step": 446500 }, { "epoch": 3.380987717890082, "eval_runtime": 271.1418, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4914332852221286, "eval_sts_eval_spearman_cosine": 0.27654192837843466, "step": 446500 }, { "epoch": 3.384773818358044, "grad_norm": 0.250980943441391, "learning_rate": 3.077497092769994e-06, "loss": 0.0302, "step": 447000 }, { "epoch": 3.384773818358044, "eval_runtime": 275.3066, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49075403446220145, "eval_sts_eval_spearman_cosine": 0.2758493780432146, "step": 447000 }, { "epoch": 3.388559918826006, "grad_norm": 0.2963213324546814, "learning_rate": 3.0585574836077684e-06, "loss": 0.029, "step": 447500 }, { "epoch": 3.388559918826006, "eval_runtime": 276.2956, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49129224084650797, "eval_sts_eval_spearman_cosine": 0.27679787457277116, "step": 447500 }, { "epoch": 3.392346019293968, "grad_norm": 0.17186139523983002, "learning_rate": 3.0396178744455435e-06, "loss": 0.032, "step": 448000 }, { "epoch": 3.392346019293968, "eval_runtime": 271.6254, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49032259966030955, "eval_sts_eval_spearman_cosine": 0.27640461865710464, "step": 448000 }, { "epoch": 1.968365707865563, "grad_norm": 0.6854721903800964, "learning_rate": 1.016093735252348e-05, "loss": 0.0314, "step": 448500 }, { "epoch": 1.968365707865563, "eval_runtime": 279.9062, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48570667163388376, "eval_sts_eval_spearman_cosine": 0.27398106249071585, "step": 448500 }, { "epoch": 1.9705600954997498, "grad_norm": 0.9208744764328003, "learning_rate": 1.0149962355761104e-05, "loss": 0.0299, "step": 449000 }, { "epoch": 1.9705600954997498, "eval_runtime": 276.1623, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48587417158696555, "eval_sts_eval_spearman_cosine": 0.2743754775559581, "step": 449000 }, { "epoch": 1.9727544831339365, "grad_norm": 0.5107960104942322, "learning_rate": 1.0138987358998731e-05, "loss": 0.0322, "step": 449500 }, { "epoch": 1.9727544831339365, "eval_runtime": 275.0605, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48707692673556435, "eval_sts_eval_spearman_cosine": 0.2764446660075839, "step": 449500 }, { "epoch": 1.9749488707681233, "grad_norm": 0.5531497597694397, "learning_rate": 1.0128012362236355e-05, "loss": 0.0305, "step": 450000 }, { "epoch": 1.9749488707681233, "eval_runtime": 273.0952, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48226039585548086, "eval_sts_eval_spearman_cosine": 0.271778415750715, "step": 450000 }, { "epoch": 1.97714325840231, "grad_norm": 0.5528868436813354, "learning_rate": 1.0117037365473979e-05, "loss": 0.0312, "step": 450500 }, { "epoch": 1.97714325840231, "eval_runtime": 278.0133, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48716318514981494, "eval_sts_eval_spearman_cosine": 0.27356108124389333, "step": 450500 }, { "epoch": 1.979337646036497, "grad_norm": 0.5085938572883606, "learning_rate": 1.0106062368711603e-05, "loss": 0.0297, "step": 451000 }, { "epoch": 1.979337646036497, "eval_runtime": 272.7064, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48663560575949627, "eval_sts_eval_spearman_cosine": 0.27230202283594623, "step": 451000 }, { "epoch": 1.9815320336706839, "grad_norm": 0.5648781061172485, "learning_rate": 1.0095087371949227e-05, "loss": 0.0295, "step": 451500 }, { "epoch": 1.9815320336706839, "eval_runtime": 275.4748, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4847650907103844, "eval_sts_eval_spearman_cosine": 0.27169361136828013, "step": 451500 }, { "epoch": 1.9837264213048706, "grad_norm": 0.28920289874076843, "learning_rate": 1.0084112375186849e-05, "loss": 0.0311, "step": 452000 }, { "epoch": 1.9837264213048706, "eval_runtime": 272.5729, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4825865244022404, "eval_sts_eval_spearman_cosine": 0.2709929854869051, "step": 452000 }, { "epoch": 1.9859208089390574, "grad_norm": 0.6947532296180725, "learning_rate": 1.0073137378424473e-05, "loss": 0.0312, "step": 452500 }, { "epoch": 1.9859208089390574, "eval_runtime": 276.4548, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4824502601655969, "eval_sts_eval_spearman_cosine": 0.27034537448243423, "step": 452500 }, { "epoch": 1.9881151965732444, "grad_norm": 0.13813982903957367, "learning_rate": 1.0062162381662097e-05, "loss": 0.0305, "step": 453000 }, { "epoch": 1.9881151965732444, "eval_runtime": 273.6575, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48619305806246244, "eval_sts_eval_spearman_cosine": 0.2728148557213322, "step": 453000 }, { "epoch": 1.9903095842074312, "grad_norm": 0.7231931686401367, "learning_rate": 1.0051187384899724e-05, "loss": 0.0317, "step": 453500 }, { "epoch": 1.9903095842074312, "eval_runtime": 273.3315, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48385774024547223, "eval_sts_eval_spearman_cosine": 0.2711253383507257, "step": 453500 }, { "epoch": 1.992503971841618, "grad_norm": 0.19298043847084045, "learning_rate": 1.0040212388137348e-05, "loss": 0.0341, "step": 454000 }, { "epoch": 1.992503971841618, "eval_runtime": 275.1592, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4825742719692311, "eval_sts_eval_spearman_cosine": 0.2726899953124195, "step": 454000 }, { "epoch": 1.9946983594758048, "grad_norm": 0.4895106554031372, "learning_rate": 1.0029237391374972e-05, "loss": 0.0318, "step": 454500 }, { "epoch": 1.9946983594758048, "eval_runtime": 288.9752, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4846662495412214, "eval_sts_eval_spearman_cosine": 0.2736081919936308, "step": 454500 }, { "epoch": 1.9968927471099915, "grad_norm": 0.34270262718200684, "learning_rate": 1.0018262394612594e-05, "loss": 0.0305, "step": 455000 }, { "epoch": 1.9968927471099915, "eval_runtime": 271.3312, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48581621041359124, "eval_sts_eval_spearman_cosine": 0.27370246813864646, "step": 455000 }, { "epoch": 1.9990871347441783, "grad_norm": 0.4689360558986664, "learning_rate": 1.0007287397850218e-05, "loss": 0.0302, "step": 455500 }, { "epoch": 1.9990871347441783, "eval_runtime": 271.916, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4854206523382445, "eval_sts_eval_spearman_cosine": 0.27398434877491934, "step": 455500 }, { "epoch": 2.001281522378365, "grad_norm": 1.0201528072357178, "learning_rate": 9.996312401087841e-06, "loss": 0.0303, "step": 456000 }, { "epoch": 2.001281522378365, "eval_runtime": 269.6825, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48321618760709617, "eval_sts_eval_spearman_cosine": 0.2721572213417982, "step": 456000 }, { "epoch": 2.003475910012552, "grad_norm": 0.8021215796470642, "learning_rate": 9.985337404325467e-06, "loss": 0.0301, "step": 456500 }, { "epoch": 2.003475910012552, "eval_runtime": 272.7011, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4797245158587642, "eval_sts_eval_spearman_cosine": 0.27230574652781064, "step": 456500 }, { "epoch": 2.0056702976467387, "grad_norm": 0.505477249622345, "learning_rate": 9.97436240756309e-06, "loss": 0.0293, "step": 457000 }, { "epoch": 2.0056702976467387, "eval_runtime": 275.9566, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4813487084192963, "eval_sts_eval_spearman_cosine": 0.27120983856477116, "step": 457000 }, { "epoch": 2.0078646852809254, "grad_norm": 0.5915279388427734, "learning_rate": 9.963387410800715e-06, "loss": 0.0301, "step": 457500 }, { "epoch": 2.0078646852809254, "eval_runtime": 273.9162, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48826681779494313, "eval_sts_eval_spearman_cosine": 0.27465250991946255, "step": 457500 }, { "epoch": 2.010059072915112, "grad_norm": 0.7840089201927185, "learning_rate": 9.952412414038339e-06, "loss": 0.0304, "step": 458000 }, { "epoch": 2.010059072915112, "eval_runtime": 274.556, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48564335283982785, "eval_sts_eval_spearman_cosine": 0.27341703188661587, "step": 458000 }, { "epoch": 2.012253460549299, "grad_norm": 0.6319242119789124, "learning_rate": 9.941437417275962e-06, "loss": 0.0321, "step": 458500 }, { "epoch": 2.012253460549299, "eval_runtime": 279.5655, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.47938210356992894, "eval_sts_eval_spearman_cosine": 0.27080942334479063, "step": 458500 }, { "epoch": 2.0144478481834858, "grad_norm": 0.29991018772125244, "learning_rate": 9.930462420513586e-06, "loss": 0.0298, "step": 459000 }, { "epoch": 2.0144478481834858, "eval_runtime": 278.5914, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48553230444366097, "eval_sts_eval_spearman_cosine": 0.27272217427277556, "step": 459000 }, { "epoch": 2.0166422358176725, "grad_norm": 0.2979397177696228, "learning_rate": 9.91948742375121e-06, "loss": 0.0312, "step": 459500 }, { "epoch": 2.0166422358176725, "eval_runtime": 280.2688, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48448889631771774, "eval_sts_eval_spearman_cosine": 0.2725522203606181, "step": 459500 }, { "epoch": 2.0188366234518593, "grad_norm": 0.26645538210868835, "learning_rate": 9.908512426988834e-06, "loss": 0.0292, "step": 460000 }, { "epoch": 2.0188366234518593, "eval_runtime": 296.1666, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48678008003471895, "eval_sts_eval_spearman_cosine": 0.27514373593869934, "step": 460000 }, { "epoch": 2.0210310110860465, "grad_norm": 0.3452828526496887, "learning_rate": 9.89753743022646e-06, "loss": 0.0306, "step": 460500 }, { "epoch": 2.0210310110860465, "eval_runtime": 278.6545, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48533030321513565, "eval_sts_eval_spearman_cosine": 0.27047159163543755, "step": 460500 }, { "epoch": 2.0232253987202333, "grad_norm": 0.3127541244029999, "learning_rate": 9.886562433464083e-06, "loss": 0.029, "step": 461000 }, { "epoch": 2.0232253987202333, "eval_runtime": 292.1244, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48667973151300603, "eval_sts_eval_spearman_cosine": 0.27331528826031637, "step": 461000 }, { "epoch": 2.02541978635442, "grad_norm": 0.5699549913406372, "learning_rate": 9.875587436701707e-06, "loss": 0.0318, "step": 461500 }, { "epoch": 2.02541978635442, "eval_runtime": 286.3527, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4875926460415635, "eval_sts_eval_spearman_cosine": 0.2747655193916346, "step": 461500 }, { "epoch": 2.027614173988607, "grad_norm": 3.598555564880371, "learning_rate": 9.864612439939331e-06, "loss": 0.0313, "step": 462000 }, { "epoch": 2.027614173988607, "eval_runtime": 292.0032, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4867004070531941, "eval_sts_eval_spearman_cosine": 0.2752103108919955, "step": 462000 }, { "epoch": 2.0298085616227937, "grad_norm": 0.5557475090026855, "learning_rate": 9.853637443176955e-06, "loss": 0.0312, "step": 462500 }, { "epoch": 2.0298085616227937, "eval_runtime": 280.4012, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48879836814032795, "eval_sts_eval_spearman_cosine": 0.27558353945792846, "step": 462500 }, { "epoch": 2.0320029492569804, "grad_norm": 0.5585613250732422, "learning_rate": 9.842662446414579e-06, "loss": 0.0322, "step": 463000 }, { "epoch": 2.0320029492569804, "eval_runtime": 276.686, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48490512156212845, "eval_sts_eval_spearman_cosine": 0.27266791347778024, "step": 463000 }, { "epoch": 2.034197336891167, "grad_norm": 0.6445340514183044, "learning_rate": 9.831687449652203e-06, "loss": 0.0314, "step": 463500 }, { "epoch": 2.034197336891167, "eval_runtime": 275.6326, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48605962889504273, "eval_sts_eval_spearman_cosine": 0.2723846799450635, "step": 463500 }, { "epoch": 2.036391724525354, "grad_norm": 0.5144784450531006, "learning_rate": 9.820712452889826e-06, "loss": 0.0308, "step": 464000 }, { "epoch": 2.036391724525354, "eval_runtime": 281.0859, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48433441496375923, "eval_sts_eval_spearman_cosine": 0.2744787993075318, "step": 464000 }, { "epoch": 2.0385861121595408, "grad_norm": 0.36596694588661194, "learning_rate": 9.809737456127452e-06, "loss": 0.0306, "step": 464500 }, { "epoch": 2.0385861121595408, "eval_runtime": 274.5928, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4842258231881297, "eval_sts_eval_spearman_cosine": 0.27325532017742327, "step": 464500 }, { "epoch": 2.0407804997937276, "grad_norm": 0.43784552812576294, "learning_rate": 9.798762459365076e-06, "loss": 0.0315, "step": 465000 }, { "epoch": 2.0407804997937276, "eval_runtime": 270.3373, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4810207954289139, "eval_sts_eval_spearman_cosine": 0.2702731950575328, "step": 465000 }, { "epoch": 2.0429748874279143, "grad_norm": 0.32237371802330017, "learning_rate": 9.7877874626027e-06, "loss": 0.0312, "step": 465500 }, { "epoch": 2.0429748874279143, "eval_runtime": 269.9192, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4817236534049424, "eval_sts_eval_spearman_cosine": 0.27083431396508195, "step": 465500 }, { "epoch": 2.045169275062101, "grad_norm": 0.389809250831604, "learning_rate": 9.776812465840324e-06, "loss": 0.0314, "step": 466000 }, { "epoch": 2.045169275062101, "eval_runtime": 272.2578, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4851028420177055, "eval_sts_eval_spearman_cosine": 0.27371543263344905, "step": 466000 }, { "epoch": 2.047363662696288, "grad_norm": 0.234055295586586, "learning_rate": 9.765837469077947e-06, "loss": 0.0309, "step": 466500 }, { "epoch": 2.047363662696288, "eval_runtime": 275.4032, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48184320464068864, "eval_sts_eval_spearman_cosine": 0.27091454447248714, "step": 466500 }, { "epoch": 2.0495580503304747, "grad_norm": 0.3646621108055115, "learning_rate": 9.754862472315571e-06, "loss": 0.0325, "step": 467000 }, { "epoch": 2.0495580503304747, "eval_runtime": 281.2256, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48410927644580704, "eval_sts_eval_spearman_cosine": 0.27020631171701354, "step": 467000 }, { "epoch": 2.0517524379646614, "grad_norm": 0.43471914529800415, "learning_rate": 9.743887475553195e-06, "loss": 0.0302, "step": 467500 }, { "epoch": 2.0517524379646614, "eval_runtime": 288.3675, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4813819845151686, "eval_sts_eval_spearman_cosine": 0.2711274764124179, "step": 467500 }, { "epoch": 2.0539468255988482, "grad_norm": 0.26582807302474976, "learning_rate": 9.732912478790819e-06, "loss": 0.0315, "step": 468000 }, { "epoch": 2.0539468255988482, "eval_runtime": 278.355, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48514072987565693, "eval_sts_eval_spearman_cosine": 0.2718692978599723, "step": 468000 }, { "epoch": 2.056141213233035, "grad_norm": 0.41335317492485046, "learning_rate": 9.721937482028444e-06, "loss": 0.0296, "step": 468500 }, { "epoch": 2.056141213233035, "eval_runtime": 279.1624, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4842389431308345, "eval_sts_eval_spearman_cosine": 0.27067474353520304, "step": 468500 }, { "epoch": 2.058335600867222, "grad_norm": 0.473097562789917, "learning_rate": 9.710962485266068e-06, "loss": 0.0314, "step": 469000 }, { "epoch": 2.058335600867222, "eval_runtime": 277.9691, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48249917724405145, "eval_sts_eval_spearman_cosine": 0.26883753576626984, "step": 469000 }, { "epoch": 2.060529988501409, "grad_norm": 0.6933205723762512, "learning_rate": 9.699987488503692e-06, "loss": 0.0306, "step": 469500 }, { "epoch": 2.060529988501409, "eval_runtime": 280.2791, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48385156363852294, "eval_sts_eval_spearman_cosine": 0.27069084453619985, "step": 469500 }, { "epoch": 2.062724376135596, "grad_norm": 0.26274266839027405, "learning_rate": 9.689012491741316e-06, "loss": 0.0333, "step": 470000 }, { "epoch": 2.062724376135596, "eval_runtime": 272.7132, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48576648186041205, "eval_sts_eval_spearman_cosine": 0.27209638748803194, "step": 470000 }, { "epoch": 2.0649187637697826, "grad_norm": 0.43697401881217957, "learning_rate": 9.67803749497894e-06, "loss": 0.0312, "step": 470500 }, { "epoch": 2.0649187637697826, "eval_runtime": 280.2171, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48338444141481984, "eval_sts_eval_spearman_cosine": 0.27199053995147865, "step": 470500 }, { "epoch": 2.0671131514039693, "grad_norm": 0.3471299111843109, "learning_rate": 9.667062498216564e-06, "loss": 0.0311, "step": 471000 }, { "epoch": 2.0671131514039693, "eval_runtime": 289.6734, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48354101993119125, "eval_sts_eval_spearman_cosine": 0.2717744245177292, "step": 471000 }, { "epoch": 2.069307539038156, "grad_norm": 0.8253263831138611, "learning_rate": 9.656087501454188e-06, "loss": 0.0314, "step": 471500 }, { "epoch": 2.069307539038156, "eval_runtime": 273.4977, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48301248546652686, "eval_sts_eval_spearman_cosine": 0.2733435636477071, "step": 471500 }, { "epoch": 2.071501926672343, "grad_norm": 0.5620436668395996, "learning_rate": 9.645112504691811e-06, "loss": 0.0308, "step": 472000 }, { "epoch": 2.071501926672343, "eval_runtime": 283.0274, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48116865129624237, "eval_sts_eval_spearman_cosine": 0.27014617756010095, "step": 472000 }, { "epoch": 2.0736963143065297, "grad_norm": 0.28995805978775024, "learning_rate": 9.634137507929437e-06, "loss": 0.0321, "step": 472500 }, { "epoch": 2.0736963143065297, "eval_runtime": 274.4346, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4818448972026859, "eval_sts_eval_spearman_cosine": 0.27010326928182365, "step": 472500 }, { "epoch": 2.0758907019407165, "grad_norm": 0.420296847820282, "learning_rate": 9.62316251116706e-06, "loss": 0.0315, "step": 473000 }, { "epoch": 2.0758907019407165, "eval_runtime": 273.0247, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4847610862126027, "eval_sts_eval_spearman_cosine": 0.2722318843392375, "step": 473000 }, { "epoch": 2.0780850895749032, "grad_norm": 0.946178138256073, "learning_rate": 9.612187514404685e-06, "loss": 0.0332, "step": 473500 }, { "epoch": 2.0780850895749032, "eval_runtime": 278.1383, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48139526192366167, "eval_sts_eval_spearman_cosine": 0.2709390363831908, "step": 473500 }, { "epoch": 2.08027947720909, "grad_norm": 0.46156835556030273, "learning_rate": 9.601212517642307e-06, "loss": 0.0306, "step": 474000 }, { "epoch": 2.08027947720909, "eval_runtime": 277.2055, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48310433733827146, "eval_sts_eval_spearman_cosine": 0.27184533736930444, "step": 474000 }, { "epoch": 2.082473864843277, "grad_norm": 0.38917461037635803, "learning_rate": 9.590237520879932e-06, "loss": 0.0324, "step": 474500 }, { "epoch": 2.082473864843277, "eval_runtime": 268.803, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48169614805574446, "eval_sts_eval_spearman_cosine": 0.2733549404095916, "step": 474500 }, { "epoch": 2.0846682524774636, "grad_norm": 0.4978792369365692, "learning_rate": 9.579262524117556e-06, "loss": 0.0325, "step": 475000 }, { "epoch": 2.0846682524774636, "eval_runtime": 273.1263, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4794597484763434, "eval_sts_eval_spearman_cosine": 0.27061932707820785, "step": 475000 }, { "epoch": 2.0868626401116503, "grad_norm": 0.5920610427856445, "learning_rate": 9.56828752735518e-06, "loss": 0.0301, "step": 475500 }, { "epoch": 2.0868626401116503, "eval_runtime": 280.7038, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4799190080274288, "eval_sts_eval_spearman_cosine": 0.2733251355612862, "step": 475500 }, { "epoch": 2.089057027745837, "grad_norm": 0.6962510943412781, "learning_rate": 9.557312530592804e-06, "loss": 0.0329, "step": 476000 }, { "epoch": 2.089057027745837, "eval_runtime": 275.6023, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.484014855746675, "eval_sts_eval_spearman_cosine": 0.276484560070883, "step": 476000 }, { "epoch": 2.091251415380024, "grad_norm": 0.39987069368362427, "learning_rate": 9.54633753383043e-06, "loss": 0.0299, "step": 476500 }, { "epoch": 2.091251415380024, "eval_runtime": 286.3034, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48510366620923706, "eval_sts_eval_spearman_cosine": 0.2775920546319452, "step": 476500 }, { "epoch": 2.0934458030142107, "grad_norm": 0.50237637758255, "learning_rate": 9.535362537068052e-06, "loss": 0.0332, "step": 477000 }, { "epoch": 2.0934458030142107, "eval_runtime": 281.7454, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4819851634152233, "eval_sts_eval_spearman_cosine": 0.27375295518023585, "step": 477000 }, { "epoch": 2.0956401906483975, "grad_norm": 0.3672468364238739, "learning_rate": 9.524387540305675e-06, "loss": 0.0336, "step": 477500 }, { "epoch": 2.0956401906483975, "eval_runtime": 301.0324, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.483054048132192, "eval_sts_eval_spearman_cosine": 0.27415471521464463, "step": 477500 }, { "epoch": 2.0978345782825842, "grad_norm": 0.5945357084274292, "learning_rate": 9.5134125435433e-06, "loss": 0.0319, "step": 478000 }, { "epoch": 2.0978345782825842, "eval_runtime": 279.9674, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.483156654130004, "eval_sts_eval_spearman_cosine": 0.2763444083550354, "step": 478000 }, { "epoch": 2.1000289659167715, "grad_norm": 0.3925120234489441, "learning_rate": 9.502437546780925e-06, "loss": 0.0306, "step": 478500 }, { "epoch": 2.1000289659167715, "eval_runtime": 271.8492, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48355593540325364, "eval_sts_eval_spearman_cosine": 0.27794408883607474, "step": 478500 }, { "epoch": 2.1022233535509582, "grad_norm": 0.44720402359962463, "learning_rate": 9.491462550018549e-06, "loss": 0.0319, "step": 479000 }, { "epoch": 2.1022233535509582, "eval_runtime": 279.047, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48429644209173306, "eval_sts_eval_spearman_cosine": 0.27703912994370916, "step": 479000 }, { "epoch": 2.104417741185145, "grad_norm": 0.25189492106437683, "learning_rate": 9.480487553256173e-06, "loss": 0.0314, "step": 479500 }, { "epoch": 2.104417741185145, "eval_runtime": 277.5769, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4834016755519595, "eval_sts_eval_spearman_cosine": 0.27575533238139177, "step": 479500 }, { "epoch": 2.106612128819332, "grad_norm": 0.8924570083618164, "learning_rate": 9.469512556493796e-06, "loss": 0.0308, "step": 480000 }, { "epoch": 2.106612128819332, "eval_runtime": 279.8384, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48413875449305266, "eval_sts_eval_spearman_cosine": 0.2743493180455154, "step": 480000 }, { "epoch": 2.1088065164535186, "grad_norm": 0.8322932124137878, "learning_rate": 9.45853755973142e-06, "loss": 0.0315, "step": 480500 }, { "epoch": 2.1088065164535186, "eval_runtime": 290.9695, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4839037719637483, "eval_sts_eval_spearman_cosine": 0.27440146465992066, "step": 480500 }, { "epoch": 2.1110009040877054, "grad_norm": 0.7654933929443359, "learning_rate": 9.447562562969044e-06, "loss": 0.03, "step": 481000 }, { "epoch": 2.1110009040877054, "eval_runtime": 291.0687, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4840720443866221, "eval_sts_eval_spearman_cosine": 0.27362415040979793, "step": 481000 }, { "epoch": 2.113195291721892, "grad_norm": 0.21563412249088287, "learning_rate": 9.436587566206668e-06, "loss": 0.0319, "step": 481500 }, { "epoch": 2.113195291721892, "eval_runtime": 290.2344, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4870034806759867, "eval_sts_eval_spearman_cosine": 0.27598230558027276, "step": 481500 }, { "epoch": 2.115389679356079, "grad_norm": 0.6363007426261902, "learning_rate": 9.425612569444292e-06, "loss": 0.0316, "step": 482000 }, { "epoch": 2.115389679356079, "eval_runtime": 280.5993, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4840018409316894, "eval_sts_eval_spearman_cosine": 0.2737054150208289, "step": 482000 }, { "epoch": 2.1175840669902657, "grad_norm": 0.7537102103233337, "learning_rate": 9.414637572681917e-06, "loss": 0.0309, "step": 482500 }, { "epoch": 2.1175840669902657, "eval_runtime": 281.1975, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4851280340954971, "eval_sts_eval_spearman_cosine": 0.2739545115847158, "step": 482500 }, { "epoch": 2.1197784546244525, "grad_norm": 0.5879955887794495, "learning_rate": 9.403662575919541e-06, "loss": 0.0311, "step": 483000 }, { "epoch": 2.1197784546244525, "eval_runtime": 276.3715, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48439610527616794, "eval_sts_eval_spearman_cosine": 0.2742358313008438, "step": 483000 }, { "epoch": 2.1219728422586392, "grad_norm": 0.8316090106964111, "learning_rate": 9.392687579157165e-06, "loss": 0.0316, "step": 483500 }, { "epoch": 2.1219728422586392, "eval_runtime": 274.3034, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48410948158806477, "eval_sts_eval_spearman_cosine": 0.27386530901272776, "step": 483500 }, { "epoch": 2.124167229892826, "grad_norm": 0.7133249640464783, "learning_rate": 9.381712582394789e-06, "loss": 0.0325, "step": 484000 }, { "epoch": 2.124167229892826, "eval_runtime": 273.4933, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.485492728389777, "eval_sts_eval_spearman_cosine": 0.27325733784908884, "step": 484000 }, { "epoch": 2.126361617527013, "grad_norm": 0.3341532051563263, "learning_rate": 9.370737585632413e-06, "loss": 0.0325, "step": 484500 }, { "epoch": 2.126361617527013, "eval_runtime": 276.6263, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48579780083331064, "eval_sts_eval_spearman_cosine": 0.2732274245884173, "step": 484500 }, { "epoch": 2.1285560051611996, "grad_norm": 0.32612451910972595, "learning_rate": 9.359762588870037e-06, "loss": 0.0318, "step": 485000 }, { "epoch": 2.1285560051611996, "eval_runtime": 269.4102, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48691106140544266, "eval_sts_eval_spearman_cosine": 0.2735583749058291, "step": 485000 }, { "epoch": 2.1307503927953864, "grad_norm": 0.6193019151687622, "learning_rate": 9.34878759210766e-06, "loss": 0.0318, "step": 485500 }, { "epoch": 2.1307503927953864, "eval_runtime": 274.4835, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48426549492385274, "eval_sts_eval_spearman_cosine": 0.2715907465090971, "step": 485500 }, { "epoch": 2.132944780429573, "grad_norm": 0.5056445598602295, "learning_rate": 9.337812595345284e-06, "loss": 0.0309, "step": 486000 }, { "epoch": 2.132944780429573, "eval_runtime": 285.4679, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48453489971297325, "eval_sts_eval_spearman_cosine": 0.27343107124466487, "step": 486000 }, { "epoch": 2.13513916806376, "grad_norm": 0.5322176814079285, "learning_rate": 9.32683759858291e-06, "loss": 0.0316, "step": 486500 }, { "epoch": 2.13513916806376, "eval_runtime": 272.4208, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4846251895489141, "eval_sts_eval_spearman_cosine": 0.2733981194766248, "step": 486500 }, { "epoch": 2.137333555697947, "grad_norm": 0.47497379779815674, "learning_rate": 9.315862601820534e-06, "loss": 0.0316, "step": 487000 }, { "epoch": 2.137333555697947, "eval_runtime": 272.3449, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48709999096634365, "eval_sts_eval_spearman_cosine": 0.27434814082744247, "step": 487000 }, { "epoch": 2.139527943332134, "grad_norm": 0.5311364531517029, "learning_rate": 9.304887605058158e-06, "loss": 0.0327, "step": 487500 }, { "epoch": 2.139527943332134, "eval_runtime": 296.3361, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4870956840923766, "eval_sts_eval_spearman_cosine": 0.2746451478058307, "step": 487500 }, { "epoch": 2.1417223309663207, "grad_norm": 0.5383901596069336, "learning_rate": 9.293912608295781e-06, "loss": 0.0335, "step": 488000 }, { "epoch": 2.1417223309663207, "eval_runtime": 287.1231, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4823380530502626, "eval_sts_eval_spearman_cosine": 0.27341603340009335, "step": 488000 }, { "epoch": 2.1439167186005075, "grad_norm": 0.37382999062538147, "learning_rate": 9.282937611533405e-06, "loss": 0.0312, "step": 488500 }, { "epoch": 2.1439167186005075, "eval_runtime": 271.541, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4870242577736378, "eval_sts_eval_spearman_cosine": 0.2758811621751825, "step": 488500 }, { "epoch": 2.1461111062346943, "grad_norm": 0.4015238285064697, "learning_rate": 9.271962614771029e-06, "loss": 0.0297, "step": 489000 }, { "epoch": 2.1461111062346943, "eval_runtime": 278.9375, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48913952069395505, "eval_sts_eval_spearman_cosine": 0.2775463996237878, "step": 489000 }, { "epoch": 2.148305493868881, "grad_norm": 0.36439549922943115, "learning_rate": 9.260987618008653e-06, "loss": 0.0335, "step": 489500 }, { "epoch": 2.148305493868881, "eval_runtime": 278.9802, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48919440737530123, "eval_sts_eval_spearman_cosine": 0.2786797437344567, "step": 489500 }, { "epoch": 2.150499881503068, "grad_norm": 0.4108155071735382, "learning_rate": 9.250012621246277e-06, "loss": 0.0322, "step": 490000 }, { "epoch": 2.150499881503068, "eval_runtime": 277.2228, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4873170788386295, "eval_sts_eval_spearman_cosine": 0.27675700511341034, "step": 490000 }, { "epoch": 2.1526942691372546, "grad_norm": 0.3319602906703949, "learning_rate": 9.239037624483902e-06, "loss": 0.031, "step": 490500 }, { "epoch": 2.1526942691372546, "eval_runtime": 277.6502, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4877439117392157, "eval_sts_eval_spearman_cosine": 0.2767140785959224, "step": 490500 }, { "epoch": 2.1548886567714414, "grad_norm": 0.21603497862815857, "learning_rate": 9.228062627721526e-06, "loss": 0.0326, "step": 491000 }, { "epoch": 2.1548886567714414, "eval_runtime": 266.9116, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48375861064888626, "eval_sts_eval_spearman_cosine": 0.27437063662883515, "step": 491000 }, { "epoch": 2.157083044405628, "grad_norm": 0.5237635374069214, "learning_rate": 9.21708763095915e-06, "loss": 0.0323, "step": 491500 }, { "epoch": 2.157083044405628, "eval_runtime": 275.2041, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48429719410026995, "eval_sts_eval_spearman_cosine": 0.2738313918858065, "step": 491500 }, { "epoch": 2.159277432039815, "grad_norm": 0.6182759404182434, "learning_rate": 9.206112634196774e-06, "loss": 0.0327, "step": 492000 }, { "epoch": 2.159277432039815, "eval_runtime": 276.2796, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48539748218230183, "eval_sts_eval_spearman_cosine": 0.2748688176325032, "step": 492000 }, { "epoch": 2.1614718196740017, "grad_norm": 0.3698105216026306, "learning_rate": 9.195137637434398e-06, "loss": 0.0312, "step": 492500 }, { "epoch": 2.1614718196740017, "eval_runtime": 266.8634, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4867754791029543, "eval_sts_eval_spearman_cosine": 0.27548832864920725, "step": 492500 }, { "epoch": 2.1636662073081885, "grad_norm": 0.6992293000221252, "learning_rate": 9.184162640672022e-06, "loss": 0.0326, "step": 493000 }, { "epoch": 2.1636662073081885, "eval_runtime": 272.2993, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4841196450497299, "eval_sts_eval_spearman_cosine": 0.27384572317033673, "step": 493000 }, { "epoch": 2.1658605949423753, "grad_norm": 0.46266791224479675, "learning_rate": 9.173187643909645e-06, "loss": 0.032, "step": 493500 }, { "epoch": 2.1658605949423753, "eval_runtime": 272.2528, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48386422677401864, "eval_sts_eval_spearman_cosine": 0.2760168405410989, "step": 493500 }, { "epoch": 2.168054982576562, "grad_norm": 1.2753549814224243, "learning_rate": 9.16221264714727e-06, "loss": 0.0321, "step": 494000 }, { "epoch": 2.168054982576562, "eval_runtime": 271.2183, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48651894871838675, "eval_sts_eval_spearman_cosine": 0.27895168100562, "step": 494000 }, { "epoch": 2.170249370210749, "grad_norm": 0.30980709195137024, "learning_rate": 9.151237650384895e-06, "loss": 0.0318, "step": 494500 }, { "epoch": 2.170249370210749, "eval_runtime": 272.892, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48586469617831474, "eval_sts_eval_spearman_cosine": 0.27668206534877043, "step": 494500 }, { "epoch": 2.1724437578449356, "grad_norm": 0.23210254311561584, "learning_rate": 9.140262653622519e-06, "loss": 0.033, "step": 495000 }, { "epoch": 2.1724437578449356, "eval_runtime": 274.2146, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.484497984072394, "eval_sts_eval_spearman_cosine": 0.27429101474590567, "step": 495000 }, { "epoch": 2.174638145479123, "grad_norm": 0.5025335550308228, "learning_rate": 9.129287656860143e-06, "loss": 0.0311, "step": 495500 }, { "epoch": 2.174638145479123, "eval_runtime": 267.1047, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48344405779043825, "eval_sts_eval_spearman_cosine": 0.27427774632543855, "step": 495500 }, { "epoch": 2.1768325331133096, "grad_norm": 0.5484679341316223, "learning_rate": 9.118312660097766e-06, "loss": 0.0326, "step": 496000 }, { "epoch": 2.1768325331133096, "eval_runtime": 275.0349, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4812186047953524, "eval_sts_eval_spearman_cosine": 0.2741292787058922, "step": 496000 }, { "epoch": 2.1790269207474964, "grad_norm": 0.5846481323242188, "learning_rate": 9.10733766333539e-06, "loss": 0.0302, "step": 496500 }, { "epoch": 2.1790269207474964, "eval_runtime": 269.2641, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48143359919681356, "eval_sts_eval_spearman_cosine": 0.27734407251659066, "step": 496500 }, { "epoch": 2.181221308381683, "grad_norm": 0.45709228515625, "learning_rate": 9.096362666573014e-06, "loss": 0.0321, "step": 497000 }, { "epoch": 2.181221308381683, "eval_runtime": 268.5447, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4813401216933416, "eval_sts_eval_spearman_cosine": 0.2770925926145257, "step": 497000 }, { "epoch": 2.18341569601587, "grad_norm": 0.2830984890460968, "learning_rate": 9.085387669810638e-06, "loss": 0.0316, "step": 497500 }, { "epoch": 2.18341569601587, "eval_runtime": 268.717, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48029362539268083, "eval_sts_eval_spearman_cosine": 0.2763845370946907, "step": 497500 }, { "epoch": 2.1856100836500567, "grad_norm": 0.35515064001083374, "learning_rate": 9.074412673048262e-06, "loss": 0.0321, "step": 498000 }, { "epoch": 2.1856100836500567, "eval_runtime": 274.3586, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48237716802633374, "eval_sts_eval_spearman_cosine": 0.2793441625279181, "step": 498000 }, { "epoch": 2.1878044712842435, "grad_norm": 0.6214091777801514, "learning_rate": 9.063437676285887e-06, "loss": 0.0323, "step": 498500 }, { "epoch": 2.1878044712842435, "eval_runtime": 265.6801, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48104500811478956, "eval_sts_eval_spearman_cosine": 0.2794568827297349, "step": 498500 }, { "epoch": 2.1899988589184303, "grad_norm": 0.6414427757263184, "learning_rate": 9.052462679523511e-06, "loss": 0.0337, "step": 499000 }, { "epoch": 2.1899988589184303, "eval_runtime": 268.509, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4826551001010678, "eval_sts_eval_spearman_cosine": 0.2791802706637717, "step": 499000 }, { "epoch": 2.192193246552617, "grad_norm": 0.2338150143623352, "learning_rate": 9.041487682761133e-06, "loss": 0.0317, "step": 499500 }, { "epoch": 2.192193246552617, "eval_runtime": 270.5181, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4843510621717454, "eval_sts_eval_spearman_cosine": 0.2779191265348618, "step": 499500 }, { "epoch": 2.194387634186804, "grad_norm": 1.3216770887374878, "learning_rate": 9.030512685998757e-06, "loss": 0.0325, "step": 500000 }, { "epoch": 2.194387634186804, "eval_runtime": 279.0448, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48334234085451044, "eval_sts_eval_spearman_cosine": 0.27678510802232, "step": 500000 }, { "epoch": 2.1965820218209906, "grad_norm": 0.13995175063610077, "learning_rate": 9.019537689236383e-06, "loss": 0.0309, "step": 500500 }, { "epoch": 2.1965820218209906, "eval_runtime": 282.1637, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4855530611856902, "eval_sts_eval_spearman_cosine": 0.2773743189564634, "step": 500500 }, { "epoch": 2.1987764094551774, "grad_norm": 0.27378275990486145, "learning_rate": 9.008562692474007e-06, "loss": 0.0325, "step": 501000 }, { "epoch": 2.1987764094551774, "eval_runtime": 294.7524, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4847243510649253, "eval_sts_eval_spearman_cosine": 0.27564806397109637, "step": 501000 }, { "epoch": 2.200970797089364, "grad_norm": 0.32091689109802246, "learning_rate": 8.99758769571163e-06, "loss": 0.0328, "step": 501500 }, { "epoch": 2.200970797089364, "eval_runtime": 276.2218, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4873097963830841, "eval_sts_eval_spearman_cosine": 0.279760056094505, "step": 501500 }, { "epoch": 2.203165184723551, "grad_norm": 0.5437402725219727, "learning_rate": 8.986612698949254e-06, "loss": 0.0331, "step": 502000 }, { "epoch": 2.203165184723551, "eval_runtime": 273.5107, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4873794474087513, "eval_sts_eval_spearman_cosine": 0.2807100950433238, "step": 502000 }, { "epoch": 2.2053595723577377, "grad_norm": 0.4115486741065979, "learning_rate": 8.975637702186878e-06, "loss": 0.0317, "step": 502500 }, { "epoch": 2.2053595723577377, "eval_runtime": 278.7736, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4870904088386173, "eval_sts_eval_spearman_cosine": 0.2811994925856234, "step": 502500 }, { "epoch": 2.2075539599919245, "grad_norm": 0.7699851393699646, "learning_rate": 8.964662705424502e-06, "loss": 0.031, "step": 503000 }, { "epoch": 2.2075539599919245, "eval_runtime": 281.8423, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4921118528951735, "eval_sts_eval_spearman_cosine": 0.28487997818063493, "step": 503000 }, { "epoch": 2.2097483476261113, "grad_norm": 0.3292633593082428, "learning_rate": 8.953687708662126e-06, "loss": 0.0337, "step": 503500 }, { "epoch": 2.2097483476261113, "eval_runtime": 279.1657, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4879321124994108, "eval_sts_eval_spearman_cosine": 0.2792022458442946, "step": 503500 }, { "epoch": 2.211942735260298, "grad_norm": 0.3308636546134949, "learning_rate": 8.94271271189975e-06, "loss": 0.0307, "step": 504000 }, { "epoch": 2.211942735260298, "eval_runtime": 279.0506, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48776060145246225, "eval_sts_eval_spearman_cosine": 0.2783313655080519, "step": 504000 }, { "epoch": 2.214137122894485, "grad_norm": 0.3502589166164398, "learning_rate": 8.931737715137375e-06, "loss": 0.032, "step": 504500 }, { "epoch": 2.214137122894485, "eval_runtime": 279.2158, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48839318583338637, "eval_sts_eval_spearman_cosine": 0.27917116245837037, "step": 504500 }, { "epoch": 2.216331510528672, "grad_norm": 0.3871518671512604, "learning_rate": 8.920762718374999e-06, "loss": 0.0339, "step": 505000 }, { "epoch": 2.216331510528672, "eval_runtime": 292.8192, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4878896099088264, "eval_sts_eval_spearman_cosine": 0.2796582600205246, "step": 505000 }, { "epoch": 2.218525898162859, "grad_norm": 0.5358190536499023, "learning_rate": 8.909787721612623e-06, "loss": 0.0327, "step": 505500 }, { "epoch": 2.218525898162859, "eval_runtime": 273.7757, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4854378226004872, "eval_sts_eval_spearman_cosine": 0.2792044899524335, "step": 505500 }, { "epoch": 2.2207202857970456, "grad_norm": 0.37383949756622314, "learning_rate": 8.898812724850247e-06, "loss": 0.0337, "step": 506000 }, { "epoch": 2.2207202857970456, "eval_runtime": 280.5189, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4838849758277515, "eval_sts_eval_spearman_cosine": 0.2781396267473465, "step": 506000 }, { "epoch": 2.2229146734312324, "grad_norm": 0.47542986273765564, "learning_rate": 8.88783772808787e-06, "loss": 0.0332, "step": 506500 }, { "epoch": 2.2229146734312324, "eval_runtime": 283.5322, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48455527256285214, "eval_sts_eval_spearman_cosine": 0.2774969310623904, "step": 506500 }, { "epoch": 2.225109061065419, "grad_norm": 0.508712112903595, "learning_rate": 8.876862731325495e-06, "loss": 0.0313, "step": 507000 }, { "epoch": 2.225109061065419, "eval_runtime": 287.7059, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4878546823383392, "eval_sts_eval_spearman_cosine": 0.2796680976331336, "step": 507000 }, { "epoch": 2.227303448699606, "grad_norm": 0.8161287903785706, "learning_rate": 8.865887734563118e-06, "loss": 0.0324, "step": 507500 }, { "epoch": 2.227303448699606, "eval_runtime": 277.8646, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4856090025460087, "eval_sts_eval_spearman_cosine": 0.2787404570242158, "step": 507500 }, { "epoch": 2.2294978363337927, "grad_norm": 0.8459435105323792, "learning_rate": 8.854912737800742e-06, "loss": 0.0326, "step": 508000 }, { "epoch": 2.2294978363337927, "eval_runtime": 285.3943, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4838258160457709, "eval_sts_eval_spearman_cosine": 0.27831082931711054, "step": 508000 }, { "epoch": 2.2316922239679795, "grad_norm": 0.4694104790687561, "learning_rate": 8.843937741038368e-06, "loss": 0.0325, "step": 508500 }, { "epoch": 2.2316922239679795, "eval_runtime": 288.2391, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48624183160330997, "eval_sts_eval_spearman_cosine": 0.27740409278497813, "step": 508500 }, { "epoch": 2.2338866116021663, "grad_norm": 0.36110883951187134, "learning_rate": 8.832962744275992e-06, "loss": 0.0329, "step": 509000 }, { "epoch": 2.2338866116021663, "eval_runtime": 277.7221, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4869907686365154, "eval_sts_eval_spearman_cosine": 0.2776276636236514, "step": 509000 }, { "epoch": 2.236080999236353, "grad_norm": 0.3222784996032715, "learning_rate": 8.821987747513615e-06, "loss": 0.0333, "step": 509500 }, { "epoch": 2.236080999236353, "eval_runtime": 277.8459, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4880135646022029, "eval_sts_eval_spearman_cosine": 0.27976881751188676, "step": 509500 }, { "epoch": 2.23827538687054, "grad_norm": 0.6076580286026001, "learning_rate": 8.81101275075124e-06, "loss": 0.0337, "step": 510000 }, { "epoch": 2.23827538687054, "eval_runtime": 275.0682, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48616450973559944, "eval_sts_eval_spearman_cosine": 0.2799426157349678, "step": 510000 }, { "epoch": 2.2404697745047266, "grad_norm": 0.591866672039032, "learning_rate": 8.800037753988863e-06, "loss": 0.0325, "step": 510500 }, { "epoch": 2.2404697745047266, "eval_runtime": 271.9004, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48594430282783063, "eval_sts_eval_spearman_cosine": 0.2778383736466962, "step": 510500 }, { "epoch": 2.2426641621389134, "grad_norm": 0.5710476636886597, "learning_rate": 8.789062757226487e-06, "loss": 0.0327, "step": 511000 }, { "epoch": 2.2426641621389134, "eval_runtime": 276.1793, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48624021361695763, "eval_sts_eval_spearman_cosine": 0.2785812196955208, "step": 511000 }, { "epoch": 2.2448585497731, "grad_norm": 0.5695497989654541, "learning_rate": 8.778087760464111e-06, "loss": 0.0318, "step": 511500 }, { "epoch": 2.2448585497731, "eval_runtime": 285.751, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4900788839077016, "eval_sts_eval_spearman_cosine": 0.2806911869789952, "step": 511500 }, { "epoch": 2.247052937407287, "grad_norm": 0.17345888912677765, "learning_rate": 8.767112763701735e-06, "loss": 0.0323, "step": 512000 }, { "epoch": 2.247052937407287, "eval_runtime": 277.7302, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4883550570138586, "eval_sts_eval_spearman_cosine": 0.28003904562977777, "step": 512000 }, { "epoch": 2.2492473250414737, "grad_norm": 0.5531647801399231, "learning_rate": 8.75613776693936e-06, "loss": 0.0342, "step": 512500 }, { "epoch": 2.2492473250414737, "eval_runtime": 282.5503, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48722911234711674, "eval_sts_eval_spearman_cosine": 0.2773475334479729, "step": 512500 }, { "epoch": 2.2514417126756605, "grad_norm": 0.22518423199653625, "learning_rate": 8.745162770176984e-06, "loss": 0.0323, "step": 513000 }, { "epoch": 2.2514417126756605, "eval_runtime": 290.3005, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4876968295725368, "eval_sts_eval_spearman_cosine": 0.2795685308866473, "step": 513000 }, { "epoch": 2.2536361003098477, "grad_norm": 0.2076551467180252, "learning_rate": 8.734187773414608e-06, "loss": 0.0321, "step": 513500 }, { "epoch": 2.2536361003098477, "eval_runtime": 270.2126, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48847096151750485, "eval_sts_eval_spearman_cosine": 0.27827216687638673, "step": 513500 }, { "epoch": 2.2558304879440345, "grad_norm": 0.3197595775127411, "learning_rate": 8.723212776652232e-06, "loss": 0.0314, "step": 514000 }, { "epoch": 2.2558304879440345, "eval_runtime": 282.7622, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4876968922171221, "eval_sts_eval_spearman_cosine": 0.27656038523674753, "step": 514000 }, { "epoch": 2.2580248755782213, "grad_norm": 0.9642879962921143, "learning_rate": 8.712237779889856e-06, "loss": 0.0308, "step": 514500 }, { "epoch": 2.2580248755782213, "eval_runtime": 280.631, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4884252342394622, "eval_sts_eval_spearman_cosine": 0.27692169081711454, "step": 514500 }, { "epoch": 2.260219263212408, "grad_norm": 0.5418426990509033, "learning_rate": 8.70126278312748e-06, "loss": 0.0308, "step": 515000 }, { "epoch": 2.260219263212408, "eval_runtime": 276.6442, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4902259088259302, "eval_sts_eval_spearman_cosine": 0.27899512726714665, "step": 515000 }, { "epoch": 2.262413650846595, "grad_norm": 0.8897181749343872, "learning_rate": 8.690287786365103e-06, "loss": 0.0342, "step": 515500 }, { "epoch": 2.262413650846595, "eval_runtime": 274.4025, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4856193777039118, "eval_sts_eval_spearman_cosine": 0.2774476945725097, "step": 515500 }, { "epoch": 2.2646080384807816, "grad_norm": 0.8008809685707092, "learning_rate": 8.679312789602727e-06, "loss": 0.033, "step": 516000 }, { "epoch": 2.2646080384807816, "eval_runtime": 285.0675, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4888727761662844, "eval_sts_eval_spearman_cosine": 0.2797088469793345, "step": 516000 }, { "epoch": 2.2668024261149684, "grad_norm": 0.33235999941825867, "learning_rate": 8.668337792840353e-06, "loss": 0.0331, "step": 516500 }, { "epoch": 2.2668024261149684, "eval_runtime": 271.2191, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48752780658077505, "eval_sts_eval_spearman_cosine": 0.28008857449116364, "step": 516500 }, { "epoch": 2.268996813749155, "grad_norm": 0.8617491722106934, "learning_rate": 8.657362796077977e-06, "loss": 0.0324, "step": 517000 }, { "epoch": 2.268996813749155, "eval_runtime": 280.7917, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4855966943063644, "eval_sts_eval_spearman_cosine": 0.2777227411543707, "step": 517000 }, { "epoch": 2.271191201383342, "grad_norm": 0.22787345945835114, "learning_rate": 8.6463877993156e-06, "loss": 0.0322, "step": 517500 }, { "epoch": 2.271191201383342, "eval_runtime": 274.7096, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4877756599114529, "eval_sts_eval_spearman_cosine": 0.27695248195068245, "step": 517500 }, { "epoch": 2.2733855890175287, "grad_norm": 0.6945652365684509, "learning_rate": 8.635412802553224e-06, "loss": 0.0326, "step": 518000 }, { "epoch": 2.2733855890175287, "eval_runtime": 276.8532, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4871583346250973, "eval_sts_eval_spearman_cosine": 0.27895110956042146, "step": 518000 }, { "epoch": 2.2755799766517155, "grad_norm": 0.5269704461097717, "learning_rate": 8.624437805790848e-06, "loss": 0.034, "step": 518500 }, { "epoch": 2.2755799766517155, "eval_runtime": 281.6775, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4835933493949941, "eval_sts_eval_spearman_cosine": 0.27750115790065205, "step": 518500 }, { "epoch": 2.2777743642859023, "grad_norm": 0.6691951751708984, "learning_rate": 8.613462809028472e-06, "loss": 0.0354, "step": 519000 }, { "epoch": 2.2777743642859023, "eval_runtime": 280.4134, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.484944235564679, "eval_sts_eval_spearman_cosine": 0.27646047774856275, "step": 519000 }, { "epoch": 2.279968751920089, "grad_norm": 0.49006837606430054, "learning_rate": 8.602487812266096e-06, "loss": 0.0327, "step": 519500 }, { "epoch": 2.279968751920089, "eval_runtime": 281.6089, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48611450091838104, "eval_sts_eval_spearman_cosine": 0.2768606333193347, "step": 519500 }, { "epoch": 2.282163139554276, "grad_norm": 0.3447847366333008, "learning_rate": 8.59151281550372e-06, "loss": 0.0312, "step": 520000 }, { "epoch": 2.282163139554276, "eval_runtime": 307.2422, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4818881358926411, "eval_sts_eval_spearman_cosine": 0.2753263461403532, "step": 520000 }, { "epoch": 2.2843575271884626, "grad_norm": 0.169752299785614, "learning_rate": 8.580537818741345e-06, "loss": 0.0327, "step": 520500 }, { "epoch": 2.2843575271884626, "eval_runtime": 276.4205, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4867625968341313, "eval_sts_eval_spearman_cosine": 0.2775164789779355, "step": 520500 }, { "epoch": 2.2865519148226494, "grad_norm": 0.4267443120479584, "learning_rate": 8.569562821978969e-06, "loss": 0.0321, "step": 521000 }, { "epoch": 2.2865519148226494, "eval_runtime": 271.7747, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48717512598387946, "eval_sts_eval_spearman_cosine": 0.27793243804051965, "step": 521000 }, { "epoch": 2.288746302456836, "grad_norm": 0.36836570501327515, "learning_rate": 8.558587825216593e-06, "loss": 0.032, "step": 521500 }, { "epoch": 2.288746302456836, "eval_runtime": 271.205, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48734570956635914, "eval_sts_eval_spearman_cosine": 0.2768958070915122, "step": 521500 }, { "epoch": 2.2909406900910234, "grad_norm": 0.27792519330978394, "learning_rate": 8.547612828454215e-06, "loss": 0.0318, "step": 522000 }, { "epoch": 2.2909406900910234, "eval_runtime": 280.273, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4851793774392884, "eval_sts_eval_spearman_cosine": 0.27578900909347687, "step": 522000 }, { "epoch": 2.29313507772521, "grad_norm": 0.19990012049674988, "learning_rate": 8.53663783169184e-06, "loss": 0.0323, "step": 522500 }, { "epoch": 2.29313507772521, "eval_runtime": 276.139, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4846087032445344, "eval_sts_eval_spearman_cosine": 0.2748462573886328, "step": 522500 }, { "epoch": 2.295329465359397, "grad_norm": 0.6889265775680542, "learning_rate": 8.525662834929465e-06, "loss": 0.0335, "step": 523000 }, { "epoch": 2.295329465359397, "eval_runtime": 288.1188, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48393896546996445, "eval_sts_eval_spearman_cosine": 0.2768094065478243, "step": 523000 }, { "epoch": 2.2975238529935837, "grad_norm": 0.28565675020217896, "learning_rate": 8.514687838167088e-06, "loss": 0.0322, "step": 523500 }, { "epoch": 2.2975238529935837, "eval_runtime": 268.0059, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48435580722205607, "eval_sts_eval_spearman_cosine": 0.27655064569146326, "step": 523500 }, { "epoch": 2.2997182406277705, "grad_norm": 0.2662995159626007, "learning_rate": 8.503712841404712e-06, "loss": 0.0321, "step": 524000 }, { "epoch": 2.2997182406277705, "eval_runtime": 269.8937, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48317408685973373, "eval_sts_eval_spearman_cosine": 0.2756874928903644, "step": 524000 }, { "epoch": 2.3019126282619573, "grad_norm": 1.2408359050750732, "learning_rate": 8.492737844642338e-06, "loss": 0.0317, "step": 524500 }, { "epoch": 2.3019126282619573, "eval_runtime": 278.666, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4826539771005184, "eval_sts_eval_spearman_cosine": 0.27460262314002176, "step": 524500 }, { "epoch": 2.304107015896144, "grad_norm": 0.5811828970909119, "learning_rate": 8.48176284787996e-06, "loss": 0.0334, "step": 525000 }, { "epoch": 2.304107015896144, "eval_runtime": 272.9968, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4841662899674697, "eval_sts_eval_spearman_cosine": 0.27367995936089506, "step": 525000 }, { "epoch": 2.306301403530331, "grad_norm": 0.7675876021385193, "learning_rate": 8.470787851117584e-06, "loss": 0.033, "step": 525500 }, { "epoch": 2.306301403530331, "eval_runtime": 274.0179, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48122713294917996, "eval_sts_eval_spearman_cosine": 0.2732567421346916, "step": 525500 }, { "epoch": 2.3084957911645176, "grad_norm": 0.3600727319717407, "learning_rate": 8.459812854355208e-06, "loss": 0.0319, "step": 526000 }, { "epoch": 2.3084957911645176, "eval_runtime": 275.3284, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48504690497753705, "eval_sts_eval_spearman_cosine": 0.2754060441821131, "step": 526000 }, { "epoch": 2.3106901787987044, "grad_norm": 0.48648613691329956, "learning_rate": 8.448837857592833e-06, "loss": 0.0331, "step": 526500 }, { "epoch": 2.3106901787987044, "eval_runtime": 270.5252, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4871114973245598, "eval_sts_eval_spearman_cosine": 0.2753928236741208, "step": 526500 }, { "epoch": 2.312884566432891, "grad_norm": 1.093798041343689, "learning_rate": 8.437862860830457e-06, "loss": 0.0326, "step": 527000 }, { "epoch": 2.312884566432891, "eval_runtime": 272.6398, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48626711305820935, "eval_sts_eval_spearman_cosine": 0.27577867472718237, "step": 527000 }, { "epoch": 2.315078954067078, "grad_norm": 0.48203474283218384, "learning_rate": 8.426887864068081e-06, "loss": 0.0322, "step": 527500 }, { "epoch": 2.315078954067078, "eval_runtime": 275.6878, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4861823045033326, "eval_sts_eval_spearman_cosine": 0.2753968605960232, "step": 527500 }, { "epoch": 2.3172733417012648, "grad_norm": 0.23388640582561493, "learning_rate": 8.415912867305705e-06, "loss": 0.034, "step": 528000 }, { "epoch": 2.3172733417012648, "eval_runtime": 272.8015, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48697782227127673, "eval_sts_eval_spearman_cosine": 0.27601231886364697, "step": 528000 }, { "epoch": 2.3194677293354515, "grad_norm": 3.7953531742095947, "learning_rate": 8.404937870543329e-06, "loss": 0.0318, "step": 528500 }, { "epoch": 2.3194677293354515, "eval_runtime": 274.5041, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4856071795698106, "eval_sts_eval_spearman_cosine": 0.2759439670538989, "step": 528500 }, { "epoch": 2.3216621169696383, "grad_norm": 0.5172401070594788, "learning_rate": 8.393962873780952e-06, "loss": 0.0329, "step": 529000 }, { "epoch": 2.3216621169696383, "eval_runtime": 272.2239, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4821601061455978, "eval_sts_eval_spearman_cosine": 0.27277772375903314, "step": 529000 }, { "epoch": 2.323856504603825, "grad_norm": 0.8210118412971497, "learning_rate": 8.382987877018576e-06, "loss": 0.0327, "step": 529500 }, { "epoch": 2.323856504603825, "eval_runtime": 273.8259, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48205424470137287, "eval_sts_eval_spearman_cosine": 0.27316002377188375, "step": 529500 }, { "epoch": 2.326050892238012, "grad_norm": 0.5263721346855164, "learning_rate": 8.3720128802562e-06, "loss": 0.0302, "step": 530000 }, { "epoch": 2.326050892238012, "eval_runtime": 273.6591, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.486521971568523, "eval_sts_eval_spearman_cosine": 0.2738359572269107, "step": 530000 }, { "epoch": 2.328245279872199, "grad_norm": 0.3393363654613495, "learning_rate": 8.361037883493826e-06, "loss": 0.0339, "step": 530500 }, { "epoch": 2.328245279872199, "eval_runtime": 290.3246, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.488373024906134, "eval_sts_eval_spearman_cosine": 0.2761209751623738, "step": 530500 }, { "epoch": 2.3304396675063854, "grad_norm": 0.34577080607414246, "learning_rate": 8.35006288673145e-06, "loss": 0.0314, "step": 531000 }, { "epoch": 2.3304396675063854, "eval_runtime": 272.4227, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4883875882193376, "eval_sts_eval_spearman_cosine": 0.2765013680192416, "step": 531000 }, { "epoch": 2.3326340551405726, "grad_norm": 0.4187750220298767, "learning_rate": 8.339087889969073e-06, "loss": 0.0318, "step": 531500 }, { "epoch": 2.3326340551405726, "eval_runtime": 276.2592, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4895415670343033, "eval_sts_eval_spearman_cosine": 0.27634056592396844, "step": 531500 }, { "epoch": 2.3348284427747594, "grad_norm": 0.3287450075149536, "learning_rate": 8.328112893206697e-06, "loss": 0.0311, "step": 532000 }, { "epoch": 2.3348284427747594, "eval_runtime": 283.4122, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48824840908647194, "eval_sts_eval_spearman_cosine": 0.2756561035807671, "step": 532000 }, { "epoch": 2.337022830408946, "grad_norm": 0.5893502831459045, "learning_rate": 8.317137896444321e-06, "loss": 0.0356, "step": 532500 }, { "epoch": 2.337022830408946, "eval_runtime": 276.2787, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48383542408207925, "eval_sts_eval_spearman_cosine": 0.2739965722928541, "step": 532500 }, { "epoch": 2.339217218043133, "grad_norm": 0.5555914640426636, "learning_rate": 8.306162899681945e-06, "loss": 0.0316, "step": 533000 }, { "epoch": 2.339217218043133, "eval_runtime": 277.2984, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4836390981959745, "eval_sts_eval_spearman_cosine": 0.2724680071582222, "step": 533000 }, { "epoch": 2.3414116056773198, "grad_norm": 0.21390046179294586, "learning_rate": 8.295187902919569e-06, "loss": 0.0332, "step": 533500 }, { "epoch": 2.3414116056773198, "eval_runtime": 270.406, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4851601745039128, "eval_sts_eval_spearman_cosine": 0.2737928639274214, "step": 533500 }, { "epoch": 2.3436059933115065, "grad_norm": 0.4467225670814514, "learning_rate": 8.284212906157193e-06, "loss": 0.0324, "step": 534000 }, { "epoch": 2.3436059933115065, "eval_runtime": 279.4515, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48386214244996734, "eval_sts_eval_spearman_cosine": 0.27220845020133666, "step": 534000 }, { "epoch": 2.3458003809456933, "grad_norm": 0.35450097918510437, "learning_rate": 8.273237909394818e-06, "loss": 0.0325, "step": 534500 }, { "epoch": 2.3458003809456933, "eval_runtime": 270.726, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48523891217009874, "eval_sts_eval_spearman_cosine": 0.27297953868470953, "step": 534500 }, { "epoch": 2.34799476857988, "grad_norm": 0.3956543803215027, "learning_rate": 8.262262912632442e-06, "loss": 0.0327, "step": 535000 }, { "epoch": 2.34799476857988, "eval_runtime": 271.7441, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4865863833347719, "eval_sts_eval_spearman_cosine": 0.2740731808075969, "step": 535000 }, { "epoch": 2.350189156214067, "grad_norm": 0.36683428287506104, "learning_rate": 8.251287915870066e-06, "loss": 0.0325, "step": 535500 }, { "epoch": 2.350189156214067, "eval_runtime": 275.1332, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4848532452535923, "eval_sts_eval_spearman_cosine": 0.27481721469365933, "step": 535500 }, { "epoch": 2.3523835438482537, "grad_norm": 0.5218644142150879, "learning_rate": 8.24031291910769e-06, "loss": 0.0331, "step": 536000 }, { "epoch": 2.3523835438482537, "eval_runtime": 268.8058, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48654005454782795, "eval_sts_eval_spearman_cosine": 0.2760930943006477, "step": 536000 }, { "epoch": 2.3545779314824404, "grad_norm": 0.6684563755989075, "learning_rate": 8.229337922345314e-06, "loss": 0.0324, "step": 536500 }, { "epoch": 2.3545779314824404, "eval_runtime": 272.9407, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48727898864577546, "eval_sts_eval_spearman_cosine": 0.27788308351429786, "step": 536500 }, { "epoch": 2.356772319116627, "grad_norm": 0.4649601876735687, "learning_rate": 8.218362925582937e-06, "loss": 0.0328, "step": 537000 }, { "epoch": 2.356772319116627, "eval_runtime": 271.08, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48907974694176737, "eval_sts_eval_spearman_cosine": 0.28037161590120896, "step": 537000 }, { "epoch": 2.358966706750814, "grad_norm": 0.5362588167190552, "learning_rate": 8.207387928820561e-06, "loss": 0.0317, "step": 537500 }, { "epoch": 2.358966706750814, "eval_runtime": 272.6422, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49046217479481974, "eval_sts_eval_spearman_cosine": 0.28204785268499316, "step": 537500 }, { "epoch": 2.3611610943850008, "grad_norm": 0.5027867555618286, "learning_rate": 8.196412932058185e-06, "loss": 0.0338, "step": 538000 }, { "epoch": 2.3611610943850008, "eval_runtime": 277.9197, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4878932925179861, "eval_sts_eval_spearman_cosine": 0.2815281079079686, "step": 538000 }, { "epoch": 2.3633554820191875, "grad_norm": 0.25220638513565063, "learning_rate": 8.18543793529581e-06, "loss": 0.0327, "step": 538500 }, { "epoch": 2.3633554820191875, "eval_runtime": 279.2164, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4895580934798617, "eval_sts_eval_spearman_cosine": 0.28157005032087595, "step": 538500 }, { "epoch": 2.3655498696533748, "grad_norm": 0.4412989318370819, "learning_rate": 8.174462938533435e-06, "loss": 0.0327, "step": 539000 }, { "epoch": 2.3655498696533748, "eval_runtime": 273.8694, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4913022219478501, "eval_sts_eval_spearman_cosine": 0.2794438652300906, "step": 539000 }, { "epoch": 2.367744257287561, "grad_norm": 0.6307740807533264, "learning_rate": 8.163487941771058e-06, "loss": 0.0328, "step": 539500 }, { "epoch": 2.367744257287561, "eval_runtime": 289.3235, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4888660229058391, "eval_sts_eval_spearman_cosine": 0.2793938274966692, "step": 539500 }, { "epoch": 2.3699386449217483, "grad_norm": 0.5284574031829834, "learning_rate": 8.152512945008682e-06, "loss": 0.0341, "step": 540000 }, { "epoch": 2.3699386449217483, "eval_runtime": 280.6447, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4879808214475472, "eval_sts_eval_spearman_cosine": 0.27726932247629865, "step": 540000 }, { "epoch": 2.372133032555935, "grad_norm": 0.30384206771850586, "learning_rate": 8.141537948246306e-06, "loss": 0.0334, "step": 540500 }, { "epoch": 2.372133032555935, "eval_runtime": 273.8612, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48685901973927614, "eval_sts_eval_spearman_cosine": 0.2759826670966392, "step": 540500 }, { "epoch": 2.374327420190122, "grad_norm": 0.763680636882782, "learning_rate": 8.13056295148393e-06, "loss": 0.0342, "step": 541000 }, { "epoch": 2.374327420190122, "eval_runtime": 274.9282, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48807108384756037, "eval_sts_eval_spearman_cosine": 0.2772844306872341, "step": 541000 }, { "epoch": 2.3765218078243087, "grad_norm": 0.4052700102329254, "learning_rate": 8.119587954721554e-06, "loss": 0.0334, "step": 541500 }, { "epoch": 2.3765218078243087, "eval_runtime": 289.1984, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48884381009261724, "eval_sts_eval_spearman_cosine": 0.2758446888012692, "step": 541500 }, { "epoch": 2.3787161954584954, "grad_norm": 0.4991072416305542, "learning_rate": 8.108612957959178e-06, "loss": 0.031, "step": 542000 }, { "epoch": 2.3787161954584954, "eval_runtime": 281.0637, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4892366570872009, "eval_sts_eval_spearman_cosine": 0.27759345158036614, "step": 542000 }, { "epoch": 2.380910583092682, "grad_norm": 0.31707268953323364, "learning_rate": 8.097637961196803e-06, "loss": 0.0322, "step": 542500 }, { "epoch": 2.380910583092682, "eval_runtime": 282.7687, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48974226881517025, "eval_sts_eval_spearman_cosine": 0.2783066473468517, "step": 542500 }, { "epoch": 2.383104970726869, "grad_norm": 0.3281314969062805, "learning_rate": 8.086662964434427e-06, "loss": 0.0332, "step": 543000 }, { "epoch": 2.383104970726869, "eval_runtime": 300.3003, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49019319178963494, "eval_sts_eval_spearman_cosine": 0.2786827435701343, "step": 543000 }, { "epoch": 2.3852993583610558, "grad_norm": 0.8090448379516602, "learning_rate": 8.075687967672051e-06, "loss": 0.0333, "step": 543500 }, { "epoch": 2.3852993583610558, "eval_runtime": 289.682, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48742604395481803, "eval_sts_eval_spearman_cosine": 0.2796599381863064, "step": 543500 }, { "epoch": 2.3874937459952426, "grad_norm": 0.5264600515365601, "learning_rate": 8.064712970909673e-06, "loss": 0.0308, "step": 544000 }, { "epoch": 2.3874937459952426, "eval_runtime": 293.0651, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49058512051198566, "eval_sts_eval_spearman_cosine": 0.28043090313403846, "step": 544000 }, { "epoch": 2.3896881336294293, "grad_norm": 0.5166017413139343, "learning_rate": 8.053737974147299e-06, "loss": 0.0322, "step": 544500 }, { "epoch": 2.3896881336294293, "eval_runtime": 276.2942, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4900882156518248, "eval_sts_eval_spearman_cosine": 0.2798124918685908, "step": 544500 }, { "epoch": 2.391882521263616, "grad_norm": 0.49041637778282166, "learning_rate": 8.042762977384922e-06, "loss": 0.0339, "step": 545000 }, { "epoch": 2.391882521263616, "eval_runtime": 276.8294, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49163656909584796, "eval_sts_eval_spearman_cosine": 0.27876330029763013, "step": 545000 }, { "epoch": 2.394076908897803, "grad_norm": 0.25002068281173706, "learning_rate": 8.031787980622546e-06, "loss": 0.0325, "step": 545500 }, { "epoch": 2.394076908897803, "eval_runtime": 277.1352, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49132694634235863, "eval_sts_eval_spearman_cosine": 0.28111486089354026, "step": 545500 }, { "epoch": 2.3962712965319897, "grad_norm": 0.37802058458328247, "learning_rate": 8.02081298386017e-06, "loss": 0.0314, "step": 546000 }, { "epoch": 2.3962712965319897, "eval_runtime": 273.738, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4893462727598874, "eval_sts_eval_spearman_cosine": 0.27865150846820064, "step": 546000 }, { "epoch": 2.3984656841661764, "grad_norm": 0.30431437492370605, "learning_rate": 8.009837987097796e-06, "loss": 0.0315, "step": 546500 }, { "epoch": 2.3984656841661764, "eval_runtime": 273.6578, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4898816909863183, "eval_sts_eval_spearman_cosine": 0.2784357605825152, "step": 546500 }, { "epoch": 2.4006600718003632, "grad_norm": 0.41540971398353577, "learning_rate": 7.99886299033542e-06, "loss": 0.0343, "step": 547000 }, { "epoch": 2.4006600718003632, "eval_runtime": 272.0678, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49186219213196936, "eval_sts_eval_spearman_cosine": 0.2803612127388827, "step": 547000 }, { "epoch": 2.40285445943455, "grad_norm": 0.9685839414596558, "learning_rate": 7.987887993573042e-06, "loss": 0.0332, "step": 547500 }, { "epoch": 2.40285445943455, "eval_runtime": 284.6266, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.489277535506184, "eval_sts_eval_spearman_cosine": 0.27826528823596086, "step": 547500 }, { "epoch": 2.405048847068737, "grad_norm": 0.44717487692832947, "learning_rate": 7.976912996810666e-06, "loss": 0.0336, "step": 548000 }, { "epoch": 2.405048847068737, "eval_runtime": 270.6748, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4879284743873119, "eval_sts_eval_spearman_cosine": 0.27773082086287093, "step": 548000 }, { "epoch": 2.407243234702924, "grad_norm": 0.5063113570213318, "learning_rate": 7.965938000048291e-06, "loss": 0.0324, "step": 548500 }, { "epoch": 2.407243234702924, "eval_runtime": 274.1005, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48936737815927933, "eval_sts_eval_spearman_cosine": 0.279854092954327, "step": 548500 }, { "epoch": 2.409437622337111, "grad_norm": 0.20647910237312317, "learning_rate": 7.954963003285915e-06, "loss": 0.0326, "step": 549000 }, { "epoch": 2.409437622337111, "eval_runtime": 282.1646, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49058682705868906, "eval_sts_eval_spearman_cosine": 0.2802693994971986, "step": 549000 }, { "epoch": 2.4116320099712976, "grad_norm": 0.5879849195480347, "learning_rate": 7.943988006523539e-06, "loss": 0.0326, "step": 549500 }, { "epoch": 2.4116320099712976, "eval_runtime": 274.6662, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49038702111395494, "eval_sts_eval_spearman_cosine": 0.27968928374412333, "step": 549500 }, { "epoch": 2.4138263976054843, "grad_norm": 0.5800304412841797, "learning_rate": 7.933013009761163e-06, "loss": 0.0322, "step": 550000 }, { "epoch": 2.4138263976054843, "eval_runtime": 273.3669, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4917521131194061, "eval_sts_eval_spearman_cosine": 0.2819964377989639, "step": 550000 }, { "epoch": 2.416020785239671, "grad_norm": 0.41533589363098145, "learning_rate": 7.922038012998786e-06, "loss": 0.0324, "step": 550500 }, { "epoch": 2.416020785239671, "eval_runtime": 284.0531, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4905870195505375, "eval_sts_eval_spearman_cosine": 0.2793417072566041, "step": 550500 }, { "epoch": 2.418215172873858, "grad_norm": 0.5675000548362732, "learning_rate": 7.91106301623641e-06, "loss": 0.0336, "step": 551000 }, { "epoch": 2.418215172873858, "eval_runtime": 283.0473, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4911571885469155, "eval_sts_eval_spearman_cosine": 0.27951117985393503, "step": 551000 }, { "epoch": 2.4204095605080447, "grad_norm": 0.7169193625450134, "learning_rate": 7.900088019474034e-06, "loss": 0.0319, "step": 551500 }, { "epoch": 2.4204095605080447, "eval_runtime": 273.8289, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4909675793310931, "eval_sts_eval_spearman_cosine": 0.280584868930096, "step": 551500 }, { "epoch": 2.4226039481422315, "grad_norm": 0.6127610206604004, "learning_rate": 7.889113022711658e-06, "loss": 0.0337, "step": 552000 }, { "epoch": 2.4226039481422315, "eval_runtime": 281.5578, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4921452102819913, "eval_sts_eval_spearman_cosine": 0.2802567817784175, "step": 552000 }, { "epoch": 2.4247983357764182, "grad_norm": 0.19785891473293304, "learning_rate": 7.878138025949284e-06, "loss": 0.0322, "step": 552500 }, { "epoch": 2.4247983357764182, "eval_runtime": 277.2822, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4897828107875901, "eval_sts_eval_spearman_cosine": 0.2781637575376749, "step": 552500 }, { "epoch": 2.426992723410605, "grad_norm": 0.3056625723838806, "learning_rate": 7.867163029186907e-06, "loss": 0.0334, "step": 553000 }, { "epoch": 2.426992723410605, "eval_runtime": 271.6446, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4893686417040204, "eval_sts_eval_spearman_cosine": 0.2767842491386061, "step": 553000 }, { "epoch": 2.429187111044792, "grad_norm": 0.5086891055107117, "learning_rate": 7.856188032424531e-06, "loss": 0.0332, "step": 553500 }, { "epoch": 2.429187111044792, "eval_runtime": 272.8591, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4914227885461877, "eval_sts_eval_spearman_cosine": 0.2787573550644257, "step": 553500 }, { "epoch": 2.4313814986789786, "grad_norm": 0.31685593724250793, "learning_rate": 7.845213035662155e-06, "loss": 0.0336, "step": 554000 }, { "epoch": 2.4313814986789786, "eval_runtime": 271.8547, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4945299866023826, "eval_sts_eval_spearman_cosine": 0.2816838362239199, "step": 554000 }, { "epoch": 2.4335758863131653, "grad_norm": 4.642474174499512, "learning_rate": 7.834238038899779e-06, "loss": 0.0325, "step": 554500 }, { "epoch": 2.4335758863131653, "eval_runtime": 274.7965, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4940459610240569, "eval_sts_eval_spearman_cosine": 0.2819096780379009, "step": 554500 }, { "epoch": 2.435770273947352, "grad_norm": 0.6000452041625977, "learning_rate": 7.823263042137403e-06, "loss": 0.0339, "step": 555000 }, { "epoch": 2.435770273947352, "eval_runtime": 270.2225, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49380685405534397, "eval_sts_eval_spearman_cosine": 0.2814274060102668, "step": 555000 }, { "epoch": 2.437964661581539, "grad_norm": 0.43584832549095154, "learning_rate": 7.812288045375027e-06, "loss": 0.0316, "step": 555500 }, { "epoch": 2.437964661581539, "eval_runtime": 271.6254, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4924882811714412, "eval_sts_eval_spearman_cosine": 0.2807779456686822, "step": 555500 }, { "epoch": 2.4401590492157257, "grad_norm": 0.9773544073104858, "learning_rate": 7.80131304861265e-06, "loss": 0.0331, "step": 556000 }, { "epoch": 2.4401590492157257, "eval_runtime": 278.9877, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49369166025021227, "eval_sts_eval_spearman_cosine": 0.2809124132901642, "step": 556000 }, { "epoch": 2.4423534368499125, "grad_norm": 0.4828943610191345, "learning_rate": 7.790338051850276e-06, "loss": 0.0329, "step": 556500 }, { "epoch": 2.4423534368499125, "eval_runtime": 274.5022, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49323736310198896, "eval_sts_eval_spearman_cosine": 0.2811431445832913, "step": 556500 }, { "epoch": 2.4445478244840997, "grad_norm": 0.29626718163490295, "learning_rate": 7.7793630550879e-06, "loss": 0.0331, "step": 557000 }, { "epoch": 2.4445478244840997, "eval_runtime": 276.3377, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4949542220603101, "eval_sts_eval_spearman_cosine": 0.2814700392563734, "step": 557000 }, { "epoch": 2.4467422121182865, "grad_norm": 0.39470940828323364, "learning_rate": 7.768388058325524e-06, "loss": 0.0336, "step": 557500 }, { "epoch": 2.4467422121182865, "eval_runtime": 279.0933, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49540302998159297, "eval_sts_eval_spearman_cosine": 0.2798981876213776, "step": 557500 }, { "epoch": 2.4489365997524732, "grad_norm": 0.5402525067329407, "learning_rate": 7.757413061563148e-06, "loss": 0.0327, "step": 558000 }, { "epoch": 2.4489365997524732, "eval_runtime": 282.736, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.493770194928924, "eval_sts_eval_spearman_cosine": 0.27870110504727097, "step": 558000 }, { "epoch": 2.45113098738666, "grad_norm": 0.3147571384906769, "learning_rate": 7.746438064800771e-06, "loss": 0.0307, "step": 558500 }, { "epoch": 2.45113098738666, "eval_runtime": 279.6841, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49168060413850573, "eval_sts_eval_spearman_cosine": 0.2783973532893021, "step": 558500 }, { "epoch": 2.453325375020847, "grad_norm": 1.4631388187408447, "learning_rate": 7.735463068038395e-06, "loss": 0.0337, "step": 559000 }, { "epoch": 2.453325375020847, "eval_runtime": 279.0051, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4932319682697207, "eval_sts_eval_spearman_cosine": 0.278605144181002, "step": 559000 }, { "epoch": 2.4555197626550336, "grad_norm": 0.36047571897506714, "learning_rate": 7.72448807127602e-06, "loss": 0.0325, "step": 559500 }, { "epoch": 2.4555197626550336, "eval_runtime": 273.0043, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4923703601208188, "eval_sts_eval_spearman_cosine": 0.27819936119187916, "step": 559500 }, { "epoch": 2.4577141502892204, "grad_norm": 0.1922254115343094, "learning_rate": 7.713513074513643e-06, "loss": 0.0335, "step": 560000 }, { "epoch": 2.4577141502892204, "eval_runtime": 272.318, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49357035388045567, "eval_sts_eval_spearman_cosine": 0.2787810200550545, "step": 560000 }, { "epoch": 2.459908537923407, "grad_norm": 0.3929598927497864, "learning_rate": 7.702538077751269e-06, "loss": 0.0337, "step": 560500 }, { "epoch": 2.459908537923407, "eval_runtime": 277.4984, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49295995058931813, "eval_sts_eval_spearman_cosine": 0.2768236941618784, "step": 560500 }, { "epoch": 2.462102925557594, "grad_norm": 0.5584201216697693, "learning_rate": 7.691563080988892e-06, "loss": 0.0334, "step": 561000 }, { "epoch": 2.462102925557594, "eval_runtime": 270.8739, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.491918619533481, "eval_sts_eval_spearman_cosine": 0.276305959199826, "step": 561000 }, { "epoch": 2.4642973131917807, "grad_norm": 0.9398521184921265, "learning_rate": 7.680588084226516e-06, "loss": 0.0312, "step": 561500 }, { "epoch": 2.4642973131917807, "eval_runtime": 276.0325, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4921969705062077, "eval_sts_eval_spearman_cosine": 0.27720617562755745, "step": 561500 }, { "epoch": 2.4664917008259675, "grad_norm": 0.4188985526561737, "learning_rate": 7.66961308746414e-06, "loss": 0.034, "step": 562000 }, { "epoch": 2.4664917008259675, "eval_runtime": 274.3155, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49207652504065136, "eval_sts_eval_spearman_cosine": 0.276350007153382, "step": 562000 }, { "epoch": 2.4686860884601542, "grad_norm": 0.3782042860984802, "learning_rate": 7.658638090701764e-06, "loss": 0.0309, "step": 562500 }, { "epoch": 2.4686860884601542, "eval_runtime": 277.6353, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4922117775180388, "eval_sts_eval_spearman_cosine": 0.276580512209653, "step": 562500 }, { "epoch": 2.470880476094341, "grad_norm": 0.9259991645812988, "learning_rate": 7.647663093939388e-06, "loss": 0.0328, "step": 563000 }, { "epoch": 2.470880476094341, "eval_runtime": 270.3387, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49384184881552784, "eval_sts_eval_spearman_cosine": 0.27708843456626825, "step": 563000 }, { "epoch": 2.473074863728528, "grad_norm": 0.2502581775188446, "learning_rate": 7.636688097177012e-06, "loss": 0.0326, "step": 563500 }, { "epoch": 2.473074863728528, "eval_runtime": 267.9068, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4923778671613257, "eval_sts_eval_spearman_cosine": 0.2772481883869902, "step": 563500 }, { "epoch": 2.4752692513627146, "grad_norm": 0.3580871522426605, "learning_rate": 7.6257131004146355e-06, "loss": 0.032, "step": 564000 }, { "epoch": 2.4752692513627146, "eval_runtime": 272.6051, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4939237950433638, "eval_sts_eval_spearman_cosine": 0.27888810000284714, "step": 564000 }, { "epoch": 2.4774636389969014, "grad_norm": 0.18687523901462555, "learning_rate": 7.61473810365226e-06, "loss": 0.0315, "step": 564500 }, { "epoch": 2.4774636389969014, "eval_runtime": 270.9423, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4943426853908175, "eval_sts_eval_spearman_cosine": 0.2786326302358679, "step": 564500 }, { "epoch": 2.479658026631088, "grad_norm": 0.35259324312210083, "learning_rate": 7.603763106889884e-06, "loss": 0.0318, "step": 565000 }, { "epoch": 2.479658026631088, "eval_runtime": 277.6741, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49505443914234487, "eval_sts_eval_spearman_cosine": 0.27999982598702955, "step": 565000 }, { "epoch": 2.4818524142652754, "grad_norm": 0.5720382928848267, "learning_rate": 7.592788110127508e-06, "loss": 0.0336, "step": 565500 }, { "epoch": 2.4818524142652754, "eval_runtime": 275.9125, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.493672638777009, "eval_sts_eval_spearman_cosine": 0.2793288287898068, "step": 565500 }, { "epoch": 2.4840468018994617, "grad_norm": 0.5382133722305298, "learning_rate": 7.581813113365132e-06, "loss": 0.0336, "step": 566000 }, { "epoch": 2.4840468018994617, "eval_runtime": 272.2449, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49319503709463497, "eval_sts_eval_spearman_cosine": 0.27817523844322023, "step": 566000 }, { "epoch": 2.486241189533649, "grad_norm": 0.592334508895874, "learning_rate": 7.5708381166027565e-06, "loss": 0.0325, "step": 566500 }, { "epoch": 2.486241189533649, "eval_runtime": 275.1329, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49137771609438086, "eval_sts_eval_spearman_cosine": 0.2769251901809927, "step": 566500 }, { "epoch": 2.4884355771678357, "grad_norm": 0.5396108627319336, "learning_rate": 7.55986311984038e-06, "loss": 0.0332, "step": 567000 }, { "epoch": 2.4884355771678357, "eval_runtime": 278.7901, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49327952231719985, "eval_sts_eval_spearman_cosine": 0.2769083956039529, "step": 567000 }, { "epoch": 2.4906299648020225, "grad_norm": 0.2205306738615036, "learning_rate": 7.548888123078004e-06, "loss": 0.0318, "step": 567500 }, { "epoch": 2.4906299648020225, "eval_runtime": 277.6841, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4917109014979658, "eval_sts_eval_spearman_cosine": 0.27594911778504566, "step": 567500 }, { "epoch": 2.4928243524362093, "grad_norm": 0.36870065331459045, "learning_rate": 7.537913126315628e-06, "loss": 0.0329, "step": 568000 }, { "epoch": 2.4928243524362093, "eval_runtime": 272.038, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49178583405597165, "eval_sts_eval_spearman_cosine": 0.2760372188171885, "step": 568000 }, { "epoch": 2.495018740070396, "grad_norm": 0.34071874618530273, "learning_rate": 7.526938129553253e-06, "loss": 0.0343, "step": 568500 }, { "epoch": 2.495018740070396, "eval_runtime": 272.5147, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49001816179498414, "eval_sts_eval_spearman_cosine": 0.27518467336253605, "step": 568500 }, { "epoch": 2.497213127704583, "grad_norm": 0.4744373857975006, "learning_rate": 7.5159631327908766e-06, "loss": 0.0327, "step": 569000 }, { "epoch": 2.497213127704583, "eval_runtime": 272.2304, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4915061524220683, "eval_sts_eval_spearman_cosine": 0.2770634825327235, "step": 569000 }, { "epoch": 2.4994075153387696, "grad_norm": 0.49118947982788086, "learning_rate": 7.5049881360285e-06, "loss": 0.0305, "step": 569500 }, { "epoch": 2.4994075153387696, "eval_runtime": 278.3588, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4909494080869416, "eval_sts_eval_spearman_cosine": 0.2769174070105109, "step": 569500 }, { "epoch": 2.5016019029729564, "grad_norm": 0.474680632352829, "learning_rate": 7.494013139266124e-06, "loss": 0.0315, "step": 570000 }, { "epoch": 2.5016019029729564, "eval_runtime": 284.1907, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4910312290897524, "eval_sts_eval_spearman_cosine": 0.27602158522904874, "step": 570000 }, { "epoch": 2.503796290607143, "grad_norm": 0.22620315849781036, "learning_rate": 7.483038142503749e-06, "loss": 0.0313, "step": 570500 }, { "epoch": 2.503796290607143, "eval_runtime": 288.6135, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49275332000108746, "eval_sts_eval_spearman_cosine": 0.27621403803862116, "step": 570500 }, { "epoch": 2.50599067824133, "grad_norm": 0.5564931631088257, "learning_rate": 7.472063145741373e-06, "loss": 0.0333, "step": 571000 }, { "epoch": 2.50599067824133, "eval_runtime": 278.1733, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.48982363013862484, "eval_sts_eval_spearman_cosine": 0.27564656923383074, "step": 571000 }, { "epoch": 2.5081850658755167, "grad_norm": 0.20235556364059448, "learning_rate": 7.461088148978997e-06, "loss": 0.0344, "step": 571500 }, { "epoch": 2.5081850658755167, "eval_runtime": 279.1481, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4927550503377177, "eval_sts_eval_spearman_cosine": 0.27526001424954993, "step": 571500 }, { "epoch": 2.5103794535097035, "grad_norm": 0.41539472341537476, "learning_rate": 7.4501131522166205e-06, "loss": 0.032, "step": 572000 }, { "epoch": 2.5103794535097035, "eval_runtime": 282.242, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4915744077915814, "eval_sts_eval_spearman_cosine": 0.2761403182567429, "step": 572000 }, { "epoch": 2.5125738411438903, "grad_norm": 0.16850659251213074, "learning_rate": 7.439138155454245e-06, "loss": 0.0323, "step": 572500 }, { "epoch": 2.5125738411438903, "eval_runtime": 279.9918, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4945571278040258, "eval_sts_eval_spearman_cosine": 0.27951470298299175, "step": 572500 }, { "epoch": 2.514768228778077, "grad_norm": 0.6116344332695007, "learning_rate": 7.428163158691869e-06, "loss": 0.0318, "step": 573000 }, { "epoch": 2.514768228778077, "eval_runtime": 272.5532, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4951866929871237, "eval_sts_eval_spearman_cosine": 0.279256192616472, "step": 573000 }, { "epoch": 2.516962616412264, "grad_norm": 0.6372465491294861, "learning_rate": 7.417188161929493e-06, "loss": 0.032, "step": 573500 }, { "epoch": 2.516962616412264, "eval_runtime": 276.5618, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49458682202692905, "eval_sts_eval_spearman_cosine": 0.2779290640843108, "step": 573500 }, { "epoch": 2.519157004046451, "grad_norm": 0.3278270661830902, "learning_rate": 7.406213165167117e-06, "loss": 0.0332, "step": 574000 }, { "epoch": 2.519157004046451, "eval_runtime": 277.1408, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49576420935775767, "eval_sts_eval_spearman_cosine": 0.2799767618210817, "step": 574000 }, { "epoch": 2.5213513916806374, "grad_norm": 0.19693461060523987, "learning_rate": 7.3952381684047415e-06, "loss": 0.0323, "step": 574500 }, { "epoch": 2.5213513916806374, "eval_runtime": 277.1092, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49598125520747777, "eval_sts_eval_spearman_cosine": 0.2807793665887702, "step": 574500 }, { "epoch": 2.5235457793148246, "grad_norm": 0.41302502155303955, "learning_rate": 7.384263171642365e-06, "loss": 0.0331, "step": 575000 }, { "epoch": 2.5235457793148246, "eval_runtime": 278.8201, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49243391628436184, "eval_sts_eval_spearman_cosine": 0.2782394507643612, "step": 575000 }, { "epoch": 2.525740166949011, "grad_norm": 0.6140841841697693, "learning_rate": 7.373288174879989e-06, "loss": 0.0347, "step": 575500 }, { "epoch": 2.525740166949011, "eval_runtime": 278.2739, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4946390288777164, "eval_sts_eval_spearman_cosine": 0.28016063584814616, "step": 575500 }, { "epoch": 2.527934554583198, "grad_norm": 0.2746317684650421, "learning_rate": 7.362313178117613e-06, "loss": 0.035, "step": 576000 }, { "epoch": 2.527934554583198, "eval_runtime": 286.1657, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4962476183572652, "eval_sts_eval_spearman_cosine": 0.28049713065045573, "step": 576000 }, { "epoch": 2.530128942217385, "grad_norm": 0.6531857848167419, "learning_rate": 7.351338181355238e-06, "loss": 0.0335, "step": 576500 }, { "epoch": 2.530128942217385, "eval_runtime": 272.7601, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4977344745594149, "eval_sts_eval_spearman_cosine": 0.28143267620490453, "step": 576500 }, { "epoch": 2.5323233298515717, "grad_norm": 0.3533378839492798, "learning_rate": 7.3403631845928616e-06, "loss": 0.0329, "step": 577000 }, { "epoch": 2.5323233298515717, "eval_runtime": 276.4626, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49928873724702905, "eval_sts_eval_spearman_cosine": 0.28342700758553324, "step": 577000 }, { "epoch": 2.5345177174857585, "grad_norm": 0.36988791823387146, "learning_rate": 7.329388187830485e-06, "loss": 0.0342, "step": 577500 }, { "epoch": 2.5345177174857585, "eval_runtime": 275.6767, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4979147470449705, "eval_sts_eval_spearman_cosine": 0.2817965291908963, "step": 577500 }, { "epoch": 2.5367121051199453, "grad_norm": 0.5572932362556458, "learning_rate": 7.318413191068109e-06, "loss": 0.0321, "step": 578000 }, { "epoch": 2.5367121051199453, "eval_runtime": 277.3293, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49603132660582644, "eval_sts_eval_spearman_cosine": 0.2802365642003251, "step": 578000 }, { "epoch": 2.538906492754132, "grad_norm": 0.29167744517326355, "learning_rate": 7.307438194305734e-06, "loss": 0.0336, "step": 578500 }, { "epoch": 2.538906492754132, "eval_runtime": 283.4148, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4940290573321783, "eval_sts_eval_spearman_cosine": 0.28056892142458345, "step": 578500 }, { "epoch": 2.541100880388319, "grad_norm": 0.2761438190937042, "learning_rate": 7.296463197543358e-06, "loss": 0.0319, "step": 579000 }, { "epoch": 2.541100880388319, "eval_runtime": 274.4484, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49626140041848354, "eval_sts_eval_spearman_cosine": 0.2817095444760468, "step": 579000 }, { "epoch": 2.5432952680225056, "grad_norm": 0.2667361795902252, "learning_rate": 7.285488200780982e-06, "loss": 0.0326, "step": 579500 }, { "epoch": 2.5432952680225056, "eval_runtime": 278.9465, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.497607140425555, "eval_sts_eval_spearman_cosine": 0.28024390410877953, "step": 579500 }, { "epoch": 2.5454896556566924, "grad_norm": 0.15998145937919617, "learning_rate": 7.274513204018605e-06, "loss": 0.0329, "step": 580000 }, { "epoch": 2.5454896556566924, "eval_runtime": 281.3661, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4954897811500124, "eval_sts_eval_spearman_cosine": 0.2799621400866962, "step": 580000 }, { "epoch": 2.547684043290879, "grad_norm": 0.384271502494812, "learning_rate": 7.26353820725623e-06, "loss": 0.0328, "step": 580500 }, { "epoch": 2.547684043290879, "eval_runtime": 289.8359, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.495067204935313, "eval_sts_eval_spearman_cosine": 0.2800445370881178, "step": 580500 }, { "epoch": 2.549878430925066, "grad_norm": 1.1004527807235718, "learning_rate": 7.252563210493854e-06, "loss": 0.0324, "step": 581000 }, { "epoch": 2.549878430925066, "eval_runtime": 278.0917, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4927019428129376, "eval_sts_eval_spearman_cosine": 0.2775081750554214, "step": 581000 }, { "epoch": 2.5520728185592527, "grad_norm": 0.26630428433418274, "learning_rate": 7.241588213731477e-06, "loss": 0.0316, "step": 581500 }, { "epoch": 2.5520728185592527, "eval_runtime": 280.5202, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.49399568572836194, "eval_sts_eval_spearman_cosine": 0.27808144367141296, "step": 581500 }, { "epoch": 2.5542672061934395, "grad_norm": 0.2401636689901352, "learning_rate": 7.230613216969101e-06, "loss": 0.0329, "step": 582000 }, { "epoch": 2.5542672061934395, "eval_runtime": 292.2841, "eval_samples_per_second": 0.0, "eval_steps_per_second": 0.0, "eval_sts_eval_pearson_cosine": 0.4956166096932603, "eval_sts_eval_spearman_cosine": 0.2796044670188721, "step": 582000 } ], "logging_steps": 500, "max_steps": 911416, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 1000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 0.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }