diff --git "a/checkpoint-25830/trainer_state.json" "b/checkpoint-25830/trainer_state.json" new file mode 100644--- /dev/null +++ "b/checkpoint-25830/trainer_state.json" @@ -0,0 +1,6935 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 5.0, + "eval_steps": 1292, + "global_step": 25830, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.012582268679829655, + "grad_norm": 4.108283996582031, + "learning_rate": 3.7166085946573755e-07, + "loss": 0.4577, + "step": 65 + }, + { + "epoch": 0.02516453735965931, + "grad_norm": 0.5773646831512451, + "learning_rate": 7.491289198606272e-07, + "loss": 0.4707, + "step": 130 + }, + { + "epoch": 0.03774680603948897, + "grad_norm": 0.6127182841300964, + "learning_rate": 1.1265969802555168e-06, + "loss": 0.5259, + "step": 195 + }, + { + "epoch": 0.05032907471931862, + "grad_norm": 1.513021469116211, + "learning_rate": 1.5040650406504067e-06, + "loss": 0.5501, + "step": 260 + }, + { + "epoch": 0.06291134339914828, + "grad_norm": 11.750137329101562, + "learning_rate": 1.8815331010452962e-06, + "loss": 0.5089, + "step": 325 + }, + { + "epoch": 0.07549361207897794, + "grad_norm": 4.16257905960083, + "learning_rate": 2.259001161440186e-06, + "loss": 0.4816, + "step": 390 + }, + { + "epoch": 0.08807588075880758, + "grad_norm": 16.527780532836914, + "learning_rate": 2.6306620209059233e-06, + "loss": 0.5822, + "step": 455 + }, + { + "epoch": 0.10065814943863724, + "grad_norm": 0.1845797300338745, + "learning_rate": 3.0081300813008134e-06, + "loss": 0.5686, + "step": 520 + }, + { + "epoch": 0.1132404181184669, + "grad_norm": 12.172422409057617, + "learning_rate": 3.3855981416957026e-06, + "loss": 0.5686, + "step": 585 + }, + { + "epoch": 0.12582268679829656, + "grad_norm": 0.6270273923873901, + "learning_rate": 3.7630662020905923e-06, + "loss": 0.517, + "step": 650 + }, + { + "epoch": 0.1384049554781262, + "grad_norm": 3.6368539333343506, + "learning_rate": 4.140534262485482e-06, + "loss": 0.3615, + "step": 715 + }, + { + "epoch": 0.15098722415795587, + "grad_norm": 9.541145324707031, + "learning_rate": 4.518002322880372e-06, + "loss": 0.5978, + "step": 780 + }, + { + "epoch": 0.16356949283778552, + "grad_norm": 9.86439323425293, + "learning_rate": 4.895470383275261e-06, + "loss": 0.5153, + "step": 845 + }, + { + "epoch": 0.17615176151761516, + "grad_norm": 4.669048309326172, + "learning_rate": 5.272938443670151e-06, + "loss": 0.5059, + "step": 910 + }, + { + "epoch": 0.18873403019744484, + "grad_norm": 9.666926383972168, + "learning_rate": 5.650406504065041e-06, + "loss": 0.5624, + "step": 975 + }, + { + "epoch": 0.20131629887727448, + "grad_norm": 6.078874588012695, + "learning_rate": 6.02787456445993e-06, + "loss": 0.5201, + "step": 1040 + }, + { + "epoch": 0.21389856755710415, + "grad_norm": 1.1067451238632202, + "learning_rate": 6.4053426248548205e-06, + "loss": 0.6127, + "step": 1105 + }, + { + "epoch": 0.2264808362369338, + "grad_norm": 1.1589373350143433, + "learning_rate": 6.78281068524971e-06, + "loss": 0.5333, + "step": 1170 + }, + { + "epoch": 0.23906310491676344, + "grad_norm": 1.977501630783081, + "learning_rate": 7.1602787456446e-06, + "loss": 0.494, + "step": 1235 + }, + { + "epoch": 0.2500967866821525, + "eval_StS-test_pearson_cosine": 0.8821101738384596, + "eval_StS-test_pearson_dot": 0.8032893366124795, + "eval_StS-test_pearson_euclidean": 0.8697205121607111, + "eval_StS-test_pearson_manhattan": 0.8704995590187196, + "eval_StS-test_pearson_max": 0.8821101738384596, + "eval_StS-test_spearman_cosine": 0.8943047751560564, + "eval_StS-test_spearman_dot": 0.8087424893555902, + "eval_StS-test_spearman_euclidean": 0.871583089708652, + "eval_StS-test_spearman_manhattan": 0.8737012027236009, + "eval_StS-test_spearman_max": 0.8943047751560564, + "eval_Vitaminc-test_cosine_accuracy": 0.5684210526315789, + "eval_Vitaminc-test_cosine_accuracy_threshold": 0.7028586268424988, + "eval_Vitaminc-test_cosine_ap": 0.5651043866206488, + "eval_Vitaminc-test_cosine_f1": 0.6755218216318786, + "eval_Vitaminc-test_cosine_f1_threshold": 0.5077509880065918, + "eval_Vitaminc-test_cosine_precision": 0.52046783625731, + "eval_Vitaminc-test_cosine_recall": 0.9621621621621622, + "eval_Vitaminc-test_dot_accuracy": 0.5684210526315789, + "eval_Vitaminc-test_dot_accuracy_threshold": 19.693286895751953, + "eval_Vitaminc-test_dot_ap": 0.5463931769790206, + "eval_Vitaminc-test_dot_f1": 0.6691449814126395, + "eval_Vitaminc-test_dot_f1_threshold": 13.839346885681152, + "eval_Vitaminc-test_dot_precision": 0.509915014164306, + "eval_Vitaminc-test_dot_recall": 0.972972972972973, + "eval_Vitaminc-test_euclidean_accuracy": 0.5894736842105263, + "eval_Vitaminc-test_euclidean_accuracy_threshold": 4.252468585968018, + "eval_Vitaminc-test_euclidean_ap": 0.5569049511912931, + "eval_Vitaminc-test_euclidean_f1": 0.6666666666666666, + "eval_Vitaminc-test_euclidean_f1_threshold": 6.922356128692627, + "eval_Vitaminc-test_euclidean_precision": 0.5041551246537396, + "eval_Vitaminc-test_euclidean_recall": 0.9837837837837838, + "eval_Vitaminc-test_manhattan_accuracy": 0.5815789473684211, + "eval_Vitaminc-test_manhattan_accuracy_threshold": 87.21337890625, + "eval_Vitaminc-test_manhattan_ap": 0.5572154085134091, + "eval_Vitaminc-test_manhattan_f1": 0.6666666666666667, + "eval_Vitaminc-test_manhattan_f1_threshold": 141.26380920410156, + "eval_Vitaminc-test_manhattan_precision": 0.505586592178771, + "eval_Vitaminc-test_manhattan_recall": 0.9783783783783784, + "eval_Vitaminc-test_max_accuracy": 0.5894736842105263, + "eval_Vitaminc-test_max_accuracy_threshold": 87.21337890625, + "eval_Vitaminc-test_max_ap": 0.5651043866206488, + "eval_Vitaminc-test_max_f1": 0.6755218216318786, + "eval_Vitaminc-test_max_f1_threshold": 141.26380920410156, + "eval_Vitaminc-test_max_precision": 0.52046783625731, + "eval_Vitaminc-test_max_recall": 0.9837837837837838, + "eval_mrpc-test_cosine_accuracy": 0.7473684210526316, + "eval_mrpc-test_cosine_accuracy_threshold": 0.7145693302154541, + "eval_mrpc-test_cosine_ap": 0.8563235829800693, + "eval_mrpc-test_cosine_f1": 0.8327645051194539, + "eval_mrpc-test_cosine_f1_threshold": 0.6522408723831177, + "eval_mrpc-test_cosine_precision": 0.7218934911242604, + "eval_mrpc-test_cosine_recall": 0.9838709677419355, + "eval_mrpc-test_dot_accuracy": 0.7026315789473684, + "eval_mrpc-test_dot_accuracy_threshold": 14.454626083374023, + "eval_mrpc-test_dot_ap": 0.796363256728503, + "eval_mrpc-test_dot_f1": 0.8054607508532423, + "eval_mrpc-test_dot_f1_threshold": 13.752894401550293, + "eval_mrpc-test_dot_precision": 0.6982248520710059, + "eval_mrpc-test_dot_recall": 0.9516129032258065, + "eval_mrpc-test_euclidean_accuracy": 0.7315789473684211, + "eval_mrpc-test_euclidean_accuracy_threshold": 3.890326499938965, + "eval_mrpc-test_euclidean_ap": 0.8252367395643119, + "eval_mrpc-test_euclidean_f1": 0.8165467625899281, + "eval_mrpc-test_euclidean_f1_threshold": 3.890326499938965, + "eval_mrpc-test_euclidean_precision": 0.737012987012987, + "eval_mrpc-test_euclidean_recall": 0.9153225806451613, + "eval_mrpc-test_manhattan_accuracy": 0.7289473684210527, + "eval_mrpc-test_manhattan_accuracy_threshold": 77.57926177978516, + "eval_mrpc-test_manhattan_ap": 0.8208816982117964, + "eval_mrpc-test_manhattan_f1": 0.815742397137746, + "eval_mrpc-test_manhattan_f1_threshold": 79.14703369140625, + "eval_mrpc-test_manhattan_precision": 0.7331189710610932, + "eval_mrpc-test_manhattan_recall": 0.9193548387096774, + "eval_mrpc-test_max_accuracy": 0.7473684210526316, + "eval_mrpc-test_max_accuracy_threshold": 77.57926177978516, + "eval_mrpc-test_max_ap": 0.8563235829800693, + "eval_mrpc-test_max_f1": 0.8327645051194539, + "eval_mrpc-test_max_f1_threshold": 79.14703369140625, + "eval_mrpc-test_max_precision": 0.737012987012987, + "eval_mrpc-test_max_recall": 0.9838709677419355, + "eval_nli-pairs_loss": 0.8093397617340088, + "eval_nli-pairs_runtime": 3.0363, + "eval_nli-pairs_samples_per_second": 52.696, + "eval_nli-pairs_steps_per_second": 1.647, + "eval_sequential_score": 0.5651043866206488, + "step": 1292 + }, + { + "epoch": 0.2500967866821525, + "eval_vitaminc-pairs_loss": 5.769770622253418, + "eval_vitaminc-pairs_runtime": 1.5488, + "eval_vitaminc-pairs_samples_per_second": 85.875, + "eval_vitaminc-pairs_steps_per_second": 3.228, + "step": 1292 + }, + { + "epoch": 0.2500967866821525, + "eval_qnli-contrastive_loss": 0.12109158933162689, + "eval_qnli-contrastive_runtime": 0.5097, + "eval_qnli-contrastive_samples_per_second": 313.889, + "eval_qnli-contrastive_steps_per_second": 9.809, + "step": 1292 + }, + { + "epoch": 0.2500967866821525, + "eval_scitail-pairs-qa_loss": 0.07553695887327194, + "eval_scitail-pairs-qa_runtime": 1.2071, + "eval_scitail-pairs-qa_samples_per_second": 132.548, + "eval_scitail-pairs-qa_steps_per_second": 4.142, + "step": 1292 + }, + { + "epoch": 0.2500967866821525, + "eval_scitail-pairs-pos_loss": 0.3979075253009796, + "eval_scitail-pairs-pos_runtime": 2.3649, + "eval_scitail-pairs-pos_samples_per_second": 67.656, + "eval_scitail-pairs-pos_steps_per_second": 2.114, + "step": 1292 + }, + { + "epoch": 0.2500967866821525, + "eval_xsum-pairs_loss": 0.313429057598114, + "eval_xsum-pairs_runtime": 1.4107, + "eval_xsum-pairs_samples_per_second": 113.419, + "eval_xsum-pairs_steps_per_second": 3.544, + "step": 1292 + }, + { + "epoch": 0.2500967866821525, + "eval_compression-pairs_loss": 0.08316509425640106, + "eval_compression-pairs_runtime": 0.3958, + "eval_compression-pairs_samples_per_second": 404.289, + "eval_compression-pairs_steps_per_second": 12.634, + "step": 1292 + }, + { + "epoch": 0.2500967866821525, + "eval_sciq_pairs_loss": 0.2692818343639374, + "eval_sciq_pairs_runtime": 7.8991, + "eval_sciq_pairs_samples_per_second": 20.255, + "eval_sciq_pairs_steps_per_second": 0.633, + "step": 1292 + }, + { + "epoch": 0.2500967866821525, + "eval_qasc_pairs_loss": 0.19870159029960632, + "eval_qasc_pairs_runtime": 1.4336, + "eval_qasc_pairs_samples_per_second": 111.608, + "eval_qasc_pairs_steps_per_second": 3.488, + "step": 1292 + }, + { + "epoch": 0.2500967866821525, + "eval_qasc_facts_sym_loss": 0.16445104777812958, + "eval_qasc_facts_sym_runtime": 0.3196, + "eval_qasc_facts_sym_samples_per_second": 500.598, + "eval_qasc_facts_sym_steps_per_second": 15.644, + "step": 1292 + }, + { + "epoch": 0.2500967866821525, + "eval_openbookqa_pairs_loss": 1.7182375192642212, + "eval_openbookqa_pairs_runtime": 1.2252, + "eval_openbookqa_pairs_samples_per_second": 130.592, + "eval_openbookqa_pairs_steps_per_second": 4.081, + "step": 1292 + }, + { + "epoch": 0.2500967866821525, + "eval_msmarco_pairs_loss": 0.4961338937282562, + "eval_msmarco_pairs_runtime": 3.1144, + "eval_msmarco_pairs_samples_per_second": 51.374, + "eval_msmarco_pairs_steps_per_second": 1.605, + "step": 1292 + }, + { + "epoch": 0.2500967866821525, + "eval_nq_pairs_loss": 0.4005078673362732, + "eval_nq_pairs_runtime": 7.7074, + "eval_nq_pairs_samples_per_second": 20.759, + "eval_nq_pairs_steps_per_second": 0.649, + "step": 1292 + }, + { + "epoch": 0.2500967866821525, + "eval_trivia_pairs_loss": 0.654505729675293, + "eval_trivia_pairs_runtime": 10.1383, + "eval_trivia_pairs_samples_per_second": 15.782, + "eval_trivia_pairs_steps_per_second": 0.493, + "step": 1292 + }, + { + "epoch": 0.2500967866821525, + "eval_quora_pairs_loss": 0.22833283245563507, + "eval_quora_pairs_runtime": 3.894, + "eval_quora_pairs_samples_per_second": 173.342, + "eval_quora_pairs_steps_per_second": 5.65, + "step": 1292 + }, + { + "epoch": 0.2500967866821525, + "eval_gooaq_pairs_loss": 0.4996432662010193, + "eval_gooaq_pairs_runtime": 2.2697, + "eval_gooaq_pairs_samples_per_second": 70.494, + "eval_gooaq_pairs_steps_per_second": 2.203, + "step": 1292 + }, + { + "epoch": 0.2500967866821525, + "eval_mrpc_pairs_loss": 0.04613902047276497, + "eval_mrpc_pairs_runtime": 0.3743, + "eval_mrpc_pairs_samples_per_second": 427.427, + "eval_mrpc_pairs_steps_per_second": 13.357, + "step": 1292 + }, + { + "epoch": 0.2516453735965931, + "grad_norm": 12.50217342376709, + "learning_rate": 7.53774680603949e-06, + "loss": 0.6236, + "step": 1300 + }, + { + "epoch": 0.26422764227642276, + "grad_norm": 1.8397300243377686, + "learning_rate": 7.91521486643438e-06, + "loss": 0.4947, + "step": 1365 + }, + { + "epoch": 0.2768099109562524, + "grad_norm": 5.885033130645752, + "learning_rate": 8.292682926829268e-06, + "loss": 0.5595, + "step": 1430 + }, + { + "epoch": 0.28939217963608205, + "grad_norm": 1.7783002853393555, + "learning_rate": 8.670150987224158e-06, + "loss": 0.641, + "step": 1495 + }, + { + "epoch": 0.30197444831591175, + "grad_norm": 5.305712699890137, + "learning_rate": 9.047619047619047e-06, + "loss": 0.5188, + "step": 1560 + }, + { + "epoch": 0.3145567169957414, + "grad_norm": 8.24319839477539, + "learning_rate": 9.425087108013936e-06, + "loss": 0.4927, + "step": 1625 + }, + { + "epoch": 0.32713898567557104, + "grad_norm": 11.07426929473877, + "learning_rate": 9.802555168408827e-06, + "loss": 0.657, + "step": 1690 + }, + { + "epoch": 0.3397212543554007, + "grad_norm": 9.04263687133789, + "learning_rate": 1.0180023228803716e-05, + "loss": 0.4665, + "step": 1755 + }, + { + "epoch": 0.3523035230352303, + "grad_norm": 1.4980370998382568, + "learning_rate": 1.0557491289198606e-05, + "loss": 0.4645, + "step": 1820 + }, + { + "epoch": 0.36488579171506, + "grad_norm": 16.511180877685547, + "learning_rate": 1.0934959349593495e-05, + "loss": 0.5887, + "step": 1885 + }, + { + "epoch": 0.37746806039488967, + "grad_norm": 5.706000804901123, + "learning_rate": 1.1312427409988386e-05, + "loss": 0.5308, + "step": 1950 + }, + { + "epoch": 0.3900503290747193, + "grad_norm": 1.0923340320587158, + "learning_rate": 1.1689895470383277e-05, + "loss": 0.536, + "step": 2015 + }, + { + "epoch": 0.40263259775454896, + "grad_norm": 6.521665573120117, + "learning_rate": 1.2067363530778166e-05, + "loss": 0.4841, + "step": 2080 + }, + { + "epoch": 0.4152148664343786, + "grad_norm": 7.254842758178711, + "learning_rate": 1.2444831591173055e-05, + "loss": 0.6499, + "step": 2145 + }, + { + "epoch": 0.4277971351142083, + "grad_norm": 14.938628196716309, + "learning_rate": 1.2822299651567945e-05, + "loss": 0.5982, + "step": 2210 + }, + { + "epoch": 0.44037940379403795, + "grad_norm": 10.192171096801758, + "learning_rate": 1.3199767711962834e-05, + "loss": 0.5281, + "step": 2275 + }, + { + "epoch": 0.4529616724738676, + "grad_norm": 8.02379035949707, + "learning_rate": 1.3577235772357725e-05, + "loss": 0.6657, + "step": 2340 + }, + { + "epoch": 0.46554394115369724, + "grad_norm": 2.842752695083618, + "learning_rate": 1.3954703832752614e-05, + "loss": 0.5746, + "step": 2405 + }, + { + "epoch": 0.4781262098335269, + "grad_norm": 0.4373825788497925, + "learning_rate": 1.432636469221835e-05, + "loss": 0.5853, + "step": 2470 + }, + { + "epoch": 0.4907084785133566, + "grad_norm": 16.475561141967773, + "learning_rate": 1.4703832752613242e-05, + "loss": 0.5828, + "step": 2535 + }, + { + "epoch": 0.500193573364305, + "eval_StS-test_pearson_cosine": 0.8850112014664042, + "eval_StS-test_pearson_dot": 0.8059948281019231, + "eval_StS-test_pearson_euclidean": 0.8736480607586303, + "eval_StS-test_pearson_manhattan": 0.8746434402751778, + "eval_StS-test_pearson_max": 0.8850112014664042, + "eval_StS-test_spearman_cosine": 0.8964908683344393, + "eval_StS-test_spearman_dot": 0.8112940990649002, + "eval_StS-test_spearman_euclidean": 0.8728524906348676, + "eval_StS-test_spearman_manhattan": 0.8747700566942432, + "eval_StS-test_spearman_max": 0.8964908683344393, + "eval_Vitaminc-test_cosine_accuracy": 0.5842105263157895, + "eval_Vitaminc-test_cosine_accuracy_threshold": 0.6876633763313293, + "eval_Vitaminc-test_cosine_ap": 0.5620310838989798, + "eval_Vitaminc-test_cosine_f1": 0.6755725190839694, + "eval_Vitaminc-test_cosine_f1_threshold": 0.5227451324462891, + "eval_Vitaminc-test_cosine_precision": 0.5221238938053098, + "eval_Vitaminc-test_cosine_recall": 0.9567567567567568, + "eval_Vitaminc-test_dot_accuracy": 0.5552631578947368, + "eval_Vitaminc-test_dot_accuracy_threshold": 19.32275390625, + "eval_Vitaminc-test_dot_ap": 0.5459852333889619, + "eval_Vitaminc-test_dot_f1": 0.6715596330275228, + "eval_Vitaminc-test_dot_f1_threshold": 12.946126937866211, + "eval_Vitaminc-test_dot_precision": 0.5083333333333333, + "eval_Vitaminc-test_dot_recall": 0.9891891891891892, + "eval_Vitaminc-test_euclidean_accuracy": 0.5736842105263158, + "eval_Vitaminc-test_euclidean_accuracy_threshold": 3.9492130279541016, + "eval_Vitaminc-test_euclidean_ap": 0.5569069466677845, + "eval_Vitaminc-test_euclidean_f1": 0.6654343807763402, + "eval_Vitaminc-test_euclidean_f1_threshold": 6.641269207000732, + "eval_Vitaminc-test_euclidean_precision": 0.5056179775280899, + "eval_Vitaminc-test_euclidean_recall": 0.972972972972973, + "eval_Vitaminc-test_manhattan_accuracy": 0.5789473684210527, + "eval_Vitaminc-test_manhattan_accuracy_threshold": 80.14714050292969, + "eval_Vitaminc-test_manhattan_ap": 0.556199495550533, + "eval_Vitaminc-test_manhattan_f1": 0.6691312384473197, + "eval_Vitaminc-test_manhattan_f1_threshold": 138.50221252441406, + "eval_Vitaminc-test_manhattan_precision": 0.5084269662921348, + "eval_Vitaminc-test_manhattan_recall": 0.9783783783783784, + "eval_Vitaminc-test_max_accuracy": 0.5842105263157895, + "eval_Vitaminc-test_max_accuracy_threshold": 80.14714050292969, + "eval_Vitaminc-test_max_ap": 0.5620310838989798, + "eval_Vitaminc-test_max_f1": 0.6755725190839694, + "eval_Vitaminc-test_max_f1_threshold": 138.50221252441406, + "eval_Vitaminc-test_max_precision": 0.5221238938053098, + "eval_Vitaminc-test_max_recall": 0.9891891891891892, + "eval_mrpc-test_cosine_accuracy": 0.7368421052631579, + "eval_mrpc-test_cosine_accuracy_threshold": 0.7595815658569336, + "eval_mrpc-test_cosine_ap": 0.8489522026016108, + "eval_mrpc-test_cosine_f1": 0.8235294117647058, + "eval_mrpc-test_cosine_f1_threshold": 0.6731016635894775, + "eval_mrpc-test_cosine_precision": 0.7212121212121212, + "eval_mrpc-test_cosine_recall": 0.9596774193548387, + "eval_mrpc-test_dot_accuracy": 0.6894736842105263, + "eval_mrpc-test_dot_accuracy_threshold": 15.645252227783203, + "eval_mrpc-test_dot_ap": 0.7828717059557433, + "eval_mrpc-test_dot_f1": 0.8071895424836601, + "eval_mrpc-test_dot_f1_threshold": 11.141586303710938, + "eval_mrpc-test_dot_precision": 0.6785714285714286, + "eval_mrpc-test_dot_recall": 0.9959677419354839, + "eval_mrpc-test_euclidean_accuracy": 0.7368421052631579, + "eval_mrpc-test_euclidean_accuracy_threshold": 3.7453079223632812, + "eval_mrpc-test_euclidean_ap": 0.8190549606804299, + "eval_mrpc-test_euclidean_f1": 0.8172043010752688, + "eval_mrpc-test_euclidean_f1_threshold": 3.905801773071289, + "eval_mrpc-test_euclidean_precision": 0.7354838709677419, + "eval_mrpc-test_euclidean_recall": 0.9193548387096774, + "eval_mrpc-test_manhattan_accuracy": 0.7315789473684211, + "eval_mrpc-test_manhattan_accuracy_threshold": 74.24961853027344, + "eval_mrpc-test_manhattan_ap": 0.815561648496177, + "eval_mrpc-test_manhattan_f1": 0.81508078994614, + "eval_mrpc-test_manhattan_f1_threshold": 78.01387023925781, + "eval_mrpc-test_manhattan_precision": 0.7346278317152104, + "eval_mrpc-test_manhattan_recall": 0.9153225806451613, + "eval_mrpc-test_max_accuracy": 0.7368421052631579, + "eval_mrpc-test_max_accuracy_threshold": 74.24961853027344, + "eval_mrpc-test_max_ap": 0.8489522026016108, + "eval_mrpc-test_max_f1": 0.8235294117647058, + "eval_mrpc-test_max_f1_threshold": 78.01387023925781, + "eval_mrpc-test_max_precision": 0.7354838709677419, + "eval_mrpc-test_max_recall": 0.9959677419354839, + "eval_nli-pairs_loss": 0.805642306804657, + "eval_nli-pairs_runtime": 3.1328, + "eval_nli-pairs_samples_per_second": 51.072, + "eval_nli-pairs_steps_per_second": 1.596, + "eval_sequential_score": 0.5620310838989798, + "step": 2584 + }, + { + "epoch": 0.500193573364305, + "eval_vitaminc-pairs_loss": 5.803024768829346, + "eval_vitaminc-pairs_runtime": 1.6236, + "eval_vitaminc-pairs_samples_per_second": 81.915, + "eval_vitaminc-pairs_steps_per_second": 3.08, + "step": 2584 + }, + { + "epoch": 0.500193573364305, + "eval_qnli-contrastive_loss": 0.1560516655445099, + "eval_qnli-contrastive_runtime": 0.5455, + "eval_qnli-contrastive_samples_per_second": 293.295, + "eval_qnli-contrastive_steps_per_second": 9.165, + "step": 2584 + }, + { + "epoch": 0.500193573364305, + "eval_scitail-pairs-qa_loss": 0.07059809565544128, + "eval_scitail-pairs-qa_runtime": 1.3304, + "eval_scitail-pairs-qa_samples_per_second": 120.266, + "eval_scitail-pairs-qa_steps_per_second": 3.758, + "step": 2584 + }, + { + "epoch": 0.500193573364305, + "eval_scitail-pairs-pos_loss": 0.40096405148506165, + "eval_scitail-pairs-pos_runtime": 2.6358, + "eval_scitail-pairs-pos_samples_per_second": 60.703, + "eval_scitail-pairs-pos_steps_per_second": 1.897, + "step": 2584 + }, + { + "epoch": 0.500193573364305, + "eval_xsum-pairs_loss": 0.3021017014980316, + "eval_xsum-pairs_runtime": 1.474, + "eval_xsum-pairs_samples_per_second": 108.547, + "eval_xsum-pairs_steps_per_second": 3.392, + "step": 2584 + }, + { + "epoch": 0.500193573364305, + "eval_compression-pairs_loss": 0.08080567419528961, + "eval_compression-pairs_runtime": 0.4395, + "eval_compression-pairs_samples_per_second": 364.011, + "eval_compression-pairs_steps_per_second": 11.375, + "step": 2584 + }, + { + "epoch": 0.500193573364305, + "eval_sciq_pairs_loss": 0.2838394045829773, + "eval_sciq_pairs_runtime": 8.1466, + "eval_sciq_pairs_samples_per_second": 19.64, + "eval_sciq_pairs_steps_per_second": 0.614, + "step": 2584 + }, + { + "epoch": 0.500193573364305, + "eval_qasc_pairs_loss": 0.19900637865066528, + "eval_qasc_pairs_runtime": 1.4833, + "eval_qasc_pairs_samples_per_second": 107.864, + "eval_qasc_pairs_steps_per_second": 3.371, + "step": 2584 + }, + { + "epoch": 0.500193573364305, + "eval_qasc_facts_sym_loss": 0.15245158970355988, + "eval_qasc_facts_sym_runtime": 0.3292, + "eval_qasc_facts_sym_samples_per_second": 485.978, + "eval_qasc_facts_sym_steps_per_second": 15.187, + "step": 2584 + }, + { + "epoch": 0.500193573364305, + "eval_openbookqa_pairs_loss": 1.7069475650787354, + "eval_openbookqa_pairs_runtime": 1.4335, + "eval_openbookqa_pairs_samples_per_second": 111.612, + "eval_openbookqa_pairs_steps_per_second": 3.488, + "step": 2584 + }, + { + "epoch": 0.500193573364305, + "eval_msmarco_pairs_loss": 0.5149500966072083, + "eval_msmarco_pairs_runtime": 3.2287, + "eval_msmarco_pairs_samples_per_second": 49.555, + "eval_msmarco_pairs_steps_per_second": 1.549, + "step": 2584 + }, + { + "epoch": 0.500193573364305, + "eval_nq_pairs_loss": 0.4162893295288086, + "eval_nq_pairs_runtime": 7.8277, + "eval_nq_pairs_samples_per_second": 20.44, + "eval_nq_pairs_steps_per_second": 0.639, + "step": 2584 + }, + { + "epoch": 0.500193573364305, + "eval_trivia_pairs_loss": 0.623663067817688, + "eval_trivia_pairs_runtime": 10.277, + "eval_trivia_pairs_samples_per_second": 15.569, + "eval_trivia_pairs_steps_per_second": 0.487, + "step": 2584 + }, + { + "epoch": 0.500193573364305, + "eval_quora_pairs_loss": 0.22309938073158264, + "eval_quora_pairs_runtime": 4.3318, + "eval_quora_pairs_samples_per_second": 155.824, + "eval_quora_pairs_steps_per_second": 5.079, + "step": 2584 + }, + { + "epoch": 0.500193573364305, + "eval_gooaq_pairs_loss": 0.49019113183021545, + "eval_gooaq_pairs_runtime": 2.2716, + "eval_gooaq_pairs_samples_per_second": 70.434, + "eval_gooaq_pairs_steps_per_second": 2.201, + "step": 2584 + }, + { + "epoch": 0.500193573364305, + "eval_mrpc_pairs_loss": 0.04494442045688629, + "eval_mrpc_pairs_runtime": 0.3756, + "eval_mrpc_pairs_samples_per_second": 425.958, + "eval_mrpc_pairs_steps_per_second": 13.311, + "step": 2584 + }, + { + "epoch": 0.5032907471931862, + "grad_norm": 0.5926259756088257, + "learning_rate": 1.5081300813008131e-05, + "loss": 0.4889, + "step": 2600 + }, + { + "epoch": 0.5158730158730159, + "grad_norm": 12.89693832397461, + "learning_rate": 1.545876887340302e-05, + "loss": 0.517, + "step": 2665 + }, + { + "epoch": 0.5284552845528455, + "grad_norm": 0.3751634359359741, + "learning_rate": 1.583623693379791e-05, + "loss": 0.5479, + "step": 2730 + }, + { + "epoch": 0.5410375532326752, + "grad_norm": 10.321626663208008, + "learning_rate": 1.62137049941928e-05, + "loss": 0.6409, + "step": 2795 + }, + { + "epoch": 0.5536198219125048, + "grad_norm": 0.40425410866737366, + "learning_rate": 1.659117305458769e-05, + "loss": 0.6137, + "step": 2860 + }, + { + "epoch": 0.5662020905923345, + "grad_norm": 0.26469847559928894, + "learning_rate": 1.696864111498258e-05, + "loss": 0.5947, + "step": 2925 + }, + { + "epoch": 0.5787843592721641, + "grad_norm": 0.3376736640930176, + "learning_rate": 1.7346109175377468e-05, + "loss": 0.514, + "step": 2990 + }, + { + "epoch": 0.5913666279519938, + "grad_norm": 11.042977333068848, + "learning_rate": 1.7723577235772357e-05, + "loss": 0.5256, + "step": 3055 + }, + { + "epoch": 0.6039488966318235, + "grad_norm": 28.415281295776367, + "learning_rate": 1.810104529616725e-05, + "loss": 0.5574, + "step": 3120 + }, + { + "epoch": 0.6165311653116531, + "grad_norm": 1.138208270072937, + "learning_rate": 1.8478513356562136e-05, + "loss": 0.5669, + "step": 3185 + }, + { + "epoch": 0.6291134339914828, + "grad_norm": 0.38507771492004395, + "learning_rate": 1.885598141695703e-05, + "loss": 0.5185, + "step": 3250 + }, + { + "epoch": 0.6416957026713124, + "grad_norm": 0.4718106687068939, + "learning_rate": 1.9233449477351914e-05, + "loss": 0.4875, + "step": 3315 + }, + { + "epoch": 0.6542779713511421, + "grad_norm": 0.3544551432132721, + "learning_rate": 1.9610917537746807e-05, + "loss": 0.5773, + "step": 3380 + }, + { + "epoch": 0.6668602400309718, + "grad_norm": 15.193903923034668, + "learning_rate": 1.9988385598141696e-05, + "loss": 0.6102, + "step": 3445 + }, + { + "epoch": 0.6794425087108014, + "grad_norm": 14.930959701538086, + "learning_rate": 2.0365853658536586e-05, + "loss": 0.5882, + "step": 3510 + }, + { + "epoch": 0.6920247773906311, + "grad_norm": 0.7249612212181091, + "learning_rate": 2.0743321718931475e-05, + "loss": 0.5888, + "step": 3575 + }, + { + "epoch": 0.7046070460704607, + "grad_norm": 0.21552956104278564, + "learning_rate": 2.1120789779326364e-05, + "loss": 0.6088, + "step": 3640 + }, + { + "epoch": 0.7171893147502904, + "grad_norm": 3.176356554031372, + "learning_rate": 2.1498257839721253e-05, + "loss": 0.688, + "step": 3705 + }, + { + "epoch": 0.72977158343012, + "grad_norm": 4.0542378425598145, + "learning_rate": 2.1875725900116146e-05, + "loss": 0.6541, + "step": 3770 + }, + { + "epoch": 0.7423538521099496, + "grad_norm": 0.5486993193626404, + "learning_rate": 2.2253193960511032e-05, + "loss": 0.4526, + "step": 3835 + }, + { + "epoch": 0.7502903600464577, + "eval_StS-test_pearson_cosine": 0.8754429584344908, + "eval_StS-test_pearson_dot": 0.7812438977199179, + "eval_StS-test_pearson_euclidean": 0.863532080020405, + "eval_StS-test_pearson_manhattan": 0.8642206125042041, + "eval_StS-test_pearson_max": 0.8754429584344908, + "eval_StS-test_spearman_cosine": 0.8899322603927042, + "eval_StS-test_spearman_dot": 0.7897822634826679, + "eval_StS-test_spearman_euclidean": 0.8661349339052001, + "eval_StS-test_spearman_manhattan": 0.8677664578276145, + "eval_StS-test_spearman_max": 0.8899322603927042, + "eval_Vitaminc-test_cosine_accuracy": 0.5815789473684211, + "eval_Vitaminc-test_cosine_accuracy_threshold": 0.7150823473930359, + "eval_Vitaminc-test_cosine_ap": 0.5695623413042502, + "eval_Vitaminc-test_cosine_f1": 0.6690909090909091, + "eval_Vitaminc-test_cosine_f1_threshold": 0.4144631028175354, + "eval_Vitaminc-test_cosine_precision": 0.5041095890410959, + "eval_Vitaminc-test_cosine_recall": 0.9945945945945946, + "eval_Vitaminc-test_dot_accuracy": 0.5631578947368421, + "eval_Vitaminc-test_dot_accuracy_threshold": 25.451148986816406, + "eval_Vitaminc-test_dot_ap": 0.5500277455332443, + "eval_Vitaminc-test_dot_f1": 0.6691449814126395, + "eval_Vitaminc-test_dot_f1_threshold": 14.098217964172363, + "eval_Vitaminc-test_dot_precision": 0.509915014164306, + "eval_Vitaminc-test_dot_recall": 0.972972972972973, + "eval_Vitaminc-test_euclidean_accuracy": 0.5736842105263158, + "eval_Vitaminc-test_euclidean_accuracy_threshold": 3.945739269256592, + "eval_Vitaminc-test_euclidean_ap": 0.5590599048028368, + "eval_Vitaminc-test_euclidean_f1": 0.6654478976234005, + "eval_Vitaminc-test_euclidean_f1_threshold": 6.931303024291992, + "eval_Vitaminc-test_euclidean_precision": 0.5027624309392266, + "eval_Vitaminc-test_euclidean_recall": 0.9837837837837838, + "eval_Vitaminc-test_manhattan_accuracy": 0.5736842105263158, + "eval_Vitaminc-test_manhattan_accuracy_threshold": 84.99573516845703, + "eval_Vitaminc-test_manhattan_ap": 0.559747734383099, + "eval_Vitaminc-test_manhattan_f1": 0.6679035250463822, + "eval_Vitaminc-test_manhattan_f1_threshold": 139.30535888671875, + "eval_Vitaminc-test_manhattan_precision": 0.5084745762711864, + "eval_Vitaminc-test_manhattan_recall": 0.972972972972973, + "eval_Vitaminc-test_max_accuracy": 0.5815789473684211, + "eval_Vitaminc-test_max_accuracy_threshold": 84.99573516845703, + "eval_Vitaminc-test_max_ap": 0.5695623413042502, + "eval_Vitaminc-test_max_f1": 0.6691449814126395, + "eval_Vitaminc-test_max_f1_threshold": 139.30535888671875, + "eval_Vitaminc-test_max_precision": 0.509915014164306, + "eval_Vitaminc-test_max_recall": 0.9945945945945946, + "eval_mrpc-test_cosine_accuracy": 0.7447368421052631, + "eval_mrpc-test_cosine_accuracy_threshold": 0.7351576089859009, + "eval_mrpc-test_cosine_ap": 0.8492225227540071, + "eval_mrpc-test_cosine_f1": 0.8274647887323944, + "eval_mrpc-test_cosine_f1_threshold": 0.7097899913787842, + "eval_mrpc-test_cosine_precision": 0.734375, + "eval_mrpc-test_cosine_recall": 0.9475806451612904, + "eval_mrpc-test_dot_accuracy": 0.6947368421052632, + "eval_mrpc-test_dot_accuracy_threshold": 13.770221710205078, + "eval_mrpc-test_dot_ap": 0.7664430695917603, + "eval_mrpc-test_dot_f1": 0.8060200668896322, + "eval_mrpc-test_dot_f1_threshold": 13.664058685302734, + "eval_mrpc-test_dot_precision": 0.6885714285714286, + "eval_mrpc-test_dot_recall": 0.9717741935483871, + "eval_mrpc-test_euclidean_accuracy": 0.7421052631578947, + "eval_mrpc-test_euclidean_accuracy_threshold": 3.573061466217041, + "eval_mrpc-test_euclidean_ap": 0.8229315066081502, + "eval_mrpc-test_euclidean_f1": 0.8216216216216217, + "eval_mrpc-test_euclidean_f1_threshold": 3.7641119956970215, + "eval_mrpc-test_euclidean_precision": 0.742671009771987, + "eval_mrpc-test_euclidean_recall": 0.9193548387096774, + "eval_mrpc-test_manhattan_accuracy": 0.7394736842105263, + "eval_mrpc-test_manhattan_accuracy_threshold": 75.05320739746094, + "eval_mrpc-test_manhattan_ap": 0.8195074272882318, + "eval_mrpc-test_manhattan_f1": 0.8228980322003578, + "eval_mrpc-test_manhattan_f1_threshold": 76.21920013427734, + "eval_mrpc-test_manhattan_precision": 0.7395498392282959, + "eval_mrpc-test_manhattan_recall": 0.9274193548387096, + "eval_mrpc-test_max_accuracy": 0.7447368421052631, + "eval_mrpc-test_max_accuracy_threshold": 75.05320739746094, + "eval_mrpc-test_max_ap": 0.8492225227540071, + "eval_mrpc-test_max_f1": 0.8274647887323944, + "eval_mrpc-test_max_f1_threshold": 76.21920013427734, + "eval_mrpc-test_max_precision": 0.742671009771987, + "eval_mrpc-test_max_recall": 0.9717741935483871, + "eval_nli-pairs_loss": 0.8814637064933777, + "eval_nli-pairs_runtime": 2.9865, + "eval_nli-pairs_samples_per_second": 53.575, + "eval_nli-pairs_steps_per_second": 1.674, + "eval_sequential_score": 0.5695623413042502, + "step": 3876 + }, + { + "epoch": 0.7502903600464577, + "eval_vitaminc-pairs_loss": 5.706007957458496, + "eval_vitaminc-pairs_runtime": 1.5554, + "eval_vitaminc-pairs_samples_per_second": 85.507, + "eval_vitaminc-pairs_steps_per_second": 3.215, + "step": 3876 + }, + { + "epoch": 0.7502903600464577, + "eval_qnli-contrastive_loss": 0.14242754876613617, + "eval_qnli-contrastive_runtime": 0.5128, + "eval_qnli-contrastive_samples_per_second": 311.993, + "eval_qnli-contrastive_steps_per_second": 9.75, + "step": 3876 + }, + { + "epoch": 0.7502903600464577, + "eval_scitail-pairs-qa_loss": 0.0674210786819458, + "eval_scitail-pairs-qa_runtime": 1.2622, + "eval_scitail-pairs-qa_samples_per_second": 126.759, + "eval_scitail-pairs-qa_steps_per_second": 3.961, + "step": 3876 + }, + { + "epoch": 0.7502903600464577, + "eval_scitail-pairs-pos_loss": 0.36303356289863586, + "eval_scitail-pairs-pos_runtime": 2.3597, + "eval_scitail-pairs-pos_samples_per_second": 67.806, + "eval_scitail-pairs-pos_steps_per_second": 2.119, + "step": 3876 + }, + { + "epoch": 0.7502903600464577, + "eval_xsum-pairs_loss": 0.32611021399497986, + "eval_xsum-pairs_runtime": 1.4024, + "eval_xsum-pairs_samples_per_second": 114.091, + "eval_xsum-pairs_steps_per_second": 3.565, + "step": 3876 + }, + { + "epoch": 0.7502903600464577, + "eval_compression-pairs_loss": 0.08624111860990524, + "eval_compression-pairs_runtime": 0.3948, + "eval_compression-pairs_samples_per_second": 405.263, + "eval_compression-pairs_steps_per_second": 12.664, + "step": 3876 + }, + { + "epoch": 0.7502903600464577, + "eval_sciq_pairs_loss": 0.2929747402667999, + "eval_sciq_pairs_runtime": 7.9352, + "eval_sciq_pairs_samples_per_second": 20.163, + "eval_sciq_pairs_steps_per_second": 0.63, + "step": 3876 + }, + { + "epoch": 0.7502903600464577, + "eval_qasc_pairs_loss": 0.20694346725940704, + "eval_qasc_pairs_runtime": 1.4441, + "eval_qasc_pairs_samples_per_second": 110.798, + "eval_qasc_pairs_steps_per_second": 3.462, + "step": 3876 + }, + { + "epoch": 0.7502903600464577, + "eval_qasc_facts_sym_loss": 0.14476129412651062, + "eval_qasc_facts_sym_runtime": 0.3295, + "eval_qasc_facts_sym_samples_per_second": 485.621, + "eval_qasc_facts_sym_steps_per_second": 15.176, + "step": 3876 + }, + { + "epoch": 0.7502903600464577, + "eval_openbookqa_pairs_loss": 1.7119003534317017, + "eval_openbookqa_pairs_runtime": 1.2433, + "eval_openbookqa_pairs_samples_per_second": 128.687, + "eval_openbookqa_pairs_steps_per_second": 4.021, + "step": 3876 + }, + { + "epoch": 0.7502903600464577, + "eval_msmarco_pairs_loss": 0.4974798262119293, + "eval_msmarco_pairs_runtime": 3.1059, + "eval_msmarco_pairs_samples_per_second": 51.516, + "eval_msmarco_pairs_steps_per_second": 1.61, + "step": 3876 + }, + { + "epoch": 0.7502903600464577, + "eval_nq_pairs_loss": 0.46937060356140137, + "eval_nq_pairs_runtime": 7.6944, + "eval_nq_pairs_samples_per_second": 20.794, + "eval_nq_pairs_steps_per_second": 0.65, + "step": 3876 + }, + { + "epoch": 0.7502903600464577, + "eval_trivia_pairs_loss": 0.6990238428115845, + "eval_trivia_pairs_runtime": 10.1876, + "eval_trivia_pairs_samples_per_second": 15.705, + "eval_trivia_pairs_steps_per_second": 0.491, + "step": 3876 + }, + { + "epoch": 0.7502903600464577, + "eval_quora_pairs_loss": 0.21933521330356598, + "eval_quora_pairs_runtime": 3.8953, + "eval_quora_pairs_samples_per_second": 173.287, + "eval_quora_pairs_steps_per_second": 5.648, + "step": 3876 + }, + { + "epoch": 0.7502903600464577, + "eval_gooaq_pairs_loss": 0.5000605583190918, + "eval_gooaq_pairs_runtime": 2.2758, + "eval_gooaq_pairs_samples_per_second": 70.306, + "eval_gooaq_pairs_steps_per_second": 2.197, + "step": 3876 + }, + { + "epoch": 0.7502903600464577, + "eval_mrpc_pairs_loss": 0.044196613132953644, + "eval_mrpc_pairs_runtime": 0.3742, + "eval_mrpc_pairs_samples_per_second": 427.588, + "eval_mrpc_pairs_steps_per_second": 13.362, + "step": 3876 + }, + { + "epoch": 0.7549361207897793, + "grad_norm": 1.587725281715393, + "learning_rate": 2.2630662020905924e-05, + "loss": 0.4466, + "step": 3900 + }, + { + "epoch": 0.7675183894696089, + "grad_norm": 9.404407501220703, + "learning_rate": 2.3008130081300814e-05, + "loss": 0.6293, + "step": 3965 + }, + { + "epoch": 0.7801006581494386, + "grad_norm": 3.672903537750244, + "learning_rate": 2.3385598141695703e-05, + "loss": 0.5591, + "step": 4030 + }, + { + "epoch": 0.7926829268292683, + "grad_norm": 1.3529752492904663, + "learning_rate": 2.3763066202090596e-05, + "loss": 0.5702, + "step": 4095 + }, + { + "epoch": 0.8052651955090979, + "grad_norm": 21.519527435302734, + "learning_rate": 2.414053426248548e-05, + "loss": 0.4645, + "step": 4160 + }, + { + "epoch": 0.8178474641889276, + "grad_norm": 5.5179362297058105, + "learning_rate": 2.4518002322880374e-05, + "loss": 0.5175, + "step": 4225 + }, + { + "epoch": 0.8304297328687572, + "grad_norm": 10.127123832702637, + "learning_rate": 2.489547038327526e-05, + "loss": 0.5188, + "step": 4290 + }, + { + "epoch": 0.8430120015485869, + "grad_norm": 5.518251895904541, + "learning_rate": 2.5272938443670153e-05, + "loss": 0.4908, + "step": 4355 + }, + { + "epoch": 0.8555942702284166, + "grad_norm": 0.7371014356613159, + "learning_rate": 2.5650406504065042e-05, + "loss": 0.5075, + "step": 4420 + }, + { + "epoch": 0.8681765389082462, + "grad_norm": 22.15941619873047, + "learning_rate": 2.602206736353078e-05, + "loss": 0.5625, + "step": 4485 + }, + { + "epoch": 0.8807588075880759, + "grad_norm": 1.0873440504074097, + "learning_rate": 2.6399535423925668e-05, + "loss": 0.4952, + "step": 4550 + }, + { + "epoch": 0.8933410762679055, + "grad_norm": 19.877098083496094, + "learning_rate": 2.6777003484320557e-05, + "loss": 0.5818, + "step": 4615 + }, + { + "epoch": 0.9059233449477352, + "grad_norm": 9.559219360351562, + "learning_rate": 2.715447154471545e-05, + "loss": 0.6346, + "step": 4680 + }, + { + "epoch": 0.9185056136275649, + "grad_norm": 2.896815299987793, + "learning_rate": 2.7531939605110336e-05, + "loss": 0.4971, + "step": 4745 + }, + { + "epoch": 0.9310878823073945, + "grad_norm": 4.5045318603515625, + "learning_rate": 2.7909407665505228e-05, + "loss": 0.4834, + "step": 4810 + }, + { + "epoch": 0.9436701509872242, + "grad_norm": 1.2259025573730469, + "learning_rate": 2.8286875725900114e-05, + "loss": 0.5992, + "step": 4875 + }, + { + "epoch": 0.9562524196670538, + "grad_norm": 9.3356294631958, + "learning_rate": 2.8664343786295007e-05, + "loss": 0.5244, + "step": 4940 + }, + { + "epoch": 0.9688346883468835, + "grad_norm": 2.221039056777954, + "learning_rate": 2.9041811846689896e-05, + "loss": 0.6051, + "step": 5005 + }, + { + "epoch": 0.9814169570267132, + "grad_norm": 1.3200041055679321, + "learning_rate": 2.9419279907084785e-05, + "loss": 0.4323, + "step": 5070 + }, + { + "epoch": 0.9939992257065428, + "grad_norm": 8.689266204833984, + "learning_rate": 2.9796747967479674e-05, + "loss": 0.5347, + "step": 5135 + }, + { + "epoch": 1.00038714672861, + "eval_StS-test_pearson_cosine": 0.8763915227055069, + "eval_StS-test_pearson_dot": 0.8008666276678159, + "eval_StS-test_pearson_euclidean": 0.862640534705167, + "eval_StS-test_pearson_manhattan": 0.8624528259321993, + "eval_StS-test_pearson_max": 0.8763915227055069, + "eval_StS-test_spearman_cosine": 0.8905227354354782, + "eval_StS-test_spearman_dot": 0.8046369691471742, + "eval_StS-test_spearman_euclidean": 0.8643543133924523, + "eval_StS-test_spearman_manhattan": 0.8653407373098487, + "eval_StS-test_spearman_max": 0.8905227354354782, + "eval_Vitaminc-test_cosine_accuracy": 0.5789473684210527, + "eval_Vitaminc-test_cosine_accuracy_threshold": 0.8158951997756958, + "eval_Vitaminc-test_cosine_ap": 0.5603755036121337, + "eval_Vitaminc-test_cosine_f1": 0.6778398510242085, + "eval_Vitaminc-test_cosine_f1_threshold": 0.5370069146156311, + "eval_Vitaminc-test_cosine_precision": 0.5170454545454546, + "eval_Vitaminc-test_cosine_recall": 0.9837837837837838, + "eval_Vitaminc-test_dot_accuracy": 0.5578947368421052, + "eval_Vitaminc-test_dot_accuracy_threshold": 25.046009063720703, + "eval_Vitaminc-test_dot_ap": 0.5384851861555453, + "eval_Vitaminc-test_dot_f1": 0.6703499079189686, + "eval_Vitaminc-test_dot_f1_threshold": 16.19637680053711, + "eval_Vitaminc-test_dot_precision": 0.5083798882681564, + "eval_Vitaminc-test_dot_recall": 0.9837837837837838, + "eval_Vitaminc-test_euclidean_accuracy": 0.5789473684210527, + "eval_Vitaminc-test_euclidean_accuracy_threshold": 4.248380661010742, + "eval_Vitaminc-test_euclidean_ap": 0.5580666089311308, + "eval_Vitaminc-test_euclidean_f1": 0.6703499079189686, + "eval_Vitaminc-test_euclidean_f1_threshold": 6.6477460861206055, + "eval_Vitaminc-test_euclidean_precision": 0.5083798882681564, + "eval_Vitaminc-test_euclidean_recall": 0.9837837837837838, + "eval_Vitaminc-test_manhattan_accuracy": 0.5736842105263158, + "eval_Vitaminc-test_manhattan_accuracy_threshold": 89.6431884765625, + "eval_Vitaminc-test_manhattan_ap": 0.5550489368095819, + "eval_Vitaminc-test_manhattan_f1": 0.6703499079189686, + "eval_Vitaminc-test_manhattan_f1_threshold": 138.67050170898438, + "eval_Vitaminc-test_manhattan_precision": 0.5083798882681564, + "eval_Vitaminc-test_manhattan_recall": 0.9837837837837838, + "eval_Vitaminc-test_max_accuracy": 0.5789473684210527, + "eval_Vitaminc-test_max_accuracy_threshold": 89.6431884765625, + "eval_Vitaminc-test_max_ap": 0.5603755036121337, + "eval_Vitaminc-test_max_f1": 0.6778398510242085, + "eval_Vitaminc-test_max_f1_threshold": 138.67050170898438, + "eval_Vitaminc-test_max_precision": 0.5170454545454546, + "eval_Vitaminc-test_max_recall": 0.9837837837837838, + "eval_mrpc-test_cosine_accuracy": 0.7526315789473684, + "eval_mrpc-test_cosine_accuracy_threshold": 0.7351824045181274, + "eval_mrpc-test_cosine_ap": 0.8519630000115989, + "eval_mrpc-test_cosine_f1": 0.8324514991181658, + "eval_mrpc-test_cosine_f1_threshold": 0.7194495797157288, + "eval_mrpc-test_cosine_precision": 0.7398119122257053, + "eval_mrpc-test_cosine_recall": 0.9516129032258065, + "eval_mrpc-test_dot_accuracy": 0.6868421052631579, + "eval_mrpc-test_dot_accuracy_threshold": 17.337886810302734, + "eval_mrpc-test_dot_ap": 0.7667271250415733, + "eval_mrpc-test_dot_f1": 0.8032786885245902, + "eval_mrpc-test_dot_f1_threshold": 12.53590202331543, + "eval_mrpc-test_dot_precision": 0.6767955801104972, + "eval_mrpc-test_dot_recall": 0.9879032258064516, + "eval_mrpc-test_euclidean_accuracy": 0.7473684210526316, + "eval_mrpc-test_euclidean_accuracy_threshold": 3.7708821296691895, + "eval_mrpc-test_euclidean_ap": 0.8286726870063765, + "eval_mrpc-test_euclidean_f1": 0.827338129496403, + "eval_mrpc-test_euclidean_f1_threshold": 3.813426971435547, + "eval_mrpc-test_euclidean_precision": 0.7467532467532467, + "eval_mrpc-test_euclidean_recall": 0.9274193548387096, + "eval_mrpc-test_manhattan_accuracy": 0.7421052631578947, + "eval_mrpc-test_manhattan_accuracy_threshold": 74.25254821777344, + "eval_mrpc-test_manhattan_ap": 0.824711624780047, + "eval_mrpc-test_manhattan_f1": 0.8243727598566308, + "eval_mrpc-test_manhattan_f1_threshold": 76.98123931884766, + "eval_mrpc-test_manhattan_precision": 0.7419354838709677, + "eval_mrpc-test_manhattan_recall": 0.9274193548387096, + "eval_mrpc-test_max_accuracy": 0.7526315789473684, + "eval_mrpc-test_max_accuracy_threshold": 74.25254821777344, + "eval_mrpc-test_max_ap": 0.8519630000115989, + "eval_mrpc-test_max_f1": 0.8324514991181658, + "eval_mrpc-test_max_f1_threshold": 76.98123931884766, + "eval_mrpc-test_max_precision": 0.7467532467532467, + "eval_mrpc-test_max_recall": 0.9879032258064516, + "eval_nli-pairs_loss": 0.8806689381599426, + "eval_nli-pairs_runtime": 3.1388, + "eval_nli-pairs_samples_per_second": 50.974, + "eval_nli-pairs_steps_per_second": 1.593, + "eval_sequential_score": 0.5603755036121337, + "step": 5168 + }, + { + "epoch": 1.00038714672861, + "eval_vitaminc-pairs_loss": 5.74862813949585, + "eval_vitaminc-pairs_runtime": 1.6555, + "eval_vitaminc-pairs_samples_per_second": 80.34, + "eval_vitaminc-pairs_steps_per_second": 3.02, + "step": 5168 + }, + { + "epoch": 1.00038714672861, + "eval_qnli-contrastive_loss": 0.15019993484020233, + "eval_qnli-contrastive_runtime": 0.5355, + "eval_qnli-contrastive_samples_per_second": 298.759, + "eval_qnli-contrastive_steps_per_second": 9.336, + "step": 5168 + }, + { + "epoch": 1.00038714672861, + "eval_scitail-pairs-qa_loss": 0.06865032762289047, + "eval_scitail-pairs-qa_runtime": 1.2902, + "eval_scitail-pairs-qa_samples_per_second": 124.008, + "eval_scitail-pairs-qa_steps_per_second": 3.875, + "step": 5168 + }, + { + "epoch": 1.00038714672861, + "eval_scitail-pairs-pos_loss": 0.35982105135917664, + "eval_scitail-pairs-pos_runtime": 2.4147, + "eval_scitail-pairs-pos_samples_per_second": 66.262, + "eval_scitail-pairs-pos_steps_per_second": 2.071, + "step": 5168 + }, + { + "epoch": 1.00038714672861, + "eval_xsum-pairs_loss": 0.2985193133354187, + "eval_xsum-pairs_runtime": 1.3999, + "eval_xsum-pairs_samples_per_second": 114.294, + "eval_xsum-pairs_steps_per_second": 3.572, + "step": 5168 + }, + { + "epoch": 1.00038714672861, + "eval_compression-pairs_loss": 0.0826263278722763, + "eval_compression-pairs_runtime": 0.4065, + "eval_compression-pairs_samples_per_second": 393.63, + "eval_compression-pairs_steps_per_second": 12.301, + "step": 5168 + }, + { + "epoch": 1.00038714672861, + "eval_sciq_pairs_loss": 0.23171819746494293, + "eval_sciq_pairs_runtime": 8.3356, + "eval_sciq_pairs_samples_per_second": 19.195, + "eval_sciq_pairs_steps_per_second": 0.6, + "step": 5168 + }, + { + "epoch": 1.00038714672861, + "eval_qasc_pairs_loss": 0.22728173434734344, + "eval_qasc_pairs_runtime": 1.4954, + "eval_qasc_pairs_samples_per_second": 106.994, + "eval_qasc_pairs_steps_per_second": 3.344, + "step": 5168 + }, + { + "epoch": 1.00038714672861, + "eval_qasc_facts_sym_loss": 0.14946210384368896, + "eval_qasc_facts_sym_runtime": 0.3454, + "eval_qasc_facts_sym_samples_per_second": 463.224, + "eval_qasc_facts_sym_steps_per_second": 14.476, + "step": 5168 + }, + { + "epoch": 1.00038714672861, + "eval_openbookqa_pairs_loss": 1.7004419565200806, + "eval_openbookqa_pairs_runtime": 1.4576, + "eval_openbookqa_pairs_samples_per_second": 109.771, + "eval_openbookqa_pairs_steps_per_second": 3.43, + "step": 5168 + }, + { + "epoch": 1.00038714672861, + "eval_msmarco_pairs_loss": 0.5557130575180054, + "eval_msmarco_pairs_runtime": 3.3251, + "eval_msmarco_pairs_samples_per_second": 48.119, + "eval_msmarco_pairs_steps_per_second": 1.504, + "step": 5168 + }, + { + "epoch": 1.00038714672861, + "eval_nq_pairs_loss": 0.44789552688598633, + "eval_nq_pairs_runtime": 7.8474, + "eval_nq_pairs_samples_per_second": 20.389, + "eval_nq_pairs_steps_per_second": 0.637, + "step": 5168 + }, + { + "epoch": 1.00038714672861, + "eval_trivia_pairs_loss": 0.7468730807304382, + "eval_trivia_pairs_runtime": 10.2776, + "eval_trivia_pairs_samples_per_second": 15.568, + "eval_trivia_pairs_steps_per_second": 0.486, + "step": 5168 + }, + { + "epoch": 1.00038714672861, + "eval_quora_pairs_loss": 0.2115297168493271, + "eval_quora_pairs_runtime": 4.0407, + "eval_quora_pairs_samples_per_second": 167.05, + "eval_quora_pairs_steps_per_second": 5.445, + "step": 5168 + }, + { + "epoch": 1.00038714672861, + "eval_gooaq_pairs_loss": 0.53202223777771, + "eval_gooaq_pairs_runtime": 2.2862, + "eval_gooaq_pairs_samples_per_second": 69.986, + "eval_gooaq_pairs_steps_per_second": 2.187, + "step": 5168 + }, + { + "epoch": 1.00038714672861, + "eval_mrpc_pairs_loss": 0.04773856699466705, + "eval_mrpc_pairs_runtime": 0.3785, + "eval_mrpc_pairs_samples_per_second": 422.744, + "eval_mrpc_pairs_steps_per_second": 13.211, + "step": 5168 + }, + { + "epoch": 1.0065814943863725, + "grad_norm": 1.1308379173278809, + "learning_rate": 2.99985958598248e-05, + "loss": 0.4946, + "step": 5200 + }, + { + "epoch": 1.019163763066202, + "grad_norm": 0.7707048058509827, + "learning_rate": 2.9986216360213095e-05, + "loss": 0.4134, + "step": 5265 + }, + { + "epoch": 1.0317460317460316, + "grad_norm": 19.599868774414062, + "learning_rate": 2.9960574370109496e-05, + "loss": 0.4702, + "step": 5330 + }, + { + "epoch": 1.0443283004258614, + "grad_norm": 0.9096149206161499, + "learning_rate": 2.992178447249302e-05, + "loss": 0.5392, + "step": 5395 + }, + { + "epoch": 1.056910569105691, + "grad_norm": 19.951955795288086, + "learning_rate": 2.986988075736407e-05, + "loss": 0.4845, + "step": 5460 + }, + { + "epoch": 1.0694928377855206, + "grad_norm": 13.416616439819336, + "learning_rate": 2.980490883963215e-05, + "loss": 0.4601, + "step": 5525 + }, + { + "epoch": 1.0820751064653504, + "grad_norm": 1.105792760848999, + "learning_rate": 2.9726925819027805e-05, + "loss": 0.5683, + "step": 5590 + }, + { + "epoch": 1.09465737514518, + "grad_norm": 6.264986991882324, + "learning_rate": 2.9636000229921248e-05, + "loss": 0.4742, + "step": 5655 + }, + { + "epoch": 1.1072396438250096, + "grad_norm": 0.4068054258823395, + "learning_rate": 2.9532211981091813e-05, + "loss": 0.5187, + "step": 5720 + }, + { + "epoch": 1.1198219125048394, + "grad_norm": 4.136714935302734, + "learning_rate": 2.941565228550117e-05, + "loss": 0.4888, + "step": 5785 + }, + { + "epoch": 1.132404181184669, + "grad_norm": 1.7270488739013672, + "learning_rate": 2.9286423580131986e-05, + "loss": 0.3486, + "step": 5850 + }, + { + "epoch": 1.1449864498644986, + "grad_norm": 3.33518123626709, + "learning_rate": 2.9144639435962487e-05, + "loss": 0.4714, + "step": 5915 + }, + { + "epoch": 1.1575687185443284, + "grad_norm": 3.2314696311950684, + "learning_rate": 2.8990424458156108e-05, + "loss": 0.5638, + "step": 5980 + }, + { + "epoch": 1.170150987224158, + "grad_norm": 28.55368423461914, + "learning_rate": 2.882391417655382e-05, + "loss": 0.457, + "step": 6045 + }, + { + "epoch": 1.1827332559039876, + "grad_norm": 17.361839294433594, + "learning_rate": 2.8645254926565508e-05, + "loss": 0.5419, + "step": 6110 + }, + { + "epoch": 1.1953155245838172, + "grad_norm": 14.851773262023926, + "learning_rate": 2.8454603720564953e-05, + "loss": 0.4478, + "step": 6175 + }, + { + "epoch": 1.207897793263647, + "grad_norm": 18.820308685302734, + "learning_rate": 2.825212810990158e-05, + "loss": 0.5012, + "step": 6240 + }, + { + "epoch": 1.2204800619434766, + "grad_norm": 0.15901756286621094, + "learning_rate": 2.803800603765008e-05, + "loss": 0.4505, + "step": 6305 + }, + { + "epoch": 1.2330623306233062, + "grad_norm": 8.319548606872559, + "learning_rate": 2.7812425682227454e-05, + "loss": 0.4677, + "step": 6370 + }, + { + "epoch": 1.245644599303136, + "grad_norm": 6.264440536499023, + "learning_rate": 2.757558529201485e-05, + "loss": 0.478, + "step": 6435 + }, + { + "epoch": 1.2504839334107627, + "eval_StS-test_pearson_cosine": 0.8749809466696259, + "eval_StS-test_pearson_dot": 0.7726333541295806, + "eval_StS-test_pearson_euclidean": 0.8635304000211516, + "eval_StS-test_pearson_manhattan": 0.8629148539097312, + "eval_StS-test_pearson_max": 0.8749809466696259, + "eval_StS-test_spearman_cosine": 0.8911637850925943, + "eval_StS-test_spearman_dot": 0.7791087210103667, + "eval_StS-test_spearman_euclidean": 0.8717319673612609, + "eval_StS-test_spearman_manhattan": 0.873430814633936, + "eval_StS-test_spearman_max": 0.8911637850925943, + "eval_Vitaminc-test_cosine_accuracy": 0.5789473684210527, + "eval_Vitaminc-test_cosine_accuracy_threshold": 0.7059692144393921, + "eval_Vitaminc-test_cosine_ap": 0.5681853466607133, + "eval_Vitaminc-test_cosine_f1": 0.6740740740740742, + "eval_Vitaminc-test_cosine_f1_threshold": 0.47524070739746094, + "eval_Vitaminc-test_cosine_precision": 0.5126760563380282, + "eval_Vitaminc-test_cosine_recall": 0.9837837837837838, + "eval_Vitaminc-test_dot_accuracy": 0.5605263157894737, + "eval_Vitaminc-test_dot_accuracy_threshold": 18.60018539428711, + "eval_Vitaminc-test_dot_ap": 0.542223191158599, + "eval_Vitaminc-test_dot_f1": 0.6678832116788321, + "eval_Vitaminc-test_dot_f1_threshold": 11.097679138183594, + "eval_Vitaminc-test_dot_precision": 0.5041322314049587, + "eval_Vitaminc-test_dot_recall": 0.9891891891891892, + "eval_Vitaminc-test_euclidean_accuracy": 0.5605263157894737, + "eval_Vitaminc-test_euclidean_accuracy_threshold": 3.57100772857666, + "eval_Vitaminc-test_euclidean_ap": 0.5599668277103405, + "eval_Vitaminc-test_euclidean_f1": 0.6691729323308271, + "eval_Vitaminc-test_euclidean_f1_threshold": 6.093693733215332, + "eval_Vitaminc-test_euclidean_precision": 0.5129682997118156, + "eval_Vitaminc-test_euclidean_recall": 0.9621621621621622, + "eval_Vitaminc-test_manhattan_accuracy": 0.5710526315789474, + "eval_Vitaminc-test_manhattan_accuracy_threshold": 77.09724426269531, + "eval_Vitaminc-test_manhattan_ap": 0.5609220991144057, + "eval_Vitaminc-test_manhattan_f1": 0.6666666666666666, + "eval_Vitaminc-test_manhattan_f1_threshold": 129.85740661621094, + "eval_Vitaminc-test_manhattan_precision": 0.5070422535211268, + "eval_Vitaminc-test_manhattan_recall": 0.972972972972973, + "eval_Vitaminc-test_max_accuracy": 0.5789473684210527, + "eval_Vitaminc-test_max_accuracy_threshold": 77.09724426269531, + "eval_Vitaminc-test_max_ap": 0.5681853466607133, + "eval_Vitaminc-test_max_f1": 0.6740740740740742, + "eval_Vitaminc-test_max_f1_threshold": 129.85740661621094, + "eval_Vitaminc-test_max_precision": 0.5129682997118156, + "eval_Vitaminc-test_max_recall": 0.9891891891891892, + "eval_mrpc-test_cosine_accuracy": 0.7421052631578947, + "eval_mrpc-test_cosine_accuracy_threshold": 0.6662319898605347, + "eval_mrpc-test_cosine_ap": 0.847874875651623, + "eval_mrpc-test_cosine_f1": 0.831615120274914, + "eval_mrpc-test_cosine_f1_threshold": 0.6662319898605347, + "eval_mrpc-test_cosine_precision": 0.7245508982035929, + "eval_mrpc-test_cosine_recall": 0.9758064516129032, + "eval_mrpc-test_dot_accuracy": 0.6973684210526315, + "eval_mrpc-test_dot_accuracy_threshold": 13.194486618041992, + "eval_mrpc-test_dot_ap": 0.7550727858117499, + "eval_mrpc-test_dot_f1": 0.803305785123967, + "eval_mrpc-test_dot_f1_threshold": 11.023738861083984, + "eval_mrpc-test_dot_precision": 0.680672268907563, + "eval_mrpc-test_dot_recall": 0.9798387096774194, + "eval_mrpc-test_euclidean_accuracy": 0.7526315789473684, + "eval_mrpc-test_euclidean_accuracy_threshold": 3.5304112434387207, + "eval_mrpc-test_euclidean_ap": 0.8291889523682024, + "eval_mrpc-test_euclidean_f1": 0.829090909090909, + "eval_mrpc-test_euclidean_f1_threshold": 3.5304112434387207, + "eval_mrpc-test_euclidean_precision": 0.7549668874172185, + "eval_mrpc-test_euclidean_recall": 0.9193548387096774, + "eval_mrpc-test_manhattan_accuracy": 0.7473684210526316, + "eval_mrpc-test_manhattan_accuracy_threshold": 70.23236846923828, + "eval_mrpc-test_manhattan_ap": 0.827542993951462, + "eval_mrpc-test_manhattan_f1": 0.8241758241758242, + "eval_mrpc-test_manhattan_f1_threshold": 70.23236846923828, + "eval_mrpc-test_manhattan_precision": 0.7550335570469798, + "eval_mrpc-test_manhattan_recall": 0.907258064516129, + "eval_mrpc-test_max_accuracy": 0.7526315789473684, + "eval_mrpc-test_max_accuracy_threshold": 70.23236846923828, + "eval_mrpc-test_max_ap": 0.847874875651623, + "eval_mrpc-test_max_f1": 0.831615120274914, + "eval_mrpc-test_max_f1_threshold": 70.23236846923828, + "eval_mrpc-test_max_precision": 0.7550335570469798, + "eval_mrpc-test_max_recall": 0.9798387096774194, + "eval_nli-pairs_loss": 0.8716467022895813, + "eval_nli-pairs_runtime": 3.0359, + "eval_nli-pairs_samples_per_second": 52.702, + "eval_nli-pairs_steps_per_second": 1.647, + "eval_sequential_score": 0.5681853466607133, + "step": 6460 + }, + { + "epoch": 1.2504839334107627, + "eval_vitaminc-pairs_loss": 5.813481330871582, + "eval_vitaminc-pairs_runtime": 1.6243, + "eval_vitaminc-pairs_samples_per_second": 81.88, + "eval_vitaminc-pairs_steps_per_second": 3.078, + "step": 6460 + }, + { + "epoch": 1.2504839334107627, + "eval_qnli-contrastive_loss": 0.09026918560266495, + "eval_qnli-contrastive_runtime": 0.5258, + "eval_qnli-contrastive_samples_per_second": 304.322, + "eval_qnli-contrastive_steps_per_second": 9.51, + "step": 6460 + }, + { + "epoch": 1.2504839334107627, + "eval_scitail-pairs-qa_loss": 0.06381334364414215, + "eval_scitail-pairs-qa_runtime": 1.2167, + "eval_scitail-pairs-qa_samples_per_second": 131.505, + "eval_scitail-pairs-qa_steps_per_second": 4.11, + "step": 6460 + }, + { + "epoch": 1.2504839334107627, + "eval_scitail-pairs-pos_loss": 0.3648618757724762, + "eval_scitail-pairs-pos_runtime": 2.3843, + "eval_scitail-pairs-pos_samples_per_second": 67.105, + "eval_scitail-pairs-pos_steps_per_second": 2.097, + "step": 6460 + }, + { + "epoch": 1.2504839334107627, + "eval_xsum-pairs_loss": 0.317168653011322, + "eval_xsum-pairs_runtime": 1.408, + "eval_xsum-pairs_samples_per_second": 113.634, + "eval_xsum-pairs_steps_per_second": 3.551, + "step": 6460 + }, + { + "epoch": 1.2504839334107627, + "eval_compression-pairs_loss": 0.08144789934158325, + "eval_compression-pairs_runtime": 0.406, + "eval_compression-pairs_samples_per_second": 394.049, + "eval_compression-pairs_steps_per_second": 12.314, + "step": 6460 + }, + { + "epoch": 1.2504839334107627, + "eval_sciq_pairs_loss": 0.25741466879844666, + "eval_sciq_pairs_runtime": 8.0381, + "eval_sciq_pairs_samples_per_second": 19.905, + "eval_sciq_pairs_steps_per_second": 0.622, + "step": 6460 + }, + { + "epoch": 1.2504839334107627, + "eval_qasc_pairs_loss": 0.19735541939735413, + "eval_qasc_pairs_runtime": 1.4554, + "eval_qasc_pairs_samples_per_second": 109.935, + "eval_qasc_pairs_steps_per_second": 3.435, + "step": 6460 + }, + { + "epoch": 1.2504839334107627, + "eval_qasc_facts_sym_loss": 0.14057289063930511, + "eval_qasc_facts_sym_runtime": 0.3302, + "eval_qasc_facts_sym_samples_per_second": 484.627, + "eval_qasc_facts_sym_steps_per_second": 15.145, + "step": 6460 + }, + { + "epoch": 1.2504839334107627, + "eval_openbookqa_pairs_loss": 1.6864427328109741, + "eval_openbookqa_pairs_runtime": 1.272, + "eval_openbookqa_pairs_samples_per_second": 125.791, + "eval_openbookqa_pairs_steps_per_second": 3.931, + "step": 6460 + }, + { + "epoch": 1.2504839334107627, + "eval_msmarco_pairs_loss": 0.5884890556335449, + "eval_msmarco_pairs_runtime": 3.1363, + "eval_msmarco_pairs_samples_per_second": 51.016, + "eval_msmarco_pairs_steps_per_second": 1.594, + "step": 6460 + }, + { + "epoch": 1.2504839334107627, + "eval_nq_pairs_loss": 0.4531589448451996, + "eval_nq_pairs_runtime": 7.7451, + "eval_nq_pairs_samples_per_second": 20.658, + "eval_nq_pairs_steps_per_second": 0.646, + "step": 6460 + }, + { + "epoch": 1.2504839334107627, + "eval_trivia_pairs_loss": 0.8866144418716431, + "eval_trivia_pairs_runtime": 10.2442, + "eval_trivia_pairs_samples_per_second": 15.619, + "eval_trivia_pairs_steps_per_second": 0.488, + "step": 6460 + }, + { + "epoch": 1.2504839334107627, + "eval_quora_pairs_loss": 0.2143346667289734, + "eval_quora_pairs_runtime": 3.9106, + "eval_quora_pairs_samples_per_second": 172.607, + "eval_quora_pairs_steps_per_second": 5.626, + "step": 6460 + }, + { + "epoch": 1.2504839334107627, + "eval_gooaq_pairs_loss": 0.535525381565094, + "eval_gooaq_pairs_runtime": 2.2827, + "eval_gooaq_pairs_samples_per_second": 70.094, + "eval_gooaq_pairs_steps_per_second": 2.19, + "step": 6460 + }, + { + "epoch": 1.2504839334107627, + "eval_mrpc_pairs_loss": 0.04031027480959892, + "eval_mrpc_pairs_runtime": 0.3763, + "eval_mrpc_pairs_samples_per_second": 425.243, + "eval_mrpc_pairs_steps_per_second": 13.289, + "step": 6460 + }, + { + "epoch": 1.2582268679829656, + "grad_norm": 3.637460947036743, + "learning_rate": 2.7327693011129508e-05, + "loss": 0.5004, + "step": 6500 + }, + { + "epoch": 1.2708091366627952, + "grad_norm": 11.11975383758545, + "learning_rate": 2.7068966696500025e-05, + "loss": 0.5453, + "step": 6565 + }, + { + "epoch": 1.2833914053426247, + "grad_norm": 0.5408668518066406, + "learning_rate": 2.6799633726405593e-05, + "loss": 0.529, + "step": 6630 + }, + { + "epoch": 1.2959736740224546, + "grad_norm": 1.3864606618881226, + "learning_rate": 2.6519930800647514e-05, + "loss": 0.4343, + "step": 6695 + }, + { + "epoch": 1.3085559427022841, + "grad_norm": 10.643692970275879, + "learning_rate": 2.623010373252868e-05, + "loss": 0.4235, + "step": 6760 + }, + { + "epoch": 1.321138211382114, + "grad_norm": 15.424538612365723, + "learning_rate": 2.59304072328237e-05, + "loss": 0.4493, + "step": 6825 + }, + { + "epoch": 1.3337204800619435, + "grad_norm": 6.388729095458984, + "learning_rate": 2.5621104685929617e-05, + "loss": 0.4349, + "step": 6890 + }, + { + "epoch": 1.3463027487417731, + "grad_norm": 17.623123168945312, + "learning_rate": 2.5302467918393965e-05, + "loss": 0.4016, + "step": 6955 + }, + { + "epoch": 1.3588850174216027, + "grad_norm": 0.2991964817047119, + "learning_rate": 2.49747769600235e-05, + "loss": 0.39, + "step": 7020 + }, + { + "epoch": 1.3714672861014323, + "grad_norm": 2.3669910430908203, + "learning_rate": 2.4638319797783612e-05, + "loss": 0.5162, + "step": 7085 + }, + { + "epoch": 1.3840495547812621, + "grad_norm": 0.24493175745010376, + "learning_rate": 2.4293392122704728e-05, + "loss": 0.4186, + "step": 7150 + }, + { + "epoch": 1.3966318234610917, + "grad_norm": 1.4514191150665283, + "learning_rate": 2.3940297070018048e-05, + "loss": 0.3867, + "step": 7215 + }, + { + "epoch": 1.4092140921409215, + "grad_norm": 12.809524536132812, + "learning_rate": 2.3579344952749066e-05, + "loss": 0.5076, + "step": 7280 + }, + { + "epoch": 1.4217963608207511, + "grad_norm": 6.879631996154785, + "learning_rate": 2.3210852989002988e-05, + "loss": 0.4872, + "step": 7345 + }, + { + "epoch": 1.4343786295005807, + "grad_norm": 2.3940792083740234, + "learning_rate": 2.283514502318168e-05, + "loss": 0.4508, + "step": 7410 + }, + { + "epoch": 1.4469608981804103, + "grad_norm": 3.3337996006011963, + "learning_rate": 2.2452551241377226e-05, + "loss": 0.4261, + "step": 7475 + }, + { + "epoch": 1.45954316686024, + "grad_norm": 18.75865364074707, + "learning_rate": 2.2063407881192124e-05, + "loss": 0.4444, + "step": 7540 + }, + { + "epoch": 1.4721254355400697, + "grad_norm": 3.5605080127716064, + "learning_rate": 2.1668056936241207e-05, + "loss": 0.4043, + "step": 7605 + }, + { + "epoch": 1.4847077042198993, + "grad_norm": 0.22081464529037476, + "learning_rate": 2.1266845855595004e-05, + "loss": 0.4451, + "step": 7670 + }, + { + "epoch": 1.497289972899729, + "grad_norm": 0.37092337012290955, + "learning_rate": 2.086012723842858e-05, + "loss": 0.4312, + "step": 7735 + }, + { + "epoch": 1.5005807200929153, + "eval_StS-test_pearson_cosine": 0.8818926010663196, + "eval_StS-test_pearson_dot": 0.7948939773009118, + "eval_StS-test_pearson_euclidean": 0.8683502643949064, + "eval_StS-test_pearson_manhattan": 0.8693690003312309, + "eval_StS-test_pearson_max": 0.8818926010663196, + "eval_StS-test_spearman_cosine": 0.8954799233919072, + "eval_StS-test_spearman_dot": 0.8002703657023814, + "eval_StS-test_spearman_euclidean": 0.8705533161218193, + "eval_StS-test_spearman_manhattan": 0.8724718674009548, + "eval_StS-test_spearman_max": 0.8954799233919072, + "eval_Vitaminc-test_cosine_accuracy": 0.5736842105263158, + "eval_Vitaminc-test_cosine_accuracy_threshold": 0.7514327764511108, + "eval_Vitaminc-test_cosine_ap": 0.5635780272169032, + "eval_Vitaminc-test_cosine_f1": 0.6728280961182994, + "eval_Vitaminc-test_cosine_f1_threshold": 0.4688807725906372, + "eval_Vitaminc-test_cosine_precision": 0.5112359550561798, + "eval_Vitaminc-test_cosine_recall": 0.9837837837837838, + "eval_Vitaminc-test_dot_accuracy": 0.5578947368421052, + "eval_Vitaminc-test_dot_accuracy_threshold": 18.673751831054688, + "eval_Vitaminc-test_dot_ap": 0.5505510423866934, + "eval_Vitaminc-test_dot_f1": 0.6679389312977099, + "eval_Vitaminc-test_dot_f1_threshold": 13.694689750671387, + "eval_Vitaminc-test_dot_precision": 0.5162241887905604, + "eval_Vitaminc-test_dot_recall": 0.9459459459459459, + "eval_Vitaminc-test_euclidean_accuracy": 0.5763157894736842, + "eval_Vitaminc-test_euclidean_accuracy_threshold": 3.70269513130188, + "eval_Vitaminc-test_euclidean_ap": 0.5537568551698975, + "eval_Vitaminc-test_euclidean_f1": 0.6654611211573237, + "eval_Vitaminc-test_euclidean_f1_threshold": 6.518295764923096, + "eval_Vitaminc-test_euclidean_precision": 0.5, + "eval_Vitaminc-test_euclidean_recall": 0.9945945945945946, + "eval_Vitaminc-test_manhattan_accuracy": 0.5789473684210527, + "eval_Vitaminc-test_manhattan_accuracy_threshold": 74.57982635498047, + "eval_Vitaminc-test_manhattan_ap": 0.5568365932864207, + "eval_Vitaminc-test_manhattan_f1": 0.6654676258992805, + "eval_Vitaminc-test_manhattan_f1_threshold": 139.71929931640625, + "eval_Vitaminc-test_manhattan_precision": 0.49865229110512127, + "eval_Vitaminc-test_manhattan_recall": 1.0, + "eval_Vitaminc-test_max_accuracy": 0.5789473684210527, + "eval_Vitaminc-test_max_accuracy_threshold": 74.57982635498047, + "eval_Vitaminc-test_max_ap": 0.5635780272169032, + "eval_Vitaminc-test_max_f1": 0.6728280961182994, + "eval_Vitaminc-test_max_f1_threshold": 139.71929931640625, + "eval_Vitaminc-test_max_precision": 0.5162241887905604, + "eval_Vitaminc-test_max_recall": 1.0, + "eval_mrpc-test_cosine_accuracy": 0.7315789473684211, + "eval_mrpc-test_cosine_accuracy_threshold": 0.7411860227584839, + "eval_mrpc-test_cosine_ap": 0.8441517199589379, + "eval_mrpc-test_cosine_f1": 0.8186528497409327, + "eval_mrpc-test_cosine_f1_threshold": 0.672976553440094, + "eval_mrpc-test_cosine_precision": 0.716012084592145, + "eval_mrpc-test_cosine_recall": 0.9556451612903226, + "eval_mrpc-test_dot_accuracy": 0.6921052631578948, + "eval_mrpc-test_dot_accuracy_threshold": 13.14952278137207, + "eval_mrpc-test_dot_ap": 0.761780840207299, + "eval_mrpc-test_dot_f1": 0.8045602605863192, + "eval_mrpc-test_dot_f1_threshold": 8.986488342285156, + "eval_mrpc-test_dot_precision": 0.674863387978142, + "eval_mrpc-test_dot_recall": 0.9959677419354839, + "eval_mrpc-test_euclidean_accuracy": 0.7421052631578947, + "eval_mrpc-test_euclidean_accuracy_threshold": 3.5243382453918457, + "eval_mrpc-test_euclidean_ap": 0.8217039781172437, + "eval_mrpc-test_euclidean_f1": 0.8231046931407942, + "eval_mrpc-test_euclidean_f1_threshold": 3.5243382453918457, + "eval_mrpc-test_euclidean_precision": 0.7450980392156863, + "eval_mrpc-test_euclidean_recall": 0.9193548387096774, + "eval_mrpc-test_manhattan_accuracy": 0.7368421052631579, + "eval_mrpc-test_manhattan_accuracy_threshold": 70.5282211303711, + "eval_mrpc-test_manhattan_ap": 0.8193721480476763, + "eval_mrpc-test_manhattan_f1": 0.8201438848920863, + "eval_mrpc-test_manhattan_f1_threshold": 70.67774963378906, + "eval_mrpc-test_manhattan_precision": 0.7402597402597403, + "eval_mrpc-test_manhattan_recall": 0.9193548387096774, + "eval_mrpc-test_max_accuracy": 0.7421052631578947, + "eval_mrpc-test_max_accuracy_threshold": 70.5282211303711, + "eval_mrpc-test_max_ap": 0.8441517199589379, + "eval_mrpc-test_max_f1": 0.8231046931407942, + "eval_mrpc-test_max_f1_threshold": 70.67774963378906, + "eval_mrpc-test_max_precision": 0.7450980392156863, + "eval_mrpc-test_max_recall": 0.9959677419354839, + "eval_nli-pairs_loss": 0.7820265293121338, + "eval_nli-pairs_runtime": 3.1498, + "eval_nli-pairs_samples_per_second": 50.797, + "eval_nli-pairs_steps_per_second": 1.587, + "eval_sequential_score": 0.5635780272169032, + "step": 7752 + }, + { + "epoch": 1.5005807200929153, + "eval_vitaminc-pairs_loss": 5.762389183044434, + "eval_vitaminc-pairs_runtime": 1.611, + "eval_vitaminc-pairs_samples_per_second": 82.56, + "eval_vitaminc-pairs_steps_per_second": 3.104, + "step": 7752 + }, + { + "epoch": 1.5005807200929153, + "eval_qnli-contrastive_loss": 0.16094566881656647, + "eval_qnli-contrastive_runtime": 0.5292, + "eval_qnli-contrastive_samples_per_second": 302.371, + "eval_qnli-contrastive_steps_per_second": 9.449, + "step": 7752 + }, + { + "epoch": 1.5005807200929153, + "eval_scitail-pairs-qa_loss": 0.05526223033666611, + "eval_scitail-pairs-qa_runtime": 1.3843, + "eval_scitail-pairs-qa_samples_per_second": 115.583, + "eval_scitail-pairs-qa_steps_per_second": 3.612, + "step": 7752 + }, + { + "epoch": 1.5005807200929153, + "eval_scitail-pairs-pos_loss": 0.3169253170490265, + "eval_scitail-pairs-pos_runtime": 2.5617, + "eval_scitail-pairs-pos_samples_per_second": 62.46, + "eval_scitail-pairs-pos_steps_per_second": 1.952, + "step": 7752 + }, + { + "epoch": 1.5005807200929153, + "eval_xsum-pairs_loss": 0.27240970730781555, + "eval_xsum-pairs_runtime": 1.421, + "eval_xsum-pairs_samples_per_second": 112.599, + "eval_xsum-pairs_steps_per_second": 3.519, + "step": 7752 + }, + { + "epoch": 1.5005807200929153, + "eval_compression-pairs_loss": 0.0707278698682785, + "eval_compression-pairs_runtime": 0.4155, + "eval_compression-pairs_samples_per_second": 385.033, + "eval_compression-pairs_steps_per_second": 12.032, + "step": 7752 + }, + { + "epoch": 1.5005807200929153, + "eval_sciq_pairs_loss": 0.25230515003204346, + "eval_sciq_pairs_runtime": 8.1954, + "eval_sciq_pairs_samples_per_second": 19.523, + "eval_sciq_pairs_steps_per_second": 0.61, + "step": 7752 + }, + { + "epoch": 1.5005807200929153, + "eval_qasc_pairs_loss": 0.17636016011238098, + "eval_qasc_pairs_runtime": 1.4827, + "eval_qasc_pairs_samples_per_second": 107.91, + "eval_qasc_pairs_steps_per_second": 3.372, + "step": 7752 + }, + { + "epoch": 1.5005807200929153, + "eval_qasc_facts_sym_loss": 0.12885577976703644, + "eval_qasc_facts_sym_runtime": 0.3403, + "eval_qasc_facts_sym_samples_per_second": 470.225, + "eval_qasc_facts_sym_steps_per_second": 14.695, + "step": 7752 + }, + { + "epoch": 1.5005807200929153, + "eval_openbookqa_pairs_loss": 1.6650470495224, + "eval_openbookqa_pairs_runtime": 1.344, + "eval_openbookqa_pairs_samples_per_second": 119.045, + "eval_openbookqa_pairs_steps_per_second": 3.72, + "step": 7752 + }, + { + "epoch": 1.5005807200929153, + "eval_msmarco_pairs_loss": 0.49048948287963867, + "eval_msmarco_pairs_runtime": 3.29, + "eval_msmarco_pairs_samples_per_second": 48.633, + "eval_msmarco_pairs_steps_per_second": 1.52, + "step": 7752 + }, + { + "epoch": 1.5005807200929153, + "eval_nq_pairs_loss": 0.4190645217895508, + "eval_nq_pairs_runtime": 7.8722, + "eval_nq_pairs_samples_per_second": 20.325, + "eval_nq_pairs_steps_per_second": 0.635, + "step": 7752 + }, + { + "epoch": 1.5005807200929153, + "eval_trivia_pairs_loss": 0.6449421644210815, + "eval_trivia_pairs_runtime": 10.3733, + "eval_trivia_pairs_samples_per_second": 15.424, + "eval_trivia_pairs_steps_per_second": 0.482, + "step": 7752 + }, + { + "epoch": 1.5005807200929153, + "eval_quora_pairs_loss": 0.18587273359298706, + "eval_quora_pairs_runtime": 4.2871, + "eval_quora_pairs_samples_per_second": 157.45, + "eval_quora_pairs_steps_per_second": 5.132, + "step": 7752 + }, + { + "epoch": 1.5005807200929153, + "eval_gooaq_pairs_loss": 0.4795718193054199, + "eval_gooaq_pairs_runtime": 2.2929, + "eval_gooaq_pairs_samples_per_second": 69.782, + "eval_gooaq_pairs_steps_per_second": 2.181, + "step": 7752 + }, + { + "epoch": 1.5005807200929153, + "eval_mrpc_pairs_loss": 0.03764195367693901, + "eval_mrpc_pairs_runtime": 0.3793, + "eval_mrpc_pairs_samples_per_second": 421.87, + "eval_mrpc_pairs_steps_per_second": 13.183, + "step": 7752 + }, + { + "epoch": 1.5098722415795587, + "grad_norm": 0.42758312821388245, + "learning_rate": 2.0448258524144327e-05, + "loss": 0.3881, + "step": 7800 + }, + { + "epoch": 1.5224545102593883, + "grad_norm": 0.5778383612632751, + "learning_rate": 2.0031601678240954e-05, + "loss": 0.3704, + "step": 7865 + }, + { + "epoch": 1.5350367789392179, + "grad_norm": 19.273122787475586, + "learning_rate": 1.9610522874204793e-05, + "loss": 0.451, + "step": 7930 + }, + { + "epoch": 1.5476190476190477, + "grad_norm": 5.756499290466309, + "learning_rate": 1.918539217170294e-05, + "loss": 0.4553, + "step": 7995 + }, + { + "epoch": 1.5602013162988773, + "grad_norm": 5.21534538269043, + "learning_rate": 1.8756583191361127e-05, + "loss": 0.411, + "step": 8060 + }, + { + "epoch": 1.572783584978707, + "grad_norm": 5.580538272857666, + "learning_rate": 1.8324472786412036e-05, + "loss": 0.446, + "step": 8125 + }, + { + "epoch": 1.5853658536585367, + "grad_norm": 16.46262550354004, + "learning_rate": 1.7889440711502724e-05, + "loss": 0.2831, + "step": 8190 + }, + { + "epoch": 1.5979481223383663, + "grad_norm": 0.19373361766338348, + "learning_rate": 1.7451869288952144e-05, + "loss": 0.3546, + "step": 8255 + }, + { + "epoch": 1.6105303910181958, + "grad_norm": 1.8671798706054688, + "learning_rate": 1.701214307275211e-05, + "loss": 0.3713, + "step": 8320 + }, + { + "epoch": 1.6231126596980254, + "grad_norm": 13.042320251464844, + "learning_rate": 1.657064851060702e-05, + "loss": 0.4221, + "step": 8385 + }, + { + "epoch": 1.6356949283778552, + "grad_norm": 0.6017350554466248, + "learning_rate": 1.612777360430923e-05, + "loss": 0.3154, + "step": 8450 + }, + { + "epoch": 1.6482771970576848, + "grad_norm": 4.0184407234191895, + "learning_rate": 1.5683907568748753e-05, + "loss": 0.3379, + "step": 8515 + }, + { + "epoch": 1.6608594657375146, + "grad_norm": 2.0760459899902344, + "learning_rate": 1.5239440489856724e-05, + "loss": 0.388, + "step": 8580 + }, + { + "epoch": 1.6734417344173442, + "grad_norm": 0.24721883237361908, + "learning_rate": 1.4794762981783464e-05, + "loss": 0.3323, + "step": 8645 + }, + { + "epoch": 1.6860240030971738, + "grad_norm": 0.26158830523490906, + "learning_rate": 1.435026584361225e-05, + "loss": 0.4388, + "step": 8710 + }, + { + "epoch": 1.6986062717770034, + "grad_norm": 8.231218338012695, + "learning_rate": 1.3906339715910606e-05, + "loss": 0.3223, + "step": 8775 + }, + { + "epoch": 1.711188540456833, + "grad_norm": 21.496234893798828, + "learning_rate": 1.346337473742089e-05, + "loss": 0.3775, + "step": 8840 + }, + { + "epoch": 1.7237708091366628, + "grad_norm": 1.0337002277374268, + "learning_rate": 1.3021760202191877e-05, + "loss": 0.4348, + "step": 8905 + }, + { + "epoch": 1.7363530778164924, + "grad_norm": 0.8793113231658936, + "learning_rate": 1.2581884217452726e-05, + "loss": 0.3758, + "step": 8970 + }, + { + "epoch": 1.7489353464963222, + "grad_norm": 9.16724967956543, + "learning_rate": 1.214413336252998e-05, + "loss": 0.2605, + "step": 9035 + }, + { + "epoch": 1.7506775067750677, + "eval_StS-test_pearson_cosine": 0.8777802459074545, + "eval_StS-test_pearson_dot": 0.7844466254188373, + "eval_StS-test_pearson_euclidean": 0.8614201032938403, + "eval_StS-test_pearson_manhattan": 0.8614174583863513, + "eval_StS-test_pearson_max": 0.8777802459074545, + "eval_StS-test_spearman_cosine": 0.892518681200198, + "eval_StS-test_spearman_dot": 0.7893342074296325, + "eval_StS-test_spearman_euclidean": 0.8654693632229482, + "eval_StS-test_spearman_manhattan": 0.8680543609352334, + "eval_StS-test_spearman_max": 0.892518681200198, + "eval_Vitaminc-test_cosine_accuracy": 0.5815789473684211, + "eval_Vitaminc-test_cosine_accuracy_threshold": 0.7277665138244629, + "eval_Vitaminc-test_cosine_ap": 0.5656681584201257, + "eval_Vitaminc-test_cosine_f1": 0.6703499079189686, + "eval_Vitaminc-test_cosine_f1_threshold": 0.4397270381450653, + "eval_Vitaminc-test_cosine_precision": 0.5083798882681564, + "eval_Vitaminc-test_cosine_recall": 0.9837837837837838, + "eval_Vitaminc-test_dot_accuracy": 0.5657894736842105, + "eval_Vitaminc-test_dot_accuracy_threshold": 20.005393981933594, + "eval_Vitaminc-test_dot_ap": 0.5480790470255866, + "eval_Vitaminc-test_dot_f1": 0.6666666666666667, + "eval_Vitaminc-test_dot_f1_threshold": 12.140420913696289, + "eval_Vitaminc-test_dot_precision": 0.5100286532951289, + "eval_Vitaminc-test_dot_recall": 0.9621621621621622, + "eval_Vitaminc-test_euclidean_accuracy": 0.5657894736842105, + "eval_Vitaminc-test_euclidean_accuracy_threshold": 3.4636647701263428, + "eval_Vitaminc-test_euclidean_ap": 0.5552496987243565, + "eval_Vitaminc-test_euclidean_f1": 0.663023679417122, + "eval_Vitaminc-test_euclidean_f1_threshold": 6.34542989730835, + "eval_Vitaminc-test_euclidean_precision": 0.5, + "eval_Vitaminc-test_euclidean_recall": 0.9837837837837838, + "eval_Vitaminc-test_manhattan_accuracy": 0.5684210526315789, + "eval_Vitaminc-test_manhattan_accuracy_threshold": 75.46658325195312, + "eval_Vitaminc-test_manhattan_ap": 0.5568456847059792, + "eval_Vitaminc-test_manhattan_f1": 0.6618962432915921, + "eval_Vitaminc-test_manhattan_f1_threshold": 144.27662658691406, + "eval_Vitaminc-test_manhattan_precision": 0.4946524064171123, + "eval_Vitaminc-test_manhattan_recall": 1.0, + "eval_Vitaminc-test_max_accuracy": 0.5815789473684211, + "eval_Vitaminc-test_max_accuracy_threshold": 75.46658325195312, + "eval_Vitaminc-test_max_ap": 0.5656681584201257, + "eval_Vitaminc-test_max_f1": 0.6703499079189686, + "eval_Vitaminc-test_max_f1_threshold": 144.27662658691406, + "eval_Vitaminc-test_max_precision": 0.5100286532951289, + "eval_Vitaminc-test_max_recall": 1.0, + "eval_mrpc-test_cosine_accuracy": 0.7447368421052631, + "eval_mrpc-test_cosine_accuracy_threshold": 0.719977617263794, + "eval_mrpc-test_cosine_ap": 0.850964159121461, + "eval_mrpc-test_cosine_f1": 0.8295254833040421, + "eval_mrpc-test_cosine_f1_threshold": 0.700160562992096, + "eval_mrpc-test_cosine_precision": 0.735202492211838, + "eval_mrpc-test_cosine_recall": 0.9516129032258065, + "eval_mrpc-test_dot_accuracy": 0.7026315789473684, + "eval_mrpc-test_dot_accuracy_threshold": 12.873937606811523, + "eval_mrpc-test_dot_ap": 0.7712669296028235, + "eval_mrpc-test_dot_f1": 0.8045977011494253, + "eval_mrpc-test_dot_f1_threshold": 9.337194442749023, + "eval_mrpc-test_dot_precision": 0.6786703601108033, + "eval_mrpc-test_dot_recall": 0.9879032258064516, + "eval_mrpc-test_euclidean_accuracy": 0.7368421052631579, + "eval_mrpc-test_euclidean_accuracy_threshold": 3.320279836654663, + "eval_mrpc-test_euclidean_ap": 0.8234369135440149, + "eval_mrpc-test_euclidean_f1": 0.8226950354609929, + "eval_mrpc-test_euclidean_f1_threshold": 3.4916229248046875, + "eval_mrpc-test_euclidean_precision": 0.7341772151898734, + "eval_mrpc-test_euclidean_recall": 0.9354838709677419, + "eval_mrpc-test_manhattan_accuracy": 0.7394736842105263, + "eval_mrpc-test_manhattan_accuracy_threshold": 67.87750244140625, + "eval_mrpc-test_manhattan_ap": 0.8215377503250268, + "eval_mrpc-test_manhattan_f1": 0.8235294117647058, + "eval_mrpc-test_manhattan_f1_threshold": 69.14122009277344, + "eval_mrpc-test_manhattan_precision": 0.7380191693290735, + "eval_mrpc-test_manhattan_recall": 0.9314516129032258, + "eval_mrpc-test_max_accuracy": 0.7447368421052631, + "eval_mrpc-test_max_accuracy_threshold": 67.87750244140625, + "eval_mrpc-test_max_ap": 0.850964159121461, + "eval_mrpc-test_max_f1": 0.8295254833040421, + "eval_mrpc-test_max_f1_threshold": 69.14122009277344, + "eval_mrpc-test_max_precision": 0.7380191693290735, + "eval_mrpc-test_max_recall": 0.9879032258064516, + "eval_nli-pairs_loss": 0.7929299473762512, + "eval_nli-pairs_runtime": 3.0029, + "eval_nli-pairs_samples_per_second": 53.282, + "eval_nli-pairs_steps_per_second": 1.665, + "eval_sequential_score": 0.5656681584201257, + "step": 9044 + }, + { + "epoch": 1.7506775067750677, + "eval_vitaminc-pairs_loss": 5.716288089752197, + "eval_vitaminc-pairs_runtime": 1.5553, + "eval_vitaminc-pairs_samples_per_second": 85.514, + "eval_vitaminc-pairs_steps_per_second": 3.215, + "step": 9044 + }, + { + "epoch": 1.7506775067750677, + "eval_qnli-contrastive_loss": 0.138463094830513, + "eval_qnli-contrastive_runtime": 0.5207, + "eval_qnli-contrastive_samples_per_second": 307.265, + "eval_qnli-contrastive_steps_per_second": 9.602, + "step": 9044 + }, + { + "epoch": 1.7506775067750677, + "eval_scitail-pairs-qa_loss": 0.0497589036822319, + "eval_scitail-pairs-qa_runtime": 1.2114, + "eval_scitail-pairs-qa_samples_per_second": 132.076, + "eval_scitail-pairs-qa_steps_per_second": 4.127, + "step": 9044 + }, + { + "epoch": 1.7506775067750677, + "eval_scitail-pairs-pos_loss": 0.28213563561439514, + "eval_scitail-pairs-pos_runtime": 2.5132, + "eval_scitail-pairs-pos_samples_per_second": 63.665, + "eval_scitail-pairs-pos_steps_per_second": 1.99, + "step": 9044 + }, + { + "epoch": 1.7506775067750677, + "eval_xsum-pairs_loss": 0.2819857597351074, + "eval_xsum-pairs_runtime": 1.404, + "eval_xsum-pairs_samples_per_second": 113.962, + "eval_xsum-pairs_steps_per_second": 3.561, + "step": 9044 + }, + { + "epoch": 1.7506775067750677, + "eval_compression-pairs_loss": 0.06524467468261719, + "eval_compression-pairs_runtime": 0.4016, + "eval_compression-pairs_samples_per_second": 398.384, + "eval_compression-pairs_steps_per_second": 12.449, + "step": 9044 + }, + { + "epoch": 1.7506775067750677, + "eval_sciq_pairs_loss": 0.25192952156066895, + "eval_sciq_pairs_runtime": 7.9683, + "eval_sciq_pairs_samples_per_second": 20.08, + "eval_sciq_pairs_steps_per_second": 0.627, + "step": 9044 + }, + { + "epoch": 1.7506775067750677, + "eval_qasc_pairs_loss": 0.15719862282276154, + "eval_qasc_pairs_runtime": 1.4347, + "eval_qasc_pairs_samples_per_second": 111.525, + "eval_qasc_pairs_steps_per_second": 3.485, + "step": 9044 + }, + { + "epoch": 1.7506775067750677, + "eval_qasc_facts_sym_loss": 0.11949674040079117, + "eval_qasc_facts_sym_runtime": 0.3223, + "eval_qasc_facts_sym_samples_per_second": 496.415, + "eval_qasc_facts_sym_steps_per_second": 15.513, + "step": 9044 + }, + { + "epoch": 1.7506775067750677, + "eval_openbookqa_pairs_loss": 1.6055030822753906, + "eval_openbookqa_pairs_runtime": 1.2319, + "eval_openbookqa_pairs_samples_per_second": 129.884, + "eval_openbookqa_pairs_steps_per_second": 4.059, + "step": 9044 + }, + { + "epoch": 1.7506775067750677, + "eval_msmarco_pairs_loss": 0.46120986342430115, + "eval_msmarco_pairs_runtime": 3.1151, + "eval_msmarco_pairs_samples_per_second": 51.364, + "eval_msmarco_pairs_steps_per_second": 1.605, + "step": 9044 + }, + { + "epoch": 1.7506775067750677, + "eval_nq_pairs_loss": 0.38706856966018677, + "eval_nq_pairs_runtime": 7.6899, + "eval_nq_pairs_samples_per_second": 20.807, + "eval_nq_pairs_steps_per_second": 0.65, + "step": 9044 + }, + { + "epoch": 1.7506775067750677, + "eval_trivia_pairs_loss": 0.6729233860969543, + "eval_trivia_pairs_runtime": 10.192, + "eval_trivia_pairs_samples_per_second": 15.699, + "eval_trivia_pairs_steps_per_second": 0.491, + "step": 9044 + }, + { + "epoch": 1.7506775067750677, + "eval_quora_pairs_loss": 0.18244178593158722, + "eval_quora_pairs_runtime": 3.9178, + "eval_quora_pairs_samples_per_second": 172.291, + "eval_quora_pairs_steps_per_second": 5.615, + "step": 9044 + }, + { + "epoch": 1.7506775067750677, + "eval_gooaq_pairs_loss": 0.4615425169467926, + "eval_gooaq_pairs_runtime": 2.2929, + "eval_gooaq_pairs_samples_per_second": 69.779, + "eval_gooaq_pairs_steps_per_second": 2.181, + "step": 9044 + }, + { + "epoch": 1.7506775067750677, + "eval_mrpc_pairs_loss": 0.03375188261270523, + "eval_mrpc_pairs_runtime": 0.3787, + "eval_mrpc_pairs_samples_per_second": 422.446, + "eval_mrpc_pairs_steps_per_second": 13.201, + "step": 9044 + }, + { + "epoch": 1.7615176151761518, + "grad_norm": 17.31819725036621, + "learning_rate": 1.1708892349107267e-05, + "loss": 0.325, + "step": 9100 + }, + { + "epoch": 1.7740998838559814, + "grad_norm": 1.4742263555526733, + "learning_rate": 1.1276543683126467e-05, + "loss": 0.2905, + "step": 9165 + }, + { + "epoch": 1.786682152535811, + "grad_norm": 17.93797492980957, + "learning_rate": 1.0847467328627261e-05, + "loss": 0.3873, + "step": 9230 + }, + { + "epoch": 1.7992644212156406, + "grad_norm": 10.806224822998047, + "learning_rate": 1.042204037382065e-05, + "loss": 0.2658, + "step": 9295 + }, + { + "epoch": 1.8118466898954704, + "grad_norm": 11.624892234802246, + "learning_rate": 1.0000636699689883e-05, + "loss": 0.3357, + "step": 9360 + }, + { + "epoch": 1.8244289585753002, + "grad_norm": 0.8121838569641113, + "learning_rate": 9.583626651409943e-06, + "loss": 0.2736, + "step": 9425 + }, + { + "epoch": 1.8370112272551298, + "grad_norm": 4.140751361846924, + "learning_rate": 9.171376712874502e-06, + "loss": 0.2758, + "step": 9490 + }, + { + "epoch": 1.8495934959349594, + "grad_norm": 0.2148134708404541, + "learning_rate": 8.764249184616278e-06, + "loss": 0.2423, + "step": 9555 + }, + { + "epoch": 1.862175764614789, + "grad_norm": 15.606979370117188, + "learning_rate": 8.362601865403897e-06, + "loss": 0.2992, + "step": 9620 + }, + { + "epoch": 1.8747580332946185, + "grad_norm": 0.496015340089798, + "learning_rate": 7.966787737795051e-06, + "loss": 0.2551, + "step": 9685 + }, + { + "epoch": 1.8873403019744484, + "grad_norm": 8.67402458190918, + "learning_rate": 7.577154657922359e-06, + "loss": 0.3565, + "step": 9750 + }, + { + "epoch": 1.899922570654278, + "grad_norm": 1.7114161252975464, + "learning_rate": 7.199887940715319e-06, + "loss": 0.2705, + "step": 9815 + }, + { + "epoch": 1.9125048393341078, + "grad_norm": 13.371696472167969, + "learning_rate": 6.823530434028392e-06, + "loss": 0.3363, + "step": 9880 + }, + { + "epoch": 1.9250871080139373, + "grad_norm": 1.002062201499939, + "learning_rate": 6.4543587119895626e-06, + "loss": 0.2787, + "step": 9945 + }, + { + "epoch": 1.937669376693767, + "grad_norm": 0.24811969697475433, + "learning_rate": 6.092697216397478e-06, + "loss": 0.273, + "step": 10010 + }, + { + "epoch": 1.9502516453735965, + "grad_norm": 6.3729352951049805, + "learning_rate": 5.7388637887853255e-06, + "loss": 0.264, + "step": 10075 + }, + { + "epoch": 1.962833914053426, + "grad_norm": 2.160889148712158, + "learning_rate": 5.393169391089864e-06, + "loss": 0.3532, + "step": 10140 + }, + { + "epoch": 1.975416182733256, + "grad_norm": 0.7218418121337891, + "learning_rate": 5.055917832366561e-06, + "loss": 0.221, + "step": 10205 + }, + { + "epoch": 1.9879984514130855, + "grad_norm": 0.2302940934896469, + "learning_rate": 4.727405501790925e-06, + "loss": 0.2625, + "step": 10270 + }, + { + "epoch": 2.0005807200929153, + "grad_norm": 8.200091361999512, + "learning_rate": 4.407921108180699e-06, + "loss": 0.2808, + "step": 10335 + }, + { + "epoch": 2.00077429345722, + "eval_StS-test_pearson_cosine": 0.8760335462570302, + "eval_StS-test_pearson_dot": 0.7858533850659265, + "eval_StS-test_pearson_euclidean": 0.8558007686663589, + "eval_StS-test_pearson_manhattan": 0.8557533448194315, + "eval_StS-test_pearson_max": 0.8760335462570302, + "eval_StS-test_spearman_cosine": 0.8899595181393952, + "eval_StS-test_spearman_dot": 0.7894333862187969, + "eval_StS-test_spearman_euclidean": 0.8594150783294261, + "eval_StS-test_spearman_manhattan": 0.8616158403353453, + "eval_StS-test_spearman_max": 0.8899595181393952, + "eval_Vitaminc-test_cosine_accuracy": 0.5789473684210527, + "eval_Vitaminc-test_cosine_accuracy_threshold": 0.7607381343841553, + "eval_Vitaminc-test_cosine_ap": 0.562686091779982, + "eval_Vitaminc-test_cosine_f1": 0.673076923076923, + "eval_Vitaminc-test_cosine_f1_threshold": 0.5096132159233093, + "eval_Vitaminc-test_cosine_precision": 0.5223880597014925, + "eval_Vitaminc-test_cosine_recall": 0.9459459459459459, + "eval_Vitaminc-test_dot_accuracy": 0.5710526315789474, + "eval_Vitaminc-test_dot_accuracy_threshold": 14.656951904296875, + "eval_Vitaminc-test_dot_ap": 0.5483043888699137, + "eval_Vitaminc-test_dot_f1": 0.6691176470588236, + "eval_Vitaminc-test_dot_f1_threshold": 9.500975608825684, + "eval_Vitaminc-test_dot_precision": 0.5069637883008357, + "eval_Vitaminc-test_dot_recall": 0.9837837837837838, + "eval_Vitaminc-test_euclidean_accuracy": 0.5710526315789474, + "eval_Vitaminc-test_euclidean_accuracy_threshold": 3.3688926696777344, + "eval_Vitaminc-test_euclidean_ap": 0.552131519622326, + "eval_Vitaminc-test_euclidean_f1": 0.6666666666666666, + "eval_Vitaminc-test_euclidean_f1_threshold": 5.8364458084106445, + "eval_Vitaminc-test_euclidean_precision": 0.5013623978201635, + "eval_Vitaminc-test_euclidean_recall": 0.9945945945945946, + "eval_Vitaminc-test_manhattan_accuracy": 0.5657894736842105, + "eval_Vitaminc-test_manhattan_accuracy_threshold": 67.55563354492188, + "eval_Vitaminc-test_manhattan_ap": 0.5549339934603554, + "eval_Vitaminc-test_manhattan_f1": 0.6642066420664207, + "eval_Vitaminc-test_manhattan_f1_threshold": 115.51136779785156, + "eval_Vitaminc-test_manhattan_precision": 0.5042016806722689, + "eval_Vitaminc-test_manhattan_recall": 0.972972972972973, + "eval_Vitaminc-test_max_accuracy": 0.5789473684210527, + "eval_Vitaminc-test_max_accuracy_threshold": 67.55563354492188, + "eval_Vitaminc-test_max_ap": 0.562686091779982, + "eval_Vitaminc-test_max_f1": 0.673076923076923, + "eval_Vitaminc-test_max_f1_threshold": 115.51136779785156, + "eval_Vitaminc-test_max_precision": 0.5223880597014925, + "eval_Vitaminc-test_max_recall": 0.9945945945945946, + "eval_mrpc-test_cosine_accuracy": 0.7394736842105263, + "eval_mrpc-test_cosine_accuracy_threshold": 0.6881712079048157, + "eval_mrpc-test_cosine_ap": 0.8489212567353354, + "eval_mrpc-test_cosine_f1": 0.8275862068965517, + "eval_mrpc-test_cosine_f1_threshold": 0.6637799143791199, + "eval_mrpc-test_cosine_precision": 0.7228915662650602, + "eval_mrpc-test_cosine_recall": 0.967741935483871, + "eval_mrpc-test_dot_accuracy": 0.6894736842105263, + "eval_mrpc-test_dot_accuracy_threshold": 11.309194564819336, + "eval_mrpc-test_dot_ap": 0.7692877575360217, + "eval_mrpc-test_dot_f1": 0.8065573770491804, + "eval_mrpc-test_dot_f1_threshold": 7.328646659851074, + "eval_mrpc-test_dot_precision": 0.6795580110497238, + "eval_mrpc-test_dot_recall": 0.9919354838709677, + "eval_mrpc-test_euclidean_accuracy": 0.7421052631578947, + "eval_mrpc-test_euclidean_accuracy_threshold": 3.1678833961486816, + "eval_mrpc-test_euclidean_ap": 0.8238273284368561, + "eval_mrpc-test_euclidean_f1": 0.8237410071942447, + "eval_mrpc-test_euclidean_f1_threshold": 3.2550790309906006, + "eval_mrpc-test_euclidean_precision": 0.7435064935064936, + "eval_mrpc-test_euclidean_recall": 0.9233870967741935, + "eval_mrpc-test_manhattan_accuracy": 0.7421052631578947, + "eval_mrpc-test_manhattan_accuracy_threshold": 64.18893432617188, + "eval_mrpc-test_manhattan_ap": 0.8221281490148798, + "eval_mrpc-test_manhattan_f1": 0.8243727598566308, + "eval_mrpc-test_manhattan_f1_threshold": 64.66268157958984, + "eval_mrpc-test_manhattan_precision": 0.7419354838709677, + "eval_mrpc-test_manhattan_recall": 0.9274193548387096, + "eval_mrpc-test_max_accuracy": 0.7421052631578947, + "eval_mrpc-test_max_accuracy_threshold": 64.18893432617188, + "eval_mrpc-test_max_ap": 0.8489212567353354, + "eval_mrpc-test_max_f1": 0.8275862068965517, + "eval_mrpc-test_max_f1_threshold": 64.66268157958984, + "eval_mrpc-test_max_precision": 0.7435064935064936, + "eval_mrpc-test_max_recall": 0.9919354838709677, + "eval_nli-pairs_loss": 0.74864262342453, + "eval_nli-pairs_runtime": 3.0791, + "eval_nli-pairs_samples_per_second": 51.964, + "eval_nli-pairs_steps_per_second": 1.624, + "eval_sequential_score": 0.562686091779982, + "step": 10336 + }, + { + "epoch": 2.00077429345722, + "eval_vitaminc-pairs_loss": 5.761841773986816, + "eval_vitaminc-pairs_runtime": 1.6287, + "eval_vitaminc-pairs_samples_per_second": 81.658, + "eval_vitaminc-pairs_steps_per_second": 3.07, + "step": 10336 + }, + { + "epoch": 2.00077429345722, + "eval_qnli-contrastive_loss": 0.11774346977472305, + "eval_qnli-contrastive_runtime": 0.5414, + "eval_qnli-contrastive_samples_per_second": 295.503, + "eval_qnli-contrastive_steps_per_second": 9.234, + "step": 10336 + }, + { + "epoch": 2.00077429345722, + "eval_scitail-pairs-qa_loss": 0.044219307601451874, + "eval_scitail-pairs-qa_runtime": 1.272, + "eval_scitail-pairs-qa_samples_per_second": 125.783, + "eval_scitail-pairs-qa_steps_per_second": 3.931, + "step": 10336 + }, + { + "epoch": 2.00077429345722, + "eval_scitail-pairs-pos_loss": 0.30131369829177856, + "eval_scitail-pairs-pos_runtime": 2.7269, + "eval_scitail-pairs-pos_samples_per_second": 58.675, + "eval_scitail-pairs-pos_steps_per_second": 1.834, + "step": 10336 + }, + { + "epoch": 2.00077429345722, + "eval_xsum-pairs_loss": 0.2668481469154358, + "eval_xsum-pairs_runtime": 1.4505, + "eval_xsum-pairs_samples_per_second": 110.31, + "eval_xsum-pairs_steps_per_second": 3.447, + "step": 10336 + }, + { + "epoch": 2.00077429345722, + "eval_compression-pairs_loss": 0.05984542891383171, + "eval_compression-pairs_runtime": 0.4143, + "eval_compression-pairs_samples_per_second": 386.167, + "eval_compression-pairs_steps_per_second": 12.068, + "step": 10336 + }, + { + "epoch": 2.00077429345722, + "eval_sciq_pairs_loss": 0.21106968820095062, + "eval_sciq_pairs_runtime": 8.1848, + "eval_sciq_pairs_samples_per_second": 19.548, + "eval_sciq_pairs_steps_per_second": 0.611, + "step": 10336 + }, + { + "epoch": 2.00077429345722, + "eval_qasc_pairs_loss": 0.14702823758125305, + "eval_qasc_pairs_runtime": 1.4839, + "eval_qasc_pairs_samples_per_second": 107.825, + "eval_qasc_pairs_steps_per_second": 3.37, + "step": 10336 + }, + { + "epoch": 2.00077429345722, + "eval_qasc_facts_sym_loss": 0.11709441989660263, + "eval_qasc_facts_sym_runtime": 0.3298, + "eval_qasc_facts_sym_samples_per_second": 485.098, + "eval_qasc_facts_sym_steps_per_second": 15.159, + "step": 10336 + }, + { + "epoch": 2.00077429345722, + "eval_openbookqa_pairs_loss": 1.611570119857788, + "eval_openbookqa_pairs_runtime": 1.3171, + "eval_openbookqa_pairs_samples_per_second": 121.475, + "eval_openbookqa_pairs_steps_per_second": 3.796, + "step": 10336 + }, + { + "epoch": 2.00077429345722, + "eval_msmarco_pairs_loss": 0.45828428864479065, + "eval_msmarco_pairs_runtime": 3.2279, + "eval_msmarco_pairs_samples_per_second": 49.567, + "eval_msmarco_pairs_steps_per_second": 1.549, + "step": 10336 + }, + { + "epoch": 2.00077429345722, + "eval_nq_pairs_loss": 0.361095130443573, + "eval_nq_pairs_runtime": 7.8042, + "eval_nq_pairs_samples_per_second": 20.502, + "eval_nq_pairs_steps_per_second": 0.641, + "step": 10336 + }, + { + "epoch": 2.00077429345722, + "eval_trivia_pairs_loss": 0.5992080569267273, + "eval_trivia_pairs_runtime": 10.3232, + "eval_trivia_pairs_samples_per_second": 15.499, + "eval_trivia_pairs_steps_per_second": 0.484, + "step": 10336 + }, + { + "epoch": 2.00077429345722, + "eval_quora_pairs_loss": 0.1803085058927536, + "eval_quora_pairs_runtime": 4.3744, + "eval_quora_pairs_samples_per_second": 154.308, + "eval_quora_pairs_steps_per_second": 5.029, + "step": 10336 + }, + { + "epoch": 2.00077429345722, + "eval_gooaq_pairs_loss": 0.44401103258132935, + "eval_gooaq_pairs_runtime": 2.2868, + "eval_gooaq_pairs_samples_per_second": 69.968, + "eval_gooaq_pairs_steps_per_second": 2.187, + "step": 10336 + }, + { + "epoch": 2.00077429345722, + "eval_mrpc_pairs_loss": 0.03005302883684635, + "eval_mrpc_pairs_runtime": 0.3764, + "eval_mrpc_pairs_samples_per_second": 425.052, + "eval_mrpc_pairs_steps_per_second": 13.283, + "step": 10336 + }, + { + "epoch": 2.013162988772745, + "grad_norm": 0.748429536819458, + "learning_rate": 4.097745426267871e-06, + "loss": 0.2003, + "step": 10400 + }, + { + "epoch": 2.0257452574525745, + "grad_norm": 0.1598404049873352, + "learning_rate": 3.797151049943393e-06, + "loss": 0.216, + "step": 10465 + }, + { + "epoch": 2.038327526132404, + "grad_norm": 1.4139065742492676, + "learning_rate": 3.5064021526915962e-06, + "loss": 0.2704, + "step": 10530 + }, + { + "epoch": 2.0509097948122337, + "grad_norm": 10.644986152648926, + "learning_rate": 3.225754255424692e-06, + "loss": 0.2788, + "step": 10595 + }, + { + "epoch": 2.0634920634920633, + "grad_norm": 0.1797364354133606, + "learning_rate": 2.955454001921588e-06, + "loss": 0.264, + "step": 10660 + }, + { + "epoch": 2.0760743321718933, + "grad_norm": 8.84312629699707, + "learning_rate": 2.6957389420681128e-06, + "loss": 0.2625, + "step": 10725 + }, + { + "epoch": 2.088656600851723, + "grad_norm": 2.1348655223846436, + "learning_rate": 2.446837323089423e-06, + "loss": 0.3213, + "step": 10790 + }, + { + "epoch": 2.1012388695315525, + "grad_norm": 0.9930692315101624, + "learning_rate": 2.208967888957853e-06, + "loss": 0.2799, + "step": 10855 + }, + { + "epoch": 2.113821138211382, + "grad_norm": 3.232008934020996, + "learning_rate": 1.982339688152608e-06, + "loss": 0.2719, + "step": 10920 + }, + { + "epoch": 2.1264034068912117, + "grad_norm": 1.367380142211914, + "learning_rate": 1.7671518899402124e-06, + "loss": 0.2086, + "step": 10985 + }, + { + "epoch": 2.1389856755710412, + "grad_norm": 2.2108681201934814, + "learning_rate": 1.5635936093371479e-06, + "loss": 0.208, + "step": 11050 + }, + { + "epoch": 2.1515679442508713, + "grad_norm": 0.841842770576477, + "learning_rate": 1.3718437409086144e-06, + "loss": 0.3249, + "step": 11115 + }, + { + "epoch": 2.164150212930701, + "grad_norm": 0.215606689453125, + "learning_rate": 1.1920708015492988e-06, + "loss": 0.2545, + "step": 11180 + }, + { + "epoch": 2.1767324816105305, + "grad_norm": 0.4588742256164551, + "learning_rate": 1.0244327823845117e-06, + "loss": 0.274, + "step": 11245 + }, + { + "epoch": 2.18931475029036, + "grad_norm": 7.016474723815918, + "learning_rate": 8.690770099217205e-07, + "loss": 0.216, + "step": 11310 + }, + { + "epoch": 2.2018970189701896, + "grad_norm": 2.9576404094696045, + "learning_rate": 7.261400165745497e-07, + "loss": 0.2467, + "step": 11375 + }, + { + "epoch": 2.2144792876500192, + "grad_norm": 5.329019546508789, + "learning_rate": 5.957474206730256e-07, + "loss": 0.2578, + "step": 11440 + }, + { + "epoch": 2.227061556329849, + "grad_norm": 0.16189192235469818, + "learning_rate": 4.780138160655507e-07, + "loss": 0.2608, + "step": 11505 + }, + { + "epoch": 2.239643825009679, + "grad_norm": 8.813294410705566, + "learning_rate": 3.7304267140955304e-07, + "loss": 0.2476, + "step": 11570 + }, + { + "epoch": 2.250871080139373, + "eval_StS-test_pearson_cosine": 0.8778872221291931, + "eval_StS-test_pearson_dot": 0.7893324466927049, + "eval_StS-test_pearson_euclidean": 0.8590444533278028, + "eval_StS-test_pearson_manhattan": 0.8591331390817218, + "eval_StS-test_pearson_max": 0.8778872221291931, + "eval_StS-test_spearman_cosine": 0.8919884140316408, + "eval_StS-test_spearman_dot": 0.7902921694428535, + "eval_StS-test_spearman_euclidean": 0.8625352693090679, + "eval_StS-test_spearman_manhattan": 0.8649523417867188, + "eval_StS-test_spearman_max": 0.8919884140316408, + "eval_Vitaminc-test_cosine_accuracy": 0.5789473684210527, + "eval_Vitaminc-test_cosine_accuracy_threshold": 0.6914482712745667, + "eval_Vitaminc-test_cosine_ap": 0.5637651212229153, + "eval_Vitaminc-test_cosine_f1": 0.6741573033707865, + "eval_Vitaminc-test_cosine_f1_threshold": 0.4676659405231476, + "eval_Vitaminc-test_cosine_precision": 0.5157593123209169, + "eval_Vitaminc-test_cosine_recall": 0.972972972972973, + "eval_Vitaminc-test_dot_accuracy": 0.5710526315789474, + "eval_Vitaminc-test_dot_accuracy_threshold": 14.829851150512695, + "eval_Vitaminc-test_dot_ap": 0.5497462011082384, + "eval_Vitaminc-test_dot_f1": 0.667953667953668, + "eval_Vitaminc-test_dot_f1_threshold": 11.509256362915039, + "eval_Vitaminc-test_dot_precision": 0.5195195195195195, + "eval_Vitaminc-test_dot_recall": 0.9351351351351351, + "eval_Vitaminc-test_euclidean_accuracy": 0.5710526315789474, + "eval_Vitaminc-test_euclidean_accuracy_threshold": 3.4082818031311035, + "eval_Vitaminc-test_euclidean_ap": 0.5529767634436812, + "eval_Vitaminc-test_euclidean_f1": 0.6654611211573237, + "eval_Vitaminc-test_euclidean_f1_threshold": 6.036683082580566, + "eval_Vitaminc-test_euclidean_precision": 0.5, + "eval_Vitaminc-test_euclidean_recall": 0.9945945945945946, + "eval_Vitaminc-test_manhattan_accuracy": 0.5710526315789474, + "eval_Vitaminc-test_manhattan_accuracy_threshold": 68.36351013183594, + "eval_Vitaminc-test_manhattan_ap": 0.5551197831302017, + "eval_Vitaminc-test_manhattan_f1": 0.6642728904847397, + "eval_Vitaminc-test_manhattan_f1_threshold": 127.92561340332031, + "eval_Vitaminc-test_manhattan_precision": 0.49731182795698925, + "eval_Vitaminc-test_manhattan_recall": 1.0, + "eval_Vitaminc-test_max_accuracy": 0.5789473684210527, + "eval_Vitaminc-test_max_accuracy_threshold": 68.36351013183594, + "eval_Vitaminc-test_max_ap": 0.5637651212229153, + "eval_Vitaminc-test_max_f1": 0.6741573033707865, + "eval_Vitaminc-test_max_f1_threshold": 127.92561340332031, + "eval_Vitaminc-test_max_precision": 0.5195195195195195, + "eval_Vitaminc-test_max_recall": 1.0, + "eval_mrpc-test_cosine_accuracy": 0.7394736842105263, + "eval_mrpc-test_cosine_accuracy_threshold": 0.694667637348175, + "eval_mrpc-test_cosine_ap": 0.8501212805448715, + "eval_mrpc-test_cosine_f1": 0.8253968253968255, + "eval_mrpc-test_cosine_f1_threshold": 0.694667637348175, + "eval_mrpc-test_cosine_precision": 0.7335423197492164, + "eval_mrpc-test_cosine_recall": 0.9435483870967742, + "eval_mrpc-test_dot_accuracy": 0.6894736842105263, + "eval_mrpc-test_dot_accuracy_threshold": 11.033209800720215, + "eval_mrpc-test_dot_ap": 0.7670949385599481, + "eval_mrpc-test_dot_f1": 0.8045977011494253, + "eval_mrpc-test_dot_f1_threshold": 8.026628494262695, + "eval_mrpc-test_dot_precision": 0.6786703601108033, + "eval_mrpc-test_dot_recall": 0.9879032258064516, + "eval_mrpc-test_euclidean_accuracy": 0.7421052631578947, + "eval_mrpc-test_euclidean_accuracy_threshold": 3.2092814445495605, + "eval_mrpc-test_euclidean_ap": 0.8242245566410124, + "eval_mrpc-test_euclidean_f1": 0.8224637681159419, + "eval_mrpc-test_euclidean_f1_threshold": 3.2241902351379395, + "eval_mrpc-test_euclidean_precision": 0.7467105263157895, + "eval_mrpc-test_euclidean_recall": 0.9153225806451613, + "eval_mrpc-test_manhattan_accuracy": 0.7447368421052631, + "eval_mrpc-test_manhattan_accuracy_threshold": 65.11294555664062, + "eval_mrpc-test_manhattan_ap": 0.8224581711224794, + "eval_mrpc-test_manhattan_f1": 0.8258527827648114, + "eval_mrpc-test_manhattan_f1_threshold": 65.11294555664062, + "eval_mrpc-test_manhattan_precision": 0.7443365695792881, + "eval_mrpc-test_manhattan_recall": 0.9274193548387096, + "eval_mrpc-test_max_accuracy": 0.7447368421052631, + "eval_mrpc-test_max_accuracy_threshold": 65.11294555664062, + "eval_mrpc-test_max_ap": 0.8501212805448715, + "eval_mrpc-test_max_f1": 0.8258527827648114, + "eval_mrpc-test_max_f1_threshold": 65.11294555664062, + "eval_mrpc-test_max_precision": 0.7467105263157895, + "eval_mrpc-test_max_recall": 0.9879032258064516, + "eval_nli-pairs_loss": 0.7365724444389343, + "eval_nli-pairs_runtime": 3.0211, + "eval_nli-pairs_samples_per_second": 52.96, + "eval_nli-pairs_steps_per_second": 1.655, + "eval_sequential_score": 0.5637651212229153, + "step": 11628 + }, + { + "epoch": 2.250871080139373, + "eval_vitaminc-pairs_loss": 5.7284016609191895, + "eval_vitaminc-pairs_runtime": 1.5892, + "eval_vitaminc-pairs_samples_per_second": 83.689, + "eval_vitaminc-pairs_steps_per_second": 3.146, + "step": 11628 + }, + { + "epoch": 2.250871080139373, + "eval_qnli-contrastive_loss": 0.1260131299495697, + "eval_qnli-contrastive_runtime": 0.5536, + "eval_qnli-contrastive_samples_per_second": 289.007, + "eval_qnli-contrastive_steps_per_second": 9.031, + "step": 11628 + }, + { + "epoch": 2.250871080139373, + "eval_scitail-pairs-qa_loss": 0.04436067119240761, + "eval_scitail-pairs-qa_runtime": 1.2776, + "eval_scitail-pairs-qa_samples_per_second": 125.233, + "eval_scitail-pairs-qa_steps_per_second": 3.914, + "step": 11628 + }, + { + "epoch": 2.250871080139373, + "eval_scitail-pairs-pos_loss": 0.28413012623786926, + "eval_scitail-pairs-pos_runtime": 2.4346, + "eval_scitail-pairs-pos_samples_per_second": 65.72, + "eval_scitail-pairs-pos_steps_per_second": 2.054, + "step": 11628 + }, + { + "epoch": 2.250871080139373, + "eval_xsum-pairs_loss": 0.2606610655784607, + "eval_xsum-pairs_runtime": 1.4129, + "eval_xsum-pairs_samples_per_second": 113.244, + "eval_xsum-pairs_steps_per_second": 3.539, + "step": 11628 + }, + { + "epoch": 2.250871080139373, + "eval_compression-pairs_loss": 0.0600699707865715, + "eval_compression-pairs_runtime": 0.4001, + "eval_compression-pairs_samples_per_second": 399.872, + "eval_compression-pairs_steps_per_second": 12.496, + "step": 11628 + }, + { + "epoch": 2.250871080139373, + "eval_sciq_pairs_loss": 0.2134503871202469, + "eval_sciq_pairs_runtime": 7.955, + "eval_sciq_pairs_samples_per_second": 20.113, + "eval_sciq_pairs_steps_per_second": 0.629, + "step": 11628 + }, + { + "epoch": 2.250871080139373, + "eval_qasc_pairs_loss": 0.1396595984697342, + "eval_qasc_pairs_runtime": 1.4536, + "eval_qasc_pairs_samples_per_second": 110.075, + "eval_qasc_pairs_steps_per_second": 3.44, + "step": 11628 + }, + { + "epoch": 2.250871080139373, + "eval_qasc_facts_sym_loss": 0.11638667434453964, + "eval_qasc_facts_sym_runtime": 0.3257, + "eval_qasc_facts_sym_samples_per_second": 491.178, + "eval_qasc_facts_sym_steps_per_second": 15.349, + "step": 11628 + }, + { + "epoch": 2.250871080139373, + "eval_openbookqa_pairs_loss": 1.5970699787139893, + "eval_openbookqa_pairs_runtime": 1.2308, + "eval_openbookqa_pairs_samples_per_second": 129.994, + "eval_openbookqa_pairs_steps_per_second": 4.062, + "step": 11628 + }, + { + "epoch": 2.250871080139373, + "eval_msmarco_pairs_loss": 0.45277172327041626, + "eval_msmarco_pairs_runtime": 3.1126, + "eval_msmarco_pairs_samples_per_second": 51.403, + "eval_msmarco_pairs_steps_per_second": 1.606, + "step": 11628 + }, + { + "epoch": 2.250871080139373, + "eval_nq_pairs_loss": 0.3480191230773926, + "eval_nq_pairs_runtime": 7.6989, + "eval_nq_pairs_samples_per_second": 20.782, + "eval_nq_pairs_steps_per_second": 0.649, + "step": 11628 + }, + { + "epoch": 2.250871080139373, + "eval_trivia_pairs_loss": 0.5698245167732239, + "eval_trivia_pairs_runtime": 10.2135, + "eval_trivia_pairs_samples_per_second": 15.666, + "eval_trivia_pairs_steps_per_second": 0.49, + "step": 11628 + }, + { + "epoch": 2.250871080139373, + "eval_quora_pairs_loss": 0.19794899225234985, + "eval_quora_pairs_runtime": 3.9215, + "eval_quora_pairs_samples_per_second": 172.129, + "eval_quora_pairs_steps_per_second": 5.61, + "step": 11628 + }, + { + "epoch": 2.250871080139373, + "eval_gooaq_pairs_loss": 0.429452508687973, + "eval_gooaq_pairs_runtime": 2.2884, + "eval_gooaq_pairs_samples_per_second": 69.918, + "eval_gooaq_pairs_steps_per_second": 2.185, + "step": 11628 + }, + { + "epoch": 2.250871080139373, + "eval_mrpc_pairs_loss": 0.029982861131429672, + "eval_mrpc_pairs_runtime": 0.3839, + "eval_mrpc_pairs_samples_per_second": 416.749, + "eval_mrpc_pairs_steps_per_second": 13.023, + "step": 11628 + }, + { + "epoch": 2.2522260936895084, + "grad_norm": 1.217717170715332, + "learning_rate": 2.809262392394196e-07, + "loss": 0.3296, + "step": 11635 + }, + { + "epoch": 2.264808362369338, + "grad_norm": 0.60965496301651, + "learning_rate": 2.0174547489152985e-07, + "loss": 0.2261, + "step": 11700 + }, + { + "epoch": 2.2773906310491676, + "grad_norm": 0.12040360271930695, + "learning_rate": 1.3556996535771416e-07, + "loss": 0.2999, + "step": 11765 + }, + { + "epoch": 2.289972899728997, + "grad_norm": 12.428990364074707, + "learning_rate": 8.245786812961842e-08, + "loss": 0.2888, + "step": 11830 + }, + { + "epoch": 2.302555168408827, + "grad_norm": 4.584768295288086, + "learning_rate": 4.245586008774671e-08, + "loss": 0.2253, + "step": 11895 + }, + { + "epoch": 2.315137437088657, + "grad_norm": 5.902719974517822, + "learning_rate": 1.5599096480093388e-08, + "loss": 0.2343, + "step": 11960 + }, + { + "epoch": 2.3277197057684864, + "grad_norm": 9.810800552368164, + "learning_rate": 1.9111800263971192e-09, + "loss": 0.331, + "step": 12025 + }, + { + "epoch": 2.340301974448316, + "grad_norm": 0.725075364112854, + "learning_rate": 2.99985958598248e-05, + "loss": 0.2101, + "step": 12090 + }, + { + "epoch": 2.3528842431281456, + "grad_norm": 3.3202953338623047, + "learning_rate": 2.9986216360213095e-05, + "loss": 0.2155, + "step": 12155 + }, + { + "epoch": 2.365466511807975, + "grad_norm": 0.6567480564117432, + "learning_rate": 2.9960574370109496e-05, + "loss": 0.2996, + "step": 12220 + }, + { + "epoch": 2.3780487804878048, + "grad_norm": 5.063121795654297, + "learning_rate": 2.992178447249302e-05, + "loss": 0.2964, + "step": 12285 + }, + { + "epoch": 2.3906310491676344, + "grad_norm": 12.657327651977539, + "learning_rate": 2.986988075736407e-05, + "loss": 0.2825, + "step": 12350 + }, + { + "epoch": 2.403213317847464, + "grad_norm": 4.071556568145752, + "learning_rate": 2.980490883963215e-05, + "loss": 0.2259, + "step": 12415 + }, + { + "epoch": 2.415795586527294, + "grad_norm": 21.96937370300293, + "learning_rate": 2.9728223774548642e-05, + "loss": 0.373, + "step": 12480 + }, + { + "epoch": 2.4283778552071236, + "grad_norm": 8.618860244750977, + "learning_rate": 2.963749672572836e-05, + "loss": 0.2906, + "step": 12545 + }, + { + "epoch": 2.440960123886953, + "grad_norm": 3.505587100982666, + "learning_rate": 2.9533905702009196e-05, + "loss": 0.295, + "step": 12610 + }, + { + "epoch": 2.4535423925667827, + "grad_norm": 2.3120741844177246, + "learning_rate": 2.9417541743024074e-05, + "loss": 0.3156, + "step": 12675 + }, + { + "epoch": 2.4661246612466123, + "grad_norm": 7.947314262390137, + "learning_rate": 2.9288507113735076e-05, + "loss": 0.3068, + "step": 12740 + }, + { + "epoch": 2.4787069299264424, + "grad_norm": 4.989195346832275, + "learning_rate": 2.9146915214559173e-05, + "loss": 0.3137, + "step": 12805 + }, + { + "epoch": 2.491289198606272, + "grad_norm": 26.050216674804688, + "learning_rate": 2.899289048170777e-05, + "loss": 0.2985, + "step": 12870 + }, + { + "epoch": 2.5009678668215254, + "eval_StS-test_pearson_cosine": 0.8795237562868311, + "eval_StS-test_pearson_dot": 0.7709910161255622, + "eval_StS-test_pearson_euclidean": 0.862412698225094, + "eval_StS-test_pearson_manhattan": 0.8626090591539304, + "eval_StS-test_pearson_max": 0.8795237562868311, + "eval_StS-test_spearman_cosine": 0.8945959623295415, + "eval_StS-test_spearman_dot": 0.7730415187911034, + "eval_StS-test_spearman_euclidean": 0.8705805738685102, + "eval_StS-test_spearman_manhattan": 0.8717799147229099, + "eval_StS-test_spearman_max": 0.8945959623295415, + "eval_Vitaminc-test_cosine_accuracy": 0.5763157894736842, + "eval_Vitaminc-test_cosine_accuracy_threshold": 0.7639559507369995, + "eval_Vitaminc-test_cosine_ap": 0.5674566161432741, + "eval_Vitaminc-test_cosine_f1": 0.6679316888045541, + "eval_Vitaminc-test_cosine_f1_threshold": 0.4714379906654358, + "eval_Vitaminc-test_cosine_precision": 0.5146198830409356, + "eval_Vitaminc-test_cosine_recall": 0.9513513513513514, + "eval_Vitaminc-test_dot_accuracy": 0.5578947368421052, + "eval_Vitaminc-test_dot_accuracy_threshold": 16.963916778564453, + "eval_Vitaminc-test_dot_ap": 0.5484232300840398, + "eval_Vitaminc-test_dot_f1": 0.6654545454545455, + "eval_Vitaminc-test_dot_f1_threshold": 7.844925880432129, + "eval_Vitaminc-test_dot_precision": 0.5013698630136987, + "eval_Vitaminc-test_dot_recall": 0.9891891891891892, + "eval_Vitaminc-test_euclidean_accuracy": 0.5631578947368421, + "eval_Vitaminc-test_euclidean_accuracy_threshold": 3.618887424468994, + "eval_Vitaminc-test_euclidean_ap": 0.5559673243165555, + "eval_Vitaminc-test_euclidean_f1": 0.6666666666666666, + "eval_Vitaminc-test_euclidean_f1_threshold": 6.2993974685668945, + "eval_Vitaminc-test_euclidean_precision": 0.5, + "eval_Vitaminc-test_euclidean_recall": 1.0, + "eval_Vitaminc-test_manhattan_accuracy": 0.5605263157894737, + "eval_Vitaminc-test_manhattan_accuracy_threshold": 72.00088500976562, + "eval_Vitaminc-test_manhattan_ap": 0.556701937854166, + "eval_Vitaminc-test_manhattan_f1": 0.6642728904847397, + "eval_Vitaminc-test_manhattan_f1_threshold": 135.06130981445312, + "eval_Vitaminc-test_manhattan_precision": 0.49731182795698925, + "eval_Vitaminc-test_manhattan_recall": 1.0, + "eval_Vitaminc-test_max_accuracy": 0.5763157894736842, + "eval_Vitaminc-test_max_accuracy_threshold": 72.00088500976562, + "eval_Vitaminc-test_max_ap": 0.5674566161432741, + "eval_Vitaminc-test_max_f1": 0.6679316888045541, + "eval_Vitaminc-test_max_f1_threshold": 135.06130981445312, + "eval_Vitaminc-test_max_precision": 0.5146198830409356, + "eval_Vitaminc-test_max_recall": 1.0, + "eval_mrpc-test_cosine_accuracy": 0.7289473684210527, + "eval_mrpc-test_cosine_accuracy_threshold": 0.7299143671989441, + "eval_mrpc-test_cosine_ap": 0.8489308274880782, + "eval_mrpc-test_cosine_f1": 0.8159722222222223, + "eval_mrpc-test_cosine_f1_threshold": 0.6859562397003174, + "eval_mrpc-test_cosine_precision": 0.7164634146341463, + "eval_mrpc-test_cosine_recall": 0.9475806451612904, + "eval_mrpc-test_dot_accuracy": 0.6894736842105263, + "eval_mrpc-test_dot_accuracy_threshold": 8.769765853881836, + "eval_mrpc-test_dot_ap": 0.7662559072996028, + "eval_mrpc-test_dot_f1": 0.8052373158756136, + "eval_mrpc-test_dot_f1_threshold": 8.27578353881836, + "eval_mrpc-test_dot_precision": 0.6776859504132231, + "eval_mrpc-test_dot_recall": 0.9919354838709677, + "eval_mrpc-test_euclidean_accuracy": 0.7394736842105263, + "eval_mrpc-test_euclidean_accuracy_threshold": 3.322539806365967, + "eval_mrpc-test_euclidean_ap": 0.8234854308664945, + "eval_mrpc-test_euclidean_f1": 0.8216216216216217, + "eval_mrpc-test_euclidean_f1_threshold": 3.322539806365967, + "eval_mrpc-test_euclidean_precision": 0.742671009771987, + "eval_mrpc-test_euclidean_recall": 0.9193548387096774, + "eval_mrpc-test_manhattan_accuracy": 0.7315789473684211, + "eval_mrpc-test_manhattan_accuracy_threshold": 65.09208679199219, + "eval_mrpc-test_manhattan_ap": 0.8216181552643164, + "eval_mrpc-test_manhattan_f1": 0.8180242634315426, + "eval_mrpc-test_manhattan_f1_threshold": 70.48411560058594, + "eval_mrpc-test_manhattan_precision": 0.7173252279635258, + "eval_mrpc-test_manhattan_recall": 0.9516129032258065, + "eval_mrpc-test_max_accuracy": 0.7394736842105263, + "eval_mrpc-test_max_accuracy_threshold": 65.09208679199219, + "eval_mrpc-test_max_ap": 0.8489308274880782, + "eval_mrpc-test_max_f1": 0.8216216216216217, + "eval_mrpc-test_max_f1_threshold": 70.48411560058594, + "eval_mrpc-test_max_precision": 0.742671009771987, + "eval_mrpc-test_max_recall": 0.9919354838709677, + "eval_nli-pairs_loss": 0.844605028629303, + "eval_nli-pairs_runtime": 3.1133, + "eval_nli-pairs_samples_per_second": 51.392, + "eval_nli-pairs_steps_per_second": 1.606, + "eval_sequential_score": 0.5674566161432741, + "step": 12920 + }, + { + "epoch": 2.5009678668215254, + "eval_vitaminc-pairs_loss": 6.087428092956543, + "eval_vitaminc-pairs_runtime": 1.7072, + "eval_vitaminc-pairs_samples_per_second": 77.905, + "eval_vitaminc-pairs_steps_per_second": 2.929, + "step": 12920 + }, + { + "epoch": 2.5009678668215254, + "eval_qnli-contrastive_loss": 0.17987525463104248, + "eval_qnli-contrastive_runtime": 0.5327, + "eval_qnli-contrastive_samples_per_second": 300.332, + "eval_qnli-contrastive_steps_per_second": 9.385, + "step": 12920 + }, + { + "epoch": 2.5009678668215254, + "eval_scitail-pairs-qa_loss": 0.04853365197777748, + "eval_scitail-pairs-qa_runtime": 1.3141, + "eval_scitail-pairs-qa_samples_per_second": 121.755, + "eval_scitail-pairs-qa_steps_per_second": 3.805, + "step": 12920 + }, + { + "epoch": 2.5009678668215254, + "eval_scitail-pairs-pos_loss": 0.311461478471756, + "eval_scitail-pairs-pos_runtime": 2.6161, + "eval_scitail-pairs-pos_samples_per_second": 61.161, + "eval_scitail-pairs-pos_steps_per_second": 1.911, + "step": 12920 + }, + { + "epoch": 2.5009678668215254, + "eval_xsum-pairs_loss": 0.27614933252334595, + "eval_xsum-pairs_runtime": 1.4525, + "eval_xsum-pairs_samples_per_second": 110.151, + "eval_xsum-pairs_steps_per_second": 3.442, + "step": 12920 + }, + { + "epoch": 2.5009678668215254, + "eval_compression-pairs_loss": 0.06246453523635864, + "eval_compression-pairs_runtime": 0.4412, + "eval_compression-pairs_samples_per_second": 362.657, + "eval_compression-pairs_steps_per_second": 11.333, + "step": 12920 + }, + { + "epoch": 2.5009678668215254, + "eval_sciq_pairs_loss": 0.25065740942955017, + "eval_sciq_pairs_runtime": 8.0752, + "eval_sciq_pairs_samples_per_second": 19.814, + "eval_sciq_pairs_steps_per_second": 0.619, + "step": 12920 + }, + { + "epoch": 2.5009678668215254, + "eval_qasc_pairs_loss": 0.147993803024292, + "eval_qasc_pairs_runtime": 1.5149, + "eval_qasc_pairs_samples_per_second": 105.617, + "eval_qasc_pairs_steps_per_second": 3.301, + "step": 12920 + }, + { + "epoch": 2.5009678668215254, + "eval_qasc_facts_sym_loss": 0.12534400820732117, + "eval_qasc_facts_sym_runtime": 0.3356, + "eval_qasc_facts_sym_samples_per_second": 476.803, + "eval_qasc_facts_sym_steps_per_second": 14.9, + "step": 12920 + }, + { + "epoch": 2.5009678668215254, + "eval_openbookqa_pairs_loss": 1.685699224472046, + "eval_openbookqa_pairs_runtime": 1.3649, + "eval_openbookqa_pairs_samples_per_second": 117.226, + "eval_openbookqa_pairs_steps_per_second": 3.663, + "step": 12920 + }, + { + "epoch": 2.5009678668215254, + "eval_msmarco_pairs_loss": 0.5117918848991394, + "eval_msmarco_pairs_runtime": 3.2325, + "eval_msmarco_pairs_samples_per_second": 49.497, + "eval_msmarco_pairs_steps_per_second": 1.547, + "step": 12920 + }, + { + "epoch": 2.5009678668215254, + "eval_nq_pairs_loss": 0.40486058592796326, + "eval_nq_pairs_runtime": 7.8919, + "eval_nq_pairs_samples_per_second": 20.274, + "eval_nq_pairs_steps_per_second": 0.634, + "step": 12920 + }, + { + "epoch": 2.5009678668215254, + "eval_trivia_pairs_loss": 0.6596993803977966, + "eval_trivia_pairs_runtime": 10.3415, + "eval_trivia_pairs_samples_per_second": 15.472, + "eval_trivia_pairs_steps_per_second": 0.483, + "step": 12920 + }, + { + "epoch": 2.5009678668215254, + "eval_quora_pairs_loss": 0.16216738522052765, + "eval_quora_pairs_runtime": 4.0361, + "eval_quora_pairs_samples_per_second": 167.241, + "eval_quora_pairs_steps_per_second": 5.451, + "step": 12920 + }, + { + "epoch": 2.5009678668215254, + "eval_gooaq_pairs_loss": 0.46012258529663086, + "eval_gooaq_pairs_runtime": 2.2861, + "eval_gooaq_pairs_samples_per_second": 69.989, + "eval_gooaq_pairs_steps_per_second": 2.187, + "step": 12920 + }, + { + "epoch": 2.5009678668215254, + "eval_mrpc_pairs_loss": 0.03315501660108566, + "eval_mrpc_pairs_runtime": 0.3835, + "eval_mrpc_pairs_samples_per_second": 417.178, + "eval_mrpc_pairs_steps_per_second": 13.037, + "step": 12920 + }, + { + "epoch": 2.5038714672861015, + "grad_norm": 0.28498581051826477, + "learning_rate": 2.8826568277827527e-05, + "loss": 0.2487, + "step": 12935 + }, + { + "epoch": 2.516453735965931, + "grad_norm": 1.0256837606430054, + "learning_rate": 2.8648094773038627e-05, + "loss": 0.2624, + "step": 13000 + }, + { + "epoch": 2.5290360046457607, + "grad_norm": 4.241197109222412, + "learning_rate": 2.845762681647511e-05, + "loss": 0.2677, + "step": 13065 + }, + { + "epoch": 2.5416182733255903, + "grad_norm": 0.36626845598220825, + "learning_rate": 2.8255331798439983e-05, + "loss": 0.3675, + "step": 13130 + }, + { + "epoch": 2.55420054200542, + "grad_norm": 8.30843734741211, + "learning_rate": 2.8041387503296447e-05, + "loss": 0.2948, + "step": 13195 + }, + { + "epoch": 2.5667828106852495, + "grad_norm": 2.066960573196411, + "learning_rate": 2.7815981953224384e-05, + "loss": 0.2964, + "step": 13260 + }, + { + "epoch": 2.5793650793650795, + "grad_norm": 8.503743171691895, + "learning_rate": 2.757931324297952e-05, + "loss": 0.2434, + "step": 13325 + }, + { + "epoch": 2.591947348044909, + "grad_norm": 0.9377574920654297, + "learning_rate": 2.7331589365800378e-05, + "loss": 0.2722, + "step": 13390 + }, + { + "epoch": 2.6045296167247387, + "grad_norm": 0.3242267370223999, + "learning_rate": 2.707302803061613e-05, + "loss": 0.3233, + "step": 13455 + }, + { + "epoch": 2.6171118854045683, + "grad_norm": 9.376032829284668, + "learning_rate": 2.68038564707159e-05, + "loss": 0.2968, + "step": 13520 + }, + { + "epoch": 2.629694154084398, + "grad_norm": 17.351438522338867, + "learning_rate": 2.6524311244047752e-05, + "loss": 0.3172, + "step": 13585 + }, + { + "epoch": 2.642276422764228, + "grad_norm": 9.639175415039062, + "learning_rate": 2.6234638025322753e-05, + "loss": 0.2398, + "step": 13650 + }, + { + "epoch": 2.6548586914440575, + "grad_norm": 2.589085578918457, + "learning_rate": 2.593509139010695e-05, + "loss": 0.3085, + "step": 13715 + }, + { + "epoch": 2.667440960123887, + "grad_norm": 1.8706110715866089, + "learning_rate": 2.5625934591090952e-05, + "loss": 0.2809, + "step": 13780 + }, + { + "epoch": 2.6800232288037167, + "grad_norm": 3.431440591812134, + "learning_rate": 2.530743932673362e-05, + "loss": 0.3496, + "step": 13845 + }, + { + "epoch": 2.6926054974835463, + "grad_norm": 2.126661777496338, + "learning_rate": 2.497988550248348e-05, + "loss": 0.2281, + "step": 13910 + }, + { + "epoch": 2.705187766163376, + "grad_norm": 0.4347963333129883, + "learning_rate": 2.464356098478738e-05, + "loss": 0.3075, + "step": 13975 + }, + { + "epoch": 2.7177700348432055, + "grad_norm": 4.2169623374938965, + "learning_rate": 2.4298761348102788e-05, + "loss": 0.3335, + "step": 14040 + }, + { + "epoch": 2.730352303523035, + "grad_norm": 0.31253141164779663, + "learning_rate": 2.394578961513602e-05, + "loss": 0.3423, + "step": 14105 + }, + { + "epoch": 2.7429345722028646, + "grad_norm": 9.057765007019043, + "learning_rate": 2.3584955990534625e-05, + "loss": 0.2579, + "step": 14170 + }, + { + "epoch": 2.751064653503678, + "eval_StS-test_pearson_cosine": 0.8786355725766506, + "eval_StS-test_pearson_dot": 0.7543362401661523, + "eval_StS-test_pearson_euclidean": 0.8562858498968491, + "eval_StS-test_pearson_manhattan": 0.8534764973498815, + "eval_StS-test_pearson_max": 0.8786355725766506, + "eval_StS-test_spearman_cosine": 0.8947025850013365, + "eval_StS-test_spearman_dot": 0.7682338653004566, + "eval_StS-test_spearman_euclidean": 0.8617967923645825, + "eval_StS-test_spearman_manhattan": 0.8596798835071182, + "eval_StS-test_spearman_max": 0.8947025850013365, + "eval_Vitaminc-test_cosine_accuracy": 0.5736842105263158, + "eval_Vitaminc-test_cosine_accuracy_threshold": 0.6747879385948181, + "eval_Vitaminc-test_cosine_ap": 0.563396078554423, + "eval_Vitaminc-test_cosine_f1": 0.6703096539162112, + "eval_Vitaminc-test_cosine_f1_threshold": 0.38167619705200195, + "eval_Vitaminc-test_cosine_precision": 0.5054945054945055, + "eval_Vitaminc-test_cosine_recall": 0.9945945945945946, + "eval_Vitaminc-test_dot_accuracy": 0.5657894736842105, + "eval_Vitaminc-test_dot_accuracy_threshold": 17.101760864257812, + "eval_Vitaminc-test_dot_ap": 0.543179161382298, + "eval_Vitaminc-test_dot_f1": 0.6691176470588236, + "eval_Vitaminc-test_dot_f1_threshold": 9.36573600769043, + "eval_Vitaminc-test_dot_precision": 0.5069637883008357, + "eval_Vitaminc-test_dot_recall": 0.9837837837837838, + "eval_Vitaminc-test_euclidean_accuracy": 0.5684210526315789, + "eval_Vitaminc-test_euclidean_accuracy_threshold": 3.356025457382202, + "eval_Vitaminc-test_euclidean_ap": 0.549990518332784, + "eval_Vitaminc-test_euclidean_f1": 0.6618962432915921, + "eval_Vitaminc-test_euclidean_f1_threshold": 6.548671722412109, + "eval_Vitaminc-test_euclidean_precision": 0.4946524064171123, + "eval_Vitaminc-test_euclidean_recall": 1.0, + "eval_Vitaminc-test_manhattan_accuracy": 0.5684210526315789, + "eval_Vitaminc-test_manhattan_accuracy_threshold": 65.38542175292969, + "eval_Vitaminc-test_manhattan_ap": 0.549871250700025, + "eval_Vitaminc-test_manhattan_f1": 0.6630824372759856, + "eval_Vitaminc-test_manhattan_f1_threshold": 133.0389404296875, + "eval_Vitaminc-test_manhattan_precision": 0.4959785522788204, + "eval_Vitaminc-test_manhattan_recall": 1.0, + "eval_Vitaminc-test_max_accuracy": 0.5736842105263158, + "eval_Vitaminc-test_max_accuracy_threshold": 65.38542175292969, + "eval_Vitaminc-test_max_ap": 0.563396078554423, + "eval_Vitaminc-test_max_f1": 0.6703096539162112, + "eval_Vitaminc-test_max_f1_threshold": 133.0389404296875, + "eval_Vitaminc-test_max_precision": 0.5069637883008357, + "eval_Vitaminc-test_max_recall": 1.0, + "eval_mrpc-test_cosine_accuracy": 0.7368421052631579, + "eval_mrpc-test_cosine_accuracy_threshold": 0.6817317605018616, + "eval_mrpc-test_cosine_ap": 0.8447794771234458, + "eval_mrpc-test_cosine_f1": 0.8257839721254355, + "eval_mrpc-test_cosine_f1_threshold": 0.6817317605018616, + "eval_mrpc-test_cosine_precision": 0.7269938650306749, + "eval_mrpc-test_cosine_recall": 0.9556451612903226, + "eval_mrpc-test_dot_accuracy": 0.6868421052631579, + "eval_mrpc-test_dot_accuracy_threshold": 7.941440582275391, + "eval_mrpc-test_dot_ap": 0.7665690166165218, + "eval_mrpc-test_dot_f1": 0.8052373158756136, + "eval_mrpc-test_dot_f1_threshold": 7.941440582275391, + "eval_mrpc-test_dot_precision": 0.6776859504132231, + "eval_mrpc-test_dot_recall": 0.9919354838709677, + "eval_mrpc-test_euclidean_accuracy": 0.7394736842105263, + "eval_mrpc-test_euclidean_accuracy_threshold": 3.0255215167999268, + "eval_mrpc-test_euclidean_ap": 0.8199592936110021, + "eval_mrpc-test_euclidean_f1": 0.8170055452865065, + "eval_mrpc-test_euclidean_f1_threshold": 3.059394359588623, + "eval_mrpc-test_euclidean_precision": 0.7542662116040956, + "eval_mrpc-test_euclidean_recall": 0.8911290322580645, + "eval_mrpc-test_manhattan_accuracy": 0.7394736842105263, + "eval_mrpc-test_manhattan_accuracy_threshold": 62.93813705444336, + "eval_mrpc-test_manhattan_ap": 0.8196183938787889, + "eval_mrpc-test_manhattan_f1": 0.8218694885361553, + "eval_mrpc-test_manhattan_f1_threshold": 64.45368957519531, + "eval_mrpc-test_manhattan_precision": 0.7304075235109718, + "eval_mrpc-test_manhattan_recall": 0.9395161290322581, + "eval_mrpc-test_max_accuracy": 0.7394736842105263, + "eval_mrpc-test_max_accuracy_threshold": 62.93813705444336, + "eval_mrpc-test_max_ap": 0.8447794771234458, + "eval_mrpc-test_max_f1": 0.8257839721254355, + "eval_mrpc-test_max_f1_threshold": 64.45368957519531, + "eval_mrpc-test_max_precision": 0.7542662116040956, + "eval_mrpc-test_max_recall": 0.9919354838709677, + "eval_nli-pairs_loss": 0.8535138964653015, + "eval_nli-pairs_runtime": 3.0393, + "eval_nli-pairs_samples_per_second": 52.643, + "eval_nli-pairs_steps_per_second": 1.645, + "eval_sequential_score": 0.563396078554423, + "step": 14212 + }, + { + "epoch": 2.751064653503678, + "eval_vitaminc-pairs_loss": 5.795119285583496, + "eval_vitaminc-pairs_runtime": 1.5715, + "eval_vitaminc-pairs_samples_per_second": 84.631, + "eval_vitaminc-pairs_steps_per_second": 3.182, + "step": 14212 + }, + { + "epoch": 2.751064653503678, + "eval_qnli-contrastive_loss": 0.15990221500396729, + "eval_qnli-contrastive_runtime": 0.5245, + "eval_qnli-contrastive_samples_per_second": 305.076, + "eval_qnli-contrastive_steps_per_second": 9.534, + "step": 14212 + }, + { + "epoch": 2.751064653503678, + "eval_scitail-pairs-qa_loss": 0.044501952826976776, + "eval_scitail-pairs-qa_runtime": 1.214, + "eval_scitail-pairs-qa_samples_per_second": 131.796, + "eval_scitail-pairs-qa_steps_per_second": 4.119, + "step": 14212 + }, + { + "epoch": 2.751064653503678, + "eval_scitail-pairs-pos_loss": 0.2757861316204071, + "eval_scitail-pairs-pos_runtime": 2.4956, + "eval_scitail-pairs-pos_samples_per_second": 64.112, + "eval_scitail-pairs-pos_steps_per_second": 2.003, + "step": 14212 + }, + { + "epoch": 2.751064653503678, + "eval_xsum-pairs_loss": 0.27693209052085876, + "eval_xsum-pairs_runtime": 1.424, + "eval_xsum-pairs_samples_per_second": 112.363, + "eval_xsum-pairs_steps_per_second": 3.511, + "step": 14212 + }, + { + "epoch": 2.751064653503678, + "eval_compression-pairs_loss": 0.060699742287397385, + "eval_compression-pairs_runtime": 0.3966, + "eval_compression-pairs_samples_per_second": 403.385, + "eval_compression-pairs_steps_per_second": 12.606, + "step": 14212 + }, + { + "epoch": 2.751064653503678, + "eval_sciq_pairs_loss": 0.24653755128383636, + "eval_sciq_pairs_runtime": 7.9551, + "eval_sciq_pairs_samples_per_second": 20.113, + "eval_sciq_pairs_steps_per_second": 0.629, + "step": 14212 + }, + { + "epoch": 2.751064653503678, + "eval_qasc_pairs_loss": 0.1548977792263031, + "eval_qasc_pairs_runtime": 1.4497, + "eval_qasc_pairs_samples_per_second": 110.365, + "eval_qasc_pairs_steps_per_second": 3.449, + "step": 14212 + }, + { + "epoch": 2.751064653503678, + "eval_qasc_facts_sym_loss": 0.11749088764190674, + "eval_qasc_facts_sym_runtime": 0.3316, + "eval_qasc_facts_sym_samples_per_second": 482.562, + "eval_qasc_facts_sym_steps_per_second": 15.08, + "step": 14212 + }, + { + "epoch": 2.751064653503678, + "eval_openbookqa_pairs_loss": 1.6478569507598877, + "eval_openbookqa_pairs_runtime": 1.2421, + "eval_openbookqa_pairs_samples_per_second": 128.816, + "eval_openbookqa_pairs_steps_per_second": 4.025, + "step": 14212 + }, + { + "epoch": 2.751064653503678, + "eval_msmarco_pairs_loss": 0.45152410864830017, + "eval_msmarco_pairs_runtime": 3.128, + "eval_msmarco_pairs_samples_per_second": 51.151, + "eval_msmarco_pairs_steps_per_second": 1.598, + "step": 14212 + }, + { + "epoch": 2.751064653503678, + "eval_nq_pairs_loss": 0.41965824365615845, + "eval_nq_pairs_runtime": 7.6797, + "eval_nq_pairs_samples_per_second": 20.834, + "eval_nq_pairs_steps_per_second": 0.651, + "step": 14212 + }, + { + "epoch": 2.751064653503678, + "eval_trivia_pairs_loss": 0.7053402066230774, + "eval_trivia_pairs_runtime": 10.1801, + "eval_trivia_pairs_samples_per_second": 15.717, + "eval_trivia_pairs_steps_per_second": 0.491, + "step": 14212 + }, + { + "epoch": 2.751064653503678, + "eval_quora_pairs_loss": 0.19425351917743683, + "eval_quora_pairs_runtime": 3.9249, + "eval_quora_pairs_samples_per_second": 171.98, + "eval_quora_pairs_steps_per_second": 5.605, + "step": 14212 + }, + { + "epoch": 2.751064653503678, + "eval_gooaq_pairs_loss": 0.4620817303657532, + "eval_gooaq_pairs_runtime": 2.2762, + "eval_gooaq_pairs_samples_per_second": 70.292, + "eval_gooaq_pairs_steps_per_second": 2.197, + "step": 14212 + }, + { + "epoch": 2.751064653503678, + "eval_mrpc_pairs_loss": 0.02996860072016716, + "eval_mrpc_pairs_runtime": 0.3789, + "eval_mrpc_pairs_samples_per_second": 422.261, + "eval_mrpc_pairs_steps_per_second": 13.196, + "step": 14212 + }, + { + "epoch": 2.7555168408826947, + "grad_norm": 4.241976737976074, + "learning_rate": 2.3216577588268073e-05, + "loss": 0.2323, + "step": 14235 + }, + { + "epoch": 2.7680991095625243, + "grad_norm": 8.405671119689941, + "learning_rate": 2.2840978152936186e-05, + "loss": 0.2952, + "step": 14300 + }, + { + "epoch": 2.780681378242354, + "grad_norm": 7.208241939544678, + "learning_rate": 2.2458487775250408e-05, + "loss": 0.2626, + "step": 14365 + }, + { + "epoch": 2.7932636469221834, + "grad_norm": 0.3114963173866272, + "learning_rate": 2.2069442601937893e-05, + "loss": 0.2741, + "step": 14430 + }, + { + "epoch": 2.805845915602013, + "grad_norm": 5.421462535858154, + "learning_rate": 2.16741845403232e-05, + "loss": 0.2424, + "step": 14495 + }, + { + "epoch": 2.818428184281843, + "grad_norm": 2.307614803314209, + "learning_rate": 2.127306095784752e-05, + "loss": 0.2544, + "step": 14560 + }, + { + "epoch": 2.8310104529616726, + "grad_norm": 11.653911590576172, + "learning_rate": 2.0866424376789318e-05, + "loss": 0.2889, + "step": 14625 + }, + { + "epoch": 2.8435927216415022, + "grad_norm": 10.62498950958252, + "learning_rate": 2.0454632164454574e-05, + "loss": 0.2222, + "step": 14690 + }, + { + "epoch": 2.856174990321332, + "grad_norm": 9.023911476135254, + "learning_rate": 2.003804621910928e-05, + "loss": 0.2335, + "step": 14755 + }, + { + "epoch": 2.8687572590011614, + "grad_norm": 15.53839111328125, + "learning_rate": 1.9617032651929685e-05, + "loss": 0.2741, + "step": 14820 + }, + { + "epoch": 2.881339527680991, + "grad_norm": 12.925213813781738, + "learning_rate": 1.919196146525036e-05, + "loss": 0.2814, + "step": 14885 + }, + { + "epoch": 2.8939217963608206, + "grad_norm": 10.400435447692871, + "learning_rate": 1.8763206227392307e-05, + "loss": 0.2493, + "step": 14950 + }, + { + "epoch": 2.90650406504065, + "grad_norm": 0.2103821039199829, + "learning_rate": 1.833114374435731e-05, + "loss": 0.3201, + "step": 15015 + }, + { + "epoch": 2.91908633372048, + "grad_norm": 5.022848606109619, + "learning_rate": 1.7896153728676896e-05, + "loss": 0.3087, + "step": 15080 + }, + { + "epoch": 2.93166860240031, + "grad_norm": 0.6551499366760254, + "learning_rate": 1.7458618465706787e-05, + "loss": 0.2326, + "step": 15145 + }, + { + "epoch": 2.9442508710801394, + "grad_norm": 0.16861538589000702, + "learning_rate": 1.7018922477660426e-05, + "loss": 0.335, + "step": 15210 + }, + { + "epoch": 2.956833139759969, + "grad_norm": 1.4138991832733154, + "learning_rate": 1.6577452185676585e-05, + "loss": 0.2784, + "step": 15275 + }, + { + "epoch": 2.9694154084397986, + "grad_norm": 5.632164001464844, + "learning_rate": 1.613459557021811e-05, + "loss": 0.2806, + "step": 15340 + }, + { + "epoch": 2.9819976771196286, + "grad_norm": 3.5955312252044678, + "learning_rate": 1.5690741830100336e-05, + "loss": 0.185, + "step": 15405 + }, + { + "epoch": 2.994579945799458, + "grad_norm": 0.3451797068119049, + "learning_rate": 1.5246281040448716e-05, + "loss": 0.2096, + "step": 15470 + }, + { + "epoch": 3.0011614401858306, + "eval_StS-test_pearson_cosine": 0.87492343918585, + "eval_StS-test_pearson_dot": 0.7692943484657655, + "eval_StS-test_pearson_euclidean": 0.8576504398982409, + "eval_StS-test_pearson_manhattan": 0.8566044794239935, + "eval_StS-test_pearson_max": 0.87492343918585, + "eval_StS-test_spearman_cosine": 0.8910954765225738, + "eval_StS-test_spearman_dot": 0.7676996572528597, + "eval_StS-test_spearman_euclidean": 0.8619204921788826, + "eval_StS-test_spearman_manhattan": 0.8625681099677316, + "eval_StS-test_spearman_max": 0.8910954765225738, + "eval_Vitaminc-test_cosine_accuracy": 0.5710526315789474, + "eval_Vitaminc-test_cosine_accuracy_threshold": 0.7102299928665161, + "eval_Vitaminc-test_cosine_ap": 0.5511272681050597, + "eval_Vitaminc-test_cosine_f1": 0.6741154562383612, + "eval_Vitaminc-test_cosine_f1_threshold": 0.4787493944168091, + "eval_Vitaminc-test_cosine_precision": 0.5142045454545454, + "eval_Vitaminc-test_cosine_recall": 0.9783783783783784, + "eval_Vitaminc-test_dot_accuracy": 0.5552631578947368, + "eval_Vitaminc-test_dot_accuracy_threshold": 15.257586479187012, + "eval_Vitaminc-test_dot_ap": 0.5382903253286273, + "eval_Vitaminc-test_dot_f1": 0.6718446601941749, + "eval_Vitaminc-test_dot_f1_threshold": 11.204628944396973, + "eval_Vitaminc-test_dot_precision": 0.5242424242424243, + "eval_Vitaminc-test_dot_recall": 0.9351351351351351, + "eval_Vitaminc-test_euclidean_accuracy": 0.5657894736842105, + "eval_Vitaminc-test_euclidean_accuracy_threshold": 3.175092935562134, + "eval_Vitaminc-test_euclidean_ap": 0.5490562334663566, + "eval_Vitaminc-test_euclidean_f1": 0.6642468239564427, + "eval_Vitaminc-test_euclidean_f1_threshold": 5.7642717361450195, + "eval_Vitaminc-test_euclidean_precision": 0.5, + "eval_Vitaminc-test_euclidean_recall": 0.9891891891891892, + "eval_Vitaminc-test_manhattan_accuracy": 0.5684210526315789, + "eval_Vitaminc-test_manhattan_accuracy_threshold": 66.32759094238281, + "eval_Vitaminc-test_manhattan_ap": 0.5526446299587435, + "eval_Vitaminc-test_manhattan_f1": 0.6678966789667896, + "eval_Vitaminc-test_manhattan_f1_threshold": 113.66679382324219, + "eval_Vitaminc-test_manhattan_precision": 0.5070028011204482, + "eval_Vitaminc-test_manhattan_recall": 0.9783783783783784, + "eval_Vitaminc-test_max_accuracy": 0.5710526315789474, + "eval_Vitaminc-test_max_accuracy_threshold": 66.32759094238281, + "eval_Vitaminc-test_max_ap": 0.5526446299587435, + "eval_Vitaminc-test_max_f1": 0.6741154562383612, + "eval_Vitaminc-test_max_f1_threshold": 113.66679382324219, + "eval_Vitaminc-test_max_precision": 0.5242424242424243, + "eval_Vitaminc-test_max_recall": 0.9891891891891892, + "eval_mrpc-test_cosine_accuracy": 0.7394736842105263, + "eval_mrpc-test_cosine_accuracy_threshold": 0.7056349515914917, + "eval_mrpc-test_cosine_ap": 0.848334505377635, + "eval_mrpc-test_cosine_f1": 0.8237347294938918, + "eval_mrpc-test_cosine_f1_threshold": 0.6817628145217896, + "eval_mrpc-test_cosine_precision": 0.7261538461538461, + "eval_mrpc-test_cosine_recall": 0.9516129032258065, + "eval_mrpc-test_dot_accuracy": 0.6868421052631579, + "eval_mrpc-test_dot_accuracy_threshold": 8.654559135437012, + "eval_mrpc-test_dot_ap": 0.7545877870990263, + "eval_mrpc-test_dot_f1": 0.8045602605863192, + "eval_mrpc-test_dot_f1_threshold": 6.8338165283203125, + "eval_mrpc-test_dot_precision": 0.674863387978142, + "eval_mrpc-test_dot_recall": 0.9959677419354839, + "eval_mrpc-test_euclidean_accuracy": 0.7421052631578947, + "eval_mrpc-test_euclidean_accuracy_threshold": 3.064603805541992, + "eval_mrpc-test_euclidean_ap": 0.8262105822554444, + "eval_mrpc-test_euclidean_f1": 0.8231046931407942, + "eval_mrpc-test_euclidean_f1_threshold": 3.064603805541992, + "eval_mrpc-test_euclidean_precision": 0.7450980392156863, + "eval_mrpc-test_euclidean_recall": 0.9193548387096774, + "eval_mrpc-test_manhattan_accuracy": 0.7421052631578947, + "eval_mrpc-test_manhattan_accuracy_threshold": 61.945098876953125, + "eval_mrpc-test_manhattan_ap": 0.8276315163519761, + "eval_mrpc-test_manhattan_f1": 0.825, + "eval_mrpc-test_manhattan_f1_threshold": 62.05877685546875, + "eval_mrpc-test_manhattan_precision": 0.7403846153846154, + "eval_mrpc-test_manhattan_recall": 0.9314516129032258, + "eval_mrpc-test_max_accuracy": 0.7421052631578947, + "eval_mrpc-test_max_accuracy_threshold": 61.945098876953125, + "eval_mrpc-test_max_ap": 0.848334505377635, + "eval_mrpc-test_max_f1": 0.825, + "eval_mrpc-test_max_f1_threshold": 62.05877685546875, + "eval_mrpc-test_max_precision": 0.7450980392156863, + "eval_mrpc-test_max_recall": 0.9959677419354839, + "eval_nli-pairs_loss": 0.7936088442802429, + "eval_nli-pairs_runtime": 3.1903, + "eval_nli-pairs_samples_per_second": 50.152, + "eval_nli-pairs_steps_per_second": 1.567, + "eval_sequential_score": 0.5526446299587435, + "step": 15504 + }, + { + "epoch": 3.0011614401858306, + "eval_vitaminc-pairs_loss": 5.943141937255859, + "eval_vitaminc-pairs_runtime": 1.6452, + "eval_vitaminc-pairs_samples_per_second": 80.84, + "eval_vitaminc-pairs_steps_per_second": 3.039, + "step": 15504 + }, + { + "epoch": 3.0011614401858306, + "eval_qnli-contrastive_loss": 0.13478858768939972, + "eval_qnli-contrastive_runtime": 0.5401, + "eval_qnli-contrastive_samples_per_second": 296.259, + "eval_qnli-contrastive_steps_per_second": 9.258, + "step": 15504 + }, + { + "epoch": 3.0011614401858306, + "eval_scitail-pairs-qa_loss": 0.03971510007977486, + "eval_scitail-pairs-qa_runtime": 1.3721, + "eval_scitail-pairs-qa_samples_per_second": 116.61, + "eval_scitail-pairs-qa_steps_per_second": 3.644, + "step": 15504 + }, + { + "epoch": 3.0011614401858306, + "eval_scitail-pairs-pos_loss": 0.2736285924911499, + "eval_scitail-pairs-pos_runtime": 2.8426, + "eval_scitail-pairs-pos_samples_per_second": 56.286, + "eval_scitail-pairs-pos_steps_per_second": 1.759, + "step": 15504 + }, + { + "epoch": 3.0011614401858306, + "eval_xsum-pairs_loss": 0.27379804849624634, + "eval_xsum-pairs_runtime": 1.4208, + "eval_xsum-pairs_samples_per_second": 112.611, + "eval_xsum-pairs_steps_per_second": 3.519, + "step": 15504 + }, + { + "epoch": 3.0011614401858306, + "eval_compression-pairs_loss": 0.055795930325984955, + "eval_compression-pairs_runtime": 0.4254, + "eval_compression-pairs_samples_per_second": 376.141, + "eval_compression-pairs_steps_per_second": 11.754, + "step": 15504 + }, + { + "epoch": 3.0011614401858306, + "eval_sciq_pairs_loss": 0.20566284656524658, + "eval_sciq_pairs_runtime": 8.2359, + "eval_sciq_pairs_samples_per_second": 19.427, + "eval_sciq_pairs_steps_per_second": 0.607, + "step": 15504 + }, + { + "epoch": 3.0011614401858306, + "eval_qasc_pairs_loss": 0.13599181175231934, + "eval_qasc_pairs_runtime": 1.4778, + "eval_qasc_pairs_samples_per_second": 108.271, + "eval_qasc_pairs_steps_per_second": 3.383, + "step": 15504 + }, + { + "epoch": 3.0011614401858306, + "eval_qasc_facts_sym_loss": 0.11813600361347198, + "eval_qasc_facts_sym_runtime": 0.3695, + "eval_qasc_facts_sym_samples_per_second": 432.998, + "eval_qasc_facts_sym_steps_per_second": 13.531, + "step": 15504 + }, + { + "epoch": 3.0011614401858306, + "eval_openbookqa_pairs_loss": 1.5948731899261475, + "eval_openbookqa_pairs_runtime": 1.3515, + "eval_openbookqa_pairs_samples_per_second": 118.389, + "eval_openbookqa_pairs_steps_per_second": 3.7, + "step": 15504 + }, + { + "epoch": 3.0011614401858306, + "eval_msmarco_pairs_loss": 0.48051756620407104, + "eval_msmarco_pairs_runtime": 3.3017, + "eval_msmarco_pairs_samples_per_second": 48.46, + "eval_msmarco_pairs_steps_per_second": 1.514, + "step": 15504 + }, + { + "epoch": 3.0011614401858306, + "eval_nq_pairs_loss": 0.38785767555236816, + "eval_nq_pairs_runtime": 7.8378, + "eval_nq_pairs_samples_per_second": 20.414, + "eval_nq_pairs_steps_per_second": 0.638, + "step": 15504 + }, + { + "epoch": 3.0011614401858306, + "eval_trivia_pairs_loss": 0.6309649348258972, + "eval_trivia_pairs_runtime": 10.3017, + "eval_trivia_pairs_samples_per_second": 15.531, + "eval_trivia_pairs_steps_per_second": 0.485, + "step": 15504 + }, + { + "epoch": 3.0011614401858306, + "eval_quora_pairs_loss": 0.175103560090065, + "eval_quora_pairs_runtime": 4.0555, + "eval_quora_pairs_samples_per_second": 166.441, + "eval_quora_pairs_steps_per_second": 5.425, + "step": 15504 + }, + { + "epoch": 3.0011614401858306, + "eval_gooaq_pairs_loss": 0.42644429206848145, + "eval_gooaq_pairs_runtime": 2.2853, + "eval_gooaq_pairs_samples_per_second": 70.011, + "eval_gooaq_pairs_steps_per_second": 2.188, + "step": 15504 + }, + { + "epoch": 3.0011614401858306, + "eval_mrpc_pairs_loss": 0.02719317004084587, + "eval_mrpc_pairs_runtime": 0.3819, + "eval_mrpc_pairs_samples_per_second": 418.95, + "eval_mrpc_pairs_steps_per_second": 13.092, + "step": 15504 + }, + { + "epoch": 3.0071622144792878, + "grad_norm": 0.18203336000442505, + "learning_rate": 1.480160380988634e-05, + "loss": 0.2311, + "step": 15535 + }, + { + "epoch": 3.0197444831591174, + "grad_norm": 4.591137409210205, + "learning_rate": 1.4357100937252624e-05, + "loss": 0.1838, + "step": 15600 + }, + { + "epoch": 3.032326751838947, + "grad_norm": 0.6497881412506104, + "learning_rate": 1.391316306815472e-05, + "loss": 0.2076, + "step": 15665 + }, + { + "epoch": 3.0449090205187765, + "grad_norm": 10.108341217041016, + "learning_rate": 1.3470180351653784e-05, + "loss": 0.2866, + "step": 15730 + }, + { + "epoch": 3.057491289198606, + "grad_norm": 0.8622055649757385, + "learning_rate": 1.302854209738737e-05, + "loss": 0.2769, + "step": 15795 + }, + { + "epoch": 3.0700735578784357, + "grad_norm": 15.875243186950684, + "learning_rate": 1.258863643342987e-05, + "loss": 0.2398, + "step": 15860 + }, + { + "epoch": 3.0826558265582658, + "grad_norm": 7.3159708976745605, + "learning_rate": 1.2150849965190985e-05, + "loss": 0.3044, + "step": 15925 + }, + { + "epoch": 3.0952380952380953, + "grad_norm": 9.284073829650879, + "learning_rate": 1.1715567435652663e-05, + "loss": 0.261, + "step": 15990 + }, + { + "epoch": 3.107820363917925, + "grad_norm": 10.888318061828613, + "learning_rate": 1.1283171387242675e-05, + "loss": 0.2806, + "step": 16055 + }, + { + "epoch": 3.1204026325977545, + "grad_norm": 2.058668851852417, + "learning_rate": 1.0854041825642137e-05, + "loss": 0.2093, + "step": 16120 + }, + { + "epoch": 3.132984901277584, + "grad_norm": 7.410093307495117, + "learning_rate": 1.042855588582242e-05, + "loss": 0.1835, + "step": 16185 + }, + { + "epoch": 3.1455671699574137, + "grad_norm": 7.427304744720459, + "learning_rate": 1.0007087500604994e-05, + "loss": 0.2615, + "step": 16250 + }, + { + "epoch": 3.1581494386372437, + "grad_norm": 7.91264533996582, + "learning_rate": 9.590007072035367e-06, + "loss": 0.3112, + "step": 16315 + }, + { + "epoch": 3.1707317073170733, + "grad_norm": 0.32049843668937683, + "learning_rate": 9.177681145859981e-06, + "loss": 0.2247, + "step": 16380 + }, + { + "epoch": 3.183313975996903, + "grad_norm": 0.22129254043102264, + "learning_rate": 8.77047208939235e-06, + "loss": 0.2808, + "step": 16445 + }, + { + "epoch": 3.1958962446767325, + "grad_norm": 17.339345932006836, + "learning_rate": 8.368737773051066e-06, + "loss": 0.2256, + "step": 16510 + }, + { + "epoch": 3.208478513356562, + "grad_norm": 0.13545936346054077, + "learning_rate": 7.972831255850175e-06, + "loss": 0.266, + "step": 16575 + }, + { + "epoch": 3.2210607820363917, + "grad_norm": 7.021492004394531, + "learning_rate": 7.583100475117644e-06, + "loss": 0.2396, + "step": 16640 + }, + { + "epoch": 3.2336430507162213, + "grad_norm": 2.81817889213562, + "learning_rate": 7.1998879407153136e-06, + "loss": 0.2493, + "step": 16705 + }, + { + "epoch": 3.2462253193960513, + "grad_norm": 1.4829257726669312, + "learning_rate": 6.823530434028392e-06, + "loss": 0.2642, + "step": 16770 + }, + { + "epoch": 3.251258226867983, + "eval_StS-test_pearson_cosine": 0.880777931752262, + "eval_StS-test_pearson_dot": 0.7745050681792695, + "eval_StS-test_pearson_euclidean": 0.8649445292928557, + "eval_StS-test_pearson_manhattan": 0.8633416689400311, + "eval_StS-test_pearson_max": 0.880777931752262, + "eval_StS-test_spearman_cosine": 0.8965067413194601, + "eval_StS-test_spearman_dot": 0.7722399877819839, + "eval_StS-test_spearman_euclidean": 0.8683118316991572, + "eval_StS-test_spearman_manhattan": 0.8680132006430414, + "eval_StS-test_spearman_max": 0.8965067413194601, + "eval_Vitaminc-test_cosine_accuracy": 0.5789473684210527, + "eval_Vitaminc-test_cosine_accuracy_threshold": 0.7058641910552979, + "eval_Vitaminc-test_cosine_ap": 0.5596672617305367, + "eval_Vitaminc-test_cosine_f1": 0.6766355140186916, + "eval_Vitaminc-test_cosine_f1_threshold": 0.43707022070884705, + "eval_Vitaminc-test_cosine_precision": 0.5171428571428571, + "eval_Vitaminc-test_cosine_recall": 0.9783783783783784, + "eval_Vitaminc-test_dot_accuracy": 0.5631578947368421, + "eval_Vitaminc-test_dot_accuracy_threshold": 12.52048110961914, + "eval_Vitaminc-test_dot_ap": 0.5494024707334075, + "eval_Vitaminc-test_dot_f1": 0.6703096539162112, + "eval_Vitaminc-test_dot_f1_threshold": 7.3003621101379395, + "eval_Vitaminc-test_dot_precision": 0.5054945054945055, + "eval_Vitaminc-test_dot_recall": 0.9945945945945946, + "eval_Vitaminc-test_euclidean_accuracy": 0.5605263157894737, + "eval_Vitaminc-test_euclidean_accuracy_threshold": 3.11747145652771, + "eval_Vitaminc-test_euclidean_ap": 0.547341808113373, + "eval_Vitaminc-test_euclidean_f1": 0.6678832116788321, + "eval_Vitaminc-test_euclidean_f1_threshold": 5.6768622398376465, + "eval_Vitaminc-test_euclidean_precision": 0.5041322314049587, + "eval_Vitaminc-test_euclidean_recall": 0.9891891891891892, + "eval_Vitaminc-test_manhattan_accuracy": 0.5684210526315789, + "eval_Vitaminc-test_manhattan_accuracy_threshold": 63.42762756347656, + "eval_Vitaminc-test_manhattan_ap": 0.5490956668956571, + "eval_Vitaminc-test_manhattan_f1": 0.6654545454545455, + "eval_Vitaminc-test_manhattan_f1_threshold": 118.83499908447266, + "eval_Vitaminc-test_manhattan_precision": 0.5013698630136987, + "eval_Vitaminc-test_manhattan_recall": 0.9891891891891892, + "eval_Vitaminc-test_max_accuracy": 0.5789473684210527, + "eval_Vitaminc-test_max_accuracy_threshold": 63.42762756347656, + "eval_Vitaminc-test_max_ap": 0.5596672617305367, + "eval_Vitaminc-test_max_f1": 0.6766355140186916, + "eval_Vitaminc-test_max_f1_threshold": 118.83499908447266, + "eval_Vitaminc-test_max_precision": 0.5171428571428571, + "eval_Vitaminc-test_max_recall": 0.9945945945945946, + "eval_mrpc-test_cosine_accuracy": 0.7315789473684211, + "eval_mrpc-test_cosine_accuracy_threshold": 0.7081175446510315, + "eval_mrpc-test_cosine_ap": 0.8471091006168546, + "eval_mrpc-test_cosine_f1": 0.8204225352112677, + "eval_mrpc-test_cosine_f1_threshold": 0.7004733085632324, + "eval_mrpc-test_cosine_precision": 0.728125, + "eval_mrpc-test_cosine_recall": 0.9395161290322581, + "eval_mrpc-test_dot_accuracy": 0.6921052631578948, + "eval_mrpc-test_dot_accuracy_threshold": 7.834476470947266, + "eval_mrpc-test_dot_ap": 0.7544989265631823, + "eval_mrpc-test_dot_f1": 0.8072487644151566, + "eval_mrpc-test_dot_f1_threshold": 7.739718437194824, + "eval_mrpc-test_dot_precision": 0.6824512534818942, + "eval_mrpc-test_dot_recall": 0.9879032258064516, + "eval_mrpc-test_euclidean_accuracy": 0.7421052631578947, + "eval_mrpc-test_euclidean_accuracy_threshold": 3.0166122913360596, + "eval_mrpc-test_euclidean_ap": 0.8252951646149229, + "eval_mrpc-test_euclidean_f1": 0.8231046931407942, + "eval_mrpc-test_euclidean_f1_threshold": 3.0470428466796875, + "eval_mrpc-test_euclidean_precision": 0.7450980392156863, + "eval_mrpc-test_euclidean_recall": 0.9193548387096774, + "eval_mrpc-test_manhattan_accuracy": 0.7394736842105263, + "eval_mrpc-test_manhattan_accuracy_threshold": 60.53413391113281, + "eval_mrpc-test_manhattan_ap": 0.8247798907091635, + "eval_mrpc-test_manhattan_f1": 0.8220640569395017, + "eval_mrpc-test_manhattan_f1_threshold": 62.061195373535156, + "eval_mrpc-test_manhattan_precision": 0.7356687898089171, + "eval_mrpc-test_manhattan_recall": 0.9314516129032258, + "eval_mrpc-test_max_accuracy": 0.7421052631578947, + "eval_mrpc-test_max_accuracy_threshold": 60.53413391113281, + "eval_mrpc-test_max_ap": 0.8471091006168546, + "eval_mrpc-test_max_f1": 0.8231046931407942, + "eval_mrpc-test_max_f1_threshold": 62.061195373535156, + "eval_mrpc-test_max_precision": 0.7450980392156863, + "eval_mrpc-test_max_recall": 0.9879032258064516, + "eval_nli-pairs_loss": 0.7501987814903259, + "eval_nli-pairs_runtime": 3.0677, + "eval_nli-pairs_samples_per_second": 52.156, + "eval_nli-pairs_steps_per_second": 1.63, + "eval_sequential_score": 0.5596672617305367, + "step": 16796 + }, + { + "epoch": 3.251258226867983, + "eval_vitaminc-pairs_loss": 6.001572132110596, + "eval_vitaminc-pairs_runtime": 1.6281, + "eval_vitaminc-pairs_samples_per_second": 81.691, + "eval_vitaminc-pairs_steps_per_second": 3.071, + "step": 16796 + }, + { + "epoch": 3.251258226867983, + "eval_qnli-contrastive_loss": 0.1142602190375328, + "eval_qnli-contrastive_runtime": 0.5259, + "eval_qnli-contrastive_samples_per_second": 304.23, + "eval_qnli-contrastive_steps_per_second": 9.507, + "step": 16796 + }, + { + "epoch": 3.251258226867983, + "eval_scitail-pairs-qa_loss": 0.03872662037611008, + "eval_scitail-pairs-qa_runtime": 1.2213, + "eval_scitail-pairs-qa_samples_per_second": 131.01, + "eval_scitail-pairs-qa_steps_per_second": 4.094, + "step": 16796 + }, + { + "epoch": 3.251258226867983, + "eval_scitail-pairs-pos_loss": 0.2690686285495758, + "eval_scitail-pairs-pos_runtime": 2.3945, + "eval_scitail-pairs-pos_samples_per_second": 66.82, + "eval_scitail-pairs-pos_steps_per_second": 2.088, + "step": 16796 + }, + { + "epoch": 3.251258226867983, + "eval_xsum-pairs_loss": 0.2524264454841614, + "eval_xsum-pairs_runtime": 1.4129, + "eval_xsum-pairs_samples_per_second": 113.239, + "eval_xsum-pairs_steps_per_second": 3.539, + "step": 16796 + }, + { + "epoch": 3.251258226867983, + "eval_compression-pairs_loss": 0.053744763135910034, + "eval_compression-pairs_runtime": 0.4045, + "eval_compression-pairs_samples_per_second": 395.564, + "eval_compression-pairs_steps_per_second": 12.361, + "step": 16796 + }, + { + "epoch": 3.251258226867983, + "eval_sciq_pairs_loss": 0.2147907316684723, + "eval_sciq_pairs_runtime": 8.0242, + "eval_sciq_pairs_samples_per_second": 19.94, + "eval_sciq_pairs_steps_per_second": 0.623, + "step": 16796 + }, + { + "epoch": 3.251258226867983, + "eval_qasc_pairs_loss": 0.1279141902923584, + "eval_qasc_pairs_runtime": 1.4536, + "eval_qasc_pairs_samples_per_second": 110.074, + "eval_qasc_pairs_steps_per_second": 3.44, + "step": 16796 + }, + { + "epoch": 3.251258226867983, + "eval_qasc_facts_sym_loss": 0.11657889187335968, + "eval_qasc_facts_sym_runtime": 0.33, + "eval_qasc_facts_sym_samples_per_second": 484.857, + "eval_qasc_facts_sym_steps_per_second": 15.152, + "step": 16796 + }, + { + "epoch": 3.251258226867983, + "eval_openbookqa_pairs_loss": 1.5521962642669678, + "eval_openbookqa_pairs_runtime": 1.2491, + "eval_openbookqa_pairs_samples_per_second": 128.096, + "eval_openbookqa_pairs_steps_per_second": 4.003, + "step": 16796 + }, + { + "epoch": 3.251258226867983, + "eval_msmarco_pairs_loss": 0.45534777641296387, + "eval_msmarco_pairs_runtime": 3.1507, + "eval_msmarco_pairs_samples_per_second": 50.783, + "eval_msmarco_pairs_steps_per_second": 1.587, + "step": 16796 + }, + { + "epoch": 3.251258226867983, + "eval_nq_pairs_loss": 0.3408084511756897, + "eval_nq_pairs_runtime": 7.7393, + "eval_nq_pairs_samples_per_second": 20.674, + "eval_nq_pairs_steps_per_second": 0.646, + "step": 16796 + }, + { + "epoch": 3.251258226867983, + "eval_trivia_pairs_loss": 0.6149572134017944, + "eval_trivia_pairs_runtime": 10.2599, + "eval_trivia_pairs_samples_per_second": 15.595, + "eval_trivia_pairs_steps_per_second": 0.487, + "step": 16796 + }, + { + "epoch": 3.251258226867983, + "eval_quora_pairs_loss": 0.16969382762908936, + "eval_quora_pairs_runtime": 3.9818, + "eval_quora_pairs_samples_per_second": 169.521, + "eval_quora_pairs_steps_per_second": 5.525, + "step": 16796 + }, + { + "epoch": 3.251258226867983, + "eval_gooaq_pairs_loss": 0.3949827253818512, + "eval_gooaq_pairs_runtime": 2.286, + "eval_gooaq_pairs_samples_per_second": 69.99, + "eval_gooaq_pairs_steps_per_second": 2.187, + "step": 16796 + }, + { + "epoch": 3.251258226867983, + "eval_mrpc_pairs_loss": 0.025785308331251144, + "eval_mrpc_pairs_runtime": 0.3822, + "eval_mrpc_pairs_samples_per_second": 418.675, + "eval_mrpc_pairs_steps_per_second": 13.084, + "step": 16796 + }, + { + "epoch": 3.258807588075881, + "grad_norm": 0.9428536295890808, + "learning_rate": 6.4543587119895626e-06, + "loss": 0.2925, + "step": 16835 + }, + { + "epoch": 3.2713898567557105, + "grad_norm": 0.49789130687713623, + "learning_rate": 6.092697216397478e-06, + "loss": 0.2983, + "step": 16900 + }, + { + "epoch": 3.28397212543554, + "grad_norm": 0.5712814927101135, + "learning_rate": 5.7388637887853255e-06, + "loss": 0.2797, + "step": 16965 + }, + { + "epoch": 3.2965543941153697, + "grad_norm": 0.5957891345024109, + "learning_rate": 5.393169391089869e-06, + "loss": 0.2351, + "step": 17030 + }, + { + "epoch": 3.3091366627951992, + "grad_norm": 0.17854127287864685, + "learning_rate": 5.055917832366551e-06, + "loss": 0.2431, + "step": 17095 + }, + { + "epoch": 3.3217189314750293, + "grad_norm": 0.43127721548080444, + "learning_rate": 4.727405501790925e-06, + "loss": 0.2406, + "step": 17160 + }, + { + "epoch": 3.334301200154859, + "grad_norm": 0.40976136922836304, + "learning_rate": 4.407921108180699e-06, + "loss": 0.2354, + "step": 17225 + }, + { + "epoch": 3.3468834688346885, + "grad_norm": 4.054069519042969, + "learning_rate": 4.097745426267878e-06, + "loss": 0.1614, + "step": 17290 + }, + { + "epoch": 3.359465737514518, + "grad_norm": 0.5118033289909363, + "learning_rate": 3.797151049943393e-06, + "loss": 0.1372, + "step": 17355 + }, + { + "epoch": 3.3720480061943476, + "grad_norm": 0.12063035368919373, + "learning_rate": 3.5064021526915996e-06, + "loss": 0.2192, + "step": 17420 + }, + { + "epoch": 3.3846302748741772, + "grad_norm": 1.3429920673370361, + "learning_rate": 3.225754255424692e-06, + "loss": 0.1628, + "step": 17485 + }, + { + "epoch": 3.397212543554007, + "grad_norm": 6.818206310272217, + "learning_rate": 2.9554540019215848e-06, + "loss": 0.1496, + "step": 17550 + }, + { + "epoch": 3.4097948122338364, + "grad_norm": 0.1729680895805359, + "learning_rate": 2.6957389420681094e-06, + "loss": 0.1813, + "step": 17615 + }, + { + "epoch": 3.4223770809136664, + "grad_norm": 2.8938841819763184, + "learning_rate": 2.446837323089423e-06, + "loss": 0.1914, + "step": 17680 + }, + { + "epoch": 3.434959349593496, + "grad_norm": 0.3303506076335907, + "learning_rate": 2.208967888957853e-06, + "loss": 0.196, + "step": 17745 + }, + { + "epoch": 3.4475416182733256, + "grad_norm": 3.809260606765747, + "learning_rate": 1.982339688152608e-06, + "loss": 0.1858, + "step": 17810 + }, + { + "epoch": 3.460123886953155, + "grad_norm": 3.7293314933776855, + "learning_rate": 1.7671518899402124e-06, + "loss": 0.19, + "step": 17875 + }, + { + "epoch": 3.472706155632985, + "grad_norm": 0.11209724098443985, + "learning_rate": 1.5635936093371428e-06, + "loss": 0.166, + "step": 17940 + }, + { + "epoch": 3.4852884243128144, + "grad_norm": 1.0682430267333984, + "learning_rate": 1.3718437409086144e-06, + "loss": 0.1681, + "step": 18005 + }, + { + "epoch": 3.4978706929926444, + "grad_norm": 0.11844446510076523, + "learning_rate": 1.1920708015492954e-06, + "loss": 0.1615, + "step": 18070 + }, + { + "epoch": 3.5013550135501355, + "eval_StS-test_pearson_cosine": 0.8813824345718434, + "eval_StS-test_pearson_dot": 0.7717544169066424, + "eval_StS-test_pearson_euclidean": 0.8642445322185591, + "eval_StS-test_pearson_manhattan": 0.8626796723170516, + "eval_StS-test_pearson_max": 0.8813824345718434, + "eval_StS-test_spearman_cosine": 0.8968829857988841, + "eval_StS-test_spearman_dot": 0.7725145356884126, + "eval_StS-test_spearman_euclidean": 0.868454031751171, + "eval_StS-test_spearman_manhattan": 0.8683110654171217, + "eval_StS-test_spearman_max": 0.8968829857988841, + "eval_Vitaminc-test_cosine_accuracy": 0.5789473684210527, + "eval_Vitaminc-test_cosine_accuracy_threshold": 0.7279139161109924, + "eval_Vitaminc-test_cosine_ap": 0.5599139775011265, + "eval_Vitaminc-test_cosine_f1": 0.6741996233521658, + "eval_Vitaminc-test_cosine_f1_threshold": 0.4386724531650543, + "eval_Vitaminc-test_cosine_precision": 0.5173410404624278, + "eval_Vitaminc-test_cosine_recall": 0.9675675675675676, + "eval_Vitaminc-test_dot_accuracy": 0.5657894736842105, + "eval_Vitaminc-test_dot_accuracy_threshold": 12.463031768798828, + "eval_Vitaminc-test_dot_ap": 0.5548755673509098, + "eval_Vitaminc-test_dot_f1": 0.6691042047531992, + "eval_Vitaminc-test_dot_f1_threshold": 6.735769271850586, + "eval_Vitaminc-test_dot_precision": 0.505524861878453, + "eval_Vitaminc-test_dot_recall": 0.9891891891891892, + "eval_Vitaminc-test_euclidean_accuracy": 0.5657894736842105, + "eval_Vitaminc-test_euclidean_accuracy_threshold": 3.1054494380950928, + "eval_Vitaminc-test_euclidean_ap": 0.5460311037339075, + "eval_Vitaminc-test_euclidean_f1": 0.667870036101083, + "eval_Vitaminc-test_euclidean_f1_threshold": 5.527202606201172, + "eval_Vitaminc-test_euclidean_precision": 0.5013550135501355, + "eval_Vitaminc-test_euclidean_recall": 1.0, + "eval_Vitaminc-test_manhattan_accuracy": 0.5657894736842105, + "eval_Vitaminc-test_manhattan_accuracy_threshold": 61.94684600830078, + "eval_Vitaminc-test_manhattan_ap": 0.5511678056221634, + "eval_Vitaminc-test_manhattan_f1": 0.667870036101083, + "eval_Vitaminc-test_manhattan_f1_threshold": 113.38865661621094, + "eval_Vitaminc-test_manhattan_precision": 0.5013550135501355, + "eval_Vitaminc-test_manhattan_recall": 1.0, + "eval_Vitaminc-test_max_accuracy": 0.5789473684210527, + "eval_Vitaminc-test_max_accuracy_threshold": 61.94684600830078, + "eval_Vitaminc-test_max_ap": 0.5599139775011265, + "eval_Vitaminc-test_max_f1": 0.6741996233521658, + "eval_Vitaminc-test_max_f1_threshold": 113.38865661621094, + "eval_Vitaminc-test_max_precision": 0.5173410404624278, + "eval_Vitaminc-test_max_recall": 1.0, + "eval_mrpc-test_cosine_accuracy": 0.7289473684210527, + "eval_mrpc-test_cosine_accuracy_threshold": 0.7165935635566711, + "eval_mrpc-test_cosine_ap": 0.8464822906177363, + "eval_mrpc-test_cosine_f1": 0.8193979933110368, + "eval_mrpc-test_cosine_f1_threshold": 0.6241698265075684, + "eval_mrpc-test_cosine_precision": 0.7, + "eval_mrpc-test_cosine_recall": 0.9879032258064516, + "eval_mrpc-test_dot_accuracy": 0.6894736842105263, + "eval_mrpc-test_dot_accuracy_threshold": 6.886929512023926, + "eval_mrpc-test_dot_ap": 0.7573697765974963, + "eval_mrpc-test_dot_f1": 0.805921052631579, + "eval_mrpc-test_dot_f1_threshold": 6.723896026611328, + "eval_mrpc-test_dot_precision": 0.6805555555555556, + "eval_mrpc-test_dot_recall": 0.9879032258064516, + "eval_mrpc-test_euclidean_accuracy": 0.7368421052631579, + "eval_mrpc-test_euclidean_accuracy_threshold": 2.9247827529907227, + "eval_mrpc-test_euclidean_ap": 0.8222108381881498, + "eval_mrpc-test_euclidean_f1": 0.8214285714285715, + "eval_mrpc-test_euclidean_f1_threshold": 3.0410943031311035, + "eval_mrpc-test_euclidean_precision": 0.7371794871794872, + "eval_mrpc-test_euclidean_recall": 0.9274193548387096, + "eval_mrpc-test_manhattan_accuracy": 0.7421052631578947, + "eval_mrpc-test_manhattan_accuracy_threshold": 59.87488555908203, + "eval_mrpc-test_manhattan_ap": 0.8232871898556722, + "eval_mrpc-test_manhattan_f1": 0.8262411347517731, + "eval_mrpc-test_manhattan_f1_threshold": 59.87488555908203, + "eval_mrpc-test_manhattan_precision": 0.7373417721518988, + "eval_mrpc-test_manhattan_recall": 0.9395161290322581, + "eval_mrpc-test_max_accuracy": 0.7421052631578947, + "eval_mrpc-test_max_accuracy_threshold": 59.87488555908203, + "eval_mrpc-test_max_ap": 0.8464822906177363, + "eval_mrpc-test_max_f1": 0.8262411347517731, + "eval_mrpc-test_max_f1_threshold": 59.87488555908203, + "eval_mrpc-test_max_precision": 0.7373417721518988, + "eval_mrpc-test_max_recall": 0.9879032258064516, + "eval_nli-pairs_loss": 0.7530286908149719, + "eval_nli-pairs_runtime": 3.1538, + "eval_nli-pairs_samples_per_second": 50.732, + "eval_nli-pairs_steps_per_second": 1.585, + "eval_sequential_score": 0.5599139775011265, + "step": 18088 + }, + { + "epoch": 3.5013550135501355, + "eval_vitaminc-pairs_loss": 6.0385422706604, + "eval_vitaminc-pairs_runtime": 1.7005, + "eval_vitaminc-pairs_samples_per_second": 78.211, + "eval_vitaminc-pairs_steps_per_second": 2.94, + "step": 18088 + }, + { + "epoch": 3.5013550135501355, + "eval_qnli-contrastive_loss": 0.13678142428398132, + "eval_qnli-contrastive_runtime": 0.554, + "eval_qnli-contrastive_samples_per_second": 288.79, + "eval_qnli-contrastive_steps_per_second": 9.025, + "step": 18088 + }, + { + "epoch": 3.5013550135501355, + "eval_scitail-pairs-qa_loss": 0.03590356931090355, + "eval_scitail-pairs-qa_runtime": 1.3971, + "eval_scitail-pairs-qa_samples_per_second": 114.52, + "eval_scitail-pairs-qa_steps_per_second": 3.579, + "step": 18088 + }, + { + "epoch": 3.5013550135501355, + "eval_scitail-pairs-pos_loss": 0.27764028310775757, + "eval_scitail-pairs-pos_runtime": 2.6797, + "eval_scitail-pairs-pos_samples_per_second": 59.709, + "eval_scitail-pairs-pos_steps_per_second": 1.866, + "step": 18088 + }, + { + "epoch": 3.5013550135501355, + "eval_xsum-pairs_loss": 0.24325545132160187, + "eval_xsum-pairs_runtime": 1.4199, + "eval_xsum-pairs_samples_per_second": 112.683, + "eval_xsum-pairs_steps_per_second": 3.521, + "step": 18088 + }, + { + "epoch": 3.5013550135501355, + "eval_compression-pairs_loss": 0.051899444311857224, + "eval_compression-pairs_runtime": 0.4875, + "eval_compression-pairs_samples_per_second": 328.228, + "eval_compression-pairs_steps_per_second": 10.257, + "step": 18088 + }, + { + "epoch": 3.5013550135501355, + "eval_sciq_pairs_loss": 0.21445715427398682, + "eval_sciq_pairs_runtime": 8.2376, + "eval_sciq_pairs_samples_per_second": 19.423, + "eval_sciq_pairs_steps_per_second": 0.607, + "step": 18088 + }, + { + "epoch": 3.5013550135501355, + "eval_qasc_pairs_loss": 0.1192399114370346, + "eval_qasc_pairs_runtime": 1.6296, + "eval_qasc_pairs_samples_per_second": 98.182, + "eval_qasc_pairs_steps_per_second": 3.068, + "step": 18088 + }, + { + "epoch": 3.5013550135501355, + "eval_qasc_facts_sym_loss": 0.11172313988208771, + "eval_qasc_facts_sym_runtime": 0.3505, + "eval_qasc_facts_sym_samples_per_second": 456.501, + "eval_qasc_facts_sym_steps_per_second": 14.266, + "step": 18088 + }, + { + "epoch": 3.5013550135501355, + "eval_openbookqa_pairs_loss": 1.5495846271514893, + "eval_openbookqa_pairs_runtime": 1.4896, + "eval_openbookqa_pairs_samples_per_second": 107.414, + "eval_openbookqa_pairs_steps_per_second": 3.357, + "step": 18088 + }, + { + "epoch": 3.5013550135501355, + "eval_msmarco_pairs_loss": 0.4486960768699646, + "eval_msmarco_pairs_runtime": 3.2657, + "eval_msmarco_pairs_samples_per_second": 48.995, + "eval_msmarco_pairs_steps_per_second": 1.531, + "step": 18088 + }, + { + "epoch": 3.5013550135501355, + "eval_nq_pairs_loss": 0.3250276446342468, + "eval_nq_pairs_runtime": 7.7729, + "eval_nq_pairs_samples_per_second": 20.584, + "eval_nq_pairs_steps_per_second": 0.643, + "step": 18088 + }, + { + "epoch": 3.5013550135501355, + "eval_trivia_pairs_loss": 0.56200110912323, + "eval_trivia_pairs_runtime": 10.4885, + "eval_trivia_pairs_samples_per_second": 15.255, + "eval_trivia_pairs_steps_per_second": 0.477, + "step": 18088 + }, + { + "epoch": 3.5013550135501355, + "eval_quora_pairs_loss": 0.1906571239233017, + "eval_quora_pairs_runtime": 4.1399, + "eval_quora_pairs_samples_per_second": 163.049, + "eval_quora_pairs_steps_per_second": 5.314, + "step": 18088 + }, + { + "epoch": 3.5013550135501355, + "eval_gooaq_pairs_loss": 0.40848416090011597, + "eval_gooaq_pairs_runtime": 2.2935, + "eval_gooaq_pairs_samples_per_second": 69.763, + "eval_gooaq_pairs_steps_per_second": 2.18, + "step": 18088 + }, + { + "epoch": 3.5013550135501355, + "eval_mrpc_pairs_loss": 0.024090532213449478, + "eval_mrpc_pairs_runtime": 0.3833, + "eval_mrpc_pairs_samples_per_second": 417.394, + "eval_mrpc_pairs_steps_per_second": 13.044, + "step": 18088 + }, + { + "epoch": 3.510452961672474, + "grad_norm": 0.44903191924095154, + "learning_rate": 1.024432782384515e-06, + "loss": 0.136, + "step": 18135 + }, + { + "epoch": 3.5230352303523036, + "grad_norm": 0.4198465645313263, + "learning_rate": 8.690770099217205e-07, + "loss": 0.1507, + "step": 18200 + }, + { + "epoch": 3.535617499032133, + "grad_norm": 9.77443790435791, + "learning_rate": 7.26140016574553e-07, + "loss": 0.207, + "step": 18265 + }, + { + "epoch": 3.5481997677119628, + "grad_norm": 4.438516616821289, + "learning_rate": 5.957474206730273e-07, + "loss": 0.2192, + "step": 18330 + }, + { + "epoch": 3.5607820363917924, + "grad_norm": 1.4462083578109741, + "learning_rate": 4.78013816065549e-07, + "loss": 0.1574, + "step": 18395 + }, + { + "epoch": 3.573364305071622, + "grad_norm": 0.18752352893352509, + "learning_rate": 3.730426714095514e-07, + "loss": 0.1859, + "step": 18460 + }, + { + "epoch": 3.5859465737514515, + "grad_norm": 0.1693798154592514, + "learning_rate": 2.809262392394196e-07, + "loss": 0.13, + "step": 18525 + }, + { + "epoch": 3.5985288424312816, + "grad_norm": 1.442649006843567, + "learning_rate": 2.0174547489152985e-07, + "loss": 0.1555, + "step": 18590 + }, + { + "epoch": 3.611111111111111, + "grad_norm": 2.7645862102508545, + "learning_rate": 1.3556996535771416e-07, + "loss": 0.2212, + "step": 18655 + }, + { + "epoch": 3.6236933797909407, + "grad_norm": 0.19195497035980225, + "learning_rate": 8.317578698789685e-08, + "loss": 0.1647, + "step": 18720 + }, + { + "epoch": 3.6362756484707703, + "grad_norm": 0.15366147458553314, + "learning_rate": 4.2971789421488715e-08, + "loss": 0.1529, + "step": 18785 + }, + { + "epoch": 3.6488579171506, + "grad_norm": 2.829648733139038, + "learning_rate": 1.591258287146735e-08, + "loss": 0.1512, + "step": 18850 + }, + { + "epoch": 3.66144018583043, + "grad_norm": 0.14906300604343414, + "learning_rate": 2.0219479719363333e-09, + "loss": 0.1788, + "step": 18915 + }, + { + "epoch": 3.6740224545102595, + "grad_norm": 0.3509635031223297, + "learning_rate": 2.9998687907672072e-05, + "loss": 0.184, + "step": 18980 + }, + { + "epoch": 3.686604723190089, + "grad_norm": 1.5732251405715942, + "learning_rate": 2.9986216360213095e-05, + "loss": 0.2365, + "step": 19045 + }, + { + "epoch": 3.6991869918699187, + "grad_norm": 4.517373561859131, + "learning_rate": 2.9960574370109496e-05, + "loss": 0.1695, + "step": 19110 + }, + { + "epoch": 3.7117692605497483, + "grad_norm": 2.6720194816589355, + "learning_rate": 2.992178447249302e-05, + "loss": 0.2396, + "step": 19175 + }, + { + "epoch": 3.724351529229578, + "grad_norm": 1.2681653499603271, + "learning_rate": 2.986988075736407e-05, + "loss": 0.232, + "step": 19240 + }, + { + "epoch": 3.7369337979094075, + "grad_norm": 7.320454120635986, + "learning_rate": 2.980490883963215e-05, + "loss": 0.2263, + "step": 19305 + }, + { + "epoch": 3.749516066589237, + "grad_norm": 0.3230324387550354, + "learning_rate": 2.9726925819027805e-05, + "loss": 0.1583, + "step": 19370 + }, + { + "epoch": 3.751451800232288, + "eval_StS-test_pearson_cosine": 0.8727518027563739, + "eval_StS-test_pearson_dot": 0.7544441009977498, + "eval_StS-test_pearson_euclidean": 0.8528252057552665, + "eval_StS-test_pearson_manhattan": 0.8503574183592293, + "eval_StS-test_pearson_max": 0.8727518027563739, + "eval_StS-test_spearman_cosine": 0.888096795979988, + "eval_StS-test_spearman_dot": 0.7570130879859552, + "eval_StS-test_spearman_euclidean": 0.858368008662364, + "eval_StS-test_spearman_manhattan": 0.8576093894472316, + "eval_StS-test_spearman_max": 0.888096795979988, + "eval_Vitaminc-test_cosine_accuracy": 0.5815789473684211, + "eval_Vitaminc-test_cosine_accuracy_threshold": 0.7381868362426758, + "eval_Vitaminc-test_cosine_ap": 0.5598426418690512, + "eval_Vitaminc-test_cosine_f1": 0.6716417910447761, + "eval_Vitaminc-test_cosine_f1_threshold": 0.4170137643814087, + "eval_Vitaminc-test_cosine_precision": 0.5128205128205128, + "eval_Vitaminc-test_cosine_recall": 0.972972972972973, + "eval_Vitaminc-test_dot_accuracy": 0.5605263157894737, + "eval_Vitaminc-test_dot_accuracy_threshold": 13.731411933898926, + "eval_Vitaminc-test_dot_ap": 0.5487102325187027, + "eval_Vitaminc-test_dot_f1": 0.6691588785046729, + "eval_Vitaminc-test_dot_f1_threshold": 7.848942756652832, + "eval_Vitaminc-test_dot_precision": 0.5114285714285715, + "eval_Vitaminc-test_dot_recall": 0.9675675675675676, + "eval_Vitaminc-test_euclidean_accuracy": 0.5789473684210527, + "eval_Vitaminc-test_euclidean_accuracy_threshold": 3.16796612739563, + "eval_Vitaminc-test_euclidean_ap": 0.5499801975077707, + "eval_Vitaminc-test_euclidean_f1": 0.6654676258992805, + "eval_Vitaminc-test_euclidean_f1_threshold": 5.5990071296691895, + "eval_Vitaminc-test_euclidean_precision": 0.49865229110512127, + "eval_Vitaminc-test_euclidean_recall": 1.0, + "eval_Vitaminc-test_manhattan_accuracy": 0.5842105263157895, + "eval_Vitaminc-test_manhattan_accuracy_threshold": 62.038818359375, + "eval_Vitaminc-test_manhattan_ap": 0.5517991149943702, + "eval_Vitaminc-test_manhattan_f1": 0.6642599277978339, + "eval_Vitaminc-test_manhattan_f1_threshold": 111.26097106933594, + "eval_Vitaminc-test_manhattan_precision": 0.4986449864498645, + "eval_Vitaminc-test_manhattan_recall": 0.9945945945945946, + "eval_Vitaminc-test_max_accuracy": 0.5842105263157895, + "eval_Vitaminc-test_max_accuracy_threshold": 62.038818359375, + "eval_Vitaminc-test_max_ap": 0.5598426418690512, + "eval_Vitaminc-test_max_f1": 0.6716417910447761, + "eval_Vitaminc-test_max_f1_threshold": 111.26097106933594, + "eval_Vitaminc-test_max_precision": 0.5128205128205128, + "eval_Vitaminc-test_max_recall": 1.0, + "eval_mrpc-test_cosine_accuracy": 0.7394736842105263, + "eval_mrpc-test_cosine_accuracy_threshold": 0.7163187265396118, + "eval_mrpc-test_cosine_ap": 0.8453611137786435, + "eval_mrpc-test_cosine_f1": 0.8266199649737302, + "eval_mrpc-test_cosine_f1_threshold": 0.6975057721138, + "eval_mrpc-test_cosine_precision": 0.7306501547987616, + "eval_mrpc-test_cosine_recall": 0.9516129032258065, + "eval_mrpc-test_dot_accuracy": 0.6894736842105263, + "eval_mrpc-test_dot_accuracy_threshold": 7.592863082885742, + "eval_mrpc-test_dot_ap": 0.7556545113140425, + "eval_mrpc-test_dot_f1": 0.8039538714991763, + "eval_mrpc-test_dot_f1_threshold": 7.086015224456787, + "eval_mrpc-test_dot_precision": 0.6796657381615598, + "eval_mrpc-test_dot_recall": 0.9838709677419355, + "eval_mrpc-test_euclidean_accuracy": 0.7394736842105263, + "eval_mrpc-test_euclidean_accuracy_threshold": 2.8540396690368652, + "eval_mrpc-test_euclidean_ap": 0.8243914030378248, + "eval_mrpc-test_euclidean_f1": 0.8186714542190305, + "eval_mrpc-test_euclidean_f1_threshold": 2.9808104038238525, + "eval_mrpc-test_euclidean_precision": 0.7378640776699029, + "eval_mrpc-test_euclidean_recall": 0.9193548387096774, + "eval_mrpc-test_manhattan_accuracy": 0.7421052631578947, + "eval_mrpc-test_manhattan_accuracy_threshold": 57.382469177246094, + "eval_mrpc-test_manhattan_ap": 0.8242227392776778, + "eval_mrpc-test_manhattan_f1": 0.8231046931407942, + "eval_mrpc-test_manhattan_f1_threshold": 58.107505798339844, + "eval_mrpc-test_manhattan_precision": 0.7450980392156863, + "eval_mrpc-test_manhattan_recall": 0.9193548387096774, + "eval_mrpc-test_max_accuracy": 0.7421052631578947, + "eval_mrpc-test_max_accuracy_threshold": 57.382469177246094, + "eval_mrpc-test_max_ap": 0.8453611137786435, + "eval_mrpc-test_max_f1": 0.8266199649737302, + "eval_mrpc-test_max_f1_threshold": 58.107505798339844, + "eval_mrpc-test_max_precision": 0.7450980392156863, + "eval_mrpc-test_max_recall": 0.9838709677419355, + "eval_nli-pairs_loss": 0.8451490998268127, + "eval_nli-pairs_runtime": 3.108, + "eval_nli-pairs_samples_per_second": 51.479, + "eval_nli-pairs_steps_per_second": 1.609, + "eval_sequential_score": 0.5598426418690512, + "step": 19380 + }, + { + "epoch": 3.751451800232288, + "eval_vitaminc-pairs_loss": 6.042627334594727, + "eval_vitaminc-pairs_runtime": 1.5811, + "eval_vitaminc-pairs_samples_per_second": 84.119, + "eval_vitaminc-pairs_steps_per_second": 3.162, + "step": 19380 + }, + { + "epoch": 3.751451800232288, + "eval_qnli-contrastive_loss": 0.14806851744651794, + "eval_qnli-contrastive_runtime": 0.5234, + "eval_qnli-contrastive_samples_per_second": 305.715, + "eval_qnli-contrastive_steps_per_second": 9.554, + "step": 19380 + }, + { + "epoch": 3.751451800232288, + "eval_scitail-pairs-qa_loss": 0.03740249201655388, + "eval_scitail-pairs-qa_runtime": 1.22, + "eval_scitail-pairs-qa_samples_per_second": 131.142, + "eval_scitail-pairs-qa_steps_per_second": 4.098, + "step": 19380 + }, + { + "epoch": 3.751451800232288, + "eval_scitail-pairs-pos_loss": 0.27747786045074463, + "eval_scitail-pairs-pos_runtime": 2.4093, + "eval_scitail-pairs-pos_samples_per_second": 66.41, + "eval_scitail-pairs-pos_steps_per_second": 2.075, + "step": 19380 + }, + { + "epoch": 3.751451800232288, + "eval_xsum-pairs_loss": 0.2729683518409729, + "eval_xsum-pairs_runtime": 1.4228, + "eval_xsum-pairs_samples_per_second": 112.455, + "eval_xsum-pairs_steps_per_second": 3.514, + "step": 19380 + }, + { + "epoch": 3.751451800232288, + "eval_compression-pairs_loss": 0.055649857968091965, + "eval_compression-pairs_runtime": 0.4125, + "eval_compression-pairs_samples_per_second": 387.884, + "eval_compression-pairs_steps_per_second": 12.121, + "step": 19380 + }, + { + "epoch": 3.751451800232288, + "eval_sciq_pairs_loss": 0.22532808780670166, + "eval_sciq_pairs_runtime": 7.9946, + "eval_sciq_pairs_samples_per_second": 20.013, + "eval_sciq_pairs_steps_per_second": 0.625, + "step": 19380 + }, + { + "epoch": 3.751451800232288, + "eval_qasc_pairs_loss": 0.13675269484519958, + "eval_qasc_pairs_runtime": 1.4473, + "eval_qasc_pairs_samples_per_second": 110.553, + "eval_qasc_pairs_steps_per_second": 3.455, + "step": 19380 + }, + { + "epoch": 3.751451800232288, + "eval_qasc_facts_sym_loss": 0.10979139804840088, + "eval_qasc_facts_sym_runtime": 0.3399, + "eval_qasc_facts_sym_samples_per_second": 470.673, + "eval_qasc_facts_sym_steps_per_second": 14.709, + "step": 19380 + }, + { + "epoch": 3.751451800232288, + "eval_openbookqa_pairs_loss": 1.6146221160888672, + "eval_openbookqa_pairs_runtime": 1.2438, + "eval_openbookqa_pairs_samples_per_second": 128.633, + "eval_openbookqa_pairs_steps_per_second": 4.02, + "step": 19380 + }, + { + "epoch": 3.751451800232288, + "eval_msmarco_pairs_loss": 0.5042445063591003, + "eval_msmarco_pairs_runtime": 3.1207, + "eval_msmarco_pairs_samples_per_second": 51.271, + "eval_msmarco_pairs_steps_per_second": 1.602, + "step": 19380 + }, + { + "epoch": 3.751451800232288, + "eval_nq_pairs_loss": 0.34877270460128784, + "eval_nq_pairs_runtime": 7.7338, + "eval_nq_pairs_samples_per_second": 20.688, + "eval_nq_pairs_steps_per_second": 0.647, + "step": 19380 + }, + { + "epoch": 3.751451800232288, + "eval_trivia_pairs_loss": 0.6838275790214539, + "eval_trivia_pairs_runtime": 10.2133, + "eval_trivia_pairs_samples_per_second": 15.666, + "eval_trivia_pairs_steps_per_second": 0.49, + "step": 19380 + }, + { + "epoch": 3.751451800232288, + "eval_quora_pairs_loss": 0.16937024891376495, + "eval_quora_pairs_runtime": 3.9408, + "eval_quora_pairs_samples_per_second": 171.286, + "eval_quora_pairs_steps_per_second": 5.583, + "step": 19380 + }, + { + "epoch": 3.751451800232288, + "eval_gooaq_pairs_loss": 0.44292718172073364, + "eval_gooaq_pairs_runtime": 2.2878, + "eval_gooaq_pairs_samples_per_second": 69.935, + "eval_gooaq_pairs_steps_per_second": 2.185, + "step": 19380 + }, + { + "epoch": 3.751451800232288, + "eval_mrpc_pairs_loss": 0.025487110018730164, + "eval_mrpc_pairs_runtime": 0.3866, + "eval_mrpc_pairs_samples_per_second": 413.842, + "eval_mrpc_pairs_steps_per_second": 12.933, + "step": 19380 + }, + { + "epoch": 3.762098335269067, + "grad_norm": 1.5713428258895874, + "learning_rate": 2.9636000229921248e-05, + "loss": 0.1775, + "step": 19435 + }, + { + "epoch": 3.7746806039488967, + "grad_norm": 1.9037290811538696, + "learning_rate": 2.9532211981091813e-05, + "loss": 0.179, + "step": 19500 + }, + { + "epoch": 3.7872628726287263, + "grad_norm": 0.3006834387779236, + "learning_rate": 2.9415652285501175e-05, + "loss": 0.225, + "step": 19565 + }, + { + "epoch": 3.799845141308556, + "grad_norm": 4.1254754066467285, + "learning_rate": 2.9288507113735076e-05, + "loss": 0.168, + "step": 19630 + }, + { + "epoch": 3.8124274099883855, + "grad_norm": 1.7347090244293213, + "learning_rate": 2.9146915214559173e-05, + "loss": 0.2078, + "step": 19695 + }, + { + "epoch": 3.8250096786682155, + "grad_norm": 0.538968026638031, + "learning_rate": 2.899289048170777e-05, + "loss": 0.1772, + "step": 19760 + }, + { + "epoch": 3.837591947348045, + "grad_norm": 2.847313404083252, + "learning_rate": 2.8826568277827517e-05, + "loss": 0.1729, + "step": 19825 + }, + { + "epoch": 3.8501742160278747, + "grad_norm": 3.284494638442993, + "learning_rate": 2.8648094773038627e-05, + "loss": 0.1674, + "step": 19890 + }, + { + "epoch": 3.8627564847077043, + "grad_norm": 5.920264720916748, + "learning_rate": 2.8457626816475117e-05, + "loss": 0.1964, + "step": 19955 + }, + { + "epoch": 3.875338753387534, + "grad_norm": 0.21167056262493134, + "learning_rate": 2.8255331798439983e-05, + "loss": 0.1475, + "step": 20020 + }, + { + "epoch": 3.8879210220673635, + "grad_norm": 1.4526911973953247, + "learning_rate": 2.804138750329644e-05, + "loss": 0.2376, + "step": 20085 + }, + { + "epoch": 3.900503290747193, + "grad_norm": 4.268107891082764, + "learning_rate": 2.7815981953224388e-05, + "loss": 0.1967, + "step": 20150 + }, + { + "epoch": 3.9130855594270226, + "grad_norm": 1.4916267395019531, + "learning_rate": 2.757931324297952e-05, + "loss": 0.238, + "step": 20215 + }, + { + "epoch": 3.925667828106852, + "grad_norm": 2.0566771030426025, + "learning_rate": 2.733158936580037e-05, + "loss": 0.2088, + "step": 20280 + }, + { + "epoch": 3.9382500967866823, + "grad_norm": 0.14723509550094604, + "learning_rate": 2.707302803061613e-05, + "loss": 0.2234, + "step": 20345 + }, + { + "epoch": 3.950832365466512, + "grad_norm": 4.088009834289551, + "learning_rate": 2.68038564707159e-05, + "loss": 0.2084, + "step": 20410 + }, + { + "epoch": 3.9634146341463414, + "grad_norm": 5.46230936050415, + "learning_rate": 2.652431124404776e-05, + "loss": 0.276, + "step": 20475 + }, + { + "epoch": 3.975996902826171, + "grad_norm": 0.2442116141319275, + "learning_rate": 2.6234638025322753e-05, + "loss": 0.1641, + "step": 20540 + }, + { + "epoch": 3.9885791715060006, + "grad_norm": 3.910892963409424, + "learning_rate": 2.593509139010695e-05, + "loss": 0.1624, + "step": 20605 + }, + { + "epoch": 4.001161440185831, + "grad_norm": 0.3063616156578064, + "learning_rate": 2.5625934591090952e-05, + "loss": 0.2049, + "step": 20670 + }, + { + "epoch": 4.00154858691444, + "eval_StS-test_pearson_cosine": 0.8750931016826595, + "eval_StS-test_pearson_dot": 0.7528798867496775, + "eval_StS-test_pearson_euclidean": 0.8596040324559132, + "eval_StS-test_pearson_manhattan": 0.8576391829193253, + "eval_StS-test_pearson_max": 0.8750931016826595, + "eval_StS-test_spearman_cosine": 0.8938139167778957, + "eval_StS-test_spearman_dot": 0.7453007954801198, + "eval_StS-test_spearman_euclidean": 0.8649762059986812, + "eval_StS-test_spearman_manhattan": 0.8633117319487409, + "eval_StS-test_spearman_max": 0.8938139167778957, + "eval_Vitaminc-test_cosine_accuracy": 0.5710526315789474, + "eval_Vitaminc-test_cosine_accuracy_threshold": 0.785514235496521, + "eval_Vitaminc-test_cosine_ap": 0.5538123804323005, + "eval_Vitaminc-test_cosine_f1": 0.6778398510242085, + "eval_Vitaminc-test_cosine_f1_threshold": 0.45756226778030396, + "eval_Vitaminc-test_cosine_precision": 0.5170454545454546, + "eval_Vitaminc-test_cosine_recall": 0.9837837837837838, + "eval_Vitaminc-test_dot_accuracy": 0.5552631578947368, + "eval_Vitaminc-test_dot_accuracy_threshold": 11.234939575195312, + "eval_Vitaminc-test_dot_ap": 0.5407038172991465, + "eval_Vitaminc-test_dot_f1": 0.6715328467153285, + "eval_Vitaminc-test_dot_f1_threshold": 7.919400215148926, + "eval_Vitaminc-test_dot_precision": 0.5068870523415978, + "eval_Vitaminc-test_dot_recall": 0.9945945945945946, + "eval_Vitaminc-test_euclidean_accuracy": 0.5763157894736842, + "eval_Vitaminc-test_euclidean_accuracy_threshold": 3.191226005554199, + "eval_Vitaminc-test_euclidean_ap": 0.5517470320076553, + "eval_Vitaminc-test_euclidean_f1": 0.6703703703703704, + "eval_Vitaminc-test_euclidean_f1_threshold": 5.36379337310791, + "eval_Vitaminc-test_euclidean_precision": 0.5098591549295775, + "eval_Vitaminc-test_euclidean_recall": 0.9783783783783784, + "eval_Vitaminc-test_manhattan_accuracy": 0.5815789473684211, + "eval_Vitaminc-test_manhattan_accuracy_threshold": 64.27323913574219, + "eval_Vitaminc-test_manhattan_ap": 0.5526594530059742, + "eval_Vitaminc-test_manhattan_f1": 0.6654411764705882, + "eval_Vitaminc-test_manhattan_f1_threshold": 114.15983581542969, + "eval_Vitaminc-test_manhattan_precision": 0.5041782729805014, + "eval_Vitaminc-test_manhattan_recall": 0.9783783783783784, + "eval_Vitaminc-test_max_accuracy": 0.5815789473684211, + "eval_Vitaminc-test_max_accuracy_threshold": 64.27323913574219, + "eval_Vitaminc-test_max_ap": 0.5538123804323005, + "eval_Vitaminc-test_max_f1": 0.6778398510242085, + "eval_Vitaminc-test_max_f1_threshold": 114.15983581542969, + "eval_Vitaminc-test_max_precision": 0.5170454545454546, + "eval_Vitaminc-test_max_recall": 0.9945945945945946, + "eval_mrpc-test_cosine_accuracy": 0.7368421052631579, + "eval_mrpc-test_cosine_accuracy_threshold": 0.7361759543418884, + "eval_mrpc-test_cosine_ap": 0.8453284249119957, + "eval_mrpc-test_cosine_f1": 0.823321554770318, + "eval_mrpc-test_cosine_f1_threshold": 0.7002996206283569, + "eval_mrpc-test_cosine_precision": 0.7327044025157232, + "eval_mrpc-test_cosine_recall": 0.9395161290322581, + "eval_mrpc-test_dot_accuracy": 0.6973684210526315, + "eval_mrpc-test_dot_accuracy_threshold": 8.213135719299316, + "eval_mrpc-test_dot_ap": 0.7568205630034259, + "eval_mrpc-test_dot_f1": 0.8099173553719008, + "eval_mrpc-test_dot_f1_threshold": 7.905356407165527, + "eval_mrpc-test_dot_precision": 0.6862745098039216, + "eval_mrpc-test_dot_recall": 0.9879032258064516, + "eval_mrpc-test_euclidean_accuracy": 0.7315789473684211, + "eval_mrpc-test_euclidean_accuracy_threshold": 2.9826178550720215, + "eval_mrpc-test_euclidean_ap": 0.8276910261659405, + "eval_mrpc-test_euclidean_f1": 0.8165467625899281, + "eval_mrpc-test_euclidean_f1_threshold": 3.032474994659424, + "eval_mrpc-test_euclidean_precision": 0.737012987012987, + "eval_mrpc-test_euclidean_recall": 0.9153225806451613, + "eval_mrpc-test_manhattan_accuracy": 0.7342105263157894, + "eval_mrpc-test_manhattan_accuracy_threshold": 58.31708526611328, + "eval_mrpc-test_manhattan_ap": 0.8277820763569756, + "eval_mrpc-test_manhattan_f1": 0.818018018018018, + "eval_mrpc-test_manhattan_f1_threshold": 59.713478088378906, + "eval_mrpc-test_manhattan_precision": 0.739413680781759, + "eval_mrpc-test_manhattan_recall": 0.9153225806451613, + "eval_mrpc-test_max_accuracy": 0.7368421052631579, + "eval_mrpc-test_max_accuracy_threshold": 58.31708526611328, + "eval_mrpc-test_max_ap": 0.8453284249119957, + "eval_mrpc-test_max_f1": 0.823321554770318, + "eval_mrpc-test_max_f1_threshold": 59.713478088378906, + "eval_mrpc-test_max_precision": 0.739413680781759, + "eval_mrpc-test_max_recall": 0.9879032258064516, + "eval_nli-pairs_loss": 0.8549426198005676, + "eval_nli-pairs_runtime": 3.2134, + "eval_nli-pairs_samples_per_second": 49.792, + "eval_nli-pairs_steps_per_second": 1.556, + "eval_sequential_score": 0.5538123804323005, + "step": 20672 + }, + { + "epoch": 4.00154858691444, + "eval_vitaminc-pairs_loss": 5.757671356201172, + "eval_vitaminc-pairs_runtime": 1.6437, + "eval_vitaminc-pairs_samples_per_second": 80.913, + "eval_vitaminc-pairs_steps_per_second": 3.042, + "step": 20672 + }, + { + "epoch": 4.00154858691444, + "eval_qnli-contrastive_loss": 0.14272728562355042, + "eval_qnli-contrastive_runtime": 0.5575, + "eval_qnli-contrastive_samples_per_second": 287.005, + "eval_qnli-contrastive_steps_per_second": 8.969, + "step": 20672 + }, + { + "epoch": 4.00154858691444, + "eval_scitail-pairs-qa_loss": 0.037104763090610504, + "eval_scitail-pairs-qa_runtime": 1.2412, + "eval_scitail-pairs-qa_samples_per_second": 128.906, + "eval_scitail-pairs-qa_steps_per_second": 4.028, + "step": 20672 + }, + { + "epoch": 4.00154858691444, + "eval_scitail-pairs-pos_loss": 0.2630062401294708, + "eval_scitail-pairs-pos_runtime": 2.7981, + "eval_scitail-pairs-pos_samples_per_second": 57.182, + "eval_scitail-pairs-pos_steps_per_second": 1.787, + "step": 20672 + }, + { + "epoch": 4.00154858691444, + "eval_xsum-pairs_loss": 0.2699679732322693, + "eval_xsum-pairs_runtime": 1.4262, + "eval_xsum-pairs_samples_per_second": 112.188, + "eval_xsum-pairs_steps_per_second": 3.506, + "step": 20672 + }, + { + "epoch": 4.00154858691444, + "eval_compression-pairs_loss": 0.05051081255078316, + "eval_compression-pairs_runtime": 0.4202, + "eval_compression-pairs_samples_per_second": 380.805, + "eval_compression-pairs_steps_per_second": 11.9, + "step": 20672 + }, + { + "epoch": 4.00154858691444, + "eval_sciq_pairs_loss": 0.22690464556217194, + "eval_sciq_pairs_runtime": 8.2116, + "eval_sciq_pairs_samples_per_second": 19.485, + "eval_sciq_pairs_steps_per_second": 0.609, + "step": 20672 + }, + { + "epoch": 4.00154858691444, + "eval_qasc_pairs_loss": 0.14700038731098175, + "eval_qasc_pairs_runtime": 1.5727, + "eval_qasc_pairs_samples_per_second": 101.735, + "eval_qasc_pairs_steps_per_second": 3.179, + "step": 20672 + }, + { + "epoch": 4.00154858691444, + "eval_qasc_facts_sym_loss": 0.1227593868970871, + "eval_qasc_facts_sym_runtime": 0.3467, + "eval_qasc_facts_sym_samples_per_second": 461.554, + "eval_qasc_facts_sym_steps_per_second": 14.424, + "step": 20672 + }, + { + "epoch": 4.00154858691444, + "eval_openbookqa_pairs_loss": 1.6408637762069702, + "eval_openbookqa_pairs_runtime": 1.382, + "eval_openbookqa_pairs_samples_per_second": 115.772, + "eval_openbookqa_pairs_steps_per_second": 3.618, + "step": 20672 + }, + { + "epoch": 4.00154858691444, + "eval_msmarco_pairs_loss": 0.508281409740448, + "eval_msmarco_pairs_runtime": 3.2197, + "eval_msmarco_pairs_samples_per_second": 49.694, + "eval_msmarco_pairs_steps_per_second": 1.553, + "step": 20672 + }, + { + "epoch": 4.00154858691444, + "eval_nq_pairs_loss": 0.4071750044822693, + "eval_nq_pairs_runtime": 7.8411, + "eval_nq_pairs_samples_per_second": 20.405, + "eval_nq_pairs_steps_per_second": 0.638, + "step": 20672 + }, + { + "epoch": 4.00154858691444, + "eval_trivia_pairs_loss": 0.6768335103988647, + "eval_trivia_pairs_runtime": 10.4692, + "eval_trivia_pairs_samples_per_second": 15.283, + "eval_trivia_pairs_steps_per_second": 0.478, + "step": 20672 + }, + { + "epoch": 4.00154858691444, + "eval_quora_pairs_loss": 0.1442810744047165, + "eval_quora_pairs_runtime": 4.0313, + "eval_quora_pairs_samples_per_second": 167.439, + "eval_quora_pairs_steps_per_second": 5.457, + "step": 20672 + }, + { + "epoch": 4.00154858691444, + "eval_gooaq_pairs_loss": 0.3749992251396179, + "eval_gooaq_pairs_runtime": 2.3247, + "eval_gooaq_pairs_samples_per_second": 68.827, + "eval_gooaq_pairs_steps_per_second": 2.151, + "step": 20672 + }, + { + "epoch": 4.00154858691444, + "eval_mrpc_pairs_loss": 0.02576417662203312, + "eval_mrpc_pairs_runtime": 0.388, + "eval_mrpc_pairs_samples_per_second": 412.418, + "eval_mrpc_pairs_steps_per_second": 12.888, + "step": 20672 + }, + { + "epoch": 4.01374370886566, + "grad_norm": 2.267441987991333, + "learning_rate": 2.530743932673362e-05, + "loss": 0.1442, + "step": 20735 + }, + { + "epoch": 4.02632597754549, + "grad_norm": 5.09822940826416, + "learning_rate": 2.497988550248347e-05, + "loss": 0.1301, + "step": 20800 + }, + { + "epoch": 4.038908246225319, + "grad_norm": 2.3969829082489014, + "learning_rate": 2.464356098478738e-05, + "loss": 0.2229, + "step": 20865 + }, + { + "epoch": 4.051490514905149, + "grad_norm": 0.22940613329410553, + "learning_rate": 2.4298761348102794e-05, + "loss": 0.2103, + "step": 20930 + }, + { + "epoch": 4.064072783584979, + "grad_norm": 1.4287828207015991, + "learning_rate": 2.394578961513602e-05, + "loss": 0.2235, + "step": 20995 + }, + { + "epoch": 4.076655052264808, + "grad_norm": 4.12492036819458, + "learning_rate": 2.3584955990534625e-05, + "loss": 0.2242, + "step": 21060 + }, + { + "epoch": 4.089237320944638, + "grad_norm": 1.9469503164291382, + "learning_rate": 2.3216577588268062e-05, + "loss": 0.2803, + "step": 21125 + }, + { + "epoch": 4.101819589624467, + "grad_norm": 0.6210708618164062, + "learning_rate": 2.2840978152936186e-05, + "loss": 0.2094, + "step": 21190 + }, + { + "epoch": 4.114401858304297, + "grad_norm": 8.642997741699219, + "learning_rate": 2.2458487775250408e-05, + "loss": 0.2196, + "step": 21255 + }, + { + "epoch": 4.1269841269841265, + "grad_norm": 0.306015282869339, + "learning_rate": 2.20694426019379e-05, + "loss": 0.1926, + "step": 21320 + }, + { + "epoch": 4.139566395663957, + "grad_norm": 15.104653358459473, + "learning_rate": 2.16741845403232e-05, + "loss": 0.1866, + "step": 21385 + }, + { + "epoch": 4.152148664343787, + "grad_norm": 16.314868927001953, + "learning_rate": 2.127306095784754e-05, + "loss": 0.2873, + "step": 21450 + }, + { + "epoch": 4.164730933023616, + "grad_norm": 2.250847578048706, + "learning_rate": 2.08664243767893e-05, + "loss": 0.2281, + "step": 21515 + }, + { + "epoch": 4.177313201703446, + "grad_norm": 0.29784318804740906, + "learning_rate": 2.0454632164454574e-05, + "loss": 0.2539, + "step": 21580 + }, + { + "epoch": 4.189895470383275, + "grad_norm": 3.4132087230682373, + "learning_rate": 2.003804621910928e-05, + "loss": 0.2232, + "step": 21645 + }, + { + "epoch": 4.202477739063105, + "grad_norm": 0.33203577995300293, + "learning_rate": 1.9617032651929685e-05, + "loss": 0.2111, + "step": 21710 + }, + { + "epoch": 4.2150600077429345, + "grad_norm": 3.6074330806732178, + "learning_rate": 1.919196146525035e-05, + "loss": 0.2502, + "step": 21775 + }, + { + "epoch": 4.227642276422764, + "grad_norm": 4.078161716461182, + "learning_rate": 1.8763206227392307e-05, + "loss": 0.2432, + "step": 21840 + }, + { + "epoch": 4.240224545102594, + "grad_norm": 6.575666904449463, + "learning_rate": 1.8337814009344705e-05, + "loss": 0.2261, + "step": 21905 + }, + { + "epoch": 4.251645373596594, + "eval_StS-test_pearson_cosine": 0.8738325263775029, + "eval_StS-test_pearson_dot": 0.7708615429651562, + "eval_StS-test_pearson_euclidean": 0.8601389926129998, + "eval_StS-test_pearson_manhattan": 0.8578889010939907, + "eval_StS-test_pearson_max": 0.8738325263775029, + "eval_StS-test_spearman_cosine": 0.8918479654814222, + "eval_StS-test_spearman_dot": 0.7754460023496, + "eval_StS-test_spearman_euclidean": 0.864699468715008, + "eval_StS-test_spearman_manhattan": 0.8639355949944897, + "eval_StS-test_spearman_max": 0.8918479654814222, + "eval_Vitaminc-test_cosine_accuracy": 0.5842105263157895, + "eval_Vitaminc-test_cosine_accuracy_threshold": 0.6854072213172913, + "eval_Vitaminc-test_cosine_ap": 0.5568954471097083, + "eval_Vitaminc-test_cosine_f1": 0.6779661016949152, + "eval_Vitaminc-test_cosine_f1_threshold": 0.4551093280315399, + "eval_Vitaminc-test_cosine_precision": 0.5202312138728323, + "eval_Vitaminc-test_cosine_recall": 0.972972972972973, + "eval_Vitaminc-test_dot_accuracy": 0.5605263157894737, + "eval_Vitaminc-test_dot_accuracy_threshold": 12.5089750289917, + "eval_Vitaminc-test_dot_ap": 0.538198865243154, + "eval_Vitaminc-test_dot_f1": 0.6715867158671587, + "eval_Vitaminc-test_dot_f1_threshold": 7.747126579284668, + "eval_Vitaminc-test_dot_precision": 0.5098039215686274, + "eval_Vitaminc-test_dot_recall": 0.9837837837837838, + "eval_Vitaminc-test_euclidean_accuracy": 0.5657894736842105, + "eval_Vitaminc-test_euclidean_accuracy_threshold": 3.2167530059814453, + "eval_Vitaminc-test_euclidean_ap": 0.550658853361431, + "eval_Vitaminc-test_euclidean_f1": 0.6666666666666666, + "eval_Vitaminc-test_euclidean_f1_threshold": 5.500119686126709, + "eval_Vitaminc-test_euclidean_precision": 0.5027472527472527, + "eval_Vitaminc-test_euclidean_recall": 0.9891891891891892, + "eval_Vitaminc-test_manhattan_accuracy": 0.5684210526315789, + "eval_Vitaminc-test_manhattan_accuracy_threshold": 63.02822494506836, + "eval_Vitaminc-test_manhattan_ap": 0.5526335700218852, + "eval_Vitaminc-test_manhattan_f1": 0.6666666666666666, + "eval_Vitaminc-test_manhattan_f1_threshold": 113.22319030761719, + "eval_Vitaminc-test_manhattan_precision": 0.5027472527472527, + "eval_Vitaminc-test_manhattan_recall": 0.9891891891891892, + "eval_Vitaminc-test_max_accuracy": 0.5842105263157895, + "eval_Vitaminc-test_max_accuracy_threshold": 63.02822494506836, + "eval_Vitaminc-test_max_ap": 0.5568954471097083, + "eval_Vitaminc-test_max_f1": 0.6779661016949152, + "eval_Vitaminc-test_max_f1_threshold": 113.22319030761719, + "eval_Vitaminc-test_max_precision": 0.5202312138728323, + "eval_Vitaminc-test_max_recall": 0.9891891891891892, + "eval_mrpc-test_cosine_accuracy": 0.7368421052631579, + "eval_mrpc-test_cosine_accuracy_threshold": 0.7062541842460632, + "eval_mrpc-test_cosine_ap": 0.8471734940379884, + "eval_mrpc-test_cosine_f1": 0.8226950354609929, + "eval_mrpc-test_cosine_f1_threshold": 0.6996989250183105, + "eval_mrpc-test_cosine_precision": 0.7341772151898734, + "eval_mrpc-test_cosine_recall": 0.9354838709677419, + "eval_mrpc-test_dot_accuracy": 0.6921052631578948, + "eval_mrpc-test_dot_accuracy_threshold": 7.777188301086426, + "eval_mrpc-test_dot_ap": 0.7490093903788431, + "eval_mrpc-test_dot_f1": 0.8072487644151566, + "eval_mrpc-test_dot_f1_threshold": 6.582016944885254, + "eval_mrpc-test_dot_precision": 0.6824512534818942, + "eval_mrpc-test_dot_recall": 0.9879032258064516, + "eval_mrpc-test_euclidean_accuracy": 0.7342105263157894, + "eval_mrpc-test_euclidean_accuracy_threshold": 2.782139539718628, + "eval_mrpc-test_euclidean_ap": 0.827030801717203, + "eval_mrpc-test_euclidean_f1": 0.8193202146690519, + "eval_mrpc-test_euclidean_f1_threshold": 2.9623780250549316, + "eval_mrpc-test_euclidean_precision": 0.7363344051446945, + "eval_mrpc-test_euclidean_recall": 0.9233870967741935, + "eval_mrpc-test_manhattan_accuracy": 0.7421052631578947, + "eval_mrpc-test_manhattan_accuracy_threshold": 55.787437438964844, + "eval_mrpc-test_manhattan_ap": 0.8262345455513962, + "eval_mrpc-test_manhattan_f1": 0.8209764918625678, + "eval_mrpc-test_manhattan_f1_threshold": 56.79448318481445, + "eval_mrpc-test_manhattan_precision": 0.7442622950819672, + "eval_mrpc-test_manhattan_recall": 0.9153225806451613, + "eval_mrpc-test_max_accuracy": 0.7421052631578947, + "eval_mrpc-test_max_accuracy_threshold": 55.787437438964844, + "eval_mrpc-test_max_ap": 0.8471734940379884, + "eval_mrpc-test_max_f1": 0.8226950354609929, + "eval_mrpc-test_max_f1_threshold": 56.79448318481445, + "eval_mrpc-test_max_precision": 0.7442622950819672, + "eval_mrpc-test_max_recall": 0.9879032258064516, + "eval_nli-pairs_loss": 0.7817401885986328, + "eval_nli-pairs_runtime": 3.0345, + "eval_nli-pairs_samples_per_second": 52.727, + "eval_nli-pairs_steps_per_second": 1.648, + "eval_sequential_score": 0.5568954471097083, + "step": 21964 + }, + { + "epoch": 4.251645373596594, + "eval_vitaminc-pairs_loss": 5.9666008949279785, + "eval_vitaminc-pairs_runtime": 1.5793, + "eval_vitaminc-pairs_samples_per_second": 84.215, + "eval_vitaminc-pairs_steps_per_second": 3.166, + "step": 21964 + }, + { + "epoch": 4.251645373596594, + "eval_qnli-contrastive_loss": 0.10523457825183868, + "eval_qnli-contrastive_runtime": 0.5394, + "eval_qnli-contrastive_samples_per_second": 296.625, + "eval_qnli-contrastive_steps_per_second": 9.27, + "step": 21964 + }, + { + "epoch": 4.251645373596594, + "eval_scitail-pairs-qa_loss": 0.03577850013971329, + "eval_scitail-pairs-qa_runtime": 1.2305, + "eval_scitail-pairs-qa_samples_per_second": 130.026, + "eval_scitail-pairs-qa_steps_per_second": 4.063, + "step": 21964 + }, + { + "epoch": 4.251645373596594, + "eval_scitail-pairs-pos_loss": 0.2494029849767685, + "eval_scitail-pairs-pos_runtime": 2.4173, + "eval_scitail-pairs-pos_samples_per_second": 66.188, + "eval_scitail-pairs-pos_steps_per_second": 2.068, + "step": 21964 + }, + { + "epoch": 4.251645373596594, + "eval_xsum-pairs_loss": 0.26588425040245056, + "eval_xsum-pairs_runtime": 1.4156, + "eval_xsum-pairs_samples_per_second": 113.027, + "eval_xsum-pairs_steps_per_second": 3.532, + "step": 21964 + }, + { + "epoch": 4.251645373596594, + "eval_compression-pairs_loss": 0.05197296291589737, + "eval_compression-pairs_runtime": 0.4041, + "eval_compression-pairs_samples_per_second": 395.981, + "eval_compression-pairs_steps_per_second": 12.374, + "step": 21964 + }, + { + "epoch": 4.251645373596594, + "eval_sciq_pairs_loss": 0.22115518152713776, + "eval_sciq_pairs_runtime": 7.9696, + "eval_sciq_pairs_samples_per_second": 20.076, + "eval_sciq_pairs_steps_per_second": 0.627, + "step": 21964 + }, + { + "epoch": 4.251645373596594, + "eval_qasc_pairs_loss": 0.12823519110679626, + "eval_qasc_pairs_runtime": 1.4565, + "eval_qasc_pairs_samples_per_second": 109.856, + "eval_qasc_pairs_steps_per_second": 3.433, + "step": 21964 + }, + { + "epoch": 4.251645373596594, + "eval_qasc_facts_sym_loss": 0.1139301210641861, + "eval_qasc_facts_sym_runtime": 0.3301, + "eval_qasc_facts_sym_samples_per_second": 484.765, + "eval_qasc_facts_sym_steps_per_second": 15.149, + "step": 21964 + }, + { + "epoch": 4.251645373596594, + "eval_openbookqa_pairs_loss": 1.6211611032485962, + "eval_openbookqa_pairs_runtime": 1.2439, + "eval_openbookqa_pairs_samples_per_second": 128.625, + "eval_openbookqa_pairs_steps_per_second": 4.02, + "step": 21964 + }, + { + "epoch": 4.251645373596594, + "eval_msmarco_pairs_loss": 0.5529596209526062, + "eval_msmarco_pairs_runtime": 3.1232, + "eval_msmarco_pairs_samples_per_second": 51.229, + "eval_msmarco_pairs_steps_per_second": 1.601, + "step": 21964 + }, + { + "epoch": 4.251645373596594, + "eval_nq_pairs_loss": 0.36157506704330444, + "eval_nq_pairs_runtime": 7.7301, + "eval_nq_pairs_samples_per_second": 20.698, + "eval_nq_pairs_steps_per_second": 0.647, + "step": 21964 + }, + { + "epoch": 4.251645373596594, + "eval_trivia_pairs_loss": 0.6664417386054993, + "eval_trivia_pairs_runtime": 10.1737, + "eval_trivia_pairs_samples_per_second": 15.727, + "eval_trivia_pairs_steps_per_second": 0.491, + "step": 21964 + }, + { + "epoch": 4.251645373596594, + "eval_quora_pairs_loss": 0.16971223056316376, + "eval_quora_pairs_runtime": 3.9513, + "eval_quora_pairs_samples_per_second": 170.83, + "eval_quora_pairs_steps_per_second": 5.568, + "step": 21964 + }, + { + "epoch": 4.251645373596594, + "eval_gooaq_pairs_loss": 0.4112943112850189, + "eval_gooaq_pairs_runtime": 2.2892, + "eval_gooaq_pairs_samples_per_second": 69.895, + "eval_gooaq_pairs_steps_per_second": 2.184, + "step": 21964 + }, + { + "epoch": 4.251645373596594, + "eval_mrpc_pairs_loss": 0.023606743663549423, + "eval_mrpc_pairs_runtime": 0.3846, + "eval_mrpc_pairs_samples_per_second": 415.966, + "eval_mrpc_pairs_steps_per_second": 12.999, + "step": 21964 + }, + { + "epoch": 4.252806813782423, + "grad_norm": 0.3098312020301819, + "learning_rate": 1.7902866143381558e-05, + "loss": 0.2843, + "step": 21970 + }, + { + "epoch": 4.265389082462253, + "grad_norm": 1.2247912883758545, + "learning_rate": 1.7465367131009776e-05, + "loss": 0.2221, + "step": 22035 + }, + { + "epoch": 4.2779713511420825, + "grad_norm": 4.876120567321777, + "learning_rate": 1.702570146258441e-05, + "loss": 0.3073, + "step": 22100 + }, + { + "epoch": 4.290553619821912, + "grad_norm": 16.738489151000977, + "learning_rate": 1.6584255532598227e-05, + "loss": 0.2695, + "step": 22165 + }, + { + "epoch": 4.303135888501743, + "grad_norm": 0.21949075162410736, + "learning_rate": 1.6141417300103866e-05, + "loss": 0.2286, + "step": 22230 + }, + { + "epoch": 4.315718157181572, + "grad_norm": 16.98850440979004, + "learning_rate": 1.569757594776104e-05, + "loss": 0.2236, + "step": 22295 + }, + { + "epoch": 4.328300425861402, + "grad_norm": 0.08736617863178253, + "learning_rate": 1.525312153980833e-05, + "loss": 0.311, + "step": 22360 + }, + { + "epoch": 4.340882694541231, + "grad_norm": 0.7681021690368652, + "learning_rate": 1.48084446792604e-05, + "loss": 0.1523, + "step": 22425 + }, + { + "epoch": 4.353464963221061, + "grad_norm": 0.5627409815788269, + "learning_rate": 1.4363936164631424e-05, + "loss": 0.137, + "step": 22490 + }, + { + "epoch": 4.3660472319008905, + "grad_norm": 12.019976615905762, + "learning_rate": 1.3919986646487015e-05, + "loss": 0.1888, + "step": 22555 + }, + { + "epoch": 4.37862950058072, + "grad_norm": 0.4080559015274048, + "learning_rate": 1.347698628412585e-05, + "loss": 0.1886, + "step": 22620 + }, + { + "epoch": 4.39121176926055, + "grad_norm": 1.7257754802703857, + "learning_rate": 1.3035324402693452e-05, + "loss": 0.1539, + "step": 22685 + }, + { + "epoch": 4.403794037940379, + "grad_norm": 1.4404957294464111, + "learning_rate": 1.2595389151028544e-05, + "loss": 0.1141, + "step": 22750 + }, + { + "epoch": 4.416376306620209, + "grad_norm": 0.5989357829093933, + "learning_rate": 1.2157567160543606e-05, + "loss": 0.2414, + "step": 22815 + }, + { + "epoch": 4.4289585753000384, + "grad_norm": 1.9353371858596802, + "learning_rate": 1.1722243205438878e-05, + "loss": 0.1844, + "step": 22880 + }, + { + "epoch": 4.441540843979868, + "grad_norm": 0.2850472033023834, + "learning_rate": 1.1289799864548468e-05, + "loss": 0.1882, + "step": 22945 + }, + { + "epoch": 4.454123112659698, + "grad_norm": 4.457968235015869, + "learning_rate": 1.0860617185115858e-05, + "loss": 0.2267, + "step": 23010 + }, + { + "epoch": 4.466705381339528, + "grad_norm": 2.144094944000244, + "learning_rate": 1.0435072348794313e-05, + "loss": 0.1586, + "step": 23075 + }, + { + "epoch": 4.479287650019358, + "grad_norm": 3.16145658493042, + "learning_rate": 1.0013539340165799e-05, + "loss": 0.1833, + "step": 23140 + }, + { + "epoch": 4.491869918699187, + "grad_norm": 5.41245174407959, + "learning_rate": 9.596388618069195e-06, + "loss": 0.1505, + "step": 23205 + }, + { + "epoch": 4.501742160278746, + "eval_StS-test_pearson_cosine": 0.8816702865221286, + "eval_StS-test_pearson_dot": 0.7894071456966113, + "eval_StS-test_pearson_euclidean": 0.8662344171069792, + "eval_StS-test_pearson_manhattan": 0.8645345748649402, + "eval_StS-test_pearson_max": 0.8816702865221286, + "eval_StS-test_spearman_cosine": 0.8968976546264207, + "eval_StS-test_spearman_dot": 0.7860133600255552, + "eval_StS-test_spearman_euclidean": 0.8683799213314533, + "eval_StS-test_spearman_manhattan": 0.8675563870810289, + "eval_StS-test_spearman_max": 0.8968976546264207, + "eval_Vitaminc-test_cosine_accuracy": 0.5842105263157895, + "eval_Vitaminc-test_cosine_accuracy_threshold": 0.6739277243614197, + "eval_Vitaminc-test_cosine_ap": 0.5589703949384612, + "eval_Vitaminc-test_cosine_f1": 0.6716981132075472, + "eval_Vitaminc-test_cosine_f1_threshold": 0.432912677526474, + "eval_Vitaminc-test_cosine_precision": 0.5159420289855072, + "eval_Vitaminc-test_cosine_recall": 0.9621621621621622, + "eval_Vitaminc-test_dot_accuracy": 0.5578947368421052, + "eval_Vitaminc-test_dot_accuracy_threshold": 10.692544937133789, + "eval_Vitaminc-test_dot_ap": 0.5593745513398436, + "eval_Vitaminc-test_dot_f1": 0.6691312384473197, + "eval_Vitaminc-test_dot_f1_threshold": 6.927554607391357, + "eval_Vitaminc-test_dot_precision": 0.5084269662921348, + "eval_Vitaminc-test_dot_recall": 0.9783783783783784, + "eval_Vitaminc-test_euclidean_accuracy": 0.5736842105263158, + "eval_Vitaminc-test_euclidean_accuracy_threshold": 2.90975284576416, + "eval_Vitaminc-test_euclidean_ap": 0.5526828083289721, + "eval_Vitaminc-test_euclidean_f1": 0.6666666666666666, + "eval_Vitaminc-test_euclidean_f1_threshold": 5.231945037841797, + "eval_Vitaminc-test_euclidean_precision": 0.5013623978201635, + "eval_Vitaminc-test_euclidean_recall": 0.9945945945945946, + "eval_Vitaminc-test_manhattan_accuracy": 0.5736842105263158, + "eval_Vitaminc-test_manhattan_accuracy_threshold": 59.42338180541992, + "eval_Vitaminc-test_manhattan_ap": 0.5548476157371564, + "eval_Vitaminc-test_manhattan_f1": 0.6678765880217786, + "eval_Vitaminc-test_manhattan_f1_threshold": 106.84965515136719, + "eval_Vitaminc-test_manhattan_precision": 0.5027322404371585, + "eval_Vitaminc-test_manhattan_recall": 0.9945945945945946, + "eval_Vitaminc-test_max_accuracy": 0.5842105263157895, + "eval_Vitaminc-test_max_accuracy_threshold": 59.42338180541992, + "eval_Vitaminc-test_max_ap": 0.5593745513398436, + "eval_Vitaminc-test_max_f1": 0.6716981132075472, + "eval_Vitaminc-test_max_f1_threshold": 106.84965515136719, + "eval_Vitaminc-test_max_precision": 0.5159420289855072, + "eval_Vitaminc-test_max_recall": 0.9945945945945946, + "eval_mrpc-test_cosine_accuracy": 0.7289473684210527, + "eval_mrpc-test_cosine_accuracy_threshold": 0.7656036615371704, + "eval_mrpc-test_cosine_ap": 0.8388786109810218, + "eval_mrpc-test_cosine_f1": 0.8180300500834724, + "eval_mrpc-test_cosine_f1_threshold": 0.6135265827178955, + "eval_mrpc-test_cosine_precision": 0.698005698005698, + "eval_mrpc-test_cosine_recall": 0.9879032258064516, + "eval_mrpc-test_dot_accuracy": 0.6947368421052632, + "eval_mrpc-test_dot_accuracy_threshold": 7.60512638092041, + "eval_mrpc-test_dot_ap": 0.7526445714043273, + "eval_mrpc-test_dot_f1": 0.804635761589404, + "eval_mrpc-test_dot_f1_threshold": 6.214287757873535, + "eval_mrpc-test_dot_precision": 0.6825842696629213, + "eval_mrpc-test_dot_recall": 0.9798387096774194, + "eval_mrpc-test_euclidean_accuracy": 0.7289473684210527, + "eval_mrpc-test_euclidean_accuracy_threshold": 2.930023670196533, + "eval_mrpc-test_euclidean_ap": 0.8236665039155787, + "eval_mrpc-test_euclidean_f1": 0.8189806678383128, + "eval_mrpc-test_euclidean_f1_threshold": 2.930023670196533, + "eval_mrpc-test_euclidean_precision": 0.7258566978193146, + "eval_mrpc-test_euclidean_recall": 0.9395161290322581, + "eval_mrpc-test_manhattan_accuracy": 0.7342105263157894, + "eval_mrpc-test_manhattan_accuracy_threshold": 55.630340576171875, + "eval_mrpc-test_manhattan_ap": 0.8215020127627809, + "eval_mrpc-test_manhattan_f1": 0.8188153310104529, + "eval_mrpc-test_manhattan_f1_threshold": 59.091949462890625, + "eval_mrpc-test_manhattan_precision": 0.7208588957055214, + "eval_mrpc-test_manhattan_recall": 0.9475806451612904, + "eval_mrpc-test_max_accuracy": 0.7342105263157894, + "eval_mrpc-test_max_accuracy_threshold": 55.630340576171875, + "eval_mrpc-test_max_ap": 0.8388786109810218, + "eval_mrpc-test_max_f1": 0.8189806678383128, + "eval_mrpc-test_max_f1_threshold": 59.091949462890625, + "eval_mrpc-test_max_precision": 0.7258566978193146, + "eval_mrpc-test_max_recall": 0.9879032258064516, + "eval_nli-pairs_loss": 0.7658803462982178, + "eval_nli-pairs_runtime": 3.343, + "eval_nli-pairs_samples_per_second": 47.862, + "eval_nli-pairs_steps_per_second": 1.496, + "eval_sequential_score": 0.5593745513398436, + "step": 23256 + }, + { + "epoch": 4.501742160278746, + "eval_vitaminc-pairs_loss": 6.087721824645996, + "eval_vitaminc-pairs_runtime": 1.6495, + "eval_vitaminc-pairs_samples_per_second": 80.629, + "eval_vitaminc-pairs_steps_per_second": 3.031, + "step": 23256 + }, + { + "epoch": 4.501742160278746, + "eval_qnli-contrastive_loss": 0.16503135859966278, + "eval_qnli-contrastive_runtime": 0.5683, + "eval_qnli-contrastive_samples_per_second": 281.564, + "eval_qnli-contrastive_steps_per_second": 8.799, + "step": 23256 + }, + { + "epoch": 4.501742160278746, + "eval_scitail-pairs-qa_loss": 0.0311884768307209, + "eval_scitail-pairs-qa_runtime": 1.3796, + "eval_scitail-pairs-qa_samples_per_second": 115.973, + "eval_scitail-pairs-qa_steps_per_second": 3.624, + "step": 23256 + }, + { + "epoch": 4.501742160278746, + "eval_scitail-pairs-pos_loss": 0.2662835419178009, + "eval_scitail-pairs-pos_runtime": 2.8353, + "eval_scitail-pairs-pos_samples_per_second": 56.432, + "eval_scitail-pairs-pos_steps_per_second": 1.763, + "step": 23256 + }, + { + "epoch": 4.501742160278746, + "eval_xsum-pairs_loss": 0.2549589276313782, + "eval_xsum-pairs_runtime": 1.48, + "eval_xsum-pairs_samples_per_second": 108.107, + "eval_xsum-pairs_steps_per_second": 3.378, + "step": 23256 + }, + { + "epoch": 4.501742160278746, + "eval_compression-pairs_loss": 0.0466877818107605, + "eval_compression-pairs_runtime": 0.4094, + "eval_compression-pairs_samples_per_second": 390.835, + "eval_compression-pairs_steps_per_second": 12.214, + "step": 23256 + }, + { + "epoch": 4.501742160278746, + "eval_sciq_pairs_loss": 0.21744883060455322, + "eval_sciq_pairs_runtime": 8.389, + "eval_sciq_pairs_samples_per_second": 19.073, + "eval_sciq_pairs_steps_per_second": 0.596, + "step": 23256 + }, + { + "epoch": 4.501742160278746, + "eval_qasc_pairs_loss": 0.11268162727355957, + "eval_qasc_pairs_runtime": 1.5933, + "eval_qasc_pairs_samples_per_second": 100.418, + "eval_qasc_pairs_steps_per_second": 3.138, + "step": 23256 + }, + { + "epoch": 4.501742160278746, + "eval_qasc_facts_sym_loss": 0.10673767328262329, + "eval_qasc_facts_sym_runtime": 0.3455, + "eval_qasc_facts_sym_samples_per_second": 463.139, + "eval_qasc_facts_sym_steps_per_second": 14.473, + "step": 23256 + }, + { + "epoch": 4.501742160278746, + "eval_openbookqa_pairs_loss": 1.5697686672210693, + "eval_openbookqa_pairs_runtime": 1.3837, + "eval_openbookqa_pairs_samples_per_second": 115.633, + "eval_openbookqa_pairs_steps_per_second": 3.614, + "step": 23256 + }, + { + "epoch": 4.501742160278746, + "eval_msmarco_pairs_loss": 0.4660285413265228, + "eval_msmarco_pairs_runtime": 3.212, + "eval_msmarco_pairs_samples_per_second": 49.813, + "eval_msmarco_pairs_steps_per_second": 1.557, + "step": 23256 + }, + { + "epoch": 4.501742160278746, + "eval_nq_pairs_loss": 0.3551401197910309, + "eval_nq_pairs_runtime": 7.8628, + "eval_nq_pairs_samples_per_second": 20.349, + "eval_nq_pairs_steps_per_second": 0.636, + "step": 23256 + }, + { + "epoch": 4.501742160278746, + "eval_trivia_pairs_loss": 0.557299017906189, + "eval_trivia_pairs_runtime": 10.3596, + "eval_trivia_pairs_samples_per_second": 15.445, + "eval_trivia_pairs_steps_per_second": 0.483, + "step": 23256 + }, + { + "epoch": 4.501742160278746, + "eval_quora_pairs_loss": 0.17031456530094147, + "eval_quora_pairs_runtime": 4.0048, + "eval_quora_pairs_samples_per_second": 168.546, + "eval_quora_pairs_steps_per_second": 5.493, + "step": 23256 + }, + { + "epoch": 4.501742160278746, + "eval_gooaq_pairs_loss": 0.36391741037368774, + "eval_gooaq_pairs_runtime": 2.3095, + "eval_gooaq_pairs_samples_per_second": 69.278, + "eval_gooaq_pairs_steps_per_second": 2.165, + "step": 23256 + }, + { + "epoch": 4.501742160278746, + "eval_mrpc_pairs_loss": 0.021536488085985184, + "eval_mrpc_pairs_runtime": 0.3852, + "eval_mrpc_pairs_samples_per_second": 415.417, + "eval_mrpc_pairs_steps_per_second": 12.982, + "step": 23256 + }, + { + "epoch": 4.504452187379017, + "grad_norm": 0.9454036951065063, + "learning_rate": 9.183986790027585e-06, + "loss": 0.1539, + "step": 23270 + }, + { + "epoch": 4.5170344560588465, + "grad_norm": 0.7937942147254944, + "learning_rate": 8.776696290059776e-06, + "loss": 0.1661, + "step": 23335 + }, + { + "epoch": 4.529616724738676, + "grad_norm": 2.6932709217071533, + "learning_rate": 8.374875060160005e-06, + "loss": 0.1719, + "step": 23400 + }, + { + "epoch": 4.542198993418506, + "grad_norm": 1.9167306423187256, + "learning_rate": 7.978876235725097e-06, + "loss": 0.2357, + "step": 23465 + }, + { + "epoch": 4.554781262098335, + "grad_norm": 0.4528341591358185, + "learning_rate": 7.5890478352061e-06, + "loss": 0.1764, + "step": 23530 + }, + { + "epoch": 4.567363530778165, + "grad_norm": 15.758140563964844, + "learning_rate": 7.205732454256714e-06, + "loss": 0.1741, + "step": 23595 + }, + { + "epoch": 4.579945799457994, + "grad_norm": 0.13203494250774384, + "learning_rate": 6.8292669646475355e-06, + "loss": 0.1251, + "step": 23660 + }, + { + "epoch": 4.592528068137824, + "grad_norm": 0.5477350354194641, + "learning_rate": 6.459982218210615e-06, + "loss": 0.1734, + "step": 23725 + }, + { + "epoch": 4.605110336817654, + "grad_norm": 0.11947894096374512, + "learning_rate": 6.098202756074636e-06, + "loss": 0.1539, + "step": 23790 + }, + { + "epoch": 4.617692605497483, + "grad_norm": 2.542245864868164, + "learning_rate": 5.744246523446167e-06, + "loss": 0.1804, + "step": 23855 + }, + { + "epoch": 4.630274874177314, + "grad_norm": 1.1431196928024292, + "learning_rate": 5.398424590187569e-06, + "loss": 0.1678, + "step": 23920 + }, + { + "epoch": 4.642857142857143, + "grad_norm": 3.039541721343994, + "learning_rate": 5.061040877437311e-06, + "loss": 0.1526, + "step": 23985 + }, + { + "epoch": 4.655439411536973, + "grad_norm": 6.675982475280762, + "learning_rate": 4.7323918905127275e-06, + "loss": 0.1954, + "step": 24050 + }, + { + "epoch": 4.668021680216802, + "grad_norm": 11.611279487609863, + "learning_rate": 4.412766458330265e-06, + "loss": 0.1876, + "step": 24115 + }, + { + "epoch": 4.680603948896632, + "grad_norm": 1.0078673362731934, + "learning_rate": 4.1024454795717974e-06, + "loss": 0.1567, + "step": 24180 + }, + { + "epoch": 4.693186217576462, + "grad_norm": 0.6833789944648743, + "learning_rate": 3.801701675820568e-06, + "loss": 0.0897, + "step": 24245 + }, + { + "epoch": 4.705768486256291, + "grad_norm": 0.08990409970283508, + "learning_rate": 3.5107993518832366e-06, + "loss": 0.1338, + "step": 24310 + }, + { + "epoch": 4.718350754936121, + "grad_norm": 2.577784299850464, + "learning_rate": 3.2299941635091016e-06, + "loss": 0.1611, + "step": 24375 + }, + { + "epoch": 4.73093302361595, + "grad_norm": 2.1565229892730713, + "learning_rate": 2.959532892710328e-06, + "loss": 0.1611, + "step": 24440 + }, + { + "epoch": 4.74351529229578, + "grad_norm": 1.29633367061615, + "learning_rate": 2.699653230880698e-06, + "loss": 0.1218, + "step": 24505 + }, + { + "epoch": 4.751838946960898, + "eval_StS-test_pearson_cosine": 0.8792609870436672, + "eval_StS-test_pearson_dot": 0.7762148715297421, + "eval_StS-test_pearson_euclidean": 0.8598391820879342, + "eval_StS-test_pearson_manhattan": 0.8576575270233212, + "eval_StS-test_pearson_max": 0.8792609870436672, + "eval_StS-test_spearman_cosine": 0.895012053474811, + "eval_StS-test_spearman_dot": 0.7765599574914741, + "eval_StS-test_spearman_euclidean": 0.8626746231706642, + "eval_StS-test_spearman_manhattan": 0.8613839852851796, + "eval_StS-test_spearman_max": 0.895012053474811, + "eval_Vitaminc-test_cosine_accuracy": 0.5763157894736842, + "eval_Vitaminc-test_cosine_accuracy_threshold": 0.7384297847747803, + "eval_Vitaminc-test_cosine_ap": 0.5582610977968616, + "eval_Vitaminc-test_cosine_f1": 0.6716981132075472, + "eval_Vitaminc-test_cosine_f1_threshold": 0.4387393295764923, + "eval_Vitaminc-test_cosine_precision": 0.5159420289855072, + "eval_Vitaminc-test_cosine_recall": 0.9621621621621622, + "eval_Vitaminc-test_dot_accuracy": 0.5526315789473685, + "eval_Vitaminc-test_dot_accuracy_threshold": 10.325508117675781, + "eval_Vitaminc-test_dot_ap": 0.558739606374971, + "eval_Vitaminc-test_dot_f1": 0.6704545454545454, + "eval_Vitaminc-test_dot_f1_threshold": 6.89784049987793, + "eval_Vitaminc-test_dot_precision": 0.5160349854227405, + "eval_Vitaminc-test_dot_recall": 0.9567567567567568, + "eval_Vitaminc-test_euclidean_accuracy": 0.5842105263157895, + "eval_Vitaminc-test_euclidean_accuracy_threshold": 2.8687446117401123, + "eval_Vitaminc-test_euclidean_ap": 0.5521974352836823, + "eval_Vitaminc-test_euclidean_f1": 0.6654478976234005, + "eval_Vitaminc-test_euclidean_f1_threshold": 4.835182189941406, + "eval_Vitaminc-test_euclidean_precision": 0.5027624309392266, + "eval_Vitaminc-test_euclidean_recall": 0.9837837837837838, + "eval_Vitaminc-test_manhattan_accuracy": 0.5789473684210527, + "eval_Vitaminc-test_manhattan_accuracy_threshold": 56.989898681640625, + "eval_Vitaminc-test_manhattan_ap": 0.5519449989523413, + "eval_Vitaminc-test_manhattan_f1": 0.6654611211573237, + "eval_Vitaminc-test_manhattan_f1_threshold": 102.37645721435547, + "eval_Vitaminc-test_manhattan_precision": 0.5, + "eval_Vitaminc-test_manhattan_recall": 0.9945945945945946, + "eval_Vitaminc-test_max_accuracy": 0.5842105263157895, + "eval_Vitaminc-test_max_accuracy_threshold": 56.989898681640625, + "eval_Vitaminc-test_max_ap": 0.558739606374971, + "eval_Vitaminc-test_max_f1": 0.6716981132075472, + "eval_Vitaminc-test_max_f1_threshold": 102.37645721435547, + "eval_Vitaminc-test_max_precision": 0.5160349854227405, + "eval_Vitaminc-test_max_recall": 0.9945945945945946, + "eval_mrpc-test_cosine_accuracy": 0.7315789473684211, + "eval_mrpc-test_cosine_accuracy_threshold": 0.7699094414710999, + "eval_mrpc-test_cosine_ap": 0.8427559709422051, + "eval_mrpc-test_cosine_f1": 0.8216783216783218, + "eval_mrpc-test_cosine_f1_threshold": 0.6925866007804871, + "eval_mrpc-test_cosine_precision": 0.7253086419753086, + "eval_mrpc-test_cosine_recall": 0.9475806451612904, + "eval_mrpc-test_dot_accuracy": 0.6973684210526315, + "eval_mrpc-test_dot_accuracy_threshold": 6.628315448760986, + "eval_mrpc-test_dot_ap": 0.7544902048971998, + "eval_mrpc-test_dot_f1": 0.8040885860306644, + "eval_mrpc-test_dot_f1_threshold": 6.628315448760986, + "eval_mrpc-test_dot_precision": 0.696165191740413, + "eval_mrpc-test_dot_recall": 0.9516129032258065, + "eval_mrpc-test_euclidean_accuracy": 0.7289473684210527, + "eval_mrpc-test_euclidean_accuracy_threshold": 2.609426975250244, + "eval_mrpc-test_euclidean_ap": 0.825937819365436, + "eval_mrpc-test_euclidean_f1": 0.8148148148148148, + "eval_mrpc-test_euclidean_f1_threshold": 2.775911569595337, + "eval_mrpc-test_euclidean_precision": 0.7241379310344828, + "eval_mrpc-test_euclidean_recall": 0.9314516129032258, + "eval_mrpc-test_manhattan_accuracy": 0.7342105263157894, + "eval_mrpc-test_manhattan_accuracy_threshold": 52.07958221435547, + "eval_mrpc-test_manhattan_ap": 0.8243923902541919, + "eval_mrpc-test_manhattan_f1": 0.8185053380782917, + "eval_mrpc-test_manhattan_f1_threshold": 53.8984489440918, + "eval_mrpc-test_manhattan_precision": 0.732484076433121, + "eval_mrpc-test_manhattan_recall": 0.9274193548387096, + "eval_mrpc-test_max_accuracy": 0.7342105263157894, + "eval_mrpc-test_max_accuracy_threshold": 52.07958221435547, + "eval_mrpc-test_max_ap": 0.8427559709422051, + "eval_mrpc-test_max_f1": 0.8216783216783218, + "eval_mrpc-test_max_f1_threshold": 53.8984489440918, + "eval_mrpc-test_max_precision": 0.732484076433121, + "eval_mrpc-test_max_recall": 0.9516129032258065, + "eval_nli-pairs_loss": 0.7502982020378113, + "eval_nli-pairs_runtime": 3.0243, + "eval_nli-pairs_samples_per_second": 52.905, + "eval_nli-pairs_steps_per_second": 1.653, + "eval_sequential_score": 0.558739606374971, + "step": 24548 + }, + { + "epoch": 4.751838946960898, + "eval_vitaminc-pairs_loss": 6.014657497406006, + "eval_vitaminc-pairs_runtime": 1.6124, + "eval_vitaminc-pairs_samples_per_second": 82.484, + "eval_vitaminc-pairs_steps_per_second": 3.101, + "step": 24548 + }, + { + "epoch": 4.751838946960898, + "eval_qnli-contrastive_loss": 0.1470354050397873, + "eval_qnli-contrastive_runtime": 0.5367, + "eval_qnli-contrastive_samples_per_second": 298.11, + "eval_qnli-contrastive_steps_per_second": 9.316, + "step": 24548 + }, + { + "epoch": 4.751838946960898, + "eval_scitail-pairs-qa_loss": 0.028492147102952003, + "eval_scitail-pairs-qa_runtime": 1.2207, + "eval_scitail-pairs-qa_samples_per_second": 131.069, + "eval_scitail-pairs-qa_steps_per_second": 4.096, + "step": 24548 + }, + { + "epoch": 4.751838946960898, + "eval_scitail-pairs-pos_loss": 0.2434484362602234, + "eval_scitail-pairs-pos_runtime": 2.394, + "eval_scitail-pairs-pos_samples_per_second": 66.835, + "eval_scitail-pairs-pos_steps_per_second": 2.089, + "step": 24548 + }, + { + "epoch": 4.751838946960898, + "eval_xsum-pairs_loss": 0.2599203288555145, + "eval_xsum-pairs_runtime": 1.4307, + "eval_xsum-pairs_samples_per_second": 111.832, + "eval_xsum-pairs_steps_per_second": 3.495, + "step": 24548 + }, + { + "epoch": 4.751838946960898, + "eval_compression-pairs_loss": 0.04551355913281441, + "eval_compression-pairs_runtime": 0.4296, + "eval_compression-pairs_samples_per_second": 372.43, + "eval_compression-pairs_steps_per_second": 11.638, + "step": 24548 + }, + { + "epoch": 4.751838946960898, + "eval_sciq_pairs_loss": 0.2139088362455368, + "eval_sciq_pairs_runtime": 8.0693, + "eval_sciq_pairs_samples_per_second": 19.828, + "eval_sciq_pairs_steps_per_second": 0.62, + "step": 24548 + }, + { + "epoch": 4.751838946960898, + "eval_qasc_pairs_loss": 0.11057784408330917, + "eval_qasc_pairs_runtime": 1.4817, + "eval_qasc_pairs_samples_per_second": 107.982, + "eval_qasc_pairs_steps_per_second": 3.374, + "step": 24548 + }, + { + "epoch": 4.751838946960898, + "eval_qasc_facts_sym_loss": 0.10339337587356567, + "eval_qasc_facts_sym_runtime": 0.3423, + "eval_qasc_facts_sym_samples_per_second": 467.489, + "eval_qasc_facts_sym_steps_per_second": 14.609, + "step": 24548 + }, + { + "epoch": 4.751838946960898, + "eval_openbookqa_pairs_loss": 1.5395641326904297, + "eval_openbookqa_pairs_runtime": 1.2771, + "eval_openbookqa_pairs_samples_per_second": 125.281, + "eval_openbookqa_pairs_steps_per_second": 3.915, + "step": 24548 + }, + { + "epoch": 4.751838946960898, + "eval_msmarco_pairs_loss": 0.4512082636356354, + "eval_msmarco_pairs_runtime": 3.1793, + "eval_msmarco_pairs_samples_per_second": 50.326, + "eval_msmarco_pairs_steps_per_second": 1.573, + "step": 24548 + }, + { + "epoch": 4.751838946960898, + "eval_nq_pairs_loss": 0.3511877954006195, + "eval_nq_pairs_runtime": 7.7842, + "eval_nq_pairs_samples_per_second": 20.554, + "eval_nq_pairs_steps_per_second": 0.642, + "step": 24548 + }, + { + "epoch": 4.751838946960898, + "eval_trivia_pairs_loss": 0.5725733041763306, + "eval_trivia_pairs_runtime": 10.2494, + "eval_trivia_pairs_samples_per_second": 15.611, + "eval_trivia_pairs_steps_per_second": 0.488, + "step": 24548 + }, + { + "epoch": 4.751838946960898, + "eval_quora_pairs_loss": 0.17618750035762787, + "eval_quora_pairs_runtime": 4.0433, + "eval_quora_pairs_samples_per_second": 166.942, + "eval_quora_pairs_steps_per_second": 5.441, + "step": 24548 + }, + { + "epoch": 4.751838946960898, + "eval_gooaq_pairs_loss": 0.35624396800994873, + "eval_gooaq_pairs_runtime": 2.3577, + "eval_gooaq_pairs_samples_per_second": 67.864, + "eval_gooaq_pairs_steps_per_second": 2.121, + "step": 24548 + }, + { + "epoch": 4.751838946960898, + "eval_mrpc_pairs_loss": 0.01982388086616993, + "eval_mrpc_pairs_runtime": 0.3996, + "eval_mrpc_pairs_samples_per_second": 400.421, + "eval_mrpc_pairs_steps_per_second": 12.513, + "step": 24548 + }, + { + "epoch": 4.7560975609756095, + "grad_norm": 0.15101125836372375, + "learning_rate": 2.4505835699037006e-06, + "loss": 0.0839, + "step": 24570 + }, + { + "epoch": 4.768679829655439, + "grad_norm": 0.5723939538002014, + "learning_rate": 2.2125428014332173e-06, + "loss": 0.1318, + "step": 24635 + }, + { + "epoch": 4.781262098335269, + "grad_norm": 0.13490521907806396, + "learning_rate": 1.985740124523413e-06, + "loss": 0.129, + "step": 24700 + }, + { + "epoch": 4.793844367015099, + "grad_norm": 0.1680539846420288, + "learning_rate": 1.7703748617768645e-06, + "loss": 0.1326, + "step": 24765 + }, + { + "epoch": 4.806426635694928, + "grad_norm": 0.649811863899231, + "learning_rate": 1.5666362841724996e-06, + "loss": 0.103, + "step": 24830 + }, + { + "epoch": 4.819008904374758, + "grad_norm": 0.36529257893562317, + "learning_rate": 1.3747034447271883e-06, + "loss": 0.1251, + "step": 24895 + }, + { + "epoch": 4.831591173054588, + "grad_norm": 2.842799186706543, + "learning_rate": 1.1947450211373478e-06, + "loss": 0.115, + "step": 24960 + }, + { + "epoch": 4.8441734417344176, + "grad_norm": 0.2723024785518646, + "learning_rate": 1.0269191675386908e-06, + "loss": 0.1023, + "step": 25025 + }, + { + "epoch": 4.856755710414247, + "grad_norm": 1.8682302236557007, + "learning_rate": 8.713733755145653e-07, + "loss": 0.1003, + "step": 25090 + }, + { + "epoch": 4.869337979094077, + "grad_norm": 2.646700620651245, + "learning_rate": 7.282443444748149e-07, + "loss": 0.1094, + "step": 25155 + }, + { + "epoch": 4.881920247773906, + "grad_norm": 0.5898552536964417, + "learning_rate": 5.97657861519324e-07, + "loss": 0.133, + "step": 25220 + }, + { + "epoch": 4.894502516453736, + "grad_norm": 7.81072473526001, + "learning_rate": 4.797286908916226e-07, + "loss": 0.1164, + "step": 25285 + }, + { + "epoch": 4.9070847851335655, + "grad_norm": 3.0226893424987793, + "learning_rate": 3.7456047311982044e-07, + "loss": 0.1303, + "step": 25350 + }, + { + "epoch": 4.919667053813395, + "grad_norm": 0.5438377261161804, + "learning_rate": 2.822456339334556e-07, + "loss": 0.1446, + "step": 25415 + }, + { + "epoch": 4.932249322493225, + "grad_norm": 2.313892126083374, + "learning_rate": 2.028653030363481e-07, + "loss": 0.1189, + "step": 25480 + }, + { + "epoch": 4.944831591173054, + "grad_norm": 0.06484068930149078, + "learning_rate": 1.3648924280681707e-07, + "loss": 0.1272, + "step": 25545 + }, + { + "epoch": 4.957413859852885, + "grad_norm": 0.3085618019104004, + "learning_rate": 8.317578698789685e-08, + "loss": 0.1283, + "step": 25610 + }, + { + "epoch": 4.969996128532713, + "grad_norm": 0.0721147358417511, + "learning_rate": 4.2971789421488715e-08, + "loss": 0.1473, + "step": 25675 + }, + { + "epoch": 4.982578397212544, + "grad_norm": 2.4143240451812744, + "learning_rate": 1.5912582871465686e-08, + "loss": 0.0909, + "step": 25740 + }, + { + "epoch": 4.9951606658923735, + "grad_norm": 0.8749054670333862, + "learning_rate": 2.0219479719363333e-09, + "loss": 0.1042, + "step": 25805 + } + ], + "logging_steps": 65, + "max_steps": 25830, + "num_input_tokens_seen": 0, + "num_train_epochs": 5, + "save_steps": 2583, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": true + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 32, + "trial_name": null, + "trial_params": null +}