diff --git "a/checkpoint-392/trainer_state.json" "b/checkpoint-392/trainer_state.json" new file mode 100644--- /dev/null +++ "b/checkpoint-392/trainer_state.json" @@ -0,0 +1,7795 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.4004085801838611, + "eval_steps": 15, + "global_step": 392, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0010214504596527069, + "grad_norm": 0.5273555517196655, + "learning_rate": 4.636785162287481e-08, + "loss": 0.0203, + "step": 1 + }, + { + "epoch": 0.0020429009193054137, + "grad_norm": 6.3600592613220215, + "learning_rate": 9.273570324574961e-08, + "loss": 0.1339, + "step": 2 + }, + { + "epoch": 0.0030643513789581204, + "grad_norm": 7.497765064239502, + "learning_rate": 1.3910355486862442e-07, + "loss": 0.1229, + "step": 3 + }, + { + "epoch": 0.0040858018386108275, + "grad_norm": 4.310150623321533, + "learning_rate": 1.8547140649149923e-07, + "loss": 0.0947, + "step": 4 + }, + { + "epoch": 0.005107252298263534, + "grad_norm": 5.7333173751831055, + "learning_rate": 2.3183925811437404e-07, + "loss": 0.1468, + "step": 5 + }, + { + "epoch": 0.006128702757916241, + "grad_norm": 11.430774688720703, + "learning_rate": 2.7820710973724884e-07, + "loss": 0.2783, + "step": 6 + }, + { + "epoch": 0.007150153217568948, + "grad_norm": 7.56001615524292, + "learning_rate": 3.245749613601236e-07, + "loss": 0.2316, + "step": 7 + }, + { + "epoch": 0.008171603677221655, + "grad_norm": 6.071979522705078, + "learning_rate": 3.7094281298299846e-07, + "loss": 0.092, + "step": 8 + }, + { + "epoch": 0.009193054136874362, + "grad_norm": 10.170219421386719, + "learning_rate": 4.173106646058733e-07, + "loss": 0.2354, + "step": 9 + }, + { + "epoch": 0.010214504596527068, + "grad_norm": 8.175680160522461, + "learning_rate": 4.636785162287481e-07, + "loss": 0.2881, + "step": 10 + }, + { + "epoch": 0.011235955056179775, + "grad_norm": 0.9932138323783875, + "learning_rate": 5.100463678516229e-07, + "loss": 0.0046, + "step": 11 + }, + { + "epoch": 0.012257405515832482, + "grad_norm": 5.045660018920898, + "learning_rate": 5.564142194744977e-07, + "loss": 0.1022, + "step": 12 + }, + { + "epoch": 0.013278855975485188, + "grad_norm": 1.2583826780319214, + "learning_rate": 6.027820710973725e-07, + "loss": 0.0723, + "step": 13 + }, + { + "epoch": 0.014300306435137897, + "grad_norm": 5.627937316894531, + "learning_rate": 6.491499227202473e-07, + "loss": 0.1052, + "step": 14 + }, + { + "epoch": 0.015321756894790603, + "grad_norm": 9.369796752929688, + "learning_rate": 6.955177743431222e-07, + "loss": 0.4409, + "step": 15 + }, + { + "epoch": 0.015321756894790603, + "eval_Qnli-dev_cosine_accuracy": 0.697265625, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.6497001647949219, + "eval_Qnli-dev_cosine_ap": 0.7172090984704772, + "eval_Qnli-dev_cosine_f1": 0.6793650793650794, + "eval_Qnli-dev_cosine_f1_threshold": 0.47885602712631226, + "eval_Qnli-dev_cosine_precision": 0.5431472081218274, + "eval_Qnli-dev_cosine_recall": 0.9067796610169492, + "eval_Qnli-dev_dot_accuracy": 0.666015625, + "eval_Qnli-dev_dot_accuracy_threshold": 319.010009765625, + "eval_Qnli-dev_dot_ap": 0.6795003806274902, + "eval_Qnli-dev_dot_f1": 0.671850699844479, + "eval_Qnli-dev_dot_f1_threshold": 198.62274169921875, + "eval_Qnli-dev_dot_precision": 0.5307125307125307, + "eval_Qnli-dev_dot_recall": 0.9152542372881356, + "eval_Qnli-dev_euclidean_accuracy": 0.708984375, + "eval_Qnli-dev_euclidean_accuracy_threshold": 17.42666244506836, + "eval_Qnli-dev_euclidean_ap": 0.7289935892292956, + "eval_Qnli-dev_euclidean_f1": 0.6826758147512865, + "eval_Qnli-dev_euclidean_f1_threshold": 20.348825454711914, + "eval_Qnli-dev_euclidean_precision": 0.5734870317002881, + "eval_Qnli-dev_euclidean_recall": 0.8432203389830508, + "eval_Qnli-dev_manhattan_accuracy": 0.705078125, + "eval_Qnli-dev_manhattan_accuracy_threshold": 368.4004211425781, + "eval_Qnli-dev_manhattan_ap": 0.7319806620902767, + "eval_Qnli-dev_manhattan_f1": 0.6842105263157895, + "eval_Qnli-dev_manhattan_f1_threshold": 444.519775390625, + "eval_Qnli-dev_manhattan_precision": 0.5591397849462365, + "eval_Qnli-dev_manhattan_recall": 0.8813559322033898, + "eval_Qnli-dev_max_accuracy": 0.708984375, + "eval_Qnli-dev_max_accuracy_threshold": 368.4004211425781, + "eval_Qnli-dev_max_ap": 0.7319806620902767, + "eval_Qnli-dev_max_f1": 0.6842105263157895, + "eval_Qnli-dev_max_f1_threshold": 444.519775390625, + "eval_Qnli-dev_max_precision": 0.5734870317002881, + "eval_Qnli-dev_max_recall": 0.9152542372881356, + "eval_allNLI-dev_cosine_accuracy": 0.740234375, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.7800098657608032, + "eval_allNLI-dev_cosine_ap": 0.6345698412091827, + "eval_allNLI-dev_cosine_f1": 0.6422018348623854, + "eval_allNLI-dev_cosine_f1_threshold": 0.6016973257064819, + "eval_allNLI-dev_cosine_precision": 0.532319391634981, + "eval_allNLI-dev_cosine_recall": 0.8092485549132948, + "eval_allNLI-dev_dot_accuracy": 0.724609375, + "eval_allNLI-dev_dot_accuracy_threshold": 307.7762145996094, + "eval_allNLI-dev_dot_ap": 0.5946818715216496, + "eval_allNLI-dev_dot_f1": 0.6083499005964215, + "eval_allNLI-dev_dot_f1_threshold": 213.3328857421875, + "eval_allNLI-dev_dot_precision": 0.4636363636363636, + "eval_allNLI-dev_dot_recall": 0.884393063583815, + "eval_allNLI-dev_euclidean_accuracy": 0.74609375, + "eval_allNLI-dev_euclidean_accuracy_threshold": 13.864477157592773, + "eval_allNLI-dev_euclidean_ap": 0.6426144847815969, + "eval_allNLI-dev_euclidean_f1": 0.656319290465632, + "eval_allNLI-dev_euclidean_f1_threshold": 18.424442291259766, + "eval_allNLI-dev_euclidean_precision": 0.5323741007194245, + "eval_allNLI-dev_euclidean_recall": 0.8554913294797688, + "eval_allNLI-dev_manhattan_accuracy": 0.748046875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 313.14306640625, + "eval_allNLI-dev_manhattan_ap": 0.6424665602334335, + "eval_allNLI-dev_manhattan_f1": 0.6558891454965358, + "eval_allNLI-dev_manhattan_f1_threshold": 380.26483154296875, + "eval_allNLI-dev_manhattan_precision": 0.5461538461538461, + "eval_allNLI-dev_manhattan_recall": 0.8208092485549133, + "eval_allNLI-dev_max_accuracy": 0.748046875, + "eval_allNLI-dev_max_accuracy_threshold": 313.14306640625, + "eval_allNLI-dev_max_ap": 0.6426144847815969, + "eval_allNLI-dev_max_f1": 0.656319290465632, + "eval_allNLI-dev_max_f1_threshold": 380.26483154296875, + "eval_allNLI-dev_max_precision": 0.5461538461538461, + "eval_allNLI-dev_max_recall": 0.884393063583815, + "eval_sequential_score": 0.7319806620902767, + "eval_sts-test_pearson_cosine": 0.8861801638022422, + "eval_sts-test_pearson_dot": 0.8769526160236782, + "eval_sts-test_pearson_euclidean": 0.9080896258111713, + "eval_sts-test_pearson_manhattan": 0.9087821292284366, + "eval_sts-test_pearson_max": 0.9087821292284366, + "eval_sts-test_spearman_cosine": 0.9077984013618918, + "eval_sts-test_spearman_dot": 0.879453355157932, + "eval_sts-test_spearman_euclidean": 0.9040312849292251, + "eval_sts-test_spearman_manhattan": 0.9044364180986016, + "eval_sts-test_spearman_max": 0.9077984013618918, + "eval_vitaminc-pairs_loss": 1.7410913705825806, + "eval_vitaminc-pairs_runtime": 3.1955, + "eval_vitaminc-pairs_samples_per_second": 40.056, + "eval_vitaminc-pairs_steps_per_second": 0.313, + "step": 15 + }, + { + "epoch": 0.015321756894790603, + "eval_negation-triplets_loss": 0.9541677236557007, + "eval_negation-triplets_runtime": 0.6938, + "eval_negation-triplets_samples_per_second": 184.492, + "eval_negation-triplets_steps_per_second": 1.441, + "step": 15 + }, + { + "epoch": 0.015321756894790603, + "eval_scitail-pairs-pos_loss": 0.05452797934412956, + "eval_scitail-pairs-pos_runtime": 0.8174, + "eval_scitail-pairs-pos_samples_per_second": 156.595, + "eval_scitail-pairs-pos_steps_per_second": 1.223, + "step": 15 + }, + { + "epoch": 0.015321756894790603, + "eval_scitail-pairs-qa_loss": 4.5379197217698675e-06, + "eval_scitail-pairs-qa_runtime": 0.5096, + "eval_scitail-pairs-qa_samples_per_second": 251.188, + "eval_scitail-pairs-qa_steps_per_second": 1.962, + "step": 15 + }, + { + "epoch": 0.015321756894790603, + "eval_xsum-pairs_loss": 0.005140363238751888, + "eval_xsum-pairs_runtime": 2.71, + "eval_xsum-pairs_samples_per_second": 47.232, + "eval_xsum-pairs_steps_per_second": 0.369, + "step": 15 + }, + { + "epoch": 0.015321756894790603, + "eval_sciq_pairs_loss": 0.02153397724032402, + "eval_sciq_pairs_runtime": 3.0528, + "eval_sciq_pairs_samples_per_second": 41.928, + "eval_sciq_pairs_steps_per_second": 0.328, + "step": 15 + }, + { + "epoch": 0.015321756894790603, + "eval_qasc_pairs_loss": 0.14103390276432037, + "eval_qasc_pairs_runtime": 0.5644, + "eval_qasc_pairs_samples_per_second": 226.78, + "eval_qasc_pairs_steps_per_second": 1.772, + "step": 15 + }, + { + "epoch": 0.015321756894790603, + "eval_openbookqa_pairs_loss": 0.8675450682640076, + "eval_openbookqa_pairs_runtime": 0.5752, + "eval_openbookqa_pairs_samples_per_second": 222.532, + "eval_openbookqa_pairs_steps_per_second": 1.739, + "step": 15 + }, + { + "epoch": 0.015321756894790603, + "eval_msmarco_pairs_loss": 0.17242787778377533, + "eval_msmarco_pairs_runtime": 1.274, + "eval_msmarco_pairs_samples_per_second": 100.469, + "eval_msmarco_pairs_steps_per_second": 0.785, + "step": 15 + }, + { + "epoch": 0.015321756894790603, + "eval_nq_pairs_loss": 0.0900416225194931, + "eval_nq_pairs_runtime": 2.7275, + "eval_nq_pairs_samples_per_second": 46.93, + "eval_nq_pairs_steps_per_second": 0.367, + "step": 15 + }, + { + "epoch": 0.015321756894790603, + "eval_trivia_pairs_loss": 0.15126368403434753, + "eval_trivia_pairs_runtime": 3.178, + "eval_trivia_pairs_samples_per_second": 40.276, + "eval_trivia_pairs_steps_per_second": 0.315, + "step": 15 + }, + { + "epoch": 0.015321756894790603, + "eval_gooaq_pairs_loss": 0.13123726844787598, + "eval_gooaq_pairs_runtime": 0.8688, + "eval_gooaq_pairs_samples_per_second": 147.324, + "eval_gooaq_pairs_steps_per_second": 1.151, + "step": 15 + }, + { + "epoch": 0.015321756894790603, + "eval_paws-pos_loss": 0.02203337289392948, + "eval_paws-pos_runtime": 0.6801, + "eval_paws-pos_samples_per_second": 188.195, + "eval_paws-pos_steps_per_second": 1.47, + "step": 15 + }, + { + "epoch": 0.015321756894790603, + "eval_global_dataset_loss": 0.25319796800613403, + "eval_global_dataset_runtime": 18.1051, + "eval_global_dataset_samples_per_second": 30.157, + "eval_global_dataset_steps_per_second": 0.276, + "step": 15 + }, + { + "epoch": 0.01634320735444331, + "grad_norm": 0.023888731375336647, + "learning_rate": 7.418856259659969e-07, + "loss": 0.0001, + "step": 16 + }, + { + "epoch": 0.017364657814096015, + "grad_norm": 8.196227073669434, + "learning_rate": 7.882534775888718e-07, + "loss": 0.3306, + "step": 17 + }, + { + "epoch": 0.018386108273748723, + "grad_norm": 12.018414497375488, + "learning_rate": 8.346213292117466e-07, + "loss": 0.2889, + "step": 18 + }, + { + "epoch": 0.01940755873340143, + "grad_norm": 8.713894844055176, + "learning_rate": 8.809891808346213e-07, + "loss": 0.2016, + "step": 19 + }, + { + "epoch": 0.020429009193054137, + "grad_norm": 10.398391723632812, + "learning_rate": 9.273570324574961e-07, + "loss": 0.295, + "step": 20 + }, + { + "epoch": 0.021450459652706845, + "grad_norm": 6.799093246459961, + "learning_rate": 9.737248840803709e-07, + "loss": 0.1884, + "step": 21 + }, + { + "epoch": 0.02247191011235955, + "grad_norm": 7.390905380249023, + "learning_rate": 1.0200927357032457e-06, + "loss": 0.2173, + "step": 22 + }, + { + "epoch": 0.02349336057201226, + "grad_norm": 10.30966854095459, + "learning_rate": 1.0664605873261208e-06, + "loss": 0.2632, + "step": 23 + }, + { + "epoch": 0.024514811031664963, + "grad_norm": 2.211714744567871, + "learning_rate": 1.1128284389489954e-06, + "loss": 0.0168, + "step": 24 + }, + { + "epoch": 0.02553626149131767, + "grad_norm": 3.876884698867798, + "learning_rate": 1.1591962905718702e-06, + "loss": 0.0348, + "step": 25 + }, + { + "epoch": 0.026557711950970377, + "grad_norm": 6.142614841461182, + "learning_rate": 1.205564142194745e-06, + "loss": 0.1607, + "step": 26 + }, + { + "epoch": 0.027579162410623085, + "grad_norm": 6.944269180297852, + "learning_rate": 1.2519319938176199e-06, + "loss": 0.209, + "step": 27 + }, + { + "epoch": 0.028600612870275793, + "grad_norm": 6.37337589263916, + "learning_rate": 1.2982998454404945e-06, + "loss": 0.0724, + "step": 28 + }, + { + "epoch": 0.0296220633299285, + "grad_norm": 9.69383430480957, + "learning_rate": 1.3446676970633693e-06, + "loss": 0.345, + "step": 29 + }, + { + "epoch": 0.030643513789581207, + "grad_norm": 1.0400453805923462, + "learning_rate": 1.3910355486862444e-06, + "loss": 0.0812, + "step": 30 + }, + { + "epoch": 0.030643513789581207, + "eval_Qnli-dev_cosine_accuracy": 0.697265625, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.6521614789962769, + "eval_Qnli-dev_cosine_ap": 0.7185588524699033, + "eval_Qnli-dev_cosine_f1": 0.6782884310618067, + "eval_Qnli-dev_cosine_f1_threshold": 0.4795927405357361, + "eval_Qnli-dev_cosine_precision": 0.5417721518987342, + "eval_Qnli-dev_cosine_recall": 0.9067796610169492, + "eval_Qnli-dev_dot_accuracy": 0.66796875, + "eval_Qnli-dev_dot_accuracy_threshold": 318.37115478515625, + "eval_Qnli-dev_dot_ap": 0.6796807664319214, + "eval_Qnli-dev_dot_f1": 0.6720257234726689, + "eval_Qnli-dev_dot_f1_threshold": 212.82940673828125, + "eval_Qnli-dev_dot_precision": 0.5414507772020726, + "eval_Qnli-dev_dot_recall": 0.885593220338983, + "eval_Qnli-dev_euclidean_accuracy": 0.708984375, + "eval_Qnli-dev_euclidean_accuracy_threshold": 17.41230010986328, + "eval_Qnli-dev_euclidean_ap": 0.7291205386768884, + "eval_Qnli-dev_euclidean_f1": 0.6838487972508591, + "eval_Qnli-dev_euclidean_f1_threshold": 20.233570098876953, + "eval_Qnli-dev_euclidean_precision": 0.5751445086705202, + "eval_Qnli-dev_euclidean_recall": 0.8432203389830508, + "eval_Qnli-dev_manhattan_accuracy": 0.70703125, + "eval_Qnli-dev_manhattan_accuracy_threshold": 368.1832275390625, + "eval_Qnli-dev_manhattan_ap": 0.7324158083351404, + "eval_Qnli-dev_manhattan_f1": 0.6830870279146142, + "eval_Qnli-dev_manhattan_f1_threshold": 444.402099609375, + "eval_Qnli-dev_manhattan_precision": 0.5576407506702413, + "eval_Qnli-dev_manhattan_recall": 0.8813559322033898, + "eval_Qnli-dev_max_accuracy": 0.708984375, + "eval_Qnli-dev_max_accuracy_threshold": 368.1832275390625, + "eval_Qnli-dev_max_ap": 0.7324158083351404, + "eval_Qnli-dev_max_f1": 0.6838487972508591, + "eval_Qnli-dev_max_f1_threshold": 444.402099609375, + "eval_Qnli-dev_max_precision": 0.5751445086705202, + "eval_Qnli-dev_max_recall": 0.9067796610169492, + "eval_allNLI-dev_cosine_accuracy": 0.73828125, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.7892241477966309, + "eval_allNLI-dev_cosine_ap": 0.6348557517402963, + "eval_allNLI-dev_cosine_f1": 0.6407322654462242, + "eval_allNLI-dev_cosine_f1_threshold": 0.603339433670044, + "eval_allNLI-dev_cosine_precision": 0.5303030303030303, + "eval_allNLI-dev_cosine_recall": 0.8092485549132948, + "eval_allNLI-dev_dot_accuracy": 0.72265625, + "eval_allNLI-dev_dot_accuracy_threshold": 309.972900390625, + "eval_allNLI-dev_dot_ap": 0.5956869756457649, + "eval_allNLI-dev_dot_f1": 0.6059405940594059, + "eval_allNLI-dev_dot_f1_threshold": 214.9962158203125, + "eval_allNLI-dev_dot_precision": 0.4608433734939759, + "eval_allNLI-dev_dot_recall": 0.884393063583815, + "eval_allNLI-dev_euclidean_accuracy": 0.74609375, + "eval_allNLI-dev_euclidean_accuracy_threshold": 13.912679672241211, + "eval_allNLI-dev_euclidean_ap": 0.6429412122037375, + "eval_allNLI-dev_euclidean_f1": 0.65625, + "eval_allNLI-dev_euclidean_f1_threshold": 18.338790893554688, + "eval_allNLI-dev_euclidean_precision": 0.5345454545454545, + "eval_allNLI-dev_euclidean_recall": 0.8497109826589595, + "eval_allNLI-dev_manhattan_accuracy": 0.748046875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 313.21661376953125, + "eval_allNLI-dev_manhattan_ap": 0.6416697630123366, + "eval_allNLI-dev_manhattan_f1": 0.6551724137931034, + "eval_allNLI-dev_manhattan_f1_threshold": 368.69781494140625, + "eval_allNLI-dev_manhattan_precision": 0.5708154506437768, + "eval_allNLI-dev_manhattan_recall": 0.7687861271676301, + "eval_allNLI-dev_max_accuracy": 0.748046875, + "eval_allNLI-dev_max_accuracy_threshold": 313.21661376953125, + "eval_allNLI-dev_max_ap": 0.6429412122037375, + "eval_allNLI-dev_max_f1": 0.65625, + "eval_allNLI-dev_max_f1_threshold": 368.69781494140625, + "eval_allNLI-dev_max_precision": 0.5708154506437768, + "eval_allNLI-dev_max_recall": 0.884393063583815, + "eval_sequential_score": 0.7324158083351404, + "eval_sts-test_pearson_cosine": 0.8861788608003685, + "eval_sts-test_pearson_dot": 0.8767868547839528, + "eval_sts-test_pearson_euclidean": 0.9082972226360341, + "eval_sts-test_pearson_manhattan": 0.909001944469733, + "eval_sts-test_pearson_max": 0.909001944469733, + "eval_sts-test_spearman_cosine": 0.9079251816854137, + "eval_sts-test_spearman_dot": 0.8793625548485687, + "eval_sts-test_spearman_euclidean": 0.9042534749420336, + "eval_sts-test_spearman_manhattan": 0.9049142273164774, + "eval_sts-test_spearman_max": 0.9079251816854137, + "eval_vitaminc-pairs_loss": 1.7372864484786987, + "eval_vitaminc-pairs_runtime": 3.1766, + "eval_vitaminc-pairs_samples_per_second": 40.294, + "eval_vitaminc-pairs_steps_per_second": 0.315, + "step": 30 + }, + { + "epoch": 0.030643513789581207, + "eval_negation-triplets_loss": 0.9514555931091309, + "eval_negation-triplets_runtime": 0.6964, + "eval_negation-triplets_samples_per_second": 183.791, + "eval_negation-triplets_steps_per_second": 1.436, + "step": 30 + }, + { + "epoch": 0.030643513789581207, + "eval_scitail-pairs-pos_loss": 0.0540882833302021, + "eval_scitail-pairs-pos_runtime": 0.7908, + "eval_scitail-pairs-pos_samples_per_second": 161.866, + "eval_scitail-pairs-pos_steps_per_second": 1.265, + "step": 30 + }, + { + "epoch": 0.030643513789581207, + "eval_scitail-pairs-qa_loss": 4.433557478478178e-06, + "eval_scitail-pairs-qa_runtime": 0.5123, + "eval_scitail-pairs-qa_samples_per_second": 249.849, + "eval_scitail-pairs-qa_steps_per_second": 1.952, + "step": 30 + }, + { + "epoch": 0.030643513789581207, + "eval_xsum-pairs_loss": 0.005012438166886568, + "eval_xsum-pairs_runtime": 2.7064, + "eval_xsum-pairs_samples_per_second": 47.296, + "eval_xsum-pairs_steps_per_second": 0.369, + "step": 30 + }, + { + "epoch": 0.030643513789581207, + "eval_sciq_pairs_loss": 0.021487703546881676, + "eval_sciq_pairs_runtime": 3.0818, + "eval_sciq_pairs_samples_per_second": 41.534, + "eval_sciq_pairs_steps_per_second": 0.324, + "step": 30 + }, + { + "epoch": 0.030643513789581207, + "eval_qasc_pairs_loss": 0.13895103335380554, + "eval_qasc_pairs_runtime": 0.5594, + "eval_qasc_pairs_samples_per_second": 228.826, + "eval_qasc_pairs_steps_per_second": 1.788, + "step": 30 + }, + { + "epoch": 0.030643513789581207, + "eval_openbookqa_pairs_loss": 0.8709484338760376, + "eval_openbookqa_pairs_runtime": 0.5742, + "eval_openbookqa_pairs_samples_per_second": 222.935, + "eval_openbookqa_pairs_steps_per_second": 1.742, + "step": 30 + }, + { + "epoch": 0.030643513789581207, + "eval_msmarco_pairs_loss": 0.17041827738285065, + "eval_msmarco_pairs_runtime": 1.2755, + "eval_msmarco_pairs_samples_per_second": 100.349, + "eval_msmarco_pairs_steps_per_second": 0.784, + "step": 30 + }, + { + "epoch": 0.030643513789581207, + "eval_nq_pairs_loss": 0.08988925069570541, + "eval_nq_pairs_runtime": 2.7176, + "eval_nq_pairs_samples_per_second": 47.101, + "eval_nq_pairs_steps_per_second": 0.368, + "step": 30 + }, + { + "epoch": 0.030643513789581207, + "eval_trivia_pairs_loss": 0.15233221650123596, + "eval_trivia_pairs_runtime": 3.1837, + "eval_trivia_pairs_samples_per_second": 40.205, + "eval_trivia_pairs_steps_per_second": 0.314, + "step": 30 + }, + { + "epoch": 0.030643513789581207, + "eval_gooaq_pairs_loss": 0.1306401789188385, + "eval_gooaq_pairs_runtime": 0.8717, + "eval_gooaq_pairs_samples_per_second": 146.836, + "eval_gooaq_pairs_steps_per_second": 1.147, + "step": 30 + }, + { + "epoch": 0.030643513789581207, + "eval_paws-pos_loss": 0.02202308550477028, + "eval_paws-pos_runtime": 0.6876, + "eval_paws-pos_samples_per_second": 186.144, + "eval_paws-pos_steps_per_second": 1.454, + "step": 30 + }, + { + "epoch": 0.030643513789581207, + "eval_global_dataset_loss": 0.24897590279579163, + "eval_global_dataset_runtime": 18.1089, + "eval_global_dataset_samples_per_second": 30.151, + "eval_global_dataset_steps_per_second": 0.276, + "step": 30 + }, + { + "epoch": 0.031664964249233915, + "grad_norm": 9.223188400268555, + "learning_rate": 1.437403400309119e-06, + "loss": 0.2389, + "step": 31 + }, + { + "epoch": 0.03268641470888662, + "grad_norm": 5.1906938552856445, + "learning_rate": 1.4837712519319938e-06, + "loss": 0.0954, + "step": 32 + }, + { + "epoch": 0.033707865168539325, + "grad_norm": 0.026071075350046158, + "learning_rate": 1.5301391035548687e-06, + "loss": 0.0002, + "step": 33 + }, + { + "epoch": 0.03472931562819203, + "grad_norm": 7.478715419769287, + "learning_rate": 1.5765069551777435e-06, + "loss": 0.2136, + "step": 34 + }, + { + "epoch": 0.03575076608784474, + "grad_norm": 8.305584907531738, + "learning_rate": 1.6228748068006181e-06, + "loss": 0.2434, + "step": 35 + }, + { + "epoch": 0.03677221654749745, + "grad_norm": 4.577242851257324, + "learning_rate": 1.6692426584234932e-06, + "loss": 0.0745, + "step": 36 + }, + { + "epoch": 0.03779366700715015, + "grad_norm": 13.501931190490723, + "learning_rate": 1.715610510046368e-06, + "loss": 0.3987, + "step": 37 + }, + { + "epoch": 0.03881511746680286, + "grad_norm": 4.866842746734619, + "learning_rate": 1.7619783616692426e-06, + "loss": 0.0742, + "step": 38 + }, + { + "epoch": 0.03983656792645557, + "grad_norm": 0.450527548789978, + "learning_rate": 1.8083462132921175e-06, + "loss": 0.0017, + "step": 39 + }, + { + "epoch": 0.04085801838610827, + "grad_norm": 8.321237564086914, + "learning_rate": 1.8547140649149923e-06, + "loss": 0.2109, + "step": 40 + }, + { + "epoch": 0.04187946884576098, + "grad_norm": 6.3697896003723145, + "learning_rate": 1.9010819165378671e-06, + "loss": 0.1791, + "step": 41 + }, + { + "epoch": 0.04290091930541369, + "grad_norm": 5.8197407722473145, + "learning_rate": 1.9474497681607418e-06, + "loss": 0.1338, + "step": 42 + }, + { + "epoch": 0.043922369765066395, + "grad_norm": 9.69156551361084, + "learning_rate": 1.993817619783617e-06, + "loss": 0.275, + "step": 43 + }, + { + "epoch": 0.0449438202247191, + "grad_norm": 8.05585765838623, + "learning_rate": 2.0401854714064914e-06, + "loss": 0.1818, + "step": 44 + }, + { + "epoch": 0.045965270684371805, + "grad_norm": 11.007800102233887, + "learning_rate": 2.086553323029366e-06, + "loss": 0.2393, + "step": 45 + }, + { + "epoch": 0.045965270684371805, + "eval_Qnli-dev_cosine_accuracy": 0.693359375, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.6514263153076172, + "eval_Qnli-dev_cosine_ap": 0.7192595387298439, + "eval_Qnli-dev_cosine_f1": 0.6773618538324421, + "eval_Qnli-dev_cosine_f1_threshold": 0.5661306977272034, + "eval_Qnli-dev_cosine_precision": 0.5846153846153846, + "eval_Qnli-dev_cosine_recall": 0.8050847457627118, + "eval_Qnli-dev_dot_accuracy": 0.66796875, + "eval_Qnli-dev_dot_accuracy_threshold": 279.9789123535156, + "eval_Qnli-dev_dot_ap": 0.6794510195926304, + "eval_Qnli-dev_dot_f1": 0.6729559748427674, + "eval_Qnli-dev_dot_f1_threshold": 204.80381774902344, + "eval_Qnli-dev_dot_precision": 0.535, + "eval_Qnli-dev_dot_recall": 0.9067796610169492, + "eval_Qnli-dev_euclidean_accuracy": 0.7109375, + "eval_Qnli-dev_euclidean_accuracy_threshold": 17.62104034423828, + "eval_Qnli-dev_euclidean_ap": 0.730044017119111, + "eval_Qnli-dev_euclidean_f1": 0.6844741235392321, + "eval_Qnli-dev_euclidean_f1_threshold": 20.66344451904297, + "eval_Qnli-dev_euclidean_precision": 0.5647382920110193, + "eval_Qnli-dev_euclidean_recall": 0.8686440677966102, + "eval_Qnli-dev_manhattan_accuracy": 0.708984375, + "eval_Qnli-dev_manhattan_accuracy_threshold": 367.0382080078125, + "eval_Qnli-dev_manhattan_ap": 0.7325536146405061, + "eval_Qnli-dev_manhattan_f1": 0.6820428336079077, + "eval_Qnli-dev_manhattan_f1_threshold": 443.19097900390625, + "eval_Qnli-dev_manhattan_precision": 0.5579514824797843, + "eval_Qnli-dev_manhattan_recall": 0.8771186440677966, + "eval_Qnli-dev_max_accuracy": 0.7109375, + "eval_Qnli-dev_max_accuracy_threshold": 367.0382080078125, + "eval_Qnli-dev_max_ap": 0.7325536146405061, + "eval_Qnli-dev_max_f1": 0.6844741235392321, + "eval_Qnli-dev_max_f1_threshold": 443.19097900390625, + "eval_Qnli-dev_max_precision": 0.5846153846153846, + "eval_Qnli-dev_max_recall": 0.9067796610169492, + "eval_allNLI-dev_cosine_accuracy": 0.740234375, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.7886371612548828, + "eval_allNLI-dev_cosine_ap": 0.6342918116087624, + "eval_allNLI-dev_cosine_f1": 0.6407322654462242, + "eval_allNLI-dev_cosine_f1_threshold": 0.604407012462616, + "eval_allNLI-dev_cosine_precision": 0.5303030303030303, + "eval_allNLI-dev_cosine_recall": 0.8092485549132948, + "eval_allNLI-dev_dot_accuracy": 0.724609375, + "eval_allNLI-dev_dot_accuracy_threshold": 300.2228698730469, + "eval_allNLI-dev_dot_ap": 0.5959831337600967, + "eval_allNLI-dev_dot_f1": 0.6074950690335306, + "eval_allNLI-dev_dot_f1_threshold": 217.0308380126953, + "eval_allNLI-dev_dot_precision": 0.46107784431137727, + "eval_allNLI-dev_dot_recall": 0.8901734104046243, + "eval_allNLI-dev_euclidean_accuracy": 0.74609375, + "eval_allNLI-dev_euclidean_accuracy_threshold": 13.926891326904297, + "eval_allNLI-dev_euclidean_ap": 0.6432871260784624, + "eval_allNLI-dev_euclidean_f1": 0.6563876651982379, + "eval_allNLI-dev_euclidean_f1_threshold": 18.53801727294922, + "eval_allNLI-dev_euclidean_precision": 0.5302491103202847, + "eval_allNLI-dev_euclidean_recall": 0.861271676300578, + "eval_allNLI-dev_manhattan_accuracy": 0.74609375, + "eval_allNLI-dev_manhattan_accuracy_threshold": 309.91705322265625, + "eval_allNLI-dev_manhattan_ap": 0.6415562376489171, + "eval_allNLI-dev_manhattan_f1": 0.6550868486352356, + "eval_allNLI-dev_manhattan_f1_threshold": 367.0815734863281, + "eval_allNLI-dev_manhattan_precision": 0.5739130434782609, + "eval_allNLI-dev_manhattan_recall": 0.7630057803468208, + "eval_allNLI-dev_max_accuracy": 0.74609375, + "eval_allNLI-dev_max_accuracy_threshold": 309.91705322265625, + "eval_allNLI-dev_max_ap": 0.6432871260784624, + "eval_allNLI-dev_max_f1": 0.6563876651982379, + "eval_allNLI-dev_max_f1_threshold": 367.0815734863281, + "eval_allNLI-dev_max_precision": 0.5739130434782609, + "eval_allNLI-dev_max_recall": 0.8901734104046243, + "eval_sequential_score": 0.7325536146405061, + "eval_sts-test_pearson_cosine": 0.8863450053129568, + "eval_sts-test_pearson_dot": 0.876803494766901, + "eval_sts-test_pearson_euclidean": 0.908863003692806, + "eval_sts-test_pearson_manhattan": 0.9095961164895561, + "eval_sts-test_pearson_max": 0.9095961164895561, + "eval_sts-test_spearman_cosine": 0.9084616598069739, + "eval_sts-test_spearman_dot": 0.8795719907518805, + "eval_sts-test_spearman_euclidean": 0.9051654161466675, + "eval_sts-test_spearman_manhattan": 0.9055802603397657, + "eval_sts-test_spearman_max": 0.9084616598069739, + "eval_vitaminc-pairs_loss": 1.734721064567566, + "eval_vitaminc-pairs_runtime": 3.1738, + "eval_vitaminc-pairs_samples_per_second": 40.331, + "eval_vitaminc-pairs_steps_per_second": 0.315, + "step": 45 + }, + { + "epoch": 0.045965270684371805, + "eval_negation-triplets_loss": 0.9482213258743286, + "eval_negation-triplets_runtime": 0.6936, + "eval_negation-triplets_samples_per_second": 184.556, + "eval_negation-triplets_steps_per_second": 1.442, + "step": 45 + }, + { + "epoch": 0.045965270684371805, + "eval_scitail-pairs-pos_loss": 0.05361902713775635, + "eval_scitail-pairs-pos_runtime": 0.816, + "eval_scitail-pairs-pos_samples_per_second": 156.87, + "eval_scitail-pairs-pos_steps_per_second": 1.226, + "step": 45 + }, + { + "epoch": 0.045965270684371805, + "eval_scitail-pairs-qa_loss": 4.589809577737469e-06, + "eval_scitail-pairs-qa_runtime": 0.5115, + "eval_scitail-pairs-qa_samples_per_second": 250.251, + "eval_scitail-pairs-qa_steps_per_second": 1.955, + "step": 45 + }, + { + "epoch": 0.045965270684371805, + "eval_xsum-pairs_loss": 0.005083221010863781, + "eval_xsum-pairs_runtime": 2.734, + "eval_xsum-pairs_samples_per_second": 46.817, + "eval_xsum-pairs_steps_per_second": 0.366, + "step": 45 + }, + { + "epoch": 0.045965270684371805, + "eval_sciq_pairs_loss": 0.021123090758919716, + "eval_sciq_pairs_runtime": 3.0729, + "eval_sciq_pairs_samples_per_second": 41.655, + "eval_sciq_pairs_steps_per_second": 0.325, + "step": 45 + }, + { + "epoch": 0.045965270684371805, + "eval_qasc_pairs_loss": 0.13705024123191833, + "eval_qasc_pairs_runtime": 0.566, + "eval_qasc_pairs_samples_per_second": 226.167, + "eval_qasc_pairs_steps_per_second": 1.767, + "step": 45 + }, + { + "epoch": 0.045965270684371805, + "eval_openbookqa_pairs_loss": 0.8665502071380615, + "eval_openbookqa_pairs_runtime": 0.5708, + "eval_openbookqa_pairs_samples_per_second": 224.25, + "eval_openbookqa_pairs_steps_per_second": 1.752, + "step": 45 + }, + { + "epoch": 0.045965270684371805, + "eval_msmarco_pairs_loss": 0.16575750708580017, + "eval_msmarco_pairs_runtime": 1.2707, + "eval_msmarco_pairs_samples_per_second": 100.733, + "eval_msmarco_pairs_steps_per_second": 0.787, + "step": 45 + }, + { + "epoch": 0.045965270684371805, + "eval_nq_pairs_loss": 0.09121595323085785, + "eval_nq_pairs_runtime": 2.7244, + "eval_nq_pairs_samples_per_second": 46.983, + "eval_nq_pairs_steps_per_second": 0.367, + "step": 45 + }, + { + "epoch": 0.045965270684371805, + "eval_trivia_pairs_loss": 0.15335939824581146, + "eval_trivia_pairs_runtime": 3.1824, + "eval_trivia_pairs_samples_per_second": 40.221, + "eval_trivia_pairs_steps_per_second": 0.314, + "step": 45 + }, + { + "epoch": 0.045965270684371805, + "eval_gooaq_pairs_loss": 0.12918642163276672, + "eval_gooaq_pairs_runtime": 0.8717, + "eval_gooaq_pairs_samples_per_second": 146.835, + "eval_gooaq_pairs_steps_per_second": 1.147, + "step": 45 + }, + { + "epoch": 0.045965270684371805, + "eval_paws-pos_loss": 0.02210753969848156, + "eval_paws-pos_runtime": 0.6839, + "eval_paws-pos_samples_per_second": 187.164, + "eval_paws-pos_steps_per_second": 1.462, + "step": 45 + }, + { + "epoch": 0.045965270684371805, + "eval_global_dataset_loss": 0.2437790036201477, + "eval_global_dataset_runtime": 18.0895, + "eval_global_dataset_samples_per_second": 30.183, + "eval_global_dataset_steps_per_second": 0.276, + "step": 45 + }, + { + "epoch": 0.04698672114402452, + "grad_norm": 8.341979026794434, + "learning_rate": 2.1329211746522415e-06, + "loss": 0.2644, + "step": 46 + }, + { + "epoch": 0.04800817160367722, + "grad_norm": 3.7365353107452393, + "learning_rate": 2.179289026275116e-06, + "loss": 0.0184, + "step": 47 + }, + { + "epoch": 0.049029622063329927, + "grad_norm": 8.873991012573242, + "learning_rate": 2.2256568778979908e-06, + "loss": 0.4364, + "step": 48 + }, + { + "epoch": 0.05005107252298264, + "grad_norm": 11.165787696838379, + "learning_rate": 2.272024729520866e-06, + "loss": 0.3131, + "step": 49 + }, + { + "epoch": 0.05107252298263534, + "grad_norm": 9.491363525390625, + "learning_rate": 2.3183925811437404e-06, + "loss": 0.2334, + "step": 50 + }, + { + "epoch": 0.05209397344228805, + "grad_norm": 4.924949645996094, + "learning_rate": 2.364760432766615e-06, + "loss": 0.094, + "step": 51 + }, + { + "epoch": 0.05311542390194075, + "grad_norm": 6.319979190826416, + "learning_rate": 2.41112828438949e-06, + "loss": 0.2048, + "step": 52 + }, + { + "epoch": 0.054136874361593465, + "grad_norm": 5.863682270050049, + "learning_rate": 2.4574961360123647e-06, + "loss": 0.138, + "step": 53 + }, + { + "epoch": 0.05515832482124617, + "grad_norm": 9.261981964111328, + "learning_rate": 2.5038639876352398e-06, + "loss": 0.1472, + "step": 54 + }, + { + "epoch": 0.056179775280898875, + "grad_norm": 6.513923168182373, + "learning_rate": 2.5502318392581144e-06, + "loss": 0.1483, + "step": 55 + }, + { + "epoch": 0.05720122574055159, + "grad_norm": 8.526361465454102, + "learning_rate": 2.596599690880989e-06, + "loss": 0.1489, + "step": 56 + }, + { + "epoch": 0.05822267620020429, + "grad_norm": 8.871485710144043, + "learning_rate": 2.642967542503864e-06, + "loss": 0.2447, + "step": 57 + }, + { + "epoch": 0.059244126659857, + "grad_norm": 0.5958855748176575, + "learning_rate": 2.6893353941267387e-06, + "loss": 0.0023, + "step": 58 + }, + { + "epoch": 0.0602655771195097, + "grad_norm": 9.389046669006348, + "learning_rate": 2.7357032457496137e-06, + "loss": 0.2143, + "step": 59 + }, + { + "epoch": 0.06128702757916241, + "grad_norm": 0.17088650166988373, + "learning_rate": 2.7820710973724888e-06, + "loss": 0.0006, + "step": 60 + }, + { + "epoch": 0.06128702757916241, + "eval_Qnli-dev_cosine_accuracy": 0.6953125, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.6521527767181396, + "eval_Qnli-dev_cosine_ap": 0.7203543055286272, + "eval_Qnli-dev_cosine_f1": 0.6822262118491921, + "eval_Qnli-dev_cosine_f1_threshold": 0.5753836035728455, + "eval_Qnli-dev_cosine_precision": 0.5919003115264797, + "eval_Qnli-dev_cosine_recall": 0.8050847457627118, + "eval_Qnli-dev_dot_accuracy": 0.671875, + "eval_Qnli-dev_dot_accuracy_threshold": 280.61773681640625, + "eval_Qnli-dev_dot_ap": 0.6781746875562886, + "eval_Qnli-dev_dot_f1": 0.676056338028169, + "eval_Qnli-dev_dot_f1_threshold": 207.84335327148438, + "eval_Qnli-dev_dot_precision": 0.5359801488833746, + "eval_Qnli-dev_dot_recall": 0.9152542372881356, + "eval_Qnli-dev_euclidean_accuracy": 0.712890625, + "eval_Qnli-dev_euclidean_accuracy_threshold": 17.638320922851562, + "eval_Qnli-dev_euclidean_ap": 0.7311449657521796, + "eval_Qnli-dev_euclidean_f1": 0.6878306878306878, + "eval_Qnli-dev_euclidean_f1_threshold": 19.743083953857422, + "eval_Qnli-dev_euclidean_precision": 0.5891238670694864, + "eval_Qnli-dev_euclidean_recall": 0.826271186440678, + "eval_Qnli-dev_manhattan_accuracy": 0.708984375, + "eval_Qnli-dev_manhattan_accuracy_threshold": 367.29486083984375, + "eval_Qnli-dev_manhattan_ap": 0.7341914384220252, + "eval_Qnli-dev_manhattan_f1": 0.6820428336079077, + "eval_Qnli-dev_manhattan_f1_threshold": 442.21612548828125, + "eval_Qnli-dev_manhattan_precision": 0.5579514824797843, + "eval_Qnli-dev_manhattan_recall": 0.8771186440677966, + "eval_Qnli-dev_max_accuracy": 0.712890625, + "eval_Qnli-dev_max_accuracy_threshold": 367.29486083984375, + "eval_Qnli-dev_max_ap": 0.7341914384220252, + "eval_Qnli-dev_max_f1": 0.6878306878306878, + "eval_Qnli-dev_max_f1_threshold": 442.21612548828125, + "eval_Qnli-dev_max_precision": 0.5919003115264797, + "eval_Qnli-dev_max_recall": 0.9152542372881356, + "eval_allNLI-dev_cosine_accuracy": 0.73828125, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.7895551919937134, + "eval_allNLI-dev_cosine_ap": 0.634559874010061, + "eval_allNLI-dev_cosine_f1": 0.6405529953917051, + "eval_allNLI-dev_cosine_f1_threshold": 0.6102022528648376, + "eval_allNLI-dev_cosine_precision": 0.5325670498084292, + "eval_allNLI-dev_cosine_recall": 0.8034682080924855, + "eval_allNLI-dev_dot_accuracy": 0.72265625, + "eval_allNLI-dev_dot_accuracy_threshold": 312.3458251953125, + "eval_allNLI-dev_dot_ap": 0.5956073487787255, + "eval_allNLI-dev_dot_f1": 0.6074950690335306, + "eval_allNLI-dev_dot_f1_threshold": 219.54769897460938, + "eval_allNLI-dev_dot_precision": 0.46107784431137727, + "eval_allNLI-dev_dot_recall": 0.8901734104046243, + "eval_allNLI-dev_euclidean_accuracy": 0.748046875, + "eval_allNLI-dev_euclidean_accuracy_threshold": 14.036979675292969, + "eval_allNLI-dev_euclidean_ap": 0.6423835707673824, + "eval_allNLI-dev_euclidean_f1": 0.6534216335540839, + "eval_allNLI-dev_euclidean_f1_threshold": 18.447982788085938, + "eval_allNLI-dev_euclidean_precision": 0.5285714285714286, + "eval_allNLI-dev_euclidean_recall": 0.8554913294797688, + "eval_allNLI-dev_manhattan_accuracy": 0.74609375, + "eval_allNLI-dev_manhattan_accuracy_threshold": 310.88720703125, + "eval_allNLI-dev_manhattan_ap": 0.6409036075054013, + "eval_allNLI-dev_manhattan_f1": 0.6550868486352356, + "eval_allNLI-dev_manhattan_f1_threshold": 367.3871765136719, + "eval_allNLI-dev_manhattan_precision": 0.5739130434782609, + "eval_allNLI-dev_manhattan_recall": 0.7630057803468208, + "eval_allNLI-dev_max_accuracy": 0.748046875, + "eval_allNLI-dev_max_accuracy_threshold": 312.3458251953125, + "eval_allNLI-dev_max_ap": 0.6423835707673824, + "eval_allNLI-dev_max_f1": 0.6550868486352356, + "eval_allNLI-dev_max_f1_threshold": 367.3871765136719, + "eval_allNLI-dev_max_precision": 0.5739130434782609, + "eval_allNLI-dev_max_recall": 0.8901734104046243, + "eval_sequential_score": 0.7341914384220252, + "eval_sts-test_pearson_cosine": 0.8863472703816779, + "eval_sts-test_pearson_dot": 0.876887579550975, + "eval_sts-test_pearson_euclidean": 0.9092623315616009, + "eval_sts-test_pearson_manhattan": 0.9100878604950867, + "eval_sts-test_pearson_max": 0.9100878604950867, + "eval_sts-test_spearman_cosine": 0.90865194217041, + "eval_sts-test_spearman_dot": 0.8802936943443395, + "eval_sts-test_spearman_euclidean": 0.9056517728554692, + "eval_sts-test_spearman_manhattan": 0.9060887689229562, + "eval_sts-test_spearman_max": 0.90865194217041, + "eval_vitaminc-pairs_loss": 1.731228232383728, + "eval_vitaminc-pairs_runtime": 3.1795, + "eval_vitaminc-pairs_samples_per_second": 40.258, + "eval_vitaminc-pairs_steps_per_second": 0.315, + "step": 60 + }, + { + "epoch": 0.06128702757916241, + "eval_negation-triplets_loss": 0.948928713798523, + "eval_negation-triplets_runtime": 0.6893, + "eval_negation-triplets_samples_per_second": 185.691, + "eval_negation-triplets_steps_per_second": 1.451, + "step": 60 + }, + { + "epoch": 0.06128702757916241, + "eval_scitail-pairs-pos_loss": 0.05333153158426285, + "eval_scitail-pairs-pos_runtime": 0.7791, + "eval_scitail-pairs-pos_samples_per_second": 164.298, + "eval_scitail-pairs-pos_steps_per_second": 1.284, + "step": 60 + }, + { + "epoch": 0.06128702757916241, + "eval_scitail-pairs-qa_loss": 5.695792424376123e-06, + "eval_scitail-pairs-qa_runtime": 0.5102, + "eval_scitail-pairs-qa_samples_per_second": 250.884, + "eval_scitail-pairs-qa_steps_per_second": 1.96, + "step": 60 + }, + { + "epoch": 0.06128702757916241, + "eval_xsum-pairs_loss": 0.005610933061689138, + "eval_xsum-pairs_runtime": 2.6982, + "eval_xsum-pairs_samples_per_second": 47.439, + "eval_xsum-pairs_steps_per_second": 0.371, + "step": 60 + }, + { + "epoch": 0.06128702757916241, + "eval_sciq_pairs_loss": 0.02088981680572033, + "eval_sciq_pairs_runtime": 3.0926, + "eval_sciq_pairs_samples_per_second": 41.39, + "eval_sciq_pairs_steps_per_second": 0.323, + "step": 60 + }, + { + "epoch": 0.06128702757916241, + "eval_qasc_pairs_loss": 0.13207125663757324, + "eval_qasc_pairs_runtime": 0.5696, + "eval_qasc_pairs_samples_per_second": 224.702, + "eval_qasc_pairs_steps_per_second": 1.755, + "step": 60 + }, + { + "epoch": 0.06128702757916241, + "eval_openbookqa_pairs_loss": 0.8543967008590698, + "eval_openbookqa_pairs_runtime": 0.5786, + "eval_openbookqa_pairs_samples_per_second": 221.237, + "eval_openbookqa_pairs_steps_per_second": 1.728, + "step": 60 + }, + { + "epoch": 0.06128702757916241, + "eval_msmarco_pairs_loss": 0.16006271541118622, + "eval_msmarco_pairs_runtime": 1.2709, + "eval_msmarco_pairs_samples_per_second": 100.713, + "eval_msmarco_pairs_steps_per_second": 0.787, + "step": 60 + }, + { + "epoch": 0.06128702757916241, + "eval_nq_pairs_loss": 0.09258166700601578, + "eval_nq_pairs_runtime": 2.7222, + "eval_nq_pairs_samples_per_second": 47.021, + "eval_nq_pairs_steps_per_second": 0.367, + "step": 60 + }, + { + "epoch": 0.06128702757916241, + "eval_trivia_pairs_loss": 0.15214744210243225, + "eval_trivia_pairs_runtime": 3.1772, + "eval_trivia_pairs_samples_per_second": 40.287, + "eval_trivia_pairs_steps_per_second": 0.315, + "step": 60 + }, + { + "epoch": 0.06128702757916241, + "eval_gooaq_pairs_loss": 0.12777934968471527, + "eval_gooaq_pairs_runtime": 0.8701, + "eval_gooaq_pairs_samples_per_second": 147.115, + "eval_gooaq_pairs_steps_per_second": 1.149, + "step": 60 + }, + { + "epoch": 0.06128702757916241, + "eval_paws-pos_loss": 0.022154221311211586, + "eval_paws-pos_runtime": 0.6827, + "eval_paws-pos_samples_per_second": 187.488, + "eval_paws-pos_steps_per_second": 1.465, + "step": 60 + }, + { + "epoch": 0.06128702757916241, + "eval_global_dataset_loss": 0.23715433478355408, + "eval_global_dataset_runtime": 18.1203, + "eval_global_dataset_samples_per_second": 30.132, + "eval_global_dataset_steps_per_second": 0.276, + "step": 60 + }, + { + "epoch": 0.06230847803881512, + "grad_norm": 8.801414489746094, + "learning_rate": 2.8284389489953634e-06, + "loss": 0.1404, + "step": 61 + }, + { + "epoch": 0.06332992849846783, + "grad_norm": 2.7502946853637695, + "learning_rate": 2.874806800618238e-06, + "loss": 0.0407, + "step": 62 + }, + { + "epoch": 0.06435137895812053, + "grad_norm": 0.7089954018592834, + "learning_rate": 2.921174652241113e-06, + "loss": 0.0295, + "step": 63 + }, + { + "epoch": 0.06537282941777324, + "grad_norm": 6.507052421569824, + "learning_rate": 2.9675425038639877e-06, + "loss": 0.1194, + "step": 64 + }, + { + "epoch": 0.06639427987742594, + "grad_norm": 0.9625360369682312, + "learning_rate": 3.0139103554868627e-06, + "loss": 0.007, + "step": 65 + }, + { + "epoch": 0.06741573033707865, + "grad_norm": 7.246578216552734, + "learning_rate": 3.0602782071097373e-06, + "loss": 0.1981, + "step": 66 + }, + { + "epoch": 0.06843718079673136, + "grad_norm": 9.406133651733398, + "learning_rate": 3.106646058732612e-06, + "loss": 0.3657, + "step": 67 + }, + { + "epoch": 0.06945863125638406, + "grad_norm": 10.381515502929688, + "learning_rate": 3.153013910355487e-06, + "loss": 0.3135, + "step": 68 + }, + { + "epoch": 0.07048008171603677, + "grad_norm": 6.522178649902344, + "learning_rate": 3.1993817619783616e-06, + "loss": 0.1054, + "step": 69 + }, + { + "epoch": 0.07150153217568948, + "grad_norm": 11.078179359436035, + "learning_rate": 3.2457496136012363e-06, + "loss": 0.2613, + "step": 70 + }, + { + "epoch": 0.07252298263534218, + "grad_norm": 1.6560922861099243, + "learning_rate": 3.2921174652241113e-06, + "loss": 0.0079, + "step": 71 + }, + { + "epoch": 0.0735444330949949, + "grad_norm": 2.070523977279663, + "learning_rate": 3.3384853168469863e-06, + "loss": 0.0157, + "step": 72 + }, + { + "epoch": 0.0745658835546476, + "grad_norm": 9.655231475830078, + "learning_rate": 3.384853168469861e-06, + "loss": 0.1849, + "step": 73 + }, + { + "epoch": 0.0755873340143003, + "grad_norm": 6.534660816192627, + "learning_rate": 3.431221020092736e-06, + "loss": 0.0975, + "step": 74 + }, + { + "epoch": 0.07660878447395301, + "grad_norm": 8.623287200927734, + "learning_rate": 3.4775888717156106e-06, + "loss": 0.2335, + "step": 75 + }, + { + "epoch": 0.07660878447395301, + "eval_Qnli-dev_cosine_accuracy": 0.697265625, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.6802465915679932, + "eval_Qnli-dev_cosine_ap": 0.7221645229256343, + "eval_Qnli-dev_cosine_f1": 0.6845878136200717, + "eval_Qnli-dev_cosine_f1_threshold": 0.5798860788345337, + "eval_Qnli-dev_cosine_precision": 0.593167701863354, + "eval_Qnli-dev_cosine_recall": 0.809322033898305, + "eval_Qnli-dev_dot_accuracy": 0.677734375, + "eval_Qnli-dev_dot_accuracy_threshold": 287.1238708496094, + "eval_Qnli-dev_dot_ap": 0.6784544411193656, + "eval_Qnli-dev_dot_f1": 0.6823161189358372, + "eval_Qnli-dev_dot_f1_threshold": 211.93714904785156, + "eval_Qnli-dev_dot_precision": 0.5409429280397022, + "eval_Qnli-dev_dot_recall": 0.923728813559322, + "eval_Qnli-dev_euclidean_accuracy": 0.71875, + "eval_Qnli-dev_euclidean_accuracy_threshold": 17.4168758392334, + "eval_Qnli-dev_euclidean_ap": 0.7337542044152053, + "eval_Qnli-dev_euclidean_f1": 0.6923076923076922, + "eval_Qnli-dev_euclidean_f1_threshold": 19.817930221557617, + "eval_Qnli-dev_euclidean_precision": 0.5892857142857143, + "eval_Qnli-dev_euclidean_recall": 0.8389830508474576, + "eval_Qnli-dev_manhattan_accuracy": 0.712890625, + "eval_Qnli-dev_manhattan_accuracy_threshold": 366.0330810546875, + "eval_Qnli-dev_manhattan_ap": 0.7364803705680161, + "eval_Qnli-dev_manhattan_f1": 0.6861313868613139, + "eval_Qnli-dev_manhattan_f1_threshold": 409.19677734375, + "eval_Qnli-dev_manhattan_precision": 0.6025641025641025, + "eval_Qnli-dev_manhattan_recall": 0.7966101694915254, + "eval_Qnli-dev_max_accuracy": 0.71875, + "eval_Qnli-dev_max_accuracy_threshold": 366.0330810546875, + "eval_Qnli-dev_max_ap": 0.7364803705680161, + "eval_Qnli-dev_max_f1": 0.6923076923076922, + "eval_Qnli-dev_max_f1_threshold": 409.19677734375, + "eval_Qnli-dev_max_precision": 0.6025641025641025, + "eval_Qnli-dev_max_recall": 0.923728813559322, + "eval_allNLI-dev_cosine_accuracy": 0.740234375, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.7561130523681641, + "eval_allNLI-dev_cosine_ap": 0.6351820401860591, + "eval_allNLI-dev_cosine_f1": 0.6433566433566433, + "eval_allNLI-dev_cosine_f1_threshold": 0.6203993558883667, + "eval_allNLI-dev_cosine_precision": 0.5390625, + "eval_allNLI-dev_cosine_recall": 0.7976878612716763, + "eval_allNLI-dev_dot_accuracy": 0.724609375, + "eval_allNLI-dev_dot_accuracy_threshold": 319.7127380371094, + "eval_allNLI-dev_dot_ap": 0.5963612797297364, + "eval_allNLI-dev_dot_f1": 0.6080000000000001, + "eval_allNLI-dev_dot_f1_threshold": 228.60348510742188, + "eval_allNLI-dev_dot_precision": 0.4648318042813456, + "eval_allNLI-dev_dot_recall": 0.8786127167630058, + "eval_allNLI-dev_euclidean_accuracy": 0.74609375, + "eval_allNLI-dev_euclidean_accuracy_threshold": 13.950254440307617, + "eval_allNLI-dev_euclidean_ap": 0.6425996990727424, + "eval_allNLI-dev_euclidean_f1": 0.6519823788546256, + "eval_allNLI-dev_euclidean_f1_threshold": 18.43679428100586, + "eval_allNLI-dev_euclidean_precision": 0.5266903914590747, + "eval_allNLI-dev_euclidean_recall": 0.8554913294797688, + "eval_allNLI-dev_manhattan_accuracy": 0.748046875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 310.272705078125, + "eval_allNLI-dev_manhattan_ap": 0.6411563319615046, + "eval_allNLI-dev_manhattan_f1": 0.6516290726817043, + "eval_allNLI-dev_manhattan_f1_threshold": 364.7450866699219, + "eval_allNLI-dev_manhattan_precision": 0.5752212389380531, + "eval_allNLI-dev_manhattan_recall": 0.7514450867052023, + "eval_allNLI-dev_max_accuracy": 0.748046875, + "eval_allNLI-dev_max_accuracy_threshold": 319.7127380371094, + "eval_allNLI-dev_max_ap": 0.6425996990727424, + "eval_allNLI-dev_max_f1": 0.6519823788546256, + "eval_allNLI-dev_max_f1_threshold": 364.7450866699219, + "eval_allNLI-dev_max_precision": 0.5752212389380531, + "eval_allNLI-dev_max_recall": 0.8786127167630058, + "eval_sequential_score": 0.7364803705680161, + "eval_sts-test_pearson_cosine": 0.885936912700247, + "eval_sts-test_pearson_dot": 0.8761949933067066, + "eval_sts-test_pearson_euclidean": 0.90948485299389, + "eval_sts-test_pearson_manhattan": 0.9103436334904699, + "eval_sts-test_pearson_max": 0.9103436334904699, + "eval_sts-test_spearman_cosine": 0.9085082906213438, + "eval_sts-test_spearman_dot": 0.8799422179373714, + "eval_sts-test_spearman_euclidean": 0.905969596313871, + "eval_sts-test_spearman_manhattan": 0.9065514074632206, + "eval_sts-test_spearman_max": 0.9085082906213438, + "eval_vitaminc-pairs_loss": 1.7180320024490356, + "eval_vitaminc-pairs_runtime": 3.1831, + "eval_vitaminc-pairs_samples_per_second": 40.213, + "eval_vitaminc-pairs_steps_per_second": 0.314, + "step": 75 + }, + { + "epoch": 0.07660878447395301, + "eval_negation-triplets_loss": 0.9486294984817505, + "eval_negation-triplets_runtime": 0.6946, + "eval_negation-triplets_samples_per_second": 184.281, + "eval_negation-triplets_steps_per_second": 1.44, + "step": 75 + }, + { + "epoch": 0.07660878447395301, + "eval_scitail-pairs-pos_loss": 0.05202628672122955, + "eval_scitail-pairs-pos_runtime": 0.8111, + "eval_scitail-pairs-pos_samples_per_second": 157.818, + "eval_scitail-pairs-pos_steps_per_second": 1.233, + "step": 75 + }, + { + "epoch": 0.07660878447395301, + "eval_scitail-pairs-qa_loss": 5.791543571831426e-06, + "eval_scitail-pairs-qa_runtime": 0.5181, + "eval_scitail-pairs-qa_samples_per_second": 247.053, + "eval_scitail-pairs-qa_steps_per_second": 1.93, + "step": 75 + }, + { + "epoch": 0.07660878447395301, + "eval_xsum-pairs_loss": 0.005996571853756905, + "eval_xsum-pairs_runtime": 2.7137, + "eval_xsum-pairs_samples_per_second": 47.169, + "eval_xsum-pairs_steps_per_second": 0.369, + "step": 75 + }, + { + "epoch": 0.07660878447395301, + "eval_sciq_pairs_loss": 0.020699353888630867, + "eval_sciq_pairs_runtime": 3.1312, + "eval_sciq_pairs_samples_per_second": 40.879, + "eval_sciq_pairs_steps_per_second": 0.319, + "step": 75 + }, + { + "epoch": 0.07660878447395301, + "eval_qasc_pairs_loss": 0.12712860107421875, + "eval_qasc_pairs_runtime": 0.58, + "eval_qasc_pairs_samples_per_second": 220.704, + "eval_qasc_pairs_steps_per_second": 1.724, + "step": 75 + }, + { + "epoch": 0.07660878447395301, + "eval_openbookqa_pairs_loss": 0.8392120599746704, + "eval_openbookqa_pairs_runtime": 0.5791, + "eval_openbookqa_pairs_samples_per_second": 221.046, + "eval_openbookqa_pairs_steps_per_second": 1.727, + "step": 75 + }, + { + "epoch": 0.07660878447395301, + "eval_msmarco_pairs_loss": 0.15558885037899017, + "eval_msmarco_pairs_runtime": 1.276, + "eval_msmarco_pairs_samples_per_second": 100.315, + "eval_msmarco_pairs_steps_per_second": 0.784, + "step": 75 + }, + { + "epoch": 0.07660878447395301, + "eval_nq_pairs_loss": 0.0976957157254219, + "eval_nq_pairs_runtime": 2.7247, + "eval_nq_pairs_samples_per_second": 46.978, + "eval_nq_pairs_steps_per_second": 0.367, + "step": 75 + }, + { + "epoch": 0.07660878447395301, + "eval_trivia_pairs_loss": 0.15088817477226257, + "eval_trivia_pairs_runtime": 3.1861, + "eval_trivia_pairs_samples_per_second": 40.174, + "eval_trivia_pairs_steps_per_second": 0.314, + "step": 75 + }, + { + "epoch": 0.07660878447395301, + "eval_gooaq_pairs_loss": 0.1258237063884735, + "eval_gooaq_pairs_runtime": 0.8729, + "eval_gooaq_pairs_samples_per_second": 146.632, + "eval_gooaq_pairs_steps_per_second": 1.146, + "step": 75 + }, + { + "epoch": 0.07660878447395301, + "eval_paws-pos_loss": 0.02218741364777088, + "eval_paws-pos_runtime": 0.6862, + "eval_paws-pos_samples_per_second": 186.535, + "eval_paws-pos_steps_per_second": 1.457, + "step": 75 + }, + { + "epoch": 0.07660878447395301, + "eval_global_dataset_loss": 0.23313331604003906, + "eval_global_dataset_runtime": 18.0956, + "eval_global_dataset_samples_per_second": 30.173, + "eval_global_dataset_steps_per_second": 0.276, + "step": 75 + }, + { + "epoch": 0.07763023493360573, + "grad_norm": 3.260712146759033, + "learning_rate": 3.5239567233384853e-06, + "loss": 0.0662, + "step": 76 + }, + { + "epoch": 0.07865168539325842, + "grad_norm": 3.6062076091766357, + "learning_rate": 3.5703245749613603e-06, + "loss": 0.0915, + "step": 77 + }, + { + "epoch": 0.07967313585291114, + "grad_norm": 7.476528167724609, + "learning_rate": 3.616692426584235e-06, + "loss": 0.1385, + "step": 78 + }, + { + "epoch": 0.08069458631256383, + "grad_norm": 11.388545989990234, + "learning_rate": 3.66306027820711e-06, + "loss": 0.3163, + "step": 79 + }, + { + "epoch": 0.08171603677221655, + "grad_norm": 7.791510581970215, + "learning_rate": 3.7094281298299846e-06, + "loss": 0.185, + "step": 80 + }, + { + "epoch": 0.08273748723186926, + "grad_norm": 6.315013885498047, + "learning_rate": 3.7557959814528596e-06, + "loss": 0.1407, + "step": 81 + }, + { + "epoch": 0.08375893769152196, + "grad_norm": 6.31016206741333, + "learning_rate": 3.8021638330757343e-06, + "loss": 0.1441, + "step": 82 + }, + { + "epoch": 0.08478038815117467, + "grad_norm": 0.00047670298954471946, + "learning_rate": 3.848531684698609e-06, + "loss": 0.0, + "step": 83 + }, + { + "epoch": 0.08580183861082738, + "grad_norm": 3.731964111328125, + "learning_rate": 3.8948995363214835e-06, + "loss": 0.1092, + "step": 84 + }, + { + "epoch": 0.08682328907048008, + "grad_norm": 11.6514253616333, + "learning_rate": 3.941267387944358e-06, + "loss": 0.2534, + "step": 85 + }, + { + "epoch": 0.08784473953013279, + "grad_norm": 5.441813945770264, + "learning_rate": 3.987635239567234e-06, + "loss": 0.0641, + "step": 86 + }, + { + "epoch": 0.0888661899897855, + "grad_norm": 9.261086463928223, + "learning_rate": 4.034003091190108e-06, + "loss": 0.3494, + "step": 87 + }, + { + "epoch": 0.0898876404494382, + "grad_norm": 18.64848518371582, + "learning_rate": 4.080370942812983e-06, + "loss": 1.4592, + "step": 88 + }, + { + "epoch": 0.09090909090909091, + "grad_norm": 6.809045314788818, + "learning_rate": 4.1267387944358575e-06, + "loss": 0.1255, + "step": 89 + }, + { + "epoch": 0.09193054136874361, + "grad_norm": 7.5073628425598145, + "learning_rate": 4.173106646058732e-06, + "loss": 0.2629, + "step": 90 + }, + { + "epoch": 0.09193054136874361, + "eval_Qnli-dev_cosine_accuracy": 0.703125, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.6682207584381104, + "eval_Qnli-dev_cosine_ap": 0.7261848352945963, + "eval_Qnli-dev_cosine_f1": 0.6869409660107335, + "eval_Qnli-dev_cosine_f1_threshold": 0.581086277961731, + "eval_Qnli-dev_cosine_precision": 0.5944272445820433, + "eval_Qnli-dev_cosine_recall": 0.8135593220338984, + "eval_Qnli-dev_dot_accuracy": 0.67578125, + "eval_Qnli-dev_dot_accuracy_threshold": 287.95068359375, + "eval_Qnli-dev_dot_ap": 0.6806011135489268, + "eval_Qnli-dev_dot_f1": 0.6845425867507886, + "eval_Qnli-dev_dot_f1_threshold": 218.3216552734375, + "eval_Qnli-dev_dot_precision": 0.5452261306532663, + "eval_Qnli-dev_dot_recall": 0.9194915254237288, + "eval_Qnli-dev_euclidean_accuracy": 0.71875, + "eval_Qnli-dev_euclidean_accuracy_threshold": 17.22848892211914, + "eval_Qnli-dev_euclidean_ap": 0.7376353333034853, + "eval_Qnli-dev_euclidean_f1": 0.6958041958041957, + "eval_Qnli-dev_euclidean_f1_threshold": 19.688594818115234, + "eval_Qnli-dev_euclidean_precision": 0.5922619047619048, + "eval_Qnli-dev_euclidean_recall": 0.8432203389830508, + "eval_Qnli-dev_manhattan_accuracy": 0.71484375, + "eval_Qnli-dev_manhattan_accuracy_threshold": 363.35137939453125, + "eval_Qnli-dev_manhattan_ap": 0.7404336390716968, + "eval_Qnli-dev_manhattan_f1": 0.6931407942238268, + "eval_Qnli-dev_manhattan_f1_threshold": 409.564453125, + "eval_Qnli-dev_manhattan_precision": 0.6037735849056604, + "eval_Qnli-dev_manhattan_recall": 0.8135593220338984, + "eval_Qnli-dev_max_accuracy": 0.71875, + "eval_Qnli-dev_max_accuracy_threshold": 363.35137939453125, + "eval_Qnli-dev_max_ap": 0.7404336390716968, + "eval_Qnli-dev_max_f1": 0.6958041958041957, + "eval_Qnli-dev_max_f1_threshold": 409.564453125, + "eval_Qnli-dev_max_precision": 0.6037735849056604, + "eval_Qnli-dev_max_recall": 0.9194915254237288, + "eval_allNLI-dev_cosine_accuracy": 0.73828125, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.7595493793487549, + "eval_allNLI-dev_cosine_ap": 0.6349171021559972, + "eval_allNLI-dev_cosine_f1": 0.6416861826697893, + "eval_allNLI-dev_cosine_f1_threshold": 0.6246594190597534, + "eval_allNLI-dev_cosine_precision": 0.5393700787401575, + "eval_allNLI-dev_cosine_recall": 0.791907514450867, + "eval_allNLI-dev_dot_accuracy": 0.7265625, + "eval_allNLI-dev_dot_accuracy_threshold": 321.2451171875, + "eval_allNLI-dev_dot_ap": 0.5928922686892528, + "eval_allNLI-dev_dot_f1": 0.6065934065934065, + "eval_allNLI-dev_dot_f1_threshold": 256.02032470703125, + "eval_allNLI-dev_dot_precision": 0.48936170212765956, + "eval_allNLI-dev_dot_recall": 0.7976878612716763, + "eval_allNLI-dev_euclidean_accuracy": 0.748046875, + "eval_allNLI-dev_euclidean_accuracy_threshold": 14.122276306152344, + "eval_allNLI-dev_euclidean_ap": 0.6424689489474189, + "eval_allNLI-dev_euclidean_f1": 0.6533333333333333, + "eval_allNLI-dev_euclidean_f1_threshold": 18.282554626464844, + "eval_allNLI-dev_euclidean_precision": 0.5306859205776173, + "eval_allNLI-dev_euclidean_recall": 0.8497109826589595, + "eval_allNLI-dev_manhattan_accuracy": 0.74609375, + "eval_allNLI-dev_manhattan_accuracy_threshold": 311.0120849609375, + "eval_allNLI-dev_manhattan_ap": 0.6407360502207873, + "eval_allNLI-dev_manhattan_f1": 0.654911838790932, + "eval_allNLI-dev_manhattan_f1_threshold": 362.8153076171875, + "eval_allNLI-dev_manhattan_precision": 0.5803571428571429, + "eval_allNLI-dev_manhattan_recall": 0.7514450867052023, + "eval_allNLI-dev_max_accuracy": 0.748046875, + "eval_allNLI-dev_max_accuracy_threshold": 321.2451171875, + "eval_allNLI-dev_max_ap": 0.6424689489474189, + "eval_allNLI-dev_max_f1": 0.654911838790932, + "eval_allNLI-dev_max_f1_threshold": 362.8153076171875, + "eval_allNLI-dev_max_precision": 0.5803571428571429, + "eval_allNLI-dev_max_recall": 0.8497109826589595, + "eval_sequential_score": 0.7404336390716968, + "eval_sts-test_pearson_cosine": 0.8856095601905809, + "eval_sts-test_pearson_dot": 0.8757631264974062, + "eval_sts-test_pearson_euclidean": 0.9094458129747329, + "eval_sts-test_pearson_manhattan": 0.9104588555059347, + "eval_sts-test_pearson_max": 0.9104588555059347, + "eval_sts-test_spearman_cosine": 0.9081662567305291, + "eval_sts-test_spearman_dot": 0.8802719452313033, + "eval_sts-test_spearman_euclidean": 0.9059896896302359, + "eval_sts-test_spearman_manhattan": 0.9068112332868588, + "eval_sts-test_spearman_max": 0.9081662567305291, + "eval_vitaminc-pairs_loss": 1.7091079950332642, + "eval_vitaminc-pairs_runtime": 3.1732, + "eval_vitaminc-pairs_samples_per_second": 40.338, + "eval_vitaminc-pairs_steps_per_second": 0.315, + "step": 90 + }, + { + "epoch": 0.09193054136874361, + "eval_negation-triplets_loss": 0.95356285572052, + "eval_negation-triplets_runtime": 0.6921, + "eval_negation-triplets_samples_per_second": 184.937, + "eval_negation-triplets_steps_per_second": 1.445, + "step": 90 + }, + { + "epoch": 0.09193054136874361, + "eval_scitail-pairs-pos_loss": 0.05094476789236069, + "eval_scitail-pairs-pos_runtime": 0.8089, + "eval_scitail-pairs-pos_samples_per_second": 158.241, + "eval_scitail-pairs-pos_steps_per_second": 1.236, + "step": 90 + }, + { + "epoch": 0.09193054136874361, + "eval_scitail-pairs-qa_loss": 7.773421202728059e-06, + "eval_scitail-pairs-qa_runtime": 0.5149, + "eval_scitail-pairs-qa_samples_per_second": 248.589, + "eval_scitail-pairs-qa_steps_per_second": 1.942, + "step": 90 + }, + { + "epoch": 0.09193054136874361, + "eval_xsum-pairs_loss": 0.006860487163066864, + "eval_xsum-pairs_runtime": 2.7073, + "eval_xsum-pairs_samples_per_second": 47.28, + "eval_xsum-pairs_steps_per_second": 0.369, + "step": 90 + }, + { + "epoch": 0.09193054136874361, + "eval_sciq_pairs_loss": 0.02071911282837391, + "eval_sciq_pairs_runtime": 3.0956, + "eval_sciq_pairs_samples_per_second": 41.35, + "eval_sciq_pairs_steps_per_second": 0.323, + "step": 90 + }, + { + "epoch": 0.09193054136874361, + "eval_qasc_pairs_loss": 0.11688968539237976, + "eval_qasc_pairs_runtime": 0.5644, + "eval_qasc_pairs_samples_per_second": 226.808, + "eval_qasc_pairs_steps_per_second": 1.772, + "step": 90 + }, + { + "epoch": 0.09193054136874361, + "eval_openbookqa_pairs_loss": 0.8204447031021118, + "eval_openbookqa_pairs_runtime": 0.5709, + "eval_openbookqa_pairs_samples_per_second": 224.214, + "eval_openbookqa_pairs_steps_per_second": 1.752, + "step": 90 + }, + { + "epoch": 0.09193054136874361, + "eval_msmarco_pairs_loss": 0.15213926136493683, + "eval_msmarco_pairs_runtime": 1.2676, + "eval_msmarco_pairs_samples_per_second": 100.975, + "eval_msmarco_pairs_steps_per_second": 0.789, + "step": 90 + }, + { + "epoch": 0.09193054136874361, + "eval_nq_pairs_loss": 0.10173202306032181, + "eval_nq_pairs_runtime": 2.718, + "eval_nq_pairs_samples_per_second": 47.094, + "eval_nq_pairs_steps_per_second": 0.368, + "step": 90 + }, + { + "epoch": 0.09193054136874361, + "eval_trivia_pairs_loss": 0.16387580335140228, + "eval_trivia_pairs_runtime": 3.1868, + "eval_trivia_pairs_samples_per_second": 40.165, + "eval_trivia_pairs_steps_per_second": 0.314, + "step": 90 + }, + { + "epoch": 0.09193054136874361, + "eval_gooaq_pairs_loss": 0.12467134743928909, + "eval_gooaq_pairs_runtime": 0.8719, + "eval_gooaq_pairs_samples_per_second": 146.81, + "eval_gooaq_pairs_steps_per_second": 1.147, + "step": 90 + }, + { + "epoch": 0.09193054136874361, + "eval_paws-pos_loss": 0.022317279130220413, + "eval_paws-pos_runtime": 0.6835, + "eval_paws-pos_samples_per_second": 187.269, + "eval_paws-pos_steps_per_second": 1.463, + "step": 90 + }, + { + "epoch": 0.09193054136874361, + "eval_global_dataset_loss": 0.22744666039943695, + "eval_global_dataset_runtime": 18.1141, + "eval_global_dataset_samples_per_second": 30.142, + "eval_global_dataset_steps_per_second": 0.276, + "step": 90 + }, + { + "epoch": 0.09295199182839632, + "grad_norm": 9.237175941467285, + "learning_rate": 4.2194744976816075e-06, + "loss": 0.3228, + "step": 91 + }, + { + "epoch": 0.09397344228804903, + "grad_norm": 7.839476108551025, + "learning_rate": 4.265842349304483e-06, + "loss": 0.1874, + "step": 92 + }, + { + "epoch": 0.09499489274770173, + "grad_norm": 7.359750747680664, + "learning_rate": 4.312210200927358e-06, + "loss": 0.3143, + "step": 93 + }, + { + "epoch": 0.09601634320735444, + "grad_norm": 8.99244213104248, + "learning_rate": 4.358578052550232e-06, + "loss": 0.2283, + "step": 94 + }, + { + "epoch": 0.09703779366700716, + "grad_norm": 7.446261882781982, + "learning_rate": 4.404945904173107e-06, + "loss": 0.2747, + "step": 95 + }, + { + "epoch": 0.09805924412665985, + "grad_norm": 3.5370335578918457, + "learning_rate": 4.4513137557959815e-06, + "loss": 0.0362, + "step": 96 + }, + { + "epoch": 0.09908069458631256, + "grad_norm": 0.029940638691186905, + "learning_rate": 4.497681607418857e-06, + "loss": 0.0002, + "step": 97 + }, + { + "epoch": 0.10010214504596528, + "grad_norm": 11.456415176391602, + "learning_rate": 4.544049459041732e-06, + "loss": 0.3649, + "step": 98 + }, + { + "epoch": 0.10112359550561797, + "grad_norm": 11.45889663696289, + "learning_rate": 4.590417310664606e-06, + "loss": 0.3179, + "step": 99 + }, + { + "epoch": 0.10214504596527069, + "grad_norm": 3.849439859390259, + "learning_rate": 4.636785162287481e-06, + "loss": 0.0557, + "step": 100 + }, + { + "epoch": 0.10316649642492338, + "grad_norm": 8.395110130310059, + "learning_rate": 4.6831530139103555e-06, + "loss": 0.3263, + "step": 101 + }, + { + "epoch": 0.1041879468845761, + "grad_norm": 6.901643753051758, + "learning_rate": 4.72952086553323e-06, + "loss": 0.231, + "step": 102 + }, + { + "epoch": 0.10520939734422881, + "grad_norm": 8.926299095153809, + "learning_rate": 4.7758887171561056e-06, + "loss": 0.3294, + "step": 103 + }, + { + "epoch": 0.1062308478038815, + "grad_norm": 6.9586615562438965, + "learning_rate": 4.82225656877898e-06, + "loss": 0.0975, + "step": 104 + }, + { + "epoch": 0.10725229826353422, + "grad_norm": 5.96987771987915, + "learning_rate": 4.868624420401855e-06, + "loss": 0.1257, + "step": 105 + }, + { + "epoch": 0.10725229826353422, + "eval_Qnli-dev_cosine_accuracy": 0.701171875, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.671695351600647, + "eval_Qnli-dev_cosine_ap": 0.7264500557166101, + "eval_Qnli-dev_cosine_f1": 0.6905187835420393, + "eval_Qnli-dev_cosine_f1_threshold": 0.5879168510437012, + "eval_Qnli-dev_cosine_precision": 0.5975232198142415, + "eval_Qnli-dev_cosine_recall": 0.8177966101694916, + "eval_Qnli-dev_dot_accuracy": 0.677734375, + "eval_Qnli-dev_dot_accuracy_threshold": 294.4508972167969, + "eval_Qnli-dev_dot_ap": 0.6801812848758537, + "eval_Qnli-dev_dot_f1": 0.683464566929134, + "eval_Qnli-dev_dot_f1_threshold": 223.16122436523438, + "eval_Qnli-dev_dot_precision": 0.543859649122807, + "eval_Qnli-dev_dot_recall": 0.9194915254237288, + "eval_Qnli-dev_euclidean_accuracy": 0.716796875, + "eval_Qnli-dev_euclidean_accuracy_threshold": 17.090248107910156, + "eval_Qnli-dev_euclidean_ap": 0.7383477209782823, + "eval_Qnli-dev_euclidean_f1": 0.6951871657754012, + "eval_Qnli-dev_euclidean_f1_threshold": 19.287824630737305, + "eval_Qnli-dev_euclidean_precision": 0.6, + "eval_Qnli-dev_euclidean_recall": 0.826271186440678, + "eval_Qnli-dev_manhattan_accuracy": 0.7109375, + "eval_Qnli-dev_manhattan_accuracy_threshold": 360.9602966308594, + "eval_Qnli-dev_manhattan_ap": 0.7416445790176474, + "eval_Qnli-dev_manhattan_f1": 0.6933797909407665, + "eval_Qnli-dev_manhattan_f1_threshold": 417.87420654296875, + "eval_Qnli-dev_manhattan_precision": 0.5887573964497042, + "eval_Qnli-dev_manhattan_recall": 0.8432203389830508, + "eval_Qnli-dev_max_accuracy": 0.716796875, + "eval_Qnli-dev_max_accuracy_threshold": 360.9602966308594, + "eval_Qnli-dev_max_ap": 0.7416445790176474, + "eval_Qnli-dev_max_f1": 0.6951871657754012, + "eval_Qnli-dev_max_f1_threshold": 417.87420654296875, + "eval_Qnli-dev_max_precision": 0.6, + "eval_Qnli-dev_max_recall": 0.9194915254237288, + "eval_allNLI-dev_cosine_accuracy": 0.73828125, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.790839433670044, + "eval_allNLI-dev_cosine_ap": 0.6356228152937139, + "eval_allNLI-dev_cosine_f1": 0.6430260047281324, + "eval_allNLI-dev_cosine_f1_threshold": 0.6322557926177979, + "eval_allNLI-dev_cosine_precision": 0.544, + "eval_allNLI-dev_cosine_recall": 0.7861271676300579, + "eval_allNLI-dev_dot_accuracy": 0.72265625, + "eval_allNLI-dev_dot_accuracy_threshold": 322.9375, + "eval_allNLI-dev_dot_ap": 0.5927249736308424, + "eval_allNLI-dev_dot_f1": 0.6071428571428571, + "eval_allNLI-dev_dot_f1_threshold": 230.85830688476562, + "eval_allNLI-dev_dot_precision": 0.4622356495468278, + "eval_allNLI-dev_dot_recall": 0.884393063583815, + "eval_allNLI-dev_euclidean_accuracy": 0.748046875, + "eval_allNLI-dev_euclidean_accuracy_threshold": 14.109395027160645, + "eval_allNLI-dev_euclidean_ap": 0.6429383798229866, + "eval_allNLI-dev_euclidean_f1": 0.6518847006651886, + "eval_allNLI-dev_euclidean_f1_threshold": 18.274385452270508, + "eval_allNLI-dev_euclidean_precision": 0.5287769784172662, + "eval_allNLI-dev_euclidean_recall": 0.8497109826589595, + "eval_allNLI-dev_manhattan_accuracy": 0.744140625, + "eval_allNLI-dev_manhattan_accuracy_threshold": 305.727783203125, + "eval_allNLI-dev_manhattan_ap": 0.6413280561278988, + "eval_allNLI-dev_manhattan_f1": 0.652482269503546, + "eval_allNLI-dev_manhattan_f1_threshold": 372.4857177734375, + "eval_allNLI-dev_manhattan_precision": 0.552, + "eval_allNLI-dev_manhattan_recall": 0.7976878612716763, + "eval_allNLI-dev_max_accuracy": 0.748046875, + "eval_allNLI-dev_max_accuracy_threshold": 322.9375, + "eval_allNLI-dev_max_ap": 0.6429383798229866, + "eval_allNLI-dev_max_f1": 0.652482269503546, + "eval_allNLI-dev_max_f1_threshold": 372.4857177734375, + "eval_allNLI-dev_max_precision": 0.552, + "eval_allNLI-dev_max_recall": 0.884393063583815, + "eval_sequential_score": 0.7416445790176474, + "eval_sts-test_pearson_cosine": 0.885652248095579, + "eval_sts-test_pearson_dot": 0.8753806885053088, + "eval_sts-test_pearson_euclidean": 0.9098417986837866, + "eval_sts-test_pearson_manhattan": 0.9108840256261982, + "eval_sts-test_pearson_max": 0.9108840256261982, + "eval_sts-test_spearman_cosine": 0.9084155660077406, + "eval_sts-test_spearman_dot": 0.8800195481170557, + "eval_sts-test_spearman_euclidean": 0.9061952769250677, + "eval_sts-test_spearman_manhattan": 0.9071873228875922, + "eval_sts-test_spearman_max": 0.9084155660077406, + "eval_vitaminc-pairs_loss": 1.6902121305465698, + "eval_vitaminc-pairs_runtime": 3.174, + "eval_vitaminc-pairs_samples_per_second": 40.328, + "eval_vitaminc-pairs_steps_per_second": 0.315, + "step": 105 + }, + { + "epoch": 0.10725229826353422, + "eval_negation-triplets_loss": 0.9357818365097046, + "eval_negation-triplets_runtime": 0.6945, + "eval_negation-triplets_samples_per_second": 184.316, + "eval_negation-triplets_steps_per_second": 1.44, + "step": 105 + }, + { + "epoch": 0.10725229826353422, + "eval_scitail-pairs-pos_loss": 0.04916258901357651, + "eval_scitail-pairs-pos_runtime": 0.8588, + "eval_scitail-pairs-pos_samples_per_second": 149.043, + "eval_scitail-pairs-pos_steps_per_second": 1.164, + "step": 105 + }, + { + "epoch": 0.10725229826353422, + "eval_scitail-pairs-qa_loss": 1.0872381608351134e-05, + "eval_scitail-pairs-qa_runtime": 0.5126, + "eval_scitail-pairs-qa_samples_per_second": 249.702, + "eval_scitail-pairs-qa_steps_per_second": 1.951, + "step": 105 + }, + { + "epoch": 0.10725229826353422, + "eval_xsum-pairs_loss": 0.00817815400660038, + "eval_xsum-pairs_runtime": 2.715, + "eval_xsum-pairs_samples_per_second": 47.145, + "eval_xsum-pairs_steps_per_second": 0.368, + "step": 105 + }, + { + "epoch": 0.10725229826353422, + "eval_sciq_pairs_loss": 0.020444568246603012, + "eval_sciq_pairs_runtime": 3.0892, + "eval_sciq_pairs_samples_per_second": 41.434, + "eval_sciq_pairs_steps_per_second": 0.324, + "step": 105 + }, + { + "epoch": 0.10725229826353422, + "eval_qasc_pairs_loss": 0.10777398943901062, + "eval_qasc_pairs_runtime": 0.5667, + "eval_qasc_pairs_samples_per_second": 225.871, + "eval_qasc_pairs_steps_per_second": 1.765, + "step": 105 + }, + { + "epoch": 0.10725229826353422, + "eval_openbookqa_pairs_loss": 0.8025698661804199, + "eval_openbookqa_pairs_runtime": 0.5788, + "eval_openbookqa_pairs_samples_per_second": 221.166, + "eval_openbookqa_pairs_steps_per_second": 1.728, + "step": 105 + }, + { + "epoch": 0.10725229826353422, + "eval_msmarco_pairs_loss": 0.15062634646892548, + "eval_msmarco_pairs_runtime": 1.2772, + "eval_msmarco_pairs_samples_per_second": 100.217, + "eval_msmarco_pairs_steps_per_second": 0.783, + "step": 105 + }, + { + "epoch": 0.10725229826353422, + "eval_nq_pairs_loss": 0.10251187533140182, + "eval_nq_pairs_runtime": 2.7256, + "eval_nq_pairs_samples_per_second": 46.962, + "eval_nq_pairs_steps_per_second": 0.367, + "step": 105 + }, + { + "epoch": 0.10725229826353422, + "eval_trivia_pairs_loss": 0.17246802151203156, + "eval_trivia_pairs_runtime": 3.1975, + "eval_trivia_pairs_samples_per_second": 40.032, + "eval_trivia_pairs_steps_per_second": 0.313, + "step": 105 + }, + { + "epoch": 0.10725229826353422, + "eval_gooaq_pairs_loss": 0.12505102157592773, + "eval_gooaq_pairs_runtime": 0.8764, + "eval_gooaq_pairs_samples_per_second": 146.059, + "eval_gooaq_pairs_steps_per_second": 1.141, + "step": 105 + }, + { + "epoch": 0.10725229826353422, + "eval_paws-pos_loss": 0.02237733267247677, + "eval_paws-pos_runtime": 0.6938, + "eval_paws-pos_samples_per_second": 184.493, + "eval_paws-pos_steps_per_second": 1.441, + "step": 105 + }, + { + "epoch": 0.10725229826353422, + "eval_global_dataset_loss": 0.22049972414970398, + "eval_global_dataset_runtime": 18.1363, + "eval_global_dataset_samples_per_second": 30.105, + "eval_global_dataset_steps_per_second": 0.276, + "step": 105 + }, + { + "epoch": 0.10827374872318693, + "grad_norm": 4.5949201583862305, + "learning_rate": 4.914992272024729e-06, + "loss": 0.0841, + "step": 106 + }, + { + "epoch": 0.10929519918283963, + "grad_norm": 11.887070655822754, + "learning_rate": 4.961360123647604e-06, + "loss": 0.3928, + "step": 107 + }, + { + "epoch": 0.11031664964249234, + "grad_norm": 7.335819721221924, + "learning_rate": 5.0077279752704795e-06, + "loss": 0.1016, + "step": 108 + }, + { + "epoch": 0.11133810010214505, + "grad_norm": 9.527905464172363, + "learning_rate": 5.054095826893354e-06, + "loss": 0.2171, + "step": 109 + }, + { + "epoch": 0.11235955056179775, + "grad_norm": 5.980996131896973, + "learning_rate": 5.100463678516229e-06, + "loss": 0.1669, + "step": 110 + }, + { + "epoch": 0.11338100102145046, + "grad_norm": Infinity, + "learning_rate": 5.100463678516229e-06, + "loss": 1.3794, + "step": 111 + }, + { + "epoch": 0.11440245148110317, + "grad_norm": 8.145145416259766, + "learning_rate": 5.146831530139103e-06, + "loss": 0.1301, + "step": 112 + }, + { + "epoch": 0.11542390194075587, + "grad_norm": 8.083388328552246, + "learning_rate": 5.193199381761978e-06, + "loss": 0.1865, + "step": 113 + }, + { + "epoch": 0.11644535240040858, + "grad_norm": 8.202815055847168, + "learning_rate": 5.239567233384853e-06, + "loss": 0.1471, + "step": 114 + }, + { + "epoch": 0.11746680286006128, + "grad_norm": 6.991161346435547, + "learning_rate": 5.285935085007728e-06, + "loss": 0.1562, + "step": 115 + }, + { + "epoch": 0.118488253319714, + "grad_norm": 9.129571914672852, + "learning_rate": 5.332302936630603e-06, + "loss": 0.5139, + "step": 116 + }, + { + "epoch": 0.1195097037793667, + "grad_norm": 5.970907688140869, + "learning_rate": 5.378670788253477e-06, + "loss": 0.1164, + "step": 117 + }, + { + "epoch": 0.1205311542390194, + "grad_norm": 5.176352024078369, + "learning_rate": 5.425038639876353e-06, + "loss": 0.1798, + "step": 118 + }, + { + "epoch": 0.12155260469867211, + "grad_norm": 7.435749053955078, + "learning_rate": 5.4714064914992274e-06, + "loss": 0.1081, + "step": 119 + }, + { + "epoch": 0.12257405515832483, + "grad_norm": 4.030707836151123, + "learning_rate": 5.517774343122103e-06, + "loss": 0.1001, + "step": 120 + }, + { + "epoch": 0.12257405515832483, + "eval_Qnli-dev_cosine_accuracy": 0.7109375, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.6581053733825684, + "eval_Qnli-dev_cosine_ap": 0.730918746452516, + "eval_Qnli-dev_cosine_f1": 0.6929982046678635, + "eval_Qnli-dev_cosine_f1_threshold": 0.6017528772354126, + "eval_Qnli-dev_cosine_precision": 0.6012461059190031, + "eval_Qnli-dev_cosine_recall": 0.8177966101694916, + "eval_Qnli-dev_dot_accuracy": 0.681640625, + "eval_Qnli-dev_dot_accuracy_threshold": 290.0906066894531, + "eval_Qnli-dev_dot_ap": 0.6821393209099462, + "eval_Qnli-dev_dot_f1": 0.6796747967479675, + "eval_Qnli-dev_dot_f1_threshold": 239.74539184570312, + "eval_Qnli-dev_dot_precision": 0.5514511873350924, + "eval_Qnli-dev_dot_recall": 0.885593220338983, + "eval_Qnli-dev_euclidean_accuracy": 0.720703125, + "eval_Qnli-dev_euclidean_accuracy_threshold": 17.121063232421875, + "eval_Qnli-dev_euclidean_ap": 0.7413521688570913, + "eval_Qnli-dev_euclidean_f1": 0.693661971830986, + "eval_Qnli-dev_euclidean_f1_threshold": 19.364234924316406, + "eval_Qnli-dev_euclidean_precision": 0.5933734939759037, + "eval_Qnli-dev_euclidean_recall": 0.8347457627118644, + "eval_Qnli-dev_manhattan_accuracy": 0.716796875, + "eval_Qnli-dev_manhattan_accuracy_threshold": 365.4123840332031, + "eval_Qnli-dev_manhattan_ap": 0.744185473419516, + "eval_Qnli-dev_manhattan_f1": 0.6927175843694494, + "eval_Qnli-dev_manhattan_f1_threshold": 408.70257568359375, + "eval_Qnli-dev_manhattan_precision": 0.5963302752293578, + "eval_Qnli-dev_manhattan_recall": 0.826271186440678, + "eval_Qnli-dev_max_accuracy": 0.720703125, + "eval_Qnli-dev_max_accuracy_threshold": 365.4123840332031, + "eval_Qnli-dev_max_ap": 0.744185473419516, + "eval_Qnli-dev_max_f1": 0.693661971830986, + "eval_Qnli-dev_max_f1_threshold": 408.70257568359375, + "eval_Qnli-dev_max_precision": 0.6012461059190031, + "eval_Qnli-dev_max_recall": 0.885593220338983, + "eval_allNLI-dev_cosine_accuracy": 0.73828125, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.7558298707008362, + "eval_allNLI-dev_cosine_ap": 0.634573316291239, + "eval_allNLI-dev_cosine_f1": 0.6411483253588517, + "eval_allNLI-dev_cosine_f1_threshold": 0.6374086141586304, + "eval_allNLI-dev_cosine_precision": 0.5469387755102041, + "eval_allNLI-dev_cosine_recall": 0.7745664739884393, + "eval_allNLI-dev_dot_accuracy": 0.7265625, + "eval_allNLI-dev_dot_accuracy_threshold": 324.275390625, + "eval_allNLI-dev_dot_ap": 0.5951723271823494, + "eval_allNLI-dev_dot_f1": 0.607645875251509, + "eval_allNLI-dev_dot_f1_threshold": 238.57200622558594, + "eval_allNLI-dev_dot_precision": 0.4660493827160494, + "eval_allNLI-dev_dot_recall": 0.8728323699421965, + "eval_allNLI-dev_euclidean_accuracy": 0.748046875, + "eval_allNLI-dev_euclidean_accuracy_threshold": 14.189202308654785, + "eval_allNLI-dev_euclidean_ap": 0.6422651563449183, + "eval_allNLI-dev_euclidean_f1": 0.6475770925110133, + "eval_allNLI-dev_euclidean_f1_threshold": 18.37746238708496, + "eval_allNLI-dev_euclidean_precision": 0.5231316725978647, + "eval_allNLI-dev_euclidean_recall": 0.8497109826589595, + "eval_allNLI-dev_manhattan_accuracy": 0.744140625, + "eval_allNLI-dev_manhattan_accuracy_threshold": 308.418212890625, + "eval_allNLI-dev_manhattan_ap": 0.6407897680060785, + "eval_allNLI-dev_manhattan_f1": 0.652482269503546, + "eval_allNLI-dev_manhattan_f1_threshold": 373.7538146972656, + "eval_allNLI-dev_manhattan_precision": 0.552, + "eval_allNLI-dev_manhattan_recall": 0.7976878612716763, + "eval_allNLI-dev_max_accuracy": 0.748046875, + "eval_allNLI-dev_max_accuracy_threshold": 324.275390625, + "eval_allNLI-dev_max_ap": 0.6422651563449183, + "eval_allNLI-dev_max_f1": 0.652482269503546, + "eval_allNLI-dev_max_f1_threshold": 373.7538146972656, + "eval_allNLI-dev_max_precision": 0.552, + "eval_allNLI-dev_max_recall": 0.8728323699421965, + "eval_sequential_score": 0.744185473419516, + "eval_sts-test_pearson_cosine": 0.8861519539446183, + "eval_sts-test_pearson_dot": 0.8758723392585603, + "eval_sts-test_pearson_euclidean": 0.9102048001875452, + "eval_sts-test_pearson_manhattan": 0.9111635045482392, + "eval_sts-test_pearson_max": 0.9111635045482392, + "eval_sts-test_spearman_cosine": 0.9087610009944209, + "eval_sts-test_spearman_dot": 0.8812273399107483, + "eval_sts-test_spearman_euclidean": 0.9068150818953387, + "eval_sts-test_spearman_manhattan": 0.9076474106059574, + "eval_sts-test_spearman_max": 0.9087610009944209, + "eval_vitaminc-pairs_loss": 1.6894718408584595, + "eval_vitaminc-pairs_runtime": 3.1953, + "eval_vitaminc-pairs_samples_per_second": 40.059, + "eval_vitaminc-pairs_steps_per_second": 0.313, + "step": 120 + }, + { + "epoch": 0.12257405515832483, + "eval_negation-triplets_loss": 0.9225427508354187, + "eval_negation-triplets_runtime": 0.691, + "eval_negation-triplets_samples_per_second": 185.233, + "eval_negation-triplets_steps_per_second": 1.447, + "step": 120 + }, + { + "epoch": 0.12257405515832483, + "eval_scitail-pairs-pos_loss": 0.04938405379652977, + "eval_scitail-pairs-pos_runtime": 0.7887, + "eval_scitail-pairs-pos_samples_per_second": 162.296, + "eval_scitail-pairs-pos_steps_per_second": 1.268, + "step": 120 + }, + { + "epoch": 0.12257405515832483, + "eval_scitail-pairs-qa_loss": 1.5053984498081263e-05, + "eval_scitail-pairs-qa_runtime": 0.5152, + "eval_scitail-pairs-qa_samples_per_second": 248.462, + "eval_scitail-pairs-qa_steps_per_second": 1.941, + "step": 120 + }, + { + "epoch": 0.12257405515832483, + "eval_xsum-pairs_loss": 0.007213404402136803, + "eval_xsum-pairs_runtime": 2.7064, + "eval_xsum-pairs_samples_per_second": 47.295, + "eval_xsum-pairs_steps_per_second": 0.369, + "step": 120 + }, + { + "epoch": 0.12257405515832483, + "eval_sciq_pairs_loss": 0.019917281344532967, + "eval_sciq_pairs_runtime": 3.1306, + "eval_sciq_pairs_samples_per_second": 40.887, + "eval_sciq_pairs_steps_per_second": 0.319, + "step": 120 + }, + { + "epoch": 0.12257405515832483, + "eval_qasc_pairs_loss": 0.10944625735282898, + "eval_qasc_pairs_runtime": 0.5665, + "eval_qasc_pairs_samples_per_second": 225.958, + "eval_qasc_pairs_steps_per_second": 1.765, + "step": 120 + }, + { + "epoch": 0.12257405515832483, + "eval_openbookqa_pairs_loss": 0.8018856048583984, + "eval_openbookqa_pairs_runtime": 0.5732, + "eval_openbookqa_pairs_samples_per_second": 223.316, + "eval_openbookqa_pairs_steps_per_second": 1.745, + "step": 120 + }, + { + "epoch": 0.12257405515832483, + "eval_msmarco_pairs_loss": 0.15368959307670593, + "eval_msmarco_pairs_runtime": 1.2728, + "eval_msmarco_pairs_samples_per_second": 100.564, + "eval_msmarco_pairs_steps_per_second": 0.786, + "step": 120 + }, + { + "epoch": 0.12257405515832483, + "eval_nq_pairs_loss": 0.10127946734428406, + "eval_nq_pairs_runtime": 2.7196, + "eval_nq_pairs_samples_per_second": 47.065, + "eval_nq_pairs_steps_per_second": 0.368, + "step": 120 + }, + { + "epoch": 0.12257405515832483, + "eval_trivia_pairs_loss": 0.16999171674251556, + "eval_trivia_pairs_runtime": 3.1956, + "eval_trivia_pairs_samples_per_second": 40.055, + "eval_trivia_pairs_steps_per_second": 0.313, + "step": 120 + }, + { + "epoch": 0.12257405515832483, + "eval_gooaq_pairs_loss": 0.12600551545619965, + "eval_gooaq_pairs_runtime": 0.8717, + "eval_gooaq_pairs_samples_per_second": 146.839, + "eval_gooaq_pairs_steps_per_second": 1.147, + "step": 120 + }, + { + "epoch": 0.12257405515832483, + "eval_paws-pos_loss": 0.02243974432349205, + "eval_paws-pos_runtime": 0.6916, + "eval_paws-pos_samples_per_second": 185.09, + "eval_paws-pos_steps_per_second": 1.446, + "step": 120 + }, + { + "epoch": 0.12257405515832483, + "eval_global_dataset_loss": 0.2185777723789215, + "eval_global_dataset_runtime": 18.1382, + "eval_global_dataset_samples_per_second": 30.102, + "eval_global_dataset_steps_per_second": 0.276, + "step": 120 + }, + { + "epoch": 0.12359550561797752, + "grad_norm": 5.014219760894775, + "learning_rate": 5.5641421947449775e-06, + "loss": 0.1866, + "step": 121 + }, + { + "epoch": 0.12461695607763024, + "grad_norm": 0.4840843975543976, + "learning_rate": 5.610510046367852e-06, + "loss": 0.0017, + "step": 122 + }, + { + "epoch": 0.12563840653728295, + "grad_norm": 1.2656071186065674, + "learning_rate": 5.656877897990727e-06, + "loss": 0.0127, + "step": 123 + }, + { + "epoch": 0.12665985699693566, + "grad_norm": 10.125456809997559, + "learning_rate": 5.703245749613601e-06, + "loss": 0.2586, + "step": 124 + }, + { + "epoch": 0.12768130745658834, + "grad_norm": 0.6021859645843506, + "learning_rate": 5.749613601236476e-06, + "loss": 0.0281, + "step": 125 + }, + { + "epoch": 0.12870275791624106, + "grad_norm": 2.960421562194824, + "learning_rate": 5.7959814528593515e-06, + "loss": 0.0289, + "step": 126 + }, + { + "epoch": 0.12972420837589377, + "grad_norm": 5.363409519195557, + "learning_rate": 5.842349304482226e-06, + "loss": 0.0659, + "step": 127 + }, + { + "epoch": 0.13074565883554648, + "grad_norm": 0.024240603670477867, + "learning_rate": 5.888717156105101e-06, + "loss": 0.0001, + "step": 128 + }, + { + "epoch": 0.1317671092951992, + "grad_norm": 6.389922618865967, + "learning_rate": 5.935085007727975e-06, + "loss": 0.132, + "step": 129 + }, + { + "epoch": 0.13278855975485188, + "grad_norm": 0.05200214684009552, + "learning_rate": 5.98145285935085e-06, + "loss": 0.0003, + "step": 130 + }, + { + "epoch": 0.1338100102145046, + "grad_norm": 7.617424488067627, + "learning_rate": 6.0278207109737254e-06, + "loss": 0.2092, + "step": 131 + }, + { + "epoch": 0.1348314606741573, + "grad_norm": 10.463836669921875, + "learning_rate": 6.0741885625966e-06, + "loss": 0.5796, + "step": 132 + }, + { + "epoch": 0.13585291113381, + "grad_norm": 8.952075958251953, + "learning_rate": 6.120556414219475e-06, + "loss": 0.4157, + "step": 133 + }, + { + "epoch": 0.13687436159346272, + "grad_norm": 8.41869831085205, + "learning_rate": 6.166924265842349e-06, + "loss": 0.2938, + "step": 134 + }, + { + "epoch": 0.13789581205311544, + "grad_norm": 4.022891044616699, + "learning_rate": 6.213292117465224e-06, + "loss": 0.0566, + "step": 135 + }, + { + "epoch": 0.13789581205311544, + "eval_Qnli-dev_cosine_accuracy": 0.7109375, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.6704687476158142, + "eval_Qnli-dev_cosine_ap": 0.730031615326299, + "eval_Qnli-dev_cosine_f1": 0.6905187835420393, + "eval_Qnli-dev_cosine_f1_threshold": 0.6074261665344238, + "eval_Qnli-dev_cosine_precision": 0.5975232198142415, + "eval_Qnli-dev_cosine_recall": 0.8177966101694916, + "eval_Qnli-dev_dot_accuracy": 0.677734375, + "eval_Qnli-dev_dot_accuracy_threshold": 295.0644836425781, + "eval_Qnli-dev_dot_ap": 0.6809699685402962, + "eval_Qnli-dev_dot_f1": 0.6775777414075286, + "eval_Qnli-dev_dot_f1_threshold": 249.67672729492188, + "eval_Qnli-dev_dot_precision": 0.552, + "eval_Qnli-dev_dot_recall": 0.8771186440677966, + "eval_Qnli-dev_euclidean_accuracy": 0.71875, + "eval_Qnli-dev_euclidean_accuracy_threshold": 17.497955322265625, + "eval_Qnli-dev_euclidean_ap": 0.7398687736208136, + "eval_Qnli-dev_euclidean_f1": 0.7012448132780082, + "eval_Qnli-dev_euclidean_f1_threshold": 17.674396514892578, + "eval_Qnli-dev_euclidean_precision": 0.6869918699186992, + "eval_Qnli-dev_euclidean_recall": 0.7161016949152542, + "eval_Qnli-dev_manhattan_accuracy": 0.71484375, + "eval_Qnli-dev_manhattan_accuracy_threshold": 366.4118957519531, + "eval_Qnli-dev_manhattan_ap": 0.7418528688385038, + "eval_Qnli-dev_manhattan_f1": 0.693950177935943, + "eval_Qnli-dev_manhattan_f1_threshold": 405.3584899902344, + "eval_Qnli-dev_manhattan_precision": 0.598159509202454, + "eval_Qnli-dev_manhattan_recall": 0.826271186440678, + "eval_Qnli-dev_max_accuracy": 0.71875, + "eval_Qnli-dev_max_accuracy_threshold": 366.4118957519531, + "eval_Qnli-dev_max_ap": 0.7418528688385038, + "eval_Qnli-dev_max_f1": 0.7012448132780082, + "eval_Qnli-dev_max_f1_threshold": 405.3584899902344, + "eval_Qnli-dev_max_precision": 0.6869918699186992, + "eval_Qnli-dev_max_recall": 0.8771186440677966, + "eval_allNLI-dev_cosine_accuracy": 0.73828125, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.7543807625770569, + "eval_allNLI-dev_cosine_ap": 0.6334878860942056, + "eval_allNLI-dev_cosine_f1": 0.6372093023255814, + "eval_allNLI-dev_cosine_f1_threshold": 0.6344075202941895, + "eval_allNLI-dev_cosine_precision": 0.5330739299610895, + "eval_allNLI-dev_cosine_recall": 0.791907514450867, + "eval_allNLI-dev_dot_accuracy": 0.7265625, + "eval_allNLI-dev_dot_accuracy_threshold": 312.7142333984375, + "eval_allNLI-dev_dot_ap": 0.5951908786217456, + "eval_allNLI-dev_dot_f1": 0.6099009900990099, + "eval_allNLI-dev_dot_f1_threshold": 240.46969604492188, + "eval_allNLI-dev_dot_precision": 0.463855421686747, + "eval_allNLI-dev_dot_recall": 0.8901734104046243, + "eval_allNLI-dev_euclidean_accuracy": 0.74609375, + "eval_allNLI-dev_euclidean_accuracy_threshold": 14.157598495483398, + "eval_allNLI-dev_euclidean_ap": 0.640386674178937, + "eval_allNLI-dev_euclidean_f1": 0.6431718061674009, + "eval_allNLI-dev_euclidean_f1_threshold": 18.330421447753906, + "eval_allNLI-dev_euclidean_precision": 0.5195729537366548, + "eval_allNLI-dev_euclidean_recall": 0.8439306358381503, + "eval_allNLI-dev_manhattan_accuracy": 0.740234375, + "eval_allNLI-dev_manhattan_accuracy_threshold": 309.08746337890625, + "eval_allNLI-dev_manhattan_ap": 0.6399125360163551, + "eval_allNLI-dev_manhattan_f1": 0.6443914081145585, + "eval_allNLI-dev_manhattan_f1_threshold": 371.892578125, + "eval_allNLI-dev_manhattan_precision": 0.5487804878048781, + "eval_allNLI-dev_manhattan_recall": 0.7803468208092486, + "eval_allNLI-dev_max_accuracy": 0.74609375, + "eval_allNLI-dev_max_accuracy_threshold": 312.7142333984375, + "eval_allNLI-dev_max_ap": 0.640386674178937, + "eval_allNLI-dev_max_f1": 0.6443914081145585, + "eval_allNLI-dev_max_f1_threshold": 371.892578125, + "eval_allNLI-dev_max_precision": 0.5487804878048781, + "eval_allNLI-dev_max_recall": 0.8901734104046243, + "eval_sequential_score": 0.7418528688385038, + "eval_sts-test_pearson_cosine": 0.8861128824276227, + "eval_sts-test_pearson_dot": 0.8748223670511182, + "eval_sts-test_pearson_euclidean": 0.9106891095404409, + "eval_sts-test_pearson_manhattan": 0.9114269537491446, + "eval_sts-test_pearson_max": 0.9114269537491446, + "eval_sts-test_spearman_cosine": 0.9087766191846463, + "eval_sts-test_spearman_dot": 0.8796734866127163, + "eval_sts-test_spearman_euclidean": 0.9071859803497505, + "eval_sts-test_spearman_manhattan": 0.9076761856670318, + "eval_sts-test_spearman_max": 0.9087766191846463, + "eval_vitaminc-pairs_loss": 1.6810442209243774, + "eval_vitaminc-pairs_runtime": 3.1686, + "eval_vitaminc-pairs_samples_per_second": 40.397, + "eval_vitaminc-pairs_steps_per_second": 0.316, + "step": 135 + }, + { + "epoch": 0.13789581205311544, + "eval_negation-triplets_loss": 0.9073691368103027, + "eval_negation-triplets_runtime": 0.6915, + "eval_negation-triplets_samples_per_second": 185.101, + "eval_negation-triplets_steps_per_second": 1.446, + "step": 135 + }, + { + "epoch": 0.13789581205311544, + "eval_scitail-pairs-pos_loss": 0.05077136307954788, + "eval_scitail-pairs-pos_runtime": 0.7839, + "eval_scitail-pairs-pos_samples_per_second": 163.288, + "eval_scitail-pairs-pos_steps_per_second": 1.276, + "step": 135 + }, + { + "epoch": 0.13789581205311544, + "eval_scitail-pairs-qa_loss": 1.8426315364195034e-05, + "eval_scitail-pairs-qa_runtime": 0.5104, + "eval_scitail-pairs-qa_samples_per_second": 250.771, + "eval_scitail-pairs-qa_steps_per_second": 1.959, + "step": 135 + }, + { + "epoch": 0.13789581205311544, + "eval_xsum-pairs_loss": 0.005991346202790737, + "eval_xsum-pairs_runtime": 2.7124, + "eval_xsum-pairs_samples_per_second": 47.191, + "eval_xsum-pairs_steps_per_second": 0.369, + "step": 135 + }, + { + "epoch": 0.13789581205311544, + "eval_sciq_pairs_loss": 0.019755717366933823, + "eval_sciq_pairs_runtime": 3.1056, + "eval_sciq_pairs_samples_per_second": 41.216, + "eval_sciq_pairs_steps_per_second": 0.322, + "step": 135 + }, + { + "epoch": 0.13789581205311544, + "eval_qasc_pairs_loss": 0.1126415804028511, + "eval_qasc_pairs_runtime": 0.5629, + "eval_qasc_pairs_samples_per_second": 227.393, + "eval_qasc_pairs_steps_per_second": 1.777, + "step": 135 + }, + { + "epoch": 0.13789581205311544, + "eval_openbookqa_pairs_loss": 0.8040855526924133, + "eval_openbookqa_pairs_runtime": 0.5711, + "eval_openbookqa_pairs_samples_per_second": 224.146, + "eval_openbookqa_pairs_steps_per_second": 1.751, + "step": 135 + }, + { + "epoch": 0.13789581205311544, + "eval_msmarco_pairs_loss": 0.1620917171239853, + "eval_msmarco_pairs_runtime": 1.2699, + "eval_msmarco_pairs_samples_per_second": 100.794, + "eval_msmarco_pairs_steps_per_second": 0.787, + "step": 135 + }, + { + "epoch": 0.13789581205311544, + "eval_nq_pairs_loss": 0.1051928922533989, + "eval_nq_pairs_runtime": 2.7259, + "eval_nq_pairs_samples_per_second": 46.956, + "eval_nq_pairs_steps_per_second": 0.367, + "step": 135 + }, + { + "epoch": 0.13789581205311544, + "eval_trivia_pairs_loss": 0.1648341715335846, + "eval_trivia_pairs_runtime": 3.188, + "eval_trivia_pairs_samples_per_second": 40.151, + "eval_trivia_pairs_steps_per_second": 0.314, + "step": 135 + }, + { + "epoch": 0.13789581205311544, + "eval_gooaq_pairs_loss": 0.12825490534305573, + "eval_gooaq_pairs_runtime": 0.8711, + "eval_gooaq_pairs_samples_per_second": 146.937, + "eval_gooaq_pairs_steps_per_second": 1.148, + "step": 135 + }, + { + "epoch": 0.13789581205311544, + "eval_paws-pos_loss": 0.022437551990151405, + "eval_paws-pos_runtime": 0.6858, + "eval_paws-pos_samples_per_second": 186.633, + "eval_paws-pos_steps_per_second": 1.458, + "step": 135 + }, + { + "epoch": 0.13789581205311544, + "eval_global_dataset_loss": 0.21732056140899658, + "eval_global_dataset_runtime": 18.0997, + "eval_global_dataset_samples_per_second": 30.166, + "eval_global_dataset_steps_per_second": 0.276, + "step": 135 + }, + { + "epoch": 0.13891726251276812, + "grad_norm": 5.725745677947998, + "learning_rate": 6.2596599690880985e-06, + "loss": 0.0864, + "step": 136 + }, + { + "epoch": 0.13993871297242083, + "grad_norm": 9.995277404785156, + "learning_rate": 6.306027820710974e-06, + "loss": 0.3386, + "step": 137 + }, + { + "epoch": 0.14096016343207354, + "grad_norm": 5.895701885223389, + "learning_rate": 6.352395672333849e-06, + "loss": 0.103, + "step": 138 + }, + { + "epoch": 0.14198161389172625, + "grad_norm": 11.060420036315918, + "learning_rate": 6.398763523956723e-06, + "loss": 0.4119, + "step": 139 + }, + { + "epoch": 0.14300306435137897, + "grad_norm": 6.30234432220459, + "learning_rate": 6.445131375579598e-06, + "loss": 0.1294, + "step": 140 + }, + { + "epoch": 0.14402451481103168, + "grad_norm": 7.138073921203613, + "learning_rate": 6.4914992272024725e-06, + "loss": 0.1687, + "step": 141 + }, + { + "epoch": 0.14504596527068436, + "grad_norm": 7.051334381103516, + "learning_rate": 6.537867078825347e-06, + "loss": 0.167, + "step": 142 + }, + { + "epoch": 0.14606741573033707, + "grad_norm": 18.21676254272461, + "learning_rate": 6.584234930448223e-06, + "loss": 1.2691, + "step": 143 + }, + { + "epoch": 0.1470888661899898, + "grad_norm": 6.23491096496582, + "learning_rate": 6.630602782071098e-06, + "loss": 0.2091, + "step": 144 + }, + { + "epoch": 0.1481103166496425, + "grad_norm": 4.759470462799072, + "learning_rate": 6.676970633693973e-06, + "loss": 0.1221, + "step": 145 + }, + { + "epoch": 0.1491317671092952, + "grad_norm": 0.009023046121001244, + "learning_rate": 6.723338485316847e-06, + "loss": 0.0, + "step": 146 + }, + { + "epoch": 0.1501532175689479, + "grad_norm": 3.0002005100250244, + "learning_rate": 6.769706336939722e-06, + "loss": 0.0484, + "step": 147 + }, + { + "epoch": 0.1511746680286006, + "grad_norm": 6.540920734405518, + "learning_rate": 6.816074188562597e-06, + "loss": 0.1199, + "step": 148 + }, + { + "epoch": 0.15219611848825332, + "grad_norm": 12.14609432220459, + "learning_rate": 6.862442040185472e-06, + "loss": 0.4259, + "step": 149 + }, + { + "epoch": 0.15321756894790603, + "grad_norm": 4.47613525390625, + "learning_rate": 6.908809891808347e-06, + "loss": 0.0532, + "step": 150 + }, + { + "epoch": 0.15321756894790603, + "eval_Qnli-dev_cosine_accuracy": 0.705078125, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.6765823364257812, + "eval_Qnli-dev_cosine_ap": 0.7269659818531288, + "eval_Qnli-dev_cosine_f1": 0.6891651865008882, + "eval_Qnli-dev_cosine_f1_threshold": 0.6105165481567383, + "eval_Qnli-dev_cosine_precision": 0.5932721712538226, + "eval_Qnli-dev_cosine_recall": 0.8220338983050848, + "eval_Qnli-dev_dot_accuracy": 0.671875, + "eval_Qnli-dev_dot_accuracy_threshold": 304.1693115234375, + "eval_Qnli-dev_dot_ap": 0.6777260329981589, + "eval_Qnli-dev_dot_f1": 0.6790123456790124, + "eval_Qnli-dev_dot_f1_threshold": 231.61270141601562, + "eval_Qnli-dev_dot_precision": 0.5339805825242718, + "eval_Qnli-dev_dot_recall": 0.9322033898305084, + "eval_Qnli-dev_euclidean_accuracy": 0.716796875, + "eval_Qnli-dev_euclidean_accuracy_threshold": 17.302841186523438, + "eval_Qnli-dev_euclidean_ap": 0.7386256429365343, + "eval_Qnli-dev_euclidean_f1": 0.6997929606625258, + "eval_Qnli-dev_euclidean_f1_threshold": 17.619129180908203, + "eval_Qnli-dev_euclidean_precision": 0.6842105263157895, + "eval_Qnli-dev_euclidean_recall": 0.7161016949152542, + "eval_Qnli-dev_manhattan_accuracy": 0.7109375, + "eval_Qnli-dev_manhattan_accuracy_threshold": 357.6455078125, + "eval_Qnli-dev_manhattan_ap": 0.7400297621960754, + "eval_Qnli-dev_manhattan_f1": 0.697080291970803, + "eval_Qnli-dev_manhattan_f1_threshold": 395.60101318359375, + "eval_Qnli-dev_manhattan_precision": 0.6121794871794872, + "eval_Qnli-dev_manhattan_recall": 0.809322033898305, + "eval_Qnli-dev_max_accuracy": 0.716796875, + "eval_Qnli-dev_max_accuracy_threshold": 357.6455078125, + "eval_Qnli-dev_max_ap": 0.7400297621960754, + "eval_Qnli-dev_max_f1": 0.6997929606625258, + "eval_Qnli-dev_max_f1_threshold": 395.60101318359375, + "eval_Qnli-dev_max_precision": 0.6842105263157895, + "eval_Qnli-dev_max_recall": 0.9322033898305084, + "eval_allNLI-dev_cosine_accuracy": 0.73828125, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.7804166674613953, + "eval_allNLI-dev_cosine_ap": 0.6354012132907776, + "eval_allNLI-dev_cosine_f1": 0.6333333333333333, + "eval_allNLI-dev_cosine_f1_threshold": 0.6445462703704834, + "eval_allNLI-dev_cosine_precision": 0.5384615384615384, + "eval_allNLI-dev_cosine_recall": 0.7687861271676301, + "eval_allNLI-dev_dot_accuracy": 0.72265625, + "eval_allNLI-dev_dot_accuracy_threshold": 338.0382385253906, + "eval_allNLI-dev_dot_ap": 0.5968781039551687, + "eval_allNLI-dev_dot_f1": 0.6090534979423868, + "eval_allNLI-dev_dot_f1_threshold": 252.47320556640625, + "eval_allNLI-dev_dot_precision": 0.4728434504792332, + "eval_allNLI-dev_dot_recall": 0.8554913294797688, + "eval_allNLI-dev_euclidean_accuracy": 0.74609375, + "eval_allNLI-dev_euclidean_accuracy_threshold": 14.17716121673584, + "eval_allNLI-dev_euclidean_ap": 0.6418933088908507, + "eval_allNLI-dev_euclidean_f1": 0.6431718061674009, + "eval_allNLI-dev_euclidean_f1_threshold": 18.30975914001465, + "eval_allNLI-dev_euclidean_precision": 0.5195729537366548, + "eval_allNLI-dev_euclidean_recall": 0.8439306358381503, + "eval_allNLI-dev_manhattan_accuracy": 0.744140625, + "eval_allNLI-dev_manhattan_accuracy_threshold": 336.6082458496094, + "eval_allNLI-dev_manhattan_ap": 0.6405645781297119, + "eval_allNLI-dev_manhattan_f1": 0.6459330143540669, + "eval_allNLI-dev_manhattan_f1_threshold": 371.16949462890625, + "eval_allNLI-dev_manhattan_precision": 0.5510204081632653, + "eval_allNLI-dev_manhattan_recall": 0.7803468208092486, + "eval_allNLI-dev_max_accuracy": 0.74609375, + "eval_allNLI-dev_max_accuracy_threshold": 338.0382385253906, + "eval_allNLI-dev_max_ap": 0.6418933088908507, + "eval_allNLI-dev_max_f1": 0.6459330143540669, + "eval_allNLI-dev_max_f1_threshold": 371.16949462890625, + "eval_allNLI-dev_max_precision": 0.5510204081632653, + "eval_allNLI-dev_max_recall": 0.8554913294797688, + "eval_sequential_score": 0.7400297621960754, + "eval_sts-test_pearson_cosine": 0.8869899637735537, + "eval_sts-test_pearson_dot": 0.8744314811311789, + "eval_sts-test_pearson_euclidean": 0.911952341281393, + "eval_sts-test_pearson_manhattan": 0.9126228158073495, + "eval_sts-test_pearson_max": 0.9126228158073495, + "eval_sts-test_spearman_cosine": 0.9093791054167589, + "eval_sts-test_spearman_dot": 0.878851271687172, + "eval_sts-test_spearman_euclidean": 0.9080618967889643, + "eval_sts-test_spearman_manhattan": 0.9088933752256164, + "eval_sts-test_spearman_max": 0.9093791054167589, + "eval_vitaminc-pairs_loss": 1.6756807565689087, + "eval_vitaminc-pairs_runtime": 3.1782, + "eval_vitaminc-pairs_samples_per_second": 40.275, + "eval_vitaminc-pairs_steps_per_second": 0.315, + "step": 150 + }, + { + "epoch": 0.15321756894790603, + "eval_negation-triplets_loss": 0.8964348435401917, + "eval_negation-triplets_runtime": 0.6917, + "eval_negation-triplets_samples_per_second": 185.048, + "eval_negation-triplets_steps_per_second": 1.446, + "step": 150 + }, + { + "epoch": 0.15321756894790603, + "eval_scitail-pairs-pos_loss": 0.05355990678071976, + "eval_scitail-pairs-pos_runtime": 0.7905, + "eval_scitail-pairs-pos_samples_per_second": 161.933, + "eval_scitail-pairs-pos_steps_per_second": 1.265, + "step": 150 + }, + { + "epoch": 0.15321756894790603, + "eval_scitail-pairs-qa_loss": 1.938045534188859e-05, + "eval_scitail-pairs-qa_runtime": 0.5155, + "eval_scitail-pairs-qa_samples_per_second": 248.323, + "eval_scitail-pairs-qa_steps_per_second": 1.94, + "step": 150 + }, + { + "epoch": 0.15321756894790603, + "eval_xsum-pairs_loss": 0.005101817660033703, + "eval_xsum-pairs_runtime": 2.7131, + "eval_xsum-pairs_samples_per_second": 47.179, + "eval_xsum-pairs_steps_per_second": 0.369, + "step": 150 + }, + { + "epoch": 0.15321756894790603, + "eval_sciq_pairs_loss": 0.018914926797151566, + "eval_sciq_pairs_runtime": 3.0725, + "eval_sciq_pairs_samples_per_second": 41.66, + "eval_sciq_pairs_steps_per_second": 0.325, + "step": 150 + }, + { + "epoch": 0.15321756894790603, + "eval_qasc_pairs_loss": 0.10371944308280945, + "eval_qasc_pairs_runtime": 0.5639, + "eval_qasc_pairs_samples_per_second": 226.98, + "eval_qasc_pairs_steps_per_second": 1.773, + "step": 150 + }, + { + "epoch": 0.15321756894790603, + "eval_openbookqa_pairs_loss": 0.7999364137649536, + "eval_openbookqa_pairs_runtime": 0.5733, + "eval_openbookqa_pairs_samples_per_second": 223.253, + "eval_openbookqa_pairs_steps_per_second": 1.744, + "step": 150 + }, + { + "epoch": 0.15321756894790603, + "eval_msmarco_pairs_loss": 0.16489192843437195, + "eval_msmarco_pairs_runtime": 1.2708, + "eval_msmarco_pairs_samples_per_second": 100.728, + "eval_msmarco_pairs_steps_per_second": 0.787, + "step": 150 + }, + { + "epoch": 0.15321756894790603, + "eval_nq_pairs_loss": 0.1105436161160469, + "eval_nq_pairs_runtime": 2.7298, + "eval_nq_pairs_samples_per_second": 46.89, + "eval_nq_pairs_steps_per_second": 0.366, + "step": 150 + }, + { + "epoch": 0.15321756894790603, + "eval_trivia_pairs_loss": 0.16050007939338684, + "eval_trivia_pairs_runtime": 3.1907, + "eval_trivia_pairs_samples_per_second": 40.117, + "eval_trivia_pairs_steps_per_second": 0.313, + "step": 150 + }, + { + "epoch": 0.15321756894790603, + "eval_gooaq_pairs_loss": 0.12783268094062805, + "eval_gooaq_pairs_runtime": 0.8762, + "eval_gooaq_pairs_samples_per_second": 146.091, + "eval_gooaq_pairs_steps_per_second": 1.141, + "step": 150 + }, + { + "epoch": 0.15321756894790603, + "eval_paws-pos_loss": 0.02260657772421837, + "eval_paws-pos_runtime": 0.682, + "eval_paws-pos_samples_per_second": 187.686, + "eval_paws-pos_steps_per_second": 1.466, + "step": 150 + }, + { + "epoch": 0.15321756894790603, + "eval_global_dataset_loss": 0.21319995820522308, + "eval_global_dataset_runtime": 18.1909, + "eval_global_dataset_samples_per_second": 30.015, + "eval_global_dataset_steps_per_second": 0.275, + "step": 150 + }, + { + "epoch": 0.15423901940755874, + "grad_norm": 9.414515495300293, + "learning_rate": 6.955177743431221e-06, + "loss": 0.3261, + "step": 151 + }, + { + "epoch": 0.15526046986721145, + "grad_norm": 6.8544602394104, + "learning_rate": 7.001545595054096e-06, + "loss": 0.1618, + "step": 152 + }, + { + "epoch": 0.15628192032686414, + "grad_norm": 8.55745792388916, + "learning_rate": 7.0479134466769705e-06, + "loss": 0.2373, + "step": 153 + }, + { + "epoch": 0.15730337078651685, + "grad_norm": 5.203763008117676, + "learning_rate": 7.094281298299846e-06, + "loss": 0.0943, + "step": 154 + }, + { + "epoch": 0.15832482124616956, + "grad_norm": 6.670897006988525, + "learning_rate": 7.140649149922721e-06, + "loss": 0.1366, + "step": 155 + }, + { + "epoch": 0.15934627170582227, + "grad_norm": 5.598010063171387, + "learning_rate": 7.187017001545595e-06, + "loss": 0.0941, + "step": 156 + }, + { + "epoch": 0.16036772216547499, + "grad_norm": 6.704461097717285, + "learning_rate": 7.23338485316847e-06, + "loss": 0.0914, + "step": 157 + }, + { + "epoch": 0.16138917262512767, + "grad_norm": 6.6563615798950195, + "learning_rate": 7.2797527047913445e-06, + "loss": 0.1362, + "step": 158 + }, + { + "epoch": 0.16241062308478038, + "grad_norm": 6.019044876098633, + "learning_rate": 7.32612055641422e-06, + "loss": 0.1623, + "step": 159 + }, + { + "epoch": 0.1634320735444331, + "grad_norm": 6.506950855255127, + "learning_rate": 7.3724884080370946e-06, + "loss": 0.167, + "step": 160 + }, + { + "epoch": 0.1644535240040858, + "grad_norm": 3.206223964691162, + "learning_rate": 7.418856259659969e-06, + "loss": 0.0605, + "step": 161 + }, + { + "epoch": 0.16547497446373852, + "grad_norm": 9.14065170288086, + "learning_rate": 7.465224111282844e-06, + "loss": 0.2929, + "step": 162 + }, + { + "epoch": 0.16649642492339123, + "grad_norm": 6.935881614685059, + "learning_rate": 7.511591962905719e-06, + "loss": 0.1709, + "step": 163 + }, + { + "epoch": 0.1675178753830439, + "grad_norm": 5.117907524108887, + "learning_rate": 7.557959814528593e-06, + "loss": 0.1022, + "step": 164 + }, + { + "epoch": 0.16853932584269662, + "grad_norm": 4.351495742797852, + "learning_rate": 7.6043276661514685e-06, + "loss": 0.1399, + "step": 165 + }, + { + "epoch": 0.16853932584269662, + "eval_Qnli-dev_cosine_accuracy": 0.705078125, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.6853021383285522, + "eval_Qnli-dev_cosine_ap": 0.7281780879660458, + "eval_Qnli-dev_cosine_f1": 0.687392055267703, + "eval_Qnli-dev_cosine_f1_threshold": 0.5926831960678101, + "eval_Qnli-dev_cosine_precision": 0.5801749271137027, + "eval_Qnli-dev_cosine_recall": 0.8432203389830508, + "eval_Qnli-dev_dot_accuracy": 0.66796875, + "eval_Qnli-dev_dot_accuracy_threshold": 313.0809326171875, + "eval_Qnli-dev_dot_ap": 0.6816504511044976, + "eval_Qnli-dev_dot_f1": 0.6808510638297872, + "eval_Qnli-dev_dot_f1_threshold": 257.91571044921875, + "eval_Qnli-dev_dot_precision": 0.5546666666666666, + "eval_Qnli-dev_dot_recall": 0.8813559322033898, + "eval_Qnli-dev_euclidean_accuracy": 0.712890625, + "eval_Qnli-dev_euclidean_accuracy_threshold": 17.199846267700195, + "eval_Qnli-dev_euclidean_ap": 0.7391681982181382, + "eval_Qnli-dev_euclidean_f1": 0.6959847036328872, + "eval_Qnli-dev_euclidean_f1_threshold": 18.26412010192871, + "eval_Qnli-dev_euclidean_precision": 0.6341463414634146, + "eval_Qnli-dev_euclidean_recall": 0.7711864406779662, + "eval_Qnli-dev_manhattan_accuracy": 0.7109375, + "eval_Qnli-dev_manhattan_accuracy_threshold": 354.17962646484375, + "eval_Qnli-dev_manhattan_ap": 0.740502378667796, + "eval_Qnli-dev_manhattan_f1": 0.6962962962962964, + "eval_Qnli-dev_manhattan_f1_threshold": 392.7843017578125, + "eval_Qnli-dev_manhattan_precision": 0.618421052631579, + "eval_Qnli-dev_manhattan_recall": 0.7966101694915254, + "eval_Qnli-dev_max_accuracy": 0.712890625, + "eval_Qnli-dev_max_accuracy_threshold": 354.17962646484375, + "eval_Qnli-dev_max_ap": 0.740502378667796, + "eval_Qnli-dev_max_f1": 0.6962962962962964, + "eval_Qnli-dev_max_f1_threshold": 392.7843017578125, + "eval_Qnli-dev_max_precision": 0.6341463414634146, + "eval_Qnli-dev_max_recall": 0.8813559322033898, + "eval_allNLI-dev_cosine_accuracy": 0.740234375, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.7809075713157654, + "eval_allNLI-dev_cosine_ap": 0.6367966833474741, + "eval_allNLI-dev_cosine_f1": 0.6330275229357799, + "eval_allNLI-dev_cosine_f1_threshold": 0.635560154914856, + "eval_allNLI-dev_cosine_precision": 0.5247148288973384, + "eval_allNLI-dev_cosine_recall": 0.7976878612716763, + "eval_allNLI-dev_dot_accuracy": 0.7265625, + "eval_allNLI-dev_dot_accuracy_threshold": 333.7015380859375, + "eval_allNLI-dev_dot_ap": 0.6051816324610505, + "eval_allNLI-dev_dot_f1": 0.6166328600405678, + "eval_allNLI-dev_dot_f1_threshold": 252.21017456054688, + "eval_allNLI-dev_dot_precision": 0.475, + "eval_allNLI-dev_dot_recall": 0.8786127167630058, + "eval_allNLI-dev_euclidean_accuracy": 0.748046875, + "eval_allNLI-dev_euclidean_accuracy_threshold": 14.007568359375, + "eval_allNLI-dev_euclidean_ap": 0.6439468476927049, + "eval_allNLI-dev_euclidean_f1": 0.6425339366515838, + "eval_allNLI-dev_euclidean_f1_threshold": 18.053264617919922, + "eval_allNLI-dev_euclidean_precision": 0.5278810408921933, + "eval_allNLI-dev_euclidean_recall": 0.8208092485549133, + "eval_allNLI-dev_manhattan_accuracy": 0.740234375, + "eval_allNLI-dev_manhattan_accuracy_threshold": 323.75823974609375, + "eval_allNLI-dev_manhattan_ap": 0.6419862420939141, + "eval_allNLI-dev_manhattan_f1": 0.6481481481481481, + "eval_allNLI-dev_manhattan_f1_threshold": 375.4950256347656, + "eval_allNLI-dev_manhattan_precision": 0.5405405405405406, + "eval_allNLI-dev_manhattan_recall": 0.8092485549132948, + "eval_allNLI-dev_max_accuracy": 0.748046875, + "eval_allNLI-dev_max_accuracy_threshold": 333.7015380859375, + "eval_allNLI-dev_max_ap": 0.6439468476927049, + "eval_allNLI-dev_max_f1": 0.6481481481481481, + "eval_allNLI-dev_max_f1_threshold": 375.4950256347656, + "eval_allNLI-dev_max_precision": 0.5405405405405406, + "eval_allNLI-dev_max_recall": 0.8786127167630058, + "eval_sequential_score": 0.740502378667796, + "eval_sts-test_pearson_cosine": 0.8870916057917433, + "eval_sts-test_pearson_dot": 0.8733368070135754, + "eval_sts-test_pearson_euclidean": 0.9124442677858347, + "eval_sts-test_pearson_manhattan": 0.912894163742058, + "eval_sts-test_pearson_max": 0.912894163742058, + "eval_sts-test_spearman_cosine": 0.9092496400175537, + "eval_sts-test_spearman_dot": 0.8767357900586777, + "eval_sts-test_spearman_euclidean": 0.9082903072271061, + "eval_sts-test_spearman_manhattan": 0.9088904663936261, + "eval_sts-test_spearman_max": 0.9092496400175537, + "eval_vitaminc-pairs_loss": 1.6712932586669922, + "eval_vitaminc-pairs_runtime": 3.1722, + "eval_vitaminc-pairs_samples_per_second": 40.35, + "eval_vitaminc-pairs_steps_per_second": 0.315, + "step": 165 + }, + { + "epoch": 0.16853932584269662, + "eval_negation-triplets_loss": 0.8837454915046692, + "eval_negation-triplets_runtime": 0.6856, + "eval_negation-triplets_samples_per_second": 186.687, + "eval_negation-triplets_steps_per_second": 1.458, + "step": 165 + }, + { + "epoch": 0.16853932584269662, + "eval_scitail-pairs-pos_loss": 0.05162264406681061, + "eval_scitail-pairs-pos_runtime": 0.7899, + "eval_scitail-pairs-pos_samples_per_second": 162.046, + "eval_scitail-pairs-pos_steps_per_second": 1.266, + "step": 165 + }, + { + "epoch": 0.16853932584269662, + "eval_scitail-pairs-qa_loss": 2.0812361981370486e-05, + "eval_scitail-pairs-qa_runtime": 0.5084, + "eval_scitail-pairs-qa_samples_per_second": 251.773, + "eval_scitail-pairs-qa_steps_per_second": 1.967, + "step": 165 + }, + { + "epoch": 0.16853932584269662, + "eval_xsum-pairs_loss": 0.0045272475108504295, + "eval_xsum-pairs_runtime": 2.7125, + "eval_xsum-pairs_samples_per_second": 47.188, + "eval_xsum-pairs_steps_per_second": 0.369, + "step": 165 + }, + { + "epoch": 0.16853932584269662, + "eval_sciq_pairs_loss": 0.01817404106259346, + "eval_sciq_pairs_runtime": 3.0609, + "eval_sciq_pairs_samples_per_second": 41.818, + "eval_sciq_pairs_steps_per_second": 0.327, + "step": 165 + }, + { + "epoch": 0.16853932584269662, + "eval_qasc_pairs_loss": 0.08290781080722809, + "eval_qasc_pairs_runtime": 0.5675, + "eval_qasc_pairs_samples_per_second": 225.548, + "eval_qasc_pairs_steps_per_second": 1.762, + "step": 165 + }, + { + "epoch": 0.16853932584269662, + "eval_openbookqa_pairs_loss": 0.781373143196106, + "eval_openbookqa_pairs_runtime": 0.5765, + "eval_openbookqa_pairs_samples_per_second": 222.023, + "eval_openbookqa_pairs_steps_per_second": 1.735, + "step": 165 + }, + { + "epoch": 0.16853932584269662, + "eval_msmarco_pairs_loss": 0.15988104045391083, + "eval_msmarco_pairs_runtime": 1.2707, + "eval_msmarco_pairs_samples_per_second": 100.729, + "eval_msmarco_pairs_steps_per_second": 0.787, + "step": 165 + }, + { + "epoch": 0.16853932584269662, + "eval_nq_pairs_loss": 0.11892935633659363, + "eval_nq_pairs_runtime": 2.7238, + "eval_nq_pairs_samples_per_second": 46.993, + "eval_nq_pairs_steps_per_second": 0.367, + "step": 165 + }, + { + "epoch": 0.16853932584269662, + "eval_trivia_pairs_loss": 0.14718589186668396, + "eval_trivia_pairs_runtime": 3.1934, + "eval_trivia_pairs_samples_per_second": 40.083, + "eval_trivia_pairs_steps_per_second": 0.313, + "step": 165 + }, + { + "epoch": 0.16853932584269662, + "eval_gooaq_pairs_loss": 0.12683114409446716, + "eval_gooaq_pairs_runtime": 0.8748, + "eval_gooaq_pairs_samples_per_second": 146.311, + "eval_gooaq_pairs_steps_per_second": 1.143, + "step": 165 + }, + { + "epoch": 0.16853932584269662, + "eval_paws-pos_loss": 0.02286490984261036, + "eval_paws-pos_runtime": 0.6821, + "eval_paws-pos_samples_per_second": 187.661, + "eval_paws-pos_steps_per_second": 1.466, + "step": 165 + }, + { + "epoch": 0.16853932584269662, + "eval_global_dataset_loss": 0.20614725351333618, + "eval_global_dataset_runtime": 18.129, + "eval_global_dataset_samples_per_second": 30.117, + "eval_global_dataset_steps_per_second": 0.276, + "step": 165 + }, + { + "epoch": 0.16956077630234934, + "grad_norm": 0.17191766202449799, + "learning_rate": 7.650695517774343e-06, + "loss": 0.0008, + "step": 166 + }, + { + "epoch": 0.17058222676200205, + "grad_norm": 2.301992654800415, + "learning_rate": 7.697063369397218e-06, + "loss": 0.0201, + "step": 167 + }, + { + "epoch": 0.17160367722165476, + "grad_norm": 5.538881778717041, + "learning_rate": 7.743431221020094e-06, + "loss": 0.1038, + "step": 168 + }, + { + "epoch": 0.17262512768130744, + "grad_norm": 5.807785511016846, + "learning_rate": 7.789799072642967e-06, + "loss": 0.1082, + "step": 169 + }, + { + "epoch": 0.17364657814096016, + "grad_norm": 8.542582511901855, + "learning_rate": 7.836166924265843e-06, + "loss": 0.2274, + "step": 170 + }, + { + "epoch": 0.17466802860061287, + "grad_norm": 6.932031154632568, + "learning_rate": 7.882534775888716e-06, + "loss": 0.211, + "step": 171 + }, + { + "epoch": 0.17568947906026558, + "grad_norm": 6.358954906463623, + "learning_rate": 7.928902627511593e-06, + "loss": 0.2527, + "step": 172 + }, + { + "epoch": 0.1767109295199183, + "grad_norm": 0.0030138411093503237, + "learning_rate": 7.975270479134467e-06, + "loss": 0.0, + "step": 173 + }, + { + "epoch": 0.177732379979571, + "grad_norm": 4.964352607727051, + "learning_rate": 8.021638330757342e-06, + "loss": 0.0565, + "step": 174 + }, + { + "epoch": 0.1787538304392237, + "grad_norm": 5.43158483505249, + "learning_rate": 8.068006182380216e-06, + "loss": 0.1456, + "step": 175 + }, + { + "epoch": 0.1797752808988764, + "grad_norm": 6.837294578552246, + "learning_rate": 8.114374034003091e-06, + "loss": 0.1498, + "step": 176 + }, + { + "epoch": 0.1807967313585291, + "grad_norm": 6.0040459632873535, + "learning_rate": 8.160741885625966e-06, + "loss": 0.1689, + "step": 177 + }, + { + "epoch": 0.18181818181818182, + "grad_norm": 4.130268573760986, + "learning_rate": 8.207109737248842e-06, + "loss": 0.1241, + "step": 178 + }, + { + "epoch": 0.18283963227783454, + "grad_norm": 5.505726337432861, + "learning_rate": 8.253477588871715e-06, + "loss": 0.1143, + "step": 179 + }, + { + "epoch": 0.18386108273748722, + "grad_norm": 4.157503128051758, + "learning_rate": 8.299845440494591e-06, + "loss": 0.0823, + "step": 180 + }, + { + "epoch": 0.18386108273748722, + "eval_Qnli-dev_cosine_accuracy": 0.69921875, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.6941273212432861, + "eval_Qnli-dev_cosine_ap": 0.7278692566993372, + "eval_Qnli-dev_cosine_f1": 0.6910420475319927, + "eval_Qnli-dev_cosine_f1_threshold": 0.6254246830940247, + "eval_Qnli-dev_cosine_precision": 0.6077170418006431, + "eval_Qnli-dev_cosine_recall": 0.8008474576271186, + "eval_Qnli-dev_dot_accuracy": 0.66796875, + "eval_Qnli-dev_dot_accuracy_threshold": 306.72845458984375, + "eval_Qnli-dev_dot_ap": 0.6811553720003177, + "eval_Qnli-dev_dot_f1": 0.685064935064935, + "eval_Qnli-dev_dot_f1_threshold": 255.69927978515625, + "eval_Qnli-dev_dot_precision": 0.5552631578947368, + "eval_Qnli-dev_dot_recall": 0.8940677966101694, + "eval_Qnli-dev_euclidean_accuracy": 0.716796875, + "eval_Qnli-dev_euclidean_accuracy_threshold": 17.333099365234375, + "eval_Qnli-dev_euclidean_ap": 0.7383105665392877, + "eval_Qnli-dev_euclidean_f1": 0.6994328922495274, + "eval_Qnli-dev_euclidean_f1_threshold": 18.217266082763672, + "eval_Qnli-dev_euclidean_precision": 0.6313993174061433, + "eval_Qnli-dev_euclidean_recall": 0.7838983050847458, + "eval_Qnli-dev_manhattan_accuracy": 0.708984375, + "eval_Qnli-dev_manhattan_accuracy_threshold": 355.33648681640625, + "eval_Qnli-dev_manhattan_ap": 0.740994353636266, + "eval_Qnli-dev_manhattan_f1": 0.7022058823529411, + "eval_Qnli-dev_manhattan_f1_threshold": 393.79034423828125, + "eval_Qnli-dev_manhattan_precision": 0.6201298701298701, + "eval_Qnli-dev_manhattan_recall": 0.809322033898305, + "eval_Qnli-dev_max_accuracy": 0.716796875, + "eval_Qnli-dev_max_accuracy_threshold": 355.33648681640625, + "eval_Qnli-dev_max_ap": 0.740994353636266, + "eval_Qnli-dev_max_f1": 0.7022058823529411, + "eval_Qnli-dev_max_f1_threshold": 393.79034423828125, + "eval_Qnli-dev_max_precision": 0.6313993174061433, + "eval_Qnli-dev_max_recall": 0.8940677966101694, + "eval_allNLI-dev_cosine_accuracy": 0.73828125, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.794581413269043, + "eval_allNLI-dev_cosine_ap": 0.637776986458248, + "eval_allNLI-dev_cosine_f1": 0.6291079812206573, + "eval_allNLI-dev_cosine_f1_threshold": 0.6438230276107788, + "eval_allNLI-dev_cosine_precision": 0.5296442687747036, + "eval_allNLI-dev_cosine_recall": 0.7745664739884393, + "eval_allNLI-dev_dot_accuracy": 0.720703125, + "eval_allNLI-dev_dot_accuracy_threshold": 329.1781921386719, + "eval_allNLI-dev_dot_ap": 0.6042529102133607, + "eval_allNLI-dev_dot_f1": 0.6153846153846154, + "eval_allNLI-dev_dot_f1_threshold": 251.23046875, + "eval_allNLI-dev_dot_precision": 0.4735202492211838, + "eval_allNLI-dev_dot_recall": 0.8786127167630058, + "eval_allNLI-dev_euclidean_accuracy": 0.74609375, + "eval_allNLI-dev_euclidean_accuracy_threshold": 14.006410598754883, + "eval_allNLI-dev_euclidean_ap": 0.6447653247811627, + "eval_allNLI-dev_euclidean_f1": 0.6413502109704641, + "eval_allNLI-dev_euclidean_f1_threshold": 18.953582763671875, + "eval_allNLI-dev_euclidean_precision": 0.5049833887043189, + "eval_allNLI-dev_euclidean_recall": 0.8786127167630058, + "eval_allNLI-dev_manhattan_accuracy": 0.73828125, + "eval_allNLI-dev_manhattan_accuracy_threshold": 309.2313537597656, + "eval_allNLI-dev_manhattan_ap": 0.6436706967068377, + "eval_allNLI-dev_manhattan_f1": 0.6436781609195402, + "eval_allNLI-dev_manhattan_f1_threshold": 377.1278076171875, + "eval_allNLI-dev_manhattan_precision": 0.5343511450381679, + "eval_allNLI-dev_manhattan_recall": 0.8092485549132948, + "eval_allNLI-dev_max_accuracy": 0.74609375, + "eval_allNLI-dev_max_accuracy_threshold": 329.1781921386719, + "eval_allNLI-dev_max_ap": 0.6447653247811627, + "eval_allNLI-dev_max_f1": 0.6436781609195402, + "eval_allNLI-dev_max_f1_threshold": 377.1278076171875, + "eval_allNLI-dev_max_precision": 0.5343511450381679, + "eval_allNLI-dev_max_recall": 0.8786127167630058, + "eval_sequential_score": 0.740994353636266, + "eval_sts-test_pearson_cosine": 0.886658041728598, + "eval_sts-test_pearson_dot": 0.869927439518612, + "eval_sts-test_pearson_euclidean": 0.912539643849305, + "eval_sts-test_pearson_manhattan": 0.9127766227820869, + "eval_sts-test_pearson_max": 0.9127766227820869, + "eval_sts-test_spearman_cosine": 0.9083680401681431, + "eval_sts-test_spearman_dot": 0.8722390938115472, + "eval_sts-test_spearman_euclidean": 0.9078598000925209, + "eval_sts-test_spearman_manhattan": 0.9084068395117693, + "eval_sts-test_spearman_max": 0.9084068395117693, + "eval_vitaminc-pairs_loss": 1.6509770154953003, + "eval_vitaminc-pairs_runtime": 3.204, + "eval_vitaminc-pairs_samples_per_second": 39.95, + "eval_vitaminc-pairs_steps_per_second": 0.312, + "step": 180 + }, + { + "epoch": 0.18386108273748722, + "eval_negation-triplets_loss": 0.8805974125862122, + "eval_negation-triplets_runtime": 0.6901, + "eval_negation-triplets_samples_per_second": 185.479, + "eval_negation-triplets_steps_per_second": 1.449, + "step": 180 + }, + { + "epoch": 0.18386108273748722, + "eval_scitail-pairs-pos_loss": 0.0494501069188118, + "eval_scitail-pairs-pos_runtime": 0.7889, + "eval_scitail-pairs-pos_samples_per_second": 162.259, + "eval_scitail-pairs-pos_steps_per_second": 1.268, + "step": 180 + }, + { + "epoch": 0.18386108273748722, + "eval_scitail-pairs-qa_loss": 2.3127886379370466e-05, + "eval_scitail-pairs-qa_runtime": 0.5112, + "eval_scitail-pairs-qa_samples_per_second": 250.375, + "eval_scitail-pairs-qa_steps_per_second": 1.956, + "step": 180 + }, + { + "epoch": 0.18386108273748722, + "eval_xsum-pairs_loss": 0.005493131931871176, + "eval_xsum-pairs_runtime": 2.7087, + "eval_xsum-pairs_samples_per_second": 47.255, + "eval_xsum-pairs_steps_per_second": 0.369, + "step": 180 + }, + { + "epoch": 0.18386108273748722, + "eval_sciq_pairs_loss": 0.01821187324821949, + "eval_sciq_pairs_runtime": 3.0815, + "eval_sciq_pairs_samples_per_second": 41.538, + "eval_sciq_pairs_steps_per_second": 0.325, + "step": 180 + }, + { + "epoch": 0.18386108273748722, + "eval_qasc_pairs_loss": 0.075060173869133, + "eval_qasc_pairs_runtime": 0.5713, + "eval_qasc_pairs_samples_per_second": 224.06, + "eval_qasc_pairs_steps_per_second": 1.75, + "step": 180 + }, + { + "epoch": 0.18386108273748722, + "eval_openbookqa_pairs_loss": 0.7500714659690857, + "eval_openbookqa_pairs_runtime": 0.5735, + "eval_openbookqa_pairs_samples_per_second": 223.203, + "eval_openbookqa_pairs_steps_per_second": 1.744, + "step": 180 + }, + { + "epoch": 0.18386108273748722, + "eval_msmarco_pairs_loss": 0.15402421355247498, + "eval_msmarco_pairs_runtime": 1.2825, + "eval_msmarco_pairs_samples_per_second": 99.808, + "eval_msmarco_pairs_steps_per_second": 0.78, + "step": 180 + }, + { + "epoch": 0.18386108273748722, + "eval_nq_pairs_loss": 0.11884366720914841, + "eval_nq_pairs_runtime": 2.7188, + "eval_nq_pairs_samples_per_second": 47.079, + "eval_nq_pairs_steps_per_second": 0.368, + "step": 180 + }, + { + "epoch": 0.18386108273748722, + "eval_trivia_pairs_loss": 0.15114806592464447, + "eval_trivia_pairs_runtime": 3.188, + "eval_trivia_pairs_samples_per_second": 40.151, + "eval_trivia_pairs_steps_per_second": 0.314, + "step": 180 + }, + { + "epoch": 0.18386108273748722, + "eval_gooaq_pairs_loss": 0.13557007908821106, + "eval_gooaq_pairs_runtime": 0.8712, + "eval_gooaq_pairs_samples_per_second": 146.918, + "eval_gooaq_pairs_steps_per_second": 1.148, + "step": 180 + }, + { + "epoch": 0.18386108273748722, + "eval_paws-pos_loss": 0.023051057010889053, + "eval_paws-pos_runtime": 0.6819, + "eval_paws-pos_samples_per_second": 187.7, + "eval_paws-pos_steps_per_second": 1.466, + "step": 180 + }, + { + "epoch": 0.18386108273748722, + "eval_global_dataset_loss": 0.19962206482887268, + "eval_global_dataset_runtime": 18.0903, + "eval_global_dataset_samples_per_second": 30.182, + "eval_global_dataset_steps_per_second": 0.276, + "step": 180 + }, + { + "epoch": 0.18488253319713993, + "grad_norm": 6.622176170349121, + "learning_rate": 8.346213292117464e-06, + "loss": 0.1072, + "step": 181 + }, + { + "epoch": 0.18590398365679264, + "grad_norm": 9.214537620544434, + "learning_rate": 8.39258114374034e-06, + "loss": 0.2119, + "step": 182 + }, + { + "epoch": 0.18692543411644535, + "grad_norm": 8.8944091796875, + "learning_rate": 8.438948995363215e-06, + "loss": 0.2067, + "step": 183 + }, + { + "epoch": 0.18794688457609807, + "grad_norm": 5.687060832977295, + "learning_rate": 8.48531684698609e-06, + "loss": 0.0849, + "step": 184 + }, + { + "epoch": 0.18896833503575078, + "grad_norm": 6.281179428100586, + "learning_rate": 8.531684698608966e-06, + "loss": 0.1698, + "step": 185 + }, + { + "epoch": 0.18998978549540346, + "grad_norm": 4.13387393951416, + "learning_rate": 8.578052550231839e-06, + "loss": 0.0729, + "step": 186 + }, + { + "epoch": 0.19101123595505617, + "grad_norm": 1.0750579833984375, + "learning_rate": 8.624420401854715e-06, + "loss": 0.0402, + "step": 187 + }, + { + "epoch": 0.1920326864147089, + "grad_norm": 5.998382568359375, + "learning_rate": 8.670788253477588e-06, + "loss": 0.0977, + "step": 188 + }, + { + "epoch": 0.1930541368743616, + "grad_norm": 5.961723804473877, + "learning_rate": 8.717156105100465e-06, + "loss": 0.0836, + "step": 189 + }, + { + "epoch": 0.1940755873340143, + "grad_norm": 9.096285820007324, + "learning_rate": 8.763523956723339e-06, + "loss": 0.264, + "step": 190 + }, + { + "epoch": 0.195097037793667, + "grad_norm": 4.752264976501465, + "learning_rate": 8.809891808346214e-06, + "loss": 0.1379, + "step": 191 + }, + { + "epoch": 0.1961184882533197, + "grad_norm": 7.471511363983154, + "learning_rate": 8.856259659969088e-06, + "loss": 0.1358, + "step": 192 + }, + { + "epoch": 0.19713993871297242, + "grad_norm": 5.222994804382324, + "learning_rate": 8.902627511591963e-06, + "loss": 0.0939, + "step": 193 + }, + { + "epoch": 0.19816138917262513, + "grad_norm": 9.64782428741455, + "learning_rate": 8.948995363214838e-06, + "loss": 0.4662, + "step": 194 + }, + { + "epoch": 0.19918283963227784, + "grad_norm": 1.159449815750122, + "learning_rate": 8.995363214837714e-06, + "loss": 0.0428, + "step": 195 + }, + { + "epoch": 0.19918283963227784, + "eval_Qnli-dev_cosine_accuracy": 0.693359375, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.6972202062606812, + "eval_Qnli-dev_cosine_ap": 0.7153827512813624, + "eval_Qnli-dev_cosine_f1": 0.6803418803418803, + "eval_Qnli-dev_cosine_f1_threshold": 0.5867342948913574, + "eval_Qnli-dev_cosine_precision": 0.5702005730659025, + "eval_Qnli-dev_cosine_recall": 0.8432203389830508, + "eval_Qnli-dev_dot_accuracy": 0.66015625, + "eval_Qnli-dev_dot_accuracy_threshold": 334.04241943359375, + "eval_Qnli-dev_dot_ap": 0.6681094375225486, + "eval_Qnli-dev_dot_f1": 0.6741935483870968, + "eval_Qnli-dev_dot_f1_threshold": 255.34799194335938, + "eval_Qnli-dev_dot_precision": 0.5442708333333334, + "eval_Qnli-dev_dot_recall": 0.885593220338983, + "eval_Qnli-dev_euclidean_accuracy": 0.70703125, + "eval_Qnli-dev_euclidean_accuracy_threshold": 16.576934814453125, + "eval_Qnli-dev_euclidean_ap": 0.7263623876399564, + "eval_Qnli-dev_euclidean_f1": 0.68796992481203, + "eval_Qnli-dev_euclidean_f1_threshold": 18.22495460510254, + "eval_Qnli-dev_euclidean_precision": 0.6182432432432432, + "eval_Qnli-dev_euclidean_recall": 0.7754237288135594, + "eval_Qnli-dev_manhattan_accuracy": 0.705078125, + "eval_Qnli-dev_manhattan_accuracy_threshold": 351.4493103027344, + "eval_Qnli-dev_manhattan_ap": 0.7282119097652248, + "eval_Qnli-dev_manhattan_f1": 0.6948853615520282, + "eval_Qnli-dev_manhattan_f1_threshold": 402.3414306640625, + "eval_Qnli-dev_manhattan_precision": 0.595166163141994, + "eval_Qnli-dev_manhattan_recall": 0.8347457627118644, + "eval_Qnli-dev_max_accuracy": 0.70703125, + "eval_Qnli-dev_max_accuracy_threshold": 351.4493103027344, + "eval_Qnli-dev_max_ap": 0.7282119097652248, + "eval_Qnli-dev_max_f1": 0.6948853615520282, + "eval_Qnli-dev_max_f1_threshold": 402.3414306640625, + "eval_Qnli-dev_max_precision": 0.6182432432432432, + "eval_Qnli-dev_max_recall": 0.885593220338983, + "eval_allNLI-dev_cosine_accuracy": 0.740234375, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.7975447177886963, + "eval_allNLI-dev_cosine_ap": 0.6377387373098401, + "eval_allNLI-dev_cosine_f1": 0.6323185011709601, + "eval_allNLI-dev_cosine_f1_threshold": 0.6594541072845459, + "eval_allNLI-dev_cosine_precision": 0.531496062992126, + "eval_allNLI-dev_cosine_recall": 0.7803468208092486, + "eval_allNLI-dev_dot_accuracy": 0.724609375, + "eval_allNLI-dev_dot_accuracy_threshold": 357.22686767578125, + "eval_allNLI-dev_dot_ap": 0.5991972031824899, + "eval_allNLI-dev_dot_f1": 0.6168958742632614, + "eval_allNLI-dev_dot_f1_threshold": 255.791015625, + "eval_allNLI-dev_dot_precision": 0.46726190476190477, + "eval_allNLI-dev_dot_recall": 0.9075144508670521, + "eval_allNLI-dev_euclidean_accuracy": 0.751953125, + "eval_allNLI-dev_euclidean_accuracy_threshold": 13.69708251953125, + "eval_allNLI-dev_euclidean_ap": 0.6435648687925655, + "eval_allNLI-dev_euclidean_f1": 0.6431924882629108, + "eval_allNLI-dev_euclidean_f1_threshold": 17.385541915893555, + "eval_allNLI-dev_euclidean_precision": 0.541501976284585, + "eval_allNLI-dev_euclidean_recall": 0.791907514450867, + "eval_allNLI-dev_manhattan_accuracy": 0.7421875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 304.86859130859375, + "eval_allNLI-dev_manhattan_ap": 0.6414859221224841, + "eval_allNLI-dev_manhattan_f1": 0.6432160804020101, + "eval_allNLI-dev_manhattan_f1_threshold": 354.99481201171875, + "eval_allNLI-dev_manhattan_precision": 0.5688888888888889, + "eval_allNLI-dev_manhattan_recall": 0.7398843930635838, + "eval_allNLI-dev_max_accuracy": 0.751953125, + "eval_allNLI-dev_max_accuracy_threshold": 357.22686767578125, + "eval_allNLI-dev_max_ap": 0.6435648687925655, + "eval_allNLI-dev_max_f1": 0.6432160804020101, + "eval_allNLI-dev_max_f1_threshold": 354.99481201171875, + "eval_allNLI-dev_max_precision": 0.5688888888888889, + "eval_allNLI-dev_max_recall": 0.9075144508670521, + "eval_sequential_score": 0.7282119097652248, + "eval_sts-test_pearson_cosine": 0.8835670131068663, + "eval_sts-test_pearson_dot": 0.8614909192586415, + "eval_sts-test_pearson_euclidean": 0.9109918303510968, + "eval_sts-test_pearson_manhattan": 0.9119339934951788, + "eval_sts-test_pearson_max": 0.9119339934951788, + "eval_sts-test_spearman_cosine": 0.9072833143432768, + "eval_sts-test_spearman_dot": 0.8645679220863562, + "eval_sts-test_spearman_euclidean": 0.9070972385984113, + "eval_sts-test_spearman_manhattan": 0.9080893293121973, + "eval_sts-test_spearman_max": 0.9080893293121973, + "eval_vitaminc-pairs_loss": 1.6089191436767578, + "eval_vitaminc-pairs_runtime": 3.1649, + "eval_vitaminc-pairs_samples_per_second": 40.444, + "eval_vitaminc-pairs_steps_per_second": 0.316, + "step": 195 + }, + { + "epoch": 0.19918283963227784, + "eval_negation-triplets_loss": 0.8759047985076904, + "eval_negation-triplets_runtime": 0.6912, + "eval_negation-triplets_samples_per_second": 185.186, + "eval_negation-triplets_steps_per_second": 1.447, + "step": 195 + }, + { + "epoch": 0.19918283963227784, + "eval_scitail-pairs-pos_loss": 0.04824283719062805, + "eval_scitail-pairs-pos_runtime": 0.8278, + "eval_scitail-pairs-pos_samples_per_second": 154.632, + "eval_scitail-pairs-pos_steps_per_second": 1.208, + "step": 195 + }, + { + "epoch": 0.19918283963227784, + "eval_scitail-pairs-qa_loss": 2.480510920577217e-05, + "eval_scitail-pairs-qa_runtime": 0.5405, + "eval_scitail-pairs-qa_samples_per_second": 236.82, + "eval_scitail-pairs-qa_steps_per_second": 1.85, + "step": 195 + }, + { + "epoch": 0.19918283963227784, + "eval_xsum-pairs_loss": 0.006399333942681551, + "eval_xsum-pairs_runtime": 2.7351, + "eval_xsum-pairs_samples_per_second": 46.799, + "eval_xsum-pairs_steps_per_second": 0.366, + "step": 195 + }, + { + "epoch": 0.19918283963227784, + "eval_sciq_pairs_loss": 0.017517240718007088, + "eval_sciq_pairs_runtime": 3.145, + "eval_sciq_pairs_samples_per_second": 40.699, + "eval_sciq_pairs_steps_per_second": 0.318, + "step": 195 + }, + { + "epoch": 0.19918283963227784, + "eval_qasc_pairs_loss": 0.06335584819316864, + "eval_qasc_pairs_runtime": 0.5612, + "eval_qasc_pairs_samples_per_second": 228.098, + "eval_qasc_pairs_steps_per_second": 1.782, + "step": 195 + }, + { + "epoch": 0.19918283963227784, + "eval_openbookqa_pairs_loss": 0.7464925050735474, + "eval_openbookqa_pairs_runtime": 0.5769, + "eval_openbookqa_pairs_samples_per_second": 221.875, + "eval_openbookqa_pairs_steps_per_second": 1.733, + "step": 195 + }, + { + "epoch": 0.19918283963227784, + "eval_msmarco_pairs_loss": 0.16150762140750885, + "eval_msmarco_pairs_runtime": 1.2761, + "eval_msmarco_pairs_samples_per_second": 100.307, + "eval_msmarco_pairs_steps_per_second": 0.784, + "step": 195 + }, + { + "epoch": 0.19918283963227784, + "eval_nq_pairs_loss": 0.11458850651979446, + "eval_nq_pairs_runtime": 2.7245, + "eval_nq_pairs_samples_per_second": 46.98, + "eval_nq_pairs_steps_per_second": 0.367, + "step": 195 + }, + { + "epoch": 0.19918283963227784, + "eval_trivia_pairs_loss": 0.15422502160072327, + "eval_trivia_pairs_runtime": 3.1931, + "eval_trivia_pairs_samples_per_second": 40.087, + "eval_trivia_pairs_steps_per_second": 0.313, + "step": 195 + }, + { + "epoch": 0.19918283963227784, + "eval_gooaq_pairs_loss": 0.1594969928264618, + "eval_gooaq_pairs_runtime": 0.8722, + "eval_gooaq_pairs_samples_per_second": 146.749, + "eval_gooaq_pairs_steps_per_second": 1.146, + "step": 195 + }, + { + "epoch": 0.19918283963227784, + "eval_paws-pos_loss": 0.02276989445090294, + "eval_paws-pos_runtime": 0.6806, + "eval_paws-pos_samples_per_second": 188.06, + "eval_paws-pos_steps_per_second": 1.469, + "step": 195 + }, + { + "epoch": 0.19918283963227784, + "eval_global_dataset_loss": 0.1976570039987564, + "eval_global_dataset_runtime": 18.0952, + "eval_global_dataset_samples_per_second": 30.174, + "eval_global_dataset_steps_per_second": 0.276, + "step": 195 + }, + { + "epoch": 0.20020429009193055, + "grad_norm": 8.014669418334961, + "learning_rate": 9.041731066460587e-06, + "loss": 0.3566, + "step": 196 + }, + { + "epoch": 0.20122574055158324, + "grad_norm": 0.01532956212759018, + "learning_rate": 9.088098918083463e-06, + "loss": 0.0001, + "step": 197 + }, + { + "epoch": 0.20224719101123595, + "grad_norm": 8.927170753479004, + "learning_rate": 9.134466769706336e-06, + "loss": 0.409, + "step": 198 + }, + { + "epoch": 0.20326864147088866, + "grad_norm": 8.353126525878906, + "learning_rate": 9.180834621329212e-06, + "loss": 0.2114, + "step": 199 + }, + { + "epoch": 0.20429009193054137, + "grad_norm": 6.29276704788208, + "learning_rate": 9.227202472952085e-06, + "loss": 0.1129, + "step": 200 + }, + { + "epoch": 0.20531154239019409, + "grad_norm": 4.237605571746826, + "learning_rate": 9.273570324574962e-06, + "loss": 0.0831, + "step": 201 + }, + { + "epoch": 0.20633299284984677, + "grad_norm": 4.7048163414001465, + "learning_rate": 9.319938176197836e-06, + "loss": 0.0788, + "step": 202 + }, + { + "epoch": 0.20735444330949948, + "grad_norm": 10.543294906616211, + "learning_rate": 9.366306027820711e-06, + "loss": 0.2791, + "step": 203 + }, + { + "epoch": 0.2083758937691522, + "grad_norm": 0.052854523062705994, + "learning_rate": 9.412673879443586e-06, + "loss": 0.0004, + "step": 204 + }, + { + "epoch": 0.2093973442288049, + "grad_norm": 3.1083083152770996, + "learning_rate": 9.45904173106646e-06, + "loss": 0.021, + "step": 205 + }, + { + "epoch": 0.21041879468845762, + "grad_norm": 7.927884101867676, + "learning_rate": 9.505409582689335e-06, + "loss": 0.1721, + "step": 206 + }, + { + "epoch": 0.21144024514811033, + "grad_norm": 4.124098777770996, + "learning_rate": 9.551777434312211e-06, + "loss": 0.0554, + "step": 207 + }, + { + "epoch": 0.212461695607763, + "grad_norm": 0.4861751198768616, + "learning_rate": 9.598145285935086e-06, + "loss": 0.0191, + "step": 208 + }, + { + "epoch": 0.21348314606741572, + "grad_norm": 4.265564918518066, + "learning_rate": 9.64451313755796e-06, + "loss": 0.0628, + "step": 209 + }, + { + "epoch": 0.21450459652706844, + "grad_norm": 7.434197425842285, + "learning_rate": 9.690880989180835e-06, + "loss": 0.4161, + "step": 210 + }, + { + "epoch": 0.21450459652706844, + "eval_Qnli-dev_cosine_accuracy": 0.69921875, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7134166955947876, + "eval_Qnli-dev_cosine_ap": 0.7233398464616407, + "eval_Qnli-dev_cosine_f1": 0.6833631484794276, + "eval_Qnli-dev_cosine_f1_threshold": 0.6342331171035767, + "eval_Qnli-dev_cosine_precision": 0.5913312693498453, + "eval_Qnli-dev_cosine_recall": 0.809322033898305, + "eval_Qnli-dev_dot_accuracy": 0.666015625, + "eval_Qnli-dev_dot_accuracy_threshold": 342.7704162597656, + "eval_Qnli-dev_dot_ap": 0.6750835143966132, + "eval_Qnli-dev_dot_f1": 0.6751188589540412, + "eval_Qnli-dev_dot_f1_threshold": 258.71185302734375, + "eval_Qnli-dev_dot_precision": 0.5392405063291139, + "eval_Qnli-dev_dot_recall": 0.902542372881356, + "eval_Qnli-dev_euclidean_accuracy": 0.7109375, + "eval_Qnli-dev_euclidean_accuracy_threshold": 16.788101196289062, + "eval_Qnli-dev_euclidean_ap": 0.733380297920513, + "eval_Qnli-dev_euclidean_f1": 0.6923076923076922, + "eval_Qnli-dev_euclidean_f1_threshold": 18.837051391601562, + "eval_Qnli-dev_euclidean_precision": 0.5892857142857143, + "eval_Qnli-dev_euclidean_recall": 0.8389830508474576, + "eval_Qnli-dev_manhattan_accuracy": 0.7109375, + "eval_Qnli-dev_manhattan_accuracy_threshold": 346.0721435546875, + "eval_Qnli-dev_manhattan_ap": 0.7363641648094164, + "eval_Qnli-dev_manhattan_f1": 0.6921739130434782, + "eval_Qnli-dev_manhattan_f1_threshold": 406.31085205078125, + "eval_Qnli-dev_manhattan_precision": 0.5870206489675516, + "eval_Qnli-dev_manhattan_recall": 0.8432203389830508, + "eval_Qnli-dev_max_accuracy": 0.7109375, + "eval_Qnli-dev_max_accuracy_threshold": 346.0721435546875, + "eval_Qnli-dev_max_ap": 0.7363641648094164, + "eval_Qnli-dev_max_f1": 0.6923076923076922, + "eval_Qnli-dev_max_f1_threshold": 406.31085205078125, + "eval_Qnli-dev_max_precision": 0.5913312693498453, + "eval_Qnli-dev_max_recall": 0.902542372881356, + "eval_allNLI-dev_cosine_accuracy": 0.744140625, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8111903667449951, + "eval_allNLI-dev_cosine_ap": 0.6373750660324018, + "eval_allNLI-dev_cosine_f1": 0.6376146788990825, + "eval_allNLI-dev_cosine_f1_threshold": 0.6769773960113525, + "eval_allNLI-dev_cosine_precision": 0.5285171102661597, + "eval_allNLI-dev_cosine_recall": 0.8034682080924855, + "eval_allNLI-dev_dot_accuracy": 0.72265625, + "eval_allNLI-dev_dot_accuracy_threshold": 370.8502502441406, + "eval_allNLI-dev_dot_ap": 0.5987399260294886, + "eval_allNLI-dev_dot_f1": 0.6092184368737475, + "eval_allNLI-dev_dot_f1_threshold": 280.74951171875, + "eval_allNLI-dev_dot_precision": 0.4662576687116564, + "eval_allNLI-dev_dot_recall": 0.8786127167630058, + "eval_allNLI-dev_euclidean_accuracy": 0.75, + "eval_allNLI-dev_euclidean_accuracy_threshold": 13.421224594116211, + "eval_allNLI-dev_euclidean_ap": 0.6432449136306547, + "eval_allNLI-dev_euclidean_f1": 0.6539379474940333, + "eval_allNLI-dev_euclidean_f1_threshold": 16.949827194213867, + "eval_allNLI-dev_euclidean_precision": 0.556910569105691, + "eval_allNLI-dev_euclidean_recall": 0.791907514450867, + "eval_allNLI-dev_manhattan_accuracy": 0.7421875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 279.84027099609375, + "eval_allNLI-dev_manhattan_ap": 0.6395472085879973, + "eval_allNLI-dev_manhattan_f1": 0.6485148514851485, + "eval_allNLI-dev_manhattan_f1_threshold": 349.93603515625, + "eval_allNLI-dev_manhattan_precision": 0.5670995670995671, + "eval_allNLI-dev_manhattan_recall": 0.7572254335260116, + "eval_allNLI-dev_max_accuracy": 0.75, + "eval_allNLI-dev_max_accuracy_threshold": 370.8502502441406, + "eval_allNLI-dev_max_ap": 0.6432449136306547, + "eval_allNLI-dev_max_f1": 0.6539379474940333, + "eval_allNLI-dev_max_f1_threshold": 349.93603515625, + "eval_allNLI-dev_max_precision": 0.5670995670995671, + "eval_allNLI-dev_max_recall": 0.8786127167630058, + "eval_sequential_score": 0.7363641648094164, + "eval_sts-test_pearson_cosine": 0.8793939580071624, + "eval_sts-test_pearson_dot": 0.8523094152522054, + "eval_sts-test_pearson_euclidean": 0.9087022157524076, + "eval_sts-test_pearson_manhattan": 0.9100918844631366, + "eval_sts-test_pearson_max": 0.9100918844631366, + "eval_sts-test_spearman_cosine": 0.9053907387477731, + "eval_sts-test_spearman_dot": 0.8564610520802587, + "eval_sts-test_spearman_euclidean": 0.9053789691660271, + "eval_sts-test_spearman_manhattan": 0.9068681568913488, + "eval_sts-test_spearman_max": 0.9068681568913488, + "eval_vitaminc-pairs_loss": 1.5809496641159058, + "eval_vitaminc-pairs_runtime": 3.2012, + "eval_vitaminc-pairs_samples_per_second": 39.985, + "eval_vitaminc-pairs_steps_per_second": 0.312, + "step": 210 + }, + { + "epoch": 0.21450459652706844, + "eval_negation-triplets_loss": 0.8745460510253906, + "eval_negation-triplets_runtime": 0.6944, + "eval_negation-triplets_samples_per_second": 184.338, + "eval_negation-triplets_steps_per_second": 1.44, + "step": 210 + }, + { + "epoch": 0.21450459652706844, + "eval_scitail-pairs-pos_loss": 0.04675232246518135, + "eval_scitail-pairs-pos_runtime": 0.8042, + "eval_scitail-pairs-pos_samples_per_second": 159.155, + "eval_scitail-pairs-pos_steps_per_second": 1.243, + "step": 210 + }, + { + "epoch": 0.21450459652706844, + "eval_scitail-pairs-qa_loss": 2.9356720915529877e-05, + "eval_scitail-pairs-qa_runtime": 0.5149, + "eval_scitail-pairs-qa_samples_per_second": 248.589, + "eval_scitail-pairs-qa_steps_per_second": 1.942, + "step": 210 + }, + { + "epoch": 0.21450459652706844, + "eval_xsum-pairs_loss": 0.007236706092953682, + "eval_xsum-pairs_runtime": 2.715, + "eval_xsum-pairs_samples_per_second": 47.146, + "eval_xsum-pairs_steps_per_second": 0.368, + "step": 210 + }, + { + "epoch": 0.21450459652706844, + "eval_sciq_pairs_loss": 0.017811505123972893, + "eval_sciq_pairs_runtime": 3.1121, + "eval_sciq_pairs_samples_per_second": 41.129, + "eval_sciq_pairs_steps_per_second": 0.321, + "step": 210 + }, + { + "epoch": 0.21450459652706844, + "eval_qasc_pairs_loss": 0.06074891984462738, + "eval_qasc_pairs_runtime": 0.5693, + "eval_qasc_pairs_samples_per_second": 224.841, + "eval_qasc_pairs_steps_per_second": 1.757, + "step": 210 + }, + { + "epoch": 0.21450459652706844, + "eval_openbookqa_pairs_loss": 0.7112113833427429, + "eval_openbookqa_pairs_runtime": 0.5758, + "eval_openbookqa_pairs_samples_per_second": 222.305, + "eval_openbookqa_pairs_steps_per_second": 1.737, + "step": 210 + }, + { + "epoch": 0.21450459652706844, + "eval_msmarco_pairs_loss": 0.1478877067565918, + "eval_msmarco_pairs_runtime": 1.2703, + "eval_msmarco_pairs_samples_per_second": 100.765, + "eval_msmarco_pairs_steps_per_second": 0.787, + "step": 210 + }, + { + "epoch": 0.21450459652706844, + "eval_nq_pairs_loss": 0.1155412495136261, + "eval_nq_pairs_runtime": 2.7413, + "eval_nq_pairs_samples_per_second": 46.693, + "eval_nq_pairs_steps_per_second": 0.365, + "step": 210 + }, + { + "epoch": 0.21450459652706844, + "eval_trivia_pairs_loss": 0.14946851134300232, + "eval_trivia_pairs_runtime": 3.1982, + "eval_trivia_pairs_samples_per_second": 40.022, + "eval_trivia_pairs_steps_per_second": 0.313, + "step": 210 + }, + { + "epoch": 0.21450459652706844, + "eval_gooaq_pairs_loss": 0.16001321375370026, + "eval_gooaq_pairs_runtime": 0.8749, + "eval_gooaq_pairs_samples_per_second": 146.307, + "eval_gooaq_pairs_steps_per_second": 1.143, + "step": 210 + }, + { + "epoch": 0.21450459652706844, + "eval_paws-pos_loss": 0.022760741412639618, + "eval_paws-pos_runtime": 0.6834, + "eval_paws-pos_samples_per_second": 187.302, + "eval_paws-pos_steps_per_second": 1.463, + "step": 210 + }, + { + "epoch": 0.21450459652706844, + "eval_global_dataset_loss": 0.19816936552524567, + "eval_global_dataset_runtime": 18.1611, + "eval_global_dataset_samples_per_second": 30.064, + "eval_global_dataset_steps_per_second": 0.275, + "step": 210 + }, + { + "epoch": 0.21552604698672115, + "grad_norm": 6.1790595054626465, + "learning_rate": 9.73724884080371e-06, + "loss": 0.2476, + "step": 211 + }, + { + "epoch": 0.21654749744637386, + "grad_norm": 6.0097761154174805, + "learning_rate": 9.783616692426586e-06, + "loss": 0.21, + "step": 212 + }, + { + "epoch": 0.21756894790602654, + "grad_norm": 6.066679000854492, + "learning_rate": 9.829984544049459e-06, + "loss": 0.2385, + "step": 213 + }, + { + "epoch": 0.21859039836567926, + "grad_norm": 4.435894966125488, + "learning_rate": 9.876352395672335e-06, + "loss": 0.0723, + "step": 214 + }, + { + "epoch": 0.21961184882533197, + "grad_norm": 3.2957065105438232, + "learning_rate": 9.922720247295208e-06, + "loss": 0.0261, + "step": 215 + }, + { + "epoch": 0.22063329928498468, + "grad_norm": 7.078932762145996, + "learning_rate": 9.969088098918084e-06, + "loss": 0.2101, + "step": 216 + }, + { + "epoch": 0.2216547497446374, + "grad_norm": 9.028351783752441, + "learning_rate": 1.0015455950540959e-05, + "loss": 0.3151, + "step": 217 + }, + { + "epoch": 0.2226762002042901, + "grad_norm": 2.8425967693328857, + "learning_rate": 1.0061823802163834e-05, + "loss": 0.0284, + "step": 218 + }, + { + "epoch": 0.2236976506639428, + "grad_norm": 4.209721565246582, + "learning_rate": 1.0108191653786708e-05, + "loss": 0.0524, + "step": 219 + }, + { + "epoch": 0.2247191011235955, + "grad_norm": 6.874032020568848, + "learning_rate": 1.0154559505409583e-05, + "loss": 0.1371, + "step": 220 + }, + { + "epoch": 0.2257405515832482, + "grad_norm": 6.712740421295166, + "learning_rate": 1.0200927357032458e-05, + "loss": 0.1681, + "step": 221 + }, + { + "epoch": 0.22676200204290092, + "grad_norm": 15.727431297302246, + "learning_rate": 1.0247295208655332e-05, + "loss": 1.1706, + "step": 222 + }, + { + "epoch": 0.22778345250255363, + "grad_norm": 6.990494728088379, + "learning_rate": 1.0293663060278207e-05, + "loss": 0.1822, + "step": 223 + }, + { + "epoch": 0.22880490296220635, + "grad_norm": 4.721086025238037, + "learning_rate": 1.0340030911901083e-05, + "loss": 0.0653, + "step": 224 + }, + { + "epoch": 0.22982635342185903, + "grad_norm": 6.331048488616943, + "learning_rate": 1.0386398763523956e-05, + "loss": 0.174, + "step": 225 + }, + { + "epoch": 0.22982635342185903, + "eval_Qnli-dev_cosine_accuracy": 0.70703125, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7183182239532471, + "eval_Qnli-dev_cosine_ap": 0.7337777359341142, + "eval_Qnli-dev_cosine_f1": 0.6953271028037383, + "eval_Qnli-dev_cosine_f1_threshold": 0.6554988622665405, + "eval_Qnli-dev_cosine_precision": 0.6220735785953178, + "eval_Qnli-dev_cosine_recall": 0.788135593220339, + "eval_Qnli-dev_dot_accuracy": 0.6796875, + "eval_Qnli-dev_dot_accuracy_threshold": 338.3375549316406, + "eval_Qnli-dev_dot_ap": 0.6835358211203821, + "eval_Qnli-dev_dot_f1": 0.680577849117175, + "eval_Qnli-dev_dot_f1_threshold": 267.7759094238281, + "eval_Qnli-dev_dot_precision": 0.5478036175710594, + "eval_Qnli-dev_dot_recall": 0.8983050847457628, + "eval_Qnli-dev_euclidean_accuracy": 0.724609375, + "eval_Qnli-dev_euclidean_accuracy_threshold": 16.682331085205078, + "eval_Qnli-dev_euclidean_ap": 0.7442475617870724, + "eval_Qnli-dev_euclidean_f1": 0.7045454545454546, + "eval_Qnli-dev_euclidean_f1_threshold": 17.7783145904541, + "eval_Qnli-dev_euclidean_precision": 0.636986301369863, + "eval_Qnli-dev_euclidean_recall": 0.788135593220339, + "eval_Qnli-dev_manhattan_accuracy": 0.716796875, + "eval_Qnli-dev_manhattan_accuracy_threshold": 354.92193603515625, + "eval_Qnli-dev_manhattan_ap": 0.7454001982312933, + "eval_Qnli-dev_manhattan_f1": 0.6976744186046511, + "eval_Qnli-dev_manhattan_f1_threshold": 396.1661376953125, + "eval_Qnli-dev_manhattan_precision": 0.6037151702786377, + "eval_Qnli-dev_manhattan_recall": 0.826271186440678, + "eval_Qnli-dev_max_accuracy": 0.724609375, + "eval_Qnli-dev_max_accuracy_threshold": 354.92193603515625, + "eval_Qnli-dev_max_ap": 0.7454001982312933, + "eval_Qnli-dev_max_f1": 0.7045454545454546, + "eval_Qnli-dev_max_f1_threshold": 396.1661376953125, + "eval_Qnli-dev_max_precision": 0.636986301369863, + "eval_Qnli-dev_max_recall": 0.8983050847457628, + "eval_allNLI-dev_cosine_accuracy": 0.74609375, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.822114109992981, + "eval_allNLI-dev_cosine_ap": 0.640896878563738, + "eval_allNLI-dev_cosine_f1": 0.6384039900249376, + "eval_allNLI-dev_cosine_f1_threshold": 0.7093552350997925, + "eval_allNLI-dev_cosine_precision": 0.5614035087719298, + "eval_allNLI-dev_cosine_recall": 0.7398843930635838, + "eval_allNLI-dev_dot_accuracy": 0.7265625, + "eval_allNLI-dev_dot_accuracy_threshold": 376.04156494140625, + "eval_allNLI-dev_dot_ap": 0.6046563344821586, + "eval_allNLI-dev_dot_f1": 0.6103896103896104, + "eval_allNLI-dev_dot_f1_threshold": 301.3974304199219, + "eval_allNLI-dev_dot_precision": 0.48788927335640137, + "eval_allNLI-dev_dot_recall": 0.815028901734104, + "eval_allNLI-dev_euclidean_accuracy": 0.751953125, + "eval_allNLI-dev_euclidean_accuracy_threshold": 13.431922912597656, + "eval_allNLI-dev_euclidean_ap": 0.6460994323255849, + "eval_allNLI-dev_euclidean_f1": 0.6538461538461539, + "eval_allNLI-dev_euclidean_f1_threshold": 16.651573181152344, + "eval_allNLI-dev_euclidean_precision": 0.5596707818930041, + "eval_allNLI-dev_euclidean_recall": 0.7861271676300579, + "eval_allNLI-dev_manhattan_accuracy": 0.74609375, + "eval_allNLI-dev_manhattan_accuracy_threshold": 308.3254699707031, + "eval_allNLI-dev_manhattan_ap": 0.6426540510912288, + "eval_allNLI-dev_manhattan_f1": 0.649164677804296, + "eval_allNLI-dev_manhattan_f1_threshold": 352.0777587890625, + "eval_allNLI-dev_manhattan_precision": 0.5528455284552846, + "eval_allNLI-dev_manhattan_recall": 0.7861271676300579, + "eval_allNLI-dev_max_accuracy": 0.751953125, + "eval_allNLI-dev_max_accuracy_threshold": 376.04156494140625, + "eval_allNLI-dev_max_ap": 0.6460994323255849, + "eval_allNLI-dev_max_f1": 0.6538461538461539, + "eval_allNLI-dev_max_f1_threshold": 352.0777587890625, + "eval_allNLI-dev_max_precision": 0.5614035087719298, + "eval_allNLI-dev_max_recall": 0.815028901734104, + "eval_sequential_score": 0.7454001982312933, + "eval_sts-test_pearson_cosine": 0.8762786229005802, + "eval_sts-test_pearson_dot": 0.8499115129219429, + "eval_sts-test_pearson_euclidean": 0.9059431367402514, + "eval_sts-test_pearson_manhattan": 0.9077374203191177, + "eval_sts-test_pearson_max": 0.9077374203191177, + "eval_sts-test_spearman_cosine": 0.9028545952621997, + "eval_sts-test_spearman_dot": 0.8549608555446349, + "eval_sts-test_spearman_euclidean": 0.9033077017837877, + "eval_sts-test_spearman_manhattan": 0.9048005591125434, + "eval_sts-test_spearman_max": 0.9048005591125434, + "eval_vitaminc-pairs_loss": 1.614261269569397, + "eval_vitaminc-pairs_runtime": 3.2333, + "eval_vitaminc-pairs_samples_per_second": 39.588, + "eval_vitaminc-pairs_steps_per_second": 0.309, + "step": 225 + }, + { + "epoch": 0.22982635342185903, + "eval_negation-triplets_loss": 0.8812417984008789, + "eval_negation-triplets_runtime": 0.7047, + "eval_negation-triplets_samples_per_second": 181.64, + "eval_negation-triplets_steps_per_second": 1.419, + "step": 225 + }, + { + "epoch": 0.22982635342185903, + "eval_scitail-pairs-pos_loss": 0.044328466057777405, + "eval_scitail-pairs-pos_runtime": 0.8195, + "eval_scitail-pairs-pos_samples_per_second": 156.195, + "eval_scitail-pairs-pos_steps_per_second": 1.22, + "step": 225 + }, + { + "epoch": 0.22982635342185903, + "eval_scitail-pairs-qa_loss": 3.0502940717269666e-05, + "eval_scitail-pairs-qa_runtime": 0.5116, + "eval_scitail-pairs-qa_samples_per_second": 250.192, + "eval_scitail-pairs-qa_steps_per_second": 1.955, + "step": 225 + }, + { + "epoch": 0.22982635342185903, + "eval_xsum-pairs_loss": 0.008906504139304161, + "eval_xsum-pairs_runtime": 2.7266, + "eval_xsum-pairs_samples_per_second": 46.945, + "eval_xsum-pairs_steps_per_second": 0.367, + "step": 225 + }, + { + "epoch": 0.22982635342185903, + "eval_sciq_pairs_loss": 0.018010232597589493, + "eval_sciq_pairs_runtime": 3.1411, + "eval_sciq_pairs_samples_per_second": 40.75, + "eval_sciq_pairs_steps_per_second": 0.318, + "step": 225 + }, + { + "epoch": 0.22982635342185903, + "eval_qasc_pairs_loss": 0.05174100026488304, + "eval_qasc_pairs_runtime": 0.5689, + "eval_qasc_pairs_samples_per_second": 224.986, + "eval_qasc_pairs_steps_per_second": 1.758, + "step": 225 + }, + { + "epoch": 0.22982635342185903, + "eval_openbookqa_pairs_loss": 0.678128182888031, + "eval_openbookqa_pairs_runtime": 0.5887, + "eval_openbookqa_pairs_samples_per_second": 217.433, + "eval_openbookqa_pairs_steps_per_second": 1.699, + "step": 225 + }, + { + "epoch": 0.22982635342185903, + "eval_msmarco_pairs_loss": 0.142665833234787, + "eval_msmarco_pairs_runtime": 1.2823, + "eval_msmarco_pairs_samples_per_second": 99.823, + "eval_msmarco_pairs_steps_per_second": 0.78, + "step": 225 + }, + { + "epoch": 0.22982635342185903, + "eval_nq_pairs_loss": 0.1214456632733345, + "eval_nq_pairs_runtime": 2.7361, + "eval_nq_pairs_samples_per_second": 46.781, + "eval_nq_pairs_steps_per_second": 0.365, + "step": 225 + }, + { + "epoch": 0.22982635342185903, + "eval_trivia_pairs_loss": 0.15045574307441711, + "eval_trivia_pairs_runtime": 3.2029, + "eval_trivia_pairs_samples_per_second": 39.964, + "eval_trivia_pairs_steps_per_second": 0.312, + "step": 225 + }, + { + "epoch": 0.22982635342185903, + "eval_gooaq_pairs_loss": 0.14028428494930267, + "eval_gooaq_pairs_runtime": 0.8834, + "eval_gooaq_pairs_samples_per_second": 144.898, + "eval_gooaq_pairs_steps_per_second": 1.132, + "step": 225 + }, + { + "epoch": 0.22982635342185903, + "eval_paws-pos_loss": 0.02274134010076523, + "eval_paws-pos_runtime": 0.6954, + "eval_paws-pos_samples_per_second": 184.066, + "eval_paws-pos_steps_per_second": 1.438, + "step": 225 + }, + { + "epoch": 0.22982635342185903, + "eval_global_dataset_loss": 0.1955849826335907, + "eval_global_dataset_runtime": 18.1838, + "eval_global_dataset_samples_per_second": 30.027, + "eval_global_dataset_steps_per_second": 0.275, + "step": 225 + }, + { + "epoch": 0.23084780388151174, + "grad_norm": 1.0896482467651367, + "learning_rate": 1.0432766615146832e-05, + "loss": 0.0096, + "step": 226 + }, + { + "epoch": 0.23186925434116445, + "grad_norm": 8.944127082824707, + "learning_rate": 1.0479134466769705e-05, + "loss": 0.3193, + "step": 227 + }, + { + "epoch": 0.23289070480081717, + "grad_norm": 7.446431636810303, + "learning_rate": 1.0525502318392582e-05, + "loss": 0.2018, + "step": 228 + }, + { + "epoch": 0.23391215526046988, + "grad_norm": 3.5281810760498047, + "learning_rate": 1.0571870170015456e-05, + "loss": 0.0791, + "step": 229 + }, + { + "epoch": 0.23493360572012256, + "grad_norm": 1.8670618534088135, + "learning_rate": 1.061823802163833e-05, + "loss": 0.0159, + "step": 230 + }, + { + "epoch": 0.23595505617977527, + "grad_norm": 4.8522047996521, + "learning_rate": 1.0664605873261205e-05, + "loss": 0.0611, + "step": 231 + }, + { + "epoch": 0.236976506639428, + "grad_norm": 9.525233268737793, + "learning_rate": 1.071097372488408e-05, + "loss": 0.2373, + "step": 232 + }, + { + "epoch": 0.2379979570990807, + "grad_norm": 9.784748077392578, + "learning_rate": 1.0757341576506955e-05, + "loss": 0.2841, + "step": 233 + }, + { + "epoch": 0.2390194075587334, + "grad_norm": 0.012696587480604649, + "learning_rate": 1.0803709428129831e-05, + "loss": 0.0001, + "step": 234 + }, + { + "epoch": 0.24004085801838612, + "grad_norm": 5.216586589813232, + "learning_rate": 1.0850077279752706e-05, + "loss": 0.1355, + "step": 235 + }, + { + "epoch": 0.2410623084780388, + "grad_norm": 5.044414520263672, + "learning_rate": 1.089644513137558e-05, + "loss": 0.1138, + "step": 236 + }, + { + "epoch": 0.24208375893769152, + "grad_norm": 12.089670181274414, + "learning_rate": 1.0942812982998455e-05, + "loss": 0.4906, + "step": 237 + }, + { + "epoch": 0.24310520939734423, + "grad_norm": 15.858445167541504, + "learning_rate": 1.098918083462133e-05, + "loss": 1.1317, + "step": 238 + }, + { + "epoch": 0.24412665985699694, + "grad_norm": 7.566562175750732, + "learning_rate": 1.1035548686244206e-05, + "loss": 0.1748, + "step": 239 + }, + { + "epoch": 0.24514811031664965, + "grad_norm": 6.732047080993652, + "learning_rate": 1.1081916537867079e-05, + "loss": 0.1975, + "step": 240 + }, + { + "epoch": 0.24514811031664965, + "eval_Qnli-dev_cosine_accuracy": 0.697265625, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7315773367881775, + "eval_Qnli-dev_cosine_ap": 0.7282373083193608, + "eval_Qnli-dev_cosine_f1": 0.6914498141263942, + "eval_Qnli-dev_cosine_f1_threshold": 0.6570438146591187, + "eval_Qnli-dev_cosine_precision": 0.6158940397350994, + "eval_Qnli-dev_cosine_recall": 0.788135593220339, + "eval_Qnli-dev_dot_accuracy": 0.66796875, + "eval_Qnli-dev_dot_accuracy_threshold": 356.5875244140625, + "eval_Qnli-dev_dot_ap": 0.6775881609477787, + "eval_Qnli-dev_dot_f1": 0.6808510638297872, + "eval_Qnli-dev_dot_f1_threshold": 279.8411560058594, + "eval_Qnli-dev_dot_precision": 0.5546666666666666, + "eval_Qnli-dev_dot_recall": 0.8813559322033898, + "eval_Qnli-dev_euclidean_accuracy": 0.71484375, + "eval_Qnli-dev_euclidean_accuracy_threshold": 16.39892578125, + "eval_Qnli-dev_euclidean_ap": 0.7381798057320368, + "eval_Qnli-dev_euclidean_f1": 0.6936416184971099, + "eval_Qnli-dev_euclidean_f1_threshold": 17.72010612487793, + "eval_Qnli-dev_euclidean_precision": 0.6360424028268551, + "eval_Qnli-dev_euclidean_recall": 0.7627118644067796, + "eval_Qnli-dev_manhattan_accuracy": 0.7109375, + "eval_Qnli-dev_manhattan_accuracy_threshold": 351.7536315917969, + "eval_Qnli-dev_manhattan_ap": 0.7409644341665307, + "eval_Qnli-dev_manhattan_f1": 0.6927175843694494, + "eval_Qnli-dev_manhattan_f1_threshold": 395.87384033203125, + "eval_Qnli-dev_manhattan_precision": 0.5963302752293578, + "eval_Qnli-dev_manhattan_recall": 0.826271186440678, + "eval_Qnli-dev_max_accuracy": 0.71484375, + "eval_Qnli-dev_max_accuracy_threshold": 356.5875244140625, + "eval_Qnli-dev_max_ap": 0.7409644341665307, + "eval_Qnli-dev_max_f1": 0.6936416184971099, + "eval_Qnli-dev_max_f1_threshold": 395.87384033203125, + "eval_Qnli-dev_max_precision": 0.6360424028268551, + "eval_Qnli-dev_max_recall": 0.8813559322033898, + "eval_allNLI-dev_cosine_accuracy": 0.74609375, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8139472007751465, + "eval_allNLI-dev_cosine_ap": 0.6437583397109479, + "eval_allNLI-dev_cosine_f1": 0.6332574031890661, + "eval_allNLI-dev_cosine_f1_threshold": 0.6743019819259644, + "eval_allNLI-dev_cosine_precision": 0.5225563909774437, + "eval_allNLI-dev_cosine_recall": 0.8034682080924855, + "eval_allNLI-dev_dot_accuracy": 0.7265625, + "eval_allNLI-dev_dot_accuracy_threshold": 377.887451171875, + "eval_allNLI-dev_dot_ap": 0.6068344971514285, + "eval_allNLI-dev_dot_f1": 0.6112185686653772, + "eval_allNLI-dev_dot_f1_threshold": 269.2353820800781, + "eval_allNLI-dev_dot_precision": 0.45930232558139533, + "eval_allNLI-dev_dot_recall": 0.9132947976878613, + "eval_allNLI-dev_euclidean_accuracy": 0.748046875, + "eval_allNLI-dev_euclidean_accuracy_threshold": 13.499614715576172, + "eval_allNLI-dev_euclidean_ap": 0.6476001002915632, + "eval_allNLI-dev_euclidean_f1": 0.642857142857143, + "eval_allNLI-dev_euclidean_f1_threshold": 16.95008087158203, + "eval_allNLI-dev_euclidean_precision": 0.5465587044534413, + "eval_allNLI-dev_euclidean_recall": 0.7803468208092486, + "eval_allNLI-dev_manhattan_accuracy": 0.7421875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 278.20916748046875, + "eval_allNLI-dev_manhattan_ap": 0.6454870451765974, + "eval_allNLI-dev_manhattan_f1": 0.6418219461697723, + "eval_allNLI-dev_manhattan_f1_threshold": 394.37567138671875, + "eval_allNLI-dev_manhattan_precision": 0.5, + "eval_allNLI-dev_manhattan_recall": 0.8959537572254336, + "eval_allNLI-dev_max_accuracy": 0.748046875, + "eval_allNLI-dev_max_accuracy_threshold": 377.887451171875, + "eval_allNLI-dev_max_ap": 0.6476001002915632, + "eval_allNLI-dev_max_f1": 0.642857142857143, + "eval_allNLI-dev_max_f1_threshold": 394.37567138671875, + "eval_allNLI-dev_max_precision": 0.5465587044534413, + "eval_allNLI-dev_max_recall": 0.9132947976878613, + "eval_sequential_score": 0.7409644341665307, + "eval_sts-test_pearson_cosine": 0.8811535388189442, + "eval_sts-test_pearson_dot": 0.8577909096806895, + "eval_sts-test_pearson_euclidean": 0.909367914436592, + "eval_sts-test_pearson_manhattan": 0.9106296873367065, + "eval_sts-test_pearson_max": 0.9106296873367065, + "eval_sts-test_spearman_cosine": 0.9059834692049022, + "eval_sts-test_spearman_dot": 0.8609735455226603, + "eval_sts-test_spearman_euclidean": 0.9060557424920492, + "eval_sts-test_spearman_manhattan": 0.9073055557201885, + "eval_sts-test_spearman_max": 0.9073055557201885, + "eval_vitaminc-pairs_loss": 1.6505483388900757, + "eval_vitaminc-pairs_runtime": 3.2059, + "eval_vitaminc-pairs_samples_per_second": 39.926, + "eval_vitaminc-pairs_steps_per_second": 0.312, + "step": 240 + }, + { + "epoch": 0.24514811031664965, + "eval_negation-triplets_loss": 0.863065779209137, + "eval_negation-triplets_runtime": 0.7017, + "eval_negation-triplets_samples_per_second": 182.41, + "eval_negation-triplets_steps_per_second": 1.425, + "step": 240 + }, + { + "epoch": 0.24514811031664965, + "eval_scitail-pairs-pos_loss": 0.04052574932575226, + "eval_scitail-pairs-pos_runtime": 0.8047, + "eval_scitail-pairs-pos_samples_per_second": 159.059, + "eval_scitail-pairs-pos_steps_per_second": 1.243, + "step": 240 + }, + { + "epoch": 0.24514811031664965, + "eval_scitail-pairs-qa_loss": 4.153175177634694e-05, + "eval_scitail-pairs-qa_runtime": 0.524, + "eval_scitail-pairs-qa_samples_per_second": 244.291, + "eval_scitail-pairs-qa_steps_per_second": 1.909, + "step": 240 + }, + { + "epoch": 0.24514811031664965, + "eval_xsum-pairs_loss": 0.006929950788617134, + "eval_xsum-pairs_runtime": 2.7195, + "eval_xsum-pairs_samples_per_second": 47.067, + "eval_xsum-pairs_steps_per_second": 0.368, + "step": 240 + }, + { + "epoch": 0.24514811031664965, + "eval_sciq_pairs_loss": 0.017397526651620865, + "eval_sciq_pairs_runtime": 3.1141, + "eval_sciq_pairs_samples_per_second": 41.104, + "eval_sciq_pairs_steps_per_second": 0.321, + "step": 240 + }, + { + "epoch": 0.24514811031664965, + "eval_qasc_pairs_loss": 0.04533594101667404, + "eval_qasc_pairs_runtime": 0.5656, + "eval_qasc_pairs_samples_per_second": 226.318, + "eval_qasc_pairs_steps_per_second": 1.768, + "step": 240 + }, + { + "epoch": 0.24514811031664965, + "eval_openbookqa_pairs_loss": 0.6543528437614441, + "eval_openbookqa_pairs_runtime": 0.5728, + "eval_openbookqa_pairs_samples_per_second": 223.447, + "eval_openbookqa_pairs_steps_per_second": 1.746, + "step": 240 + }, + { + "epoch": 0.24514811031664965, + "eval_msmarco_pairs_loss": 0.1534004509449005, + "eval_msmarco_pairs_runtime": 1.2692, + "eval_msmarco_pairs_samples_per_second": 100.851, + "eval_msmarco_pairs_steps_per_second": 0.788, + "step": 240 + }, + { + "epoch": 0.24514811031664965, + "eval_nq_pairs_loss": 0.12322381138801575, + "eval_nq_pairs_runtime": 2.7322, + "eval_nq_pairs_samples_per_second": 46.849, + "eval_nq_pairs_steps_per_second": 0.366, + "step": 240 + }, + { + "epoch": 0.24514811031664965, + "eval_trivia_pairs_loss": 0.1417546272277832, + "eval_trivia_pairs_runtime": 3.194, + "eval_trivia_pairs_samples_per_second": 40.075, + "eval_trivia_pairs_steps_per_second": 0.313, + "step": 240 + }, + { + "epoch": 0.24514811031664965, + "eval_gooaq_pairs_loss": 0.13262256979942322, + "eval_gooaq_pairs_runtime": 0.8791, + "eval_gooaq_pairs_samples_per_second": 145.596, + "eval_gooaq_pairs_steps_per_second": 1.137, + "step": 240 + }, + { + "epoch": 0.24514811031664965, + "eval_paws-pos_loss": 0.023073820397257805, + "eval_paws-pos_runtime": 0.6877, + "eval_paws-pos_samples_per_second": 186.123, + "eval_paws-pos_steps_per_second": 1.454, + "step": 240 + }, + { + "epoch": 0.24514811031664965, + "eval_global_dataset_loss": 0.186612069606781, + "eval_global_dataset_runtime": 18.1718, + "eval_global_dataset_samples_per_second": 30.047, + "eval_global_dataset_steps_per_second": 0.275, + "step": 240 + }, + { + "epoch": 0.24616956077630234, + "grad_norm": 2.5275423526763916, + "learning_rate": 1.1128284389489955e-05, + "loss": 0.0155, + "step": 241 + }, + { + "epoch": 0.24719101123595505, + "grad_norm": 8.447001457214355, + "learning_rate": 1.1174652241112828e-05, + "loss": 0.2976, + "step": 242 + }, + { + "epoch": 0.24821246169560776, + "grad_norm": 0.6705769896507263, + "learning_rate": 1.1221020092735704e-05, + "loss": 0.0152, + "step": 243 + }, + { + "epoch": 0.24923391215526047, + "grad_norm": 8.869510650634766, + "learning_rate": 1.1267387944358577e-05, + "loss": 0.2855, + "step": 244 + }, + { + "epoch": 0.25025536261491316, + "grad_norm": 9.213271141052246, + "learning_rate": 1.1313755795981454e-05, + "loss": 0.493, + "step": 245 + }, + { + "epoch": 0.2512768130745659, + "grad_norm": 7.719737529754639, + "learning_rate": 1.1360123647604328e-05, + "loss": 0.3024, + "step": 246 + }, + { + "epoch": 0.2522982635342186, + "grad_norm": 9.609936714172363, + "learning_rate": 1.1406491499227203e-05, + "loss": 0.32, + "step": 247 + }, + { + "epoch": 0.2533197139938713, + "grad_norm": 6.189861297607422, + "learning_rate": 1.1452859350850077e-05, + "loss": 0.1631, + "step": 248 + }, + { + "epoch": 0.254341164453524, + "grad_norm": 5.420290946960449, + "learning_rate": 1.1499227202472952e-05, + "loss": 0.1349, + "step": 249 + }, + { + "epoch": 0.2553626149131767, + "grad_norm": 2.40893816947937, + "learning_rate": 1.1545595054095827e-05, + "loss": 0.0319, + "step": 250 + }, + { + "epoch": 0.25638406537282943, + "grad_norm": 6.218864440917969, + "learning_rate": 1.1591962905718703e-05, + "loss": 0.1413, + "step": 251 + }, + { + "epoch": 0.2574055158324821, + "grad_norm": 5.625741481781006, + "learning_rate": 1.1638330757341576e-05, + "loss": 0.0825, + "step": 252 + }, + { + "epoch": 0.25842696629213485, + "grad_norm": 7.639575481414795, + "learning_rate": 1.1684698608964452e-05, + "loss": 0.2668, + "step": 253 + }, + { + "epoch": 0.25944841675178754, + "grad_norm": 9.167525291442871, + "learning_rate": 1.1731066460587325e-05, + "loss": 0.3149, + "step": 254 + }, + { + "epoch": 0.2604698672114402, + "grad_norm": 4.172579765319824, + "learning_rate": 1.1777434312210201e-05, + "loss": 0.0909, + "step": 255 + }, + { + "epoch": 0.2604698672114402, + "eval_Qnli-dev_cosine_accuracy": 0.69921875, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.739615797996521, + "eval_Qnli-dev_cosine_ap": 0.7176513542552487, + "eval_Qnli-dev_cosine_f1": 0.6848381601362862, + "eval_Qnli-dev_cosine_f1_threshold": 0.6334110498428345, + "eval_Qnli-dev_cosine_precision": 0.5726495726495726, + "eval_Qnli-dev_cosine_recall": 0.8516949152542372, + "eval_Qnli-dev_dot_accuracy": 0.662109375, + "eval_Qnli-dev_dot_accuracy_threshold": 361.2568359375, + "eval_Qnli-dev_dot_ap": 0.6717189830271137, + "eval_Qnli-dev_dot_f1": 0.6751188589540412, + "eval_Qnli-dev_dot_f1_threshold": 280.51141357421875, + "eval_Qnli-dev_dot_precision": 0.5392405063291139, + "eval_Qnli-dev_dot_recall": 0.902542372881356, + "eval_Qnli-dev_euclidean_accuracy": 0.703125, + "eval_Qnli-dev_euclidean_accuracy_threshold": 16.127859115600586, + "eval_Qnli-dev_euclidean_ap": 0.7253669149951798, + "eval_Qnli-dev_euclidean_f1": 0.6907216494845361, + "eval_Qnli-dev_euclidean_f1_threshold": 18.814456939697266, + "eval_Qnli-dev_euclidean_precision": 0.5809248554913294, + "eval_Qnli-dev_euclidean_recall": 0.8516949152542372, + "eval_Qnli-dev_manhattan_accuracy": 0.69921875, + "eval_Qnli-dev_manhattan_accuracy_threshold": 343.5025939941406, + "eval_Qnli-dev_manhattan_ap": 0.7277372252549248, + "eval_Qnli-dev_manhattan_f1": 0.689655172413793, + "eval_Qnli-dev_manhattan_f1_threshold": 398.349609375, + "eval_Qnli-dev_manhattan_precision": 0.5813953488372093, + "eval_Qnli-dev_manhattan_recall": 0.847457627118644, + "eval_Qnli-dev_max_accuracy": 0.703125, + "eval_Qnli-dev_max_accuracy_threshold": 361.2568359375, + "eval_Qnli-dev_max_ap": 0.7277372252549248, + "eval_Qnli-dev_max_f1": 0.6907216494845361, + "eval_Qnli-dev_max_f1_threshold": 398.349609375, + "eval_Qnli-dev_max_precision": 0.5813953488372093, + "eval_Qnli-dev_max_recall": 0.902542372881356, + "eval_allNLI-dev_cosine_accuracy": 0.744140625, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8239623308181763, + "eval_allNLI-dev_cosine_ap": 0.6404982967910067, + "eval_allNLI-dev_cosine_f1": 0.6340326340326341, + "eval_allNLI-dev_cosine_f1_threshold": 0.6908655166625977, + "eval_allNLI-dev_cosine_precision": 0.53125, + "eval_allNLI-dev_cosine_recall": 0.7861271676300579, + "eval_allNLI-dev_dot_accuracy": 0.7265625, + "eval_allNLI-dev_dot_accuracy_threshold": 390.59832763671875, + "eval_allNLI-dev_dot_ap": 0.6024622848402517, + "eval_allNLI-dev_dot_f1": 0.609375, + "eval_allNLI-dev_dot_f1_threshold": 285.86602783203125, + "eval_allNLI-dev_dot_precision": 0.46017699115044247, + "eval_allNLI-dev_dot_recall": 0.9017341040462428, + "eval_allNLI-dev_euclidean_accuracy": 0.744140625, + "eval_allNLI-dev_euclidean_accuracy_threshold": 13.379159927368164, + "eval_allNLI-dev_euclidean_ap": 0.6444607569194125, + "eval_allNLI-dev_euclidean_f1": 0.6451612903225806, + "eval_allNLI-dev_euclidean_f1_threshold": 16.701509475708008, + "eval_allNLI-dev_euclidean_precision": 0.5652173913043478, + "eval_allNLI-dev_euclidean_recall": 0.7514450867052023, + "eval_allNLI-dev_manhattan_accuracy": 0.7421875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 268.04638671875, + "eval_allNLI-dev_manhattan_ap": 0.6408446994722032, + "eval_allNLI-dev_manhattan_f1": 0.6473214285714285, + "eval_allNLI-dev_manhattan_f1_threshold": 371.20526123046875, + "eval_allNLI-dev_manhattan_precision": 0.5272727272727272, + "eval_allNLI-dev_manhattan_recall": 0.838150289017341, + "eval_allNLI-dev_max_accuracy": 0.744140625, + "eval_allNLI-dev_max_accuracy_threshold": 390.59832763671875, + "eval_allNLI-dev_max_ap": 0.6444607569194125, + "eval_allNLI-dev_max_f1": 0.6473214285714285, + "eval_allNLI-dev_max_f1_threshold": 371.20526123046875, + "eval_allNLI-dev_max_precision": 0.5652173913043478, + "eval_allNLI-dev_max_recall": 0.9017341040462428, + "eval_sequential_score": 0.7277372252549248, + "eval_sts-test_pearson_cosine": 0.8816805778426262, + "eval_sts-test_pearson_dot": 0.8577562657690772, + "eval_sts-test_pearson_euclidean": 0.9106582467151493, + "eval_sts-test_pearson_manhattan": 0.9113569719592275, + "eval_sts-test_pearson_max": 0.9113569719592275, + "eval_sts-test_spearman_cosine": 0.9069367158238004, + "eval_sts-test_spearman_dot": 0.8602106260184599, + "eval_sts-test_spearman_euclidean": 0.9069664754126258, + "eval_sts-test_spearman_manhattan": 0.9080076135088967, + "eval_sts-test_spearman_max": 0.9080076135088967, + "eval_vitaminc-pairs_loss": 1.6110538244247437, + "eval_vitaminc-pairs_runtime": 3.1802, + "eval_vitaminc-pairs_samples_per_second": 40.249, + "eval_vitaminc-pairs_steps_per_second": 0.314, + "step": 255 + }, + { + "epoch": 0.2604698672114402, + "eval_negation-triplets_loss": 0.8394344449043274, + "eval_negation-triplets_runtime": 0.6936, + "eval_negation-triplets_samples_per_second": 184.555, + "eval_negation-triplets_steps_per_second": 1.442, + "step": 255 + }, + { + "epoch": 0.2604698672114402, + "eval_scitail-pairs-pos_loss": 0.04331967607140541, + "eval_scitail-pairs-pos_runtime": 0.8066, + "eval_scitail-pairs-pos_samples_per_second": 158.692, + "eval_scitail-pairs-pos_steps_per_second": 1.24, + "step": 255 + }, + { + "epoch": 0.2604698672114402, + "eval_scitail-pairs-qa_loss": 6.0038993979105726e-05, + "eval_scitail-pairs-qa_runtime": 0.5299, + "eval_scitail-pairs-qa_samples_per_second": 241.571, + "eval_scitail-pairs-qa_steps_per_second": 1.887, + "step": 255 + }, + { + "epoch": 0.2604698672114402, + "eval_xsum-pairs_loss": 0.006882825866341591, + "eval_xsum-pairs_runtime": 2.7201, + "eval_xsum-pairs_samples_per_second": 47.058, + "eval_xsum-pairs_steps_per_second": 0.368, + "step": 255 + }, + { + "epoch": 0.2604698672114402, + "eval_sciq_pairs_loss": 0.01721280999481678, + "eval_sciq_pairs_runtime": 3.1188, + "eval_sciq_pairs_samples_per_second": 41.041, + "eval_sciq_pairs_steps_per_second": 0.321, + "step": 255 + }, + { + "epoch": 0.2604698672114402, + "eval_qasc_pairs_loss": 0.04551500454545021, + "eval_qasc_pairs_runtime": 0.5688, + "eval_qasc_pairs_samples_per_second": 225.039, + "eval_qasc_pairs_steps_per_second": 1.758, + "step": 255 + }, + { + "epoch": 0.2604698672114402, + "eval_openbookqa_pairs_loss": 0.673441469669342, + "eval_openbookqa_pairs_runtime": 0.5762, + "eval_openbookqa_pairs_samples_per_second": 222.146, + "eval_openbookqa_pairs_steps_per_second": 1.736, + "step": 255 + }, + { + "epoch": 0.2604698672114402, + "eval_msmarco_pairs_loss": 0.17468245327472687, + "eval_msmarco_pairs_runtime": 1.2693, + "eval_msmarco_pairs_samples_per_second": 100.842, + "eval_msmarco_pairs_steps_per_second": 0.788, + "step": 255 + }, + { + "epoch": 0.2604698672114402, + "eval_nq_pairs_loss": 0.1132262647151947, + "eval_nq_pairs_runtime": 2.7333, + "eval_nq_pairs_samples_per_second": 46.83, + "eval_nq_pairs_steps_per_second": 0.366, + "step": 255 + }, + { + "epoch": 0.2604698672114402, + "eval_trivia_pairs_loss": 0.12717972695827484, + "eval_trivia_pairs_runtime": 3.1931, + "eval_trivia_pairs_samples_per_second": 40.087, + "eval_trivia_pairs_steps_per_second": 0.313, + "step": 255 + }, + { + "epoch": 0.2604698672114402, + "eval_gooaq_pairs_loss": 0.1465369015932083, + "eval_gooaq_pairs_runtime": 0.8776, + "eval_gooaq_pairs_samples_per_second": 145.857, + "eval_gooaq_pairs_steps_per_second": 1.14, + "step": 255 + }, + { + "epoch": 0.2604698672114402, + "eval_paws-pos_loss": 0.023481549695134163, + "eval_paws-pos_runtime": 0.6874, + "eval_paws-pos_samples_per_second": 186.211, + "eval_paws-pos_steps_per_second": 1.455, + "step": 255 + }, + { + "epoch": 0.2604698672114402, + "eval_global_dataset_loss": 0.18388400971889496, + "eval_global_dataset_runtime": 18.1565, + "eval_global_dataset_samples_per_second": 30.072, + "eval_global_dataset_steps_per_second": 0.275, + "step": 255 + }, + { + "epoch": 0.26149131767109296, + "grad_norm": 6.071037769317627, + "learning_rate": 1.1823802163833076e-05, + "loss": 0.1851, + "step": 256 + }, + { + "epoch": 0.26251276813074564, + "grad_norm": 1.952987790107727, + "learning_rate": 1.187017001545595e-05, + "loss": 0.0148, + "step": 257 + }, + { + "epoch": 0.2635342185903984, + "grad_norm": 2.3067948818206787, + "learning_rate": 1.1916537867078825e-05, + "loss": 0.0766, + "step": 258 + }, + { + "epoch": 0.26455566905005107, + "grad_norm": 4.001049995422363, + "learning_rate": 1.19629057187017e-05, + "loss": 0.1014, + "step": 259 + }, + { + "epoch": 0.26557711950970375, + "grad_norm": 9.999890327453613, + "learning_rate": 1.2009273570324576e-05, + "loss": 0.2848, + "step": 260 + }, + { + "epoch": 0.2665985699693565, + "grad_norm": 5.984616756439209, + "learning_rate": 1.2055641421947451e-05, + "loss": 0.1803, + "step": 261 + }, + { + "epoch": 0.2676200204290092, + "grad_norm": 3.0844433307647705, + "learning_rate": 1.2102009273570325e-05, + "loss": 0.058, + "step": 262 + }, + { + "epoch": 0.2686414708886619, + "grad_norm": 10.779717445373535, + "learning_rate": 1.21483771251932e-05, + "loss": 0.3695, + "step": 263 + }, + { + "epoch": 0.2696629213483146, + "grad_norm": 5.611161708831787, + "learning_rate": 1.2194744976816075e-05, + "loss": 0.1595, + "step": 264 + }, + { + "epoch": 0.27068437180796734, + "grad_norm": 6.025004863739014, + "learning_rate": 1.224111282843895e-05, + "loss": 0.2092, + "step": 265 + }, + { + "epoch": 0.27170582226762, + "grad_norm": 8.771313667297363, + "learning_rate": 1.2287480680061824e-05, + "loss": 0.3305, + "step": 266 + }, + { + "epoch": 0.2727272727272727, + "grad_norm": 11.752165794372559, + "learning_rate": 1.2333848531684699e-05, + "loss": 0.469, + "step": 267 + }, + { + "epoch": 0.27374872318692545, + "grad_norm": 4.256598472595215, + "learning_rate": 1.2380216383307575e-05, + "loss": 0.0758, + "step": 268 + }, + { + "epoch": 0.27477017364657813, + "grad_norm": 5.781081676483154, + "learning_rate": 1.2426584234930448e-05, + "loss": 0.1443, + "step": 269 + }, + { + "epoch": 0.27579162410623087, + "grad_norm": 3.7666285037994385, + "learning_rate": 1.2472952086553324e-05, + "loss": 0.0849, + "step": 270 + }, + { + "epoch": 0.27579162410623087, + "eval_Qnli-dev_cosine_accuracy": 0.693359375, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7265219688415527, + "eval_Qnli-dev_cosine_ap": 0.7163017451384, + "eval_Qnli-dev_cosine_f1": 0.6798029556650246, + "eval_Qnli-dev_cosine_f1_threshold": 0.6078540086746216, + "eval_Qnli-dev_cosine_precision": 0.5549597855227882, + "eval_Qnli-dev_cosine_recall": 0.8771186440677966, + "eval_Qnli-dev_dot_accuracy": 0.666015625, + "eval_Qnli-dev_dot_accuracy_threshold": 363.2034606933594, + "eval_Qnli-dev_dot_ap": 0.6769190142598542, + "eval_Qnli-dev_dot_f1": 0.6698717948717948, + "eval_Qnli-dev_dot_f1_threshold": 283.3017578125, + "eval_Qnli-dev_dot_precision": 0.538659793814433, + "eval_Qnli-dev_dot_recall": 0.885593220338983, + "eval_Qnli-dev_euclidean_accuracy": 0.705078125, + "eval_Qnli-dev_euclidean_accuracy_threshold": 16.37971305847168, + "eval_Qnli-dev_euclidean_ap": 0.7234972362035788, + "eval_Qnli-dev_euclidean_f1": 0.6870748299319728, + "eval_Qnli-dev_euclidean_f1_threshold": 19.120662689208984, + "eval_Qnli-dev_euclidean_precision": 0.5738636363636364, + "eval_Qnli-dev_euclidean_recall": 0.8559322033898306, + "eval_Qnli-dev_manhattan_accuracy": 0.6953125, + "eval_Qnli-dev_manhattan_accuracy_threshold": 349.92462158203125, + "eval_Qnli-dev_manhattan_ap": 0.7256945760740315, + "eval_Qnli-dev_manhattan_f1": 0.6825688073394496, + "eval_Qnli-dev_manhattan_f1_threshold": 384.43963623046875, + "eval_Qnli-dev_manhattan_precision": 0.6019417475728155, + "eval_Qnli-dev_manhattan_recall": 0.788135593220339, + "eval_Qnli-dev_max_accuracy": 0.705078125, + "eval_Qnli-dev_max_accuracy_threshold": 363.2034606933594, + "eval_Qnli-dev_max_ap": 0.7256945760740315, + "eval_Qnli-dev_max_f1": 0.6870748299319728, + "eval_Qnli-dev_max_f1_threshold": 384.43963623046875, + "eval_Qnli-dev_max_precision": 0.6019417475728155, + "eval_Qnli-dev_max_recall": 0.885593220338983, + "eval_allNLI-dev_cosine_accuracy": 0.744140625, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8210654258728027, + "eval_allNLI-dev_cosine_ap": 0.6393577663437054, + "eval_allNLI-dev_cosine_f1": 0.6376146788990825, + "eval_allNLI-dev_cosine_f1_threshold": 0.6806380748748779, + "eval_allNLI-dev_cosine_precision": 0.5285171102661597, + "eval_allNLI-dev_cosine_recall": 0.8034682080924855, + "eval_allNLI-dev_dot_accuracy": 0.728515625, + "eval_allNLI-dev_dot_accuracy_threshold": 391.4153747558594, + "eval_allNLI-dev_dot_ap": 0.5988245527245092, + "eval_allNLI-dev_dot_f1": 0.6160164271047228, + "eval_allNLI-dev_dot_f1_threshold": 298.9134521484375, + "eval_allNLI-dev_dot_precision": 0.47770700636942676, + "eval_allNLI-dev_dot_recall": 0.8670520231213873, + "eval_allNLI-dev_euclidean_accuracy": 0.748046875, + "eval_allNLI-dev_euclidean_accuracy_threshold": 13.53447151184082, + "eval_allNLI-dev_euclidean_ap": 0.6437983794799559, + "eval_allNLI-dev_euclidean_f1": 0.6467661691542288, + "eval_allNLI-dev_euclidean_f1_threshold": 16.686382293701172, + "eval_allNLI-dev_euclidean_precision": 0.5676855895196506, + "eval_allNLI-dev_euclidean_recall": 0.7514450867052023, + "eval_allNLI-dev_manhattan_accuracy": 0.740234375, + "eval_allNLI-dev_manhattan_accuracy_threshold": 262.890869140625, + "eval_allNLI-dev_manhattan_ap": 0.640705434946914, + "eval_allNLI-dev_manhattan_f1": 0.6428571428571429, + "eval_allNLI-dev_manhattan_f1_threshold": 373.98333740234375, + "eval_allNLI-dev_manhattan_precision": 0.5236363636363637, + "eval_allNLI-dev_manhattan_recall": 0.8323699421965318, + "eval_allNLI-dev_max_accuracy": 0.748046875, + "eval_allNLI-dev_max_accuracy_threshold": 391.4153747558594, + "eval_allNLI-dev_max_ap": 0.6437983794799559, + "eval_allNLI-dev_max_f1": 0.6467661691542288, + "eval_allNLI-dev_max_f1_threshold": 373.98333740234375, + "eval_allNLI-dev_max_precision": 0.5676855895196506, + "eval_allNLI-dev_max_recall": 0.8670520231213873, + "eval_sequential_score": 0.7256945760740315, + "eval_sts-test_pearson_cosine": 0.8788711413447038, + "eval_sts-test_pearson_dot": 0.8578009900444606, + "eval_sts-test_pearson_euclidean": 0.9085752978914952, + "eval_sts-test_pearson_manhattan": 0.9093466408033868, + "eval_sts-test_pearson_max": 0.9093466408033868, + "eval_sts-test_spearman_cosine": 0.9056817114493402, + "eval_sts-test_spearman_dot": 0.8613584063706264, + "eval_sts-test_spearman_euclidean": 0.90590202190917, + "eval_sts-test_spearman_manhattan": 0.9066481596903487, + "eval_sts-test_spearman_max": 0.9066481596903487, + "eval_vitaminc-pairs_loss": 1.549290657043457, + "eval_vitaminc-pairs_runtime": 3.1821, + "eval_vitaminc-pairs_samples_per_second": 40.225, + "eval_vitaminc-pairs_steps_per_second": 0.314, + "step": 270 + }, + { + "epoch": 0.27579162410623087, + "eval_negation-triplets_loss": 0.8520956635475159, + "eval_negation-triplets_runtime": 0.6945, + "eval_negation-triplets_samples_per_second": 184.31, + "eval_negation-triplets_steps_per_second": 1.44, + "step": 270 + }, + { + "epoch": 0.27579162410623087, + "eval_scitail-pairs-pos_loss": 0.04591067135334015, + "eval_scitail-pairs-pos_runtime": 0.7953, + "eval_scitail-pairs-pos_samples_per_second": 160.95, + "eval_scitail-pairs-pos_steps_per_second": 1.257, + "step": 270 + }, + { + "epoch": 0.27579162410623087, + "eval_scitail-pairs-qa_loss": 7.293735689017922e-05, + "eval_scitail-pairs-qa_runtime": 0.5152, + "eval_scitail-pairs-qa_samples_per_second": 248.448, + "eval_scitail-pairs-qa_steps_per_second": 1.941, + "step": 270 + }, + { + "epoch": 0.27579162410623087, + "eval_xsum-pairs_loss": 0.004420202691107988, + "eval_xsum-pairs_runtime": 2.7244, + "eval_xsum-pairs_samples_per_second": 46.983, + "eval_xsum-pairs_steps_per_second": 0.367, + "step": 270 + }, + { + "epoch": 0.27579162410623087, + "eval_sciq_pairs_loss": 0.01765809766948223, + "eval_sciq_pairs_runtime": 3.148, + "eval_sciq_pairs_samples_per_second": 40.661, + "eval_sciq_pairs_steps_per_second": 0.318, + "step": 270 + }, + { + "epoch": 0.27579162410623087, + "eval_qasc_pairs_loss": 0.04713795706629753, + "eval_qasc_pairs_runtime": 0.5662, + "eval_qasc_pairs_samples_per_second": 226.069, + "eval_qasc_pairs_steps_per_second": 1.766, + "step": 270 + }, + { + "epoch": 0.27579162410623087, + "eval_openbookqa_pairs_loss": 0.7042292356491089, + "eval_openbookqa_pairs_runtime": 0.5779, + "eval_openbookqa_pairs_samples_per_second": 221.475, + "eval_openbookqa_pairs_steps_per_second": 1.73, + "step": 270 + }, + { + "epoch": 0.27579162410623087, + "eval_msmarco_pairs_loss": 0.17726396024227142, + "eval_msmarco_pairs_runtime": 1.2739, + "eval_msmarco_pairs_samples_per_second": 100.482, + "eval_msmarco_pairs_steps_per_second": 0.785, + "step": 270 + }, + { + "epoch": 0.27579162410623087, + "eval_nq_pairs_loss": 0.09398603439331055, + "eval_nq_pairs_runtime": 2.7279, + "eval_nq_pairs_samples_per_second": 46.922, + "eval_nq_pairs_steps_per_second": 0.367, + "step": 270 + }, + { + "epoch": 0.27579162410623087, + "eval_trivia_pairs_loss": 0.11321136355400085, + "eval_trivia_pairs_runtime": 3.1938, + "eval_trivia_pairs_samples_per_second": 40.077, + "eval_trivia_pairs_steps_per_second": 0.313, + "step": 270 + }, + { + "epoch": 0.27579162410623087, + "eval_gooaq_pairs_loss": 0.1668858677148819, + "eval_gooaq_pairs_runtime": 0.8748, + "eval_gooaq_pairs_samples_per_second": 146.314, + "eval_gooaq_pairs_steps_per_second": 1.143, + "step": 270 + }, + { + "epoch": 0.27579162410623087, + "eval_paws-pos_loss": 0.023237815126776695, + "eval_paws-pos_runtime": 0.7121, + "eval_paws-pos_samples_per_second": 179.763, + "eval_paws-pos_steps_per_second": 1.404, + "step": 270 + }, + { + "epoch": 0.27579162410623087, + "eval_global_dataset_loss": 0.1838148534297943, + "eval_global_dataset_runtime": 18.1795, + "eval_global_dataset_samples_per_second": 30.034, + "eval_global_dataset_steps_per_second": 0.275, + "step": 270 + }, + { + "epoch": 0.27681307456588355, + "grad_norm": 4.110296726226807, + "learning_rate": 1.2519319938176197e-05, + "loss": 0.0903, + "step": 271 + }, + { + "epoch": 0.27783452502553624, + "grad_norm": 5.035268783569336, + "learning_rate": 1.2565687789799073e-05, + "loss": 0.143, + "step": 272 + }, + { + "epoch": 0.278855975485189, + "grad_norm": 0.013771588914096355, + "learning_rate": 1.2612055641421948e-05, + "loss": 0.0001, + "step": 273 + }, + { + "epoch": 0.27987742594484166, + "grad_norm": 0.044864725321531296, + "learning_rate": 1.2658423493044823e-05, + "loss": 0.0003, + "step": 274 + }, + { + "epoch": 0.2808988764044944, + "grad_norm": 6.8100199699401855, + "learning_rate": 1.2704791344667697e-05, + "loss": 0.1538, + "step": 275 + }, + { + "epoch": 0.2819203268641471, + "grad_norm": 14.402591705322266, + "learning_rate": 1.2751159196290572e-05, + "loss": 1.1346, + "step": 276 + }, + { + "epoch": 0.28294177732379977, + "grad_norm": 8.473956108093262, + "learning_rate": 1.2797527047913447e-05, + "loss": 0.371, + "step": 277 + }, + { + "epoch": 0.2839632277834525, + "grad_norm": 7.969709396362305, + "learning_rate": 1.2843894899536323e-05, + "loss": 0.276, + "step": 278 + }, + { + "epoch": 0.2849846782431052, + "grad_norm": 14.899836540222168, + "learning_rate": 1.2890262751159196e-05, + "loss": 1.2873, + "step": 279 + }, + { + "epoch": 0.28600612870275793, + "grad_norm": 6.117278575897217, + "learning_rate": 1.2936630602782072e-05, + "loss": 0.1981, + "step": 280 + }, + { + "epoch": 0.2870275791624106, + "grad_norm": 3.290231943130493, + "learning_rate": 1.2982998454404945e-05, + "loss": 0.0358, + "step": 281 + }, + { + "epoch": 0.28804902962206336, + "grad_norm": 0.31267091631889343, + "learning_rate": 1.3029366306027821e-05, + "loss": 0.0028, + "step": 282 + }, + { + "epoch": 0.28907048008171604, + "grad_norm": 6.556621551513672, + "learning_rate": 1.3075734157650694e-05, + "loss": 0.1649, + "step": 283 + }, + { + "epoch": 0.2900919305413687, + "grad_norm": 4.962923526763916, + "learning_rate": 1.312210200927357e-05, + "loss": 0.1078, + "step": 284 + }, + { + "epoch": 0.29111338100102147, + "grad_norm": 5.9970703125, + "learning_rate": 1.3168469860896445e-05, + "loss": 0.1261, + "step": 285 + }, + { + "epoch": 0.29111338100102147, + "eval_Qnli-dev_cosine_accuracy": 0.69921875, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7337853908538818, + "eval_Qnli-dev_cosine_ap": 0.7217262945967701, + "eval_Qnli-dev_cosine_f1": 0.6820428336079077, + "eval_Qnli-dev_cosine_f1_threshold": 0.6166578531265259, + "eval_Qnli-dev_cosine_precision": 0.5579514824797843, + "eval_Qnli-dev_cosine_recall": 0.8771186440677966, + "eval_Qnli-dev_dot_accuracy": 0.6640625, + "eval_Qnli-dev_dot_accuracy_threshold": 363.1641845703125, + "eval_Qnli-dev_dot_ap": 0.6809353456868141, + "eval_Qnli-dev_dot_f1": 0.6774193548387097, + "eval_Qnli-dev_dot_f1_threshold": 290.4522399902344, + "eval_Qnli-dev_dot_precision": 0.546875, + "eval_Qnli-dev_dot_recall": 0.8898305084745762, + "eval_Qnli-dev_euclidean_accuracy": 0.70703125, + "eval_Qnli-dev_euclidean_accuracy_threshold": 16.211044311523438, + "eval_Qnli-dev_euclidean_ap": 0.7297399082524314, + "eval_Qnli-dev_euclidean_f1": 0.6879194630872484, + "eval_Qnli-dev_euclidean_f1_threshold": 18.96355438232422, + "eval_Qnli-dev_euclidean_precision": 0.5694444444444444, + "eval_Qnli-dev_euclidean_recall": 0.8686440677966102, + "eval_Qnli-dev_manhattan_accuracy": 0.697265625, + "eval_Qnli-dev_manhattan_accuracy_threshold": 324.5723876953125, + "eval_Qnli-dev_manhattan_ap": 0.73303442115081, + "eval_Qnli-dev_manhattan_f1": 0.684981684981685, + "eval_Qnli-dev_manhattan_f1_threshold": 379.77838134765625, + "eval_Qnli-dev_manhattan_precision": 0.603225806451613, + "eval_Qnli-dev_manhattan_recall": 0.7923728813559322, + "eval_Qnli-dev_max_accuracy": 0.70703125, + "eval_Qnli-dev_max_accuracy_threshold": 363.1641845703125, + "eval_Qnli-dev_max_ap": 0.73303442115081, + "eval_Qnli-dev_max_f1": 0.6879194630872484, + "eval_Qnli-dev_max_f1_threshold": 379.77838134765625, + "eval_Qnli-dev_max_precision": 0.603225806451613, + "eval_Qnli-dev_max_recall": 0.8898305084745762, + "eval_allNLI-dev_cosine_accuracy": 0.74609375, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8103257417678833, + "eval_allNLI-dev_cosine_ap": 0.6367258359366549, + "eval_allNLI-dev_cosine_f1": 0.6438356164383561, + "eval_allNLI-dev_cosine_f1_threshold": 0.6819883584976196, + "eval_allNLI-dev_cosine_precision": 0.5320754716981132, + "eval_allNLI-dev_cosine_recall": 0.815028901734104, + "eval_allNLI-dev_dot_accuracy": 0.732421875, + "eval_allNLI-dev_dot_accuracy_threshold": 389.21514892578125, + "eval_allNLI-dev_dot_ap": 0.5923693949749147, + "eval_allNLI-dev_dot_f1": 0.6147540983606558, + "eval_allNLI-dev_dot_f1_threshold": 297.75360107421875, + "eval_allNLI-dev_dot_precision": 0.47619047619047616, + "eval_allNLI-dev_dot_recall": 0.8670520231213873, + "eval_allNLI-dev_euclidean_accuracy": 0.751953125, + "eval_allNLI-dev_euclidean_accuracy_threshold": 13.616144180297852, + "eval_allNLI-dev_euclidean_ap": 0.6433404840141702, + "eval_allNLI-dev_euclidean_f1": 0.6494117647058825, + "eval_allNLI-dev_euclidean_f1_threshold": 17.07449722290039, + "eval_allNLI-dev_euclidean_precision": 0.5476190476190477, + "eval_allNLI-dev_euclidean_recall": 0.7976878612716763, + "eval_allNLI-dev_manhattan_accuracy": 0.74609375, + "eval_allNLI-dev_manhattan_accuracy_threshold": 302.7928161621094, + "eval_allNLI-dev_manhattan_ap": 0.6398238460644392, + "eval_allNLI-dev_manhattan_f1": 0.6473214285714285, + "eval_allNLI-dev_manhattan_f1_threshold": 372.90765380859375, + "eval_allNLI-dev_manhattan_precision": 0.5272727272727272, + "eval_allNLI-dev_manhattan_recall": 0.838150289017341, + "eval_allNLI-dev_max_accuracy": 0.751953125, + "eval_allNLI-dev_max_accuracy_threshold": 389.21514892578125, + "eval_allNLI-dev_max_ap": 0.6433404840141702, + "eval_allNLI-dev_max_f1": 0.6494117647058825, + "eval_allNLI-dev_max_f1_threshold": 372.90765380859375, + "eval_allNLI-dev_max_precision": 0.5476190476190477, + "eval_allNLI-dev_max_recall": 0.8670520231213873, + "eval_sequential_score": 0.73303442115081, + "eval_sts-test_pearson_cosine": 0.8759428380123349, + "eval_sts-test_pearson_dot": 0.853814592484377, + "eval_sts-test_pearson_euclidean": 0.9064134959917913, + "eval_sts-test_pearson_manhattan": 0.9075111868821899, + "eval_sts-test_pearson_max": 0.9075111868821899, + "eval_sts-test_spearman_cosine": 0.9040790792763912, + "eval_sts-test_spearman_dot": 0.8570938796675944, + "eval_sts-test_spearman_euclidean": 0.9039949469049755, + "eval_sts-test_spearman_manhattan": 0.9050900102712229, + "eval_sts-test_spearman_max": 0.9050900102712229, + "eval_vitaminc-pairs_loss": 1.5489282608032227, + "eval_vitaminc-pairs_runtime": 3.1786, + "eval_vitaminc-pairs_samples_per_second": 40.269, + "eval_vitaminc-pairs_steps_per_second": 0.315, + "step": 285 + }, + { + "epoch": 0.29111338100102147, + "eval_negation-triplets_loss": 0.8852226138114929, + "eval_negation-triplets_runtime": 0.6982, + "eval_negation-triplets_samples_per_second": 183.33, + "eval_negation-triplets_steps_per_second": 1.432, + "step": 285 + }, + { + "epoch": 0.29111338100102147, + "eval_scitail-pairs-pos_loss": 0.0468689501285553, + "eval_scitail-pairs-pos_runtime": 0.8592, + "eval_scitail-pairs-pos_samples_per_second": 148.981, + "eval_scitail-pairs-pos_steps_per_second": 1.164, + "step": 285 + }, + { + "epoch": 0.29111338100102147, + "eval_scitail-pairs-qa_loss": 0.00011975038069067523, + "eval_scitail-pairs-qa_runtime": 0.5203, + "eval_scitail-pairs-qa_samples_per_second": 246.024, + "eval_scitail-pairs-qa_steps_per_second": 1.922, + "step": 285 + }, + { + "epoch": 0.29111338100102147, + "eval_xsum-pairs_loss": 0.004035182297229767, + "eval_xsum-pairs_runtime": 2.7201, + "eval_xsum-pairs_samples_per_second": 47.057, + "eval_xsum-pairs_steps_per_second": 0.368, + "step": 285 + }, + { + "epoch": 0.29111338100102147, + "eval_sciq_pairs_loss": 0.017545733600854874, + "eval_sciq_pairs_runtime": 3.1021, + "eval_sciq_pairs_samples_per_second": 41.262, + "eval_sciq_pairs_steps_per_second": 0.322, + "step": 285 + }, + { + "epoch": 0.29111338100102147, + "eval_qasc_pairs_loss": 0.050791218876838684, + "eval_qasc_pairs_runtime": 0.5694, + "eval_qasc_pairs_samples_per_second": 224.794, + "eval_qasc_pairs_steps_per_second": 1.756, + "step": 285 + }, + { + "epoch": 0.29111338100102147, + "eval_openbookqa_pairs_loss": 0.6752834320068359, + "eval_openbookqa_pairs_runtime": 0.5779, + "eval_openbookqa_pairs_samples_per_second": 221.498, + "eval_openbookqa_pairs_steps_per_second": 1.73, + "step": 285 + }, + { + "epoch": 0.29111338100102147, + "eval_msmarco_pairs_loss": 0.1674027293920517, + "eval_msmarco_pairs_runtime": 1.2701, + "eval_msmarco_pairs_samples_per_second": 100.778, + "eval_msmarco_pairs_steps_per_second": 0.787, + "step": 285 + }, + { + "epoch": 0.29111338100102147, + "eval_nq_pairs_loss": 0.0822448581457138, + "eval_nq_pairs_runtime": 2.7289, + "eval_nq_pairs_samples_per_second": 46.905, + "eval_nq_pairs_steps_per_second": 0.366, + "step": 285 + }, + { + "epoch": 0.29111338100102147, + "eval_trivia_pairs_loss": 0.11886414885520935, + "eval_trivia_pairs_runtime": 3.2077, + "eval_trivia_pairs_samples_per_second": 39.904, + "eval_trivia_pairs_steps_per_second": 0.312, + "step": 285 + }, + { + "epoch": 0.29111338100102147, + "eval_gooaq_pairs_loss": 0.1732579916715622, + "eval_gooaq_pairs_runtime": 0.8728, + "eval_gooaq_pairs_samples_per_second": 146.662, + "eval_gooaq_pairs_steps_per_second": 1.146, + "step": 285 + }, + { + "epoch": 0.29111338100102147, + "eval_paws-pos_loss": 0.023297669366002083, + "eval_paws-pos_runtime": 0.6826, + "eval_paws-pos_samples_per_second": 187.523, + "eval_paws-pos_steps_per_second": 1.465, + "step": 285 + }, + { + "epoch": 0.29111338100102147, + "eval_global_dataset_loss": 0.18985512852668762, + "eval_global_dataset_runtime": 18.1604, + "eval_global_dataset_samples_per_second": 30.065, + "eval_global_dataset_steps_per_second": 0.275, + "step": 285 + }, + { + "epoch": 0.29213483146067415, + "grad_norm": 3.3900773525238037, + "learning_rate": 1.321483771251932e-05, + "loss": 0.1047, + "step": 286 + }, + { + "epoch": 0.2931562819203269, + "grad_norm": 6.067929267883301, + "learning_rate": 1.3261205564142196e-05, + "loss": 0.162, + "step": 287 + }, + { + "epoch": 0.2941777323799796, + "grad_norm": 0.8399752378463745, + "learning_rate": 1.3307573415765069e-05, + "loss": 0.0323, + "step": 288 + }, + { + "epoch": 0.29519918283963226, + "grad_norm": 6.48594856262207, + "learning_rate": 1.3353941267387945e-05, + "loss": 0.3326, + "step": 289 + }, + { + "epoch": 0.296220633299285, + "grad_norm": 5.543538570404053, + "learning_rate": 1.340030911901082e-05, + "loss": 0.126, + "step": 290 + }, + { + "epoch": 0.2972420837589377, + "grad_norm": 0.3173281252384186, + "learning_rate": 1.3446676970633695e-05, + "loss": 0.0085, + "step": 291 + }, + { + "epoch": 0.2982635342185904, + "grad_norm": 6.157264232635498, + "learning_rate": 1.349304482225657e-05, + "loss": 0.2076, + "step": 292 + }, + { + "epoch": 0.2992849846782431, + "grad_norm": 8.121578216552734, + "learning_rate": 1.3539412673879444e-05, + "loss": 0.2953, + "step": 293 + }, + { + "epoch": 0.3003064351378958, + "grad_norm": 9.3897066116333, + "learning_rate": 1.3585780525502318e-05, + "loss": 0.2242, + "step": 294 + }, + { + "epoch": 0.30132788559754853, + "grad_norm": 4.103500843048096, + "learning_rate": 1.3632148377125195e-05, + "loss": 0.0889, + "step": 295 + }, + { + "epoch": 0.3023493360572012, + "grad_norm": 0.307234525680542, + "learning_rate": 1.3678516228748068e-05, + "loss": 0.0033, + "step": 296 + }, + { + "epoch": 0.30337078651685395, + "grad_norm": 10.16430377960205, + "learning_rate": 1.3724884080370944e-05, + "loss": 0.3779, + "step": 297 + }, + { + "epoch": 0.30439223697650664, + "grad_norm": 6.08989143371582, + "learning_rate": 1.3771251931993817e-05, + "loss": 0.1762, + "step": 298 + }, + { + "epoch": 0.3054136874361593, + "grad_norm": 5.143807411193848, + "learning_rate": 1.3817619783616693e-05, + "loss": 0.1256, + "step": 299 + }, + { + "epoch": 0.30643513789581206, + "grad_norm": 7.561800479888916, + "learning_rate": 1.3863987635239568e-05, + "loss": 0.1733, + "step": 300 + }, + { + "epoch": 0.30643513789581206, + "eval_Qnli-dev_cosine_accuracy": 0.705078125, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.734356164932251, + "eval_Qnli-dev_cosine_ap": 0.7251388111056354, + "eval_Qnli-dev_cosine_f1": 0.6880570409982174, + "eval_Qnli-dev_cosine_f1_threshold": 0.666618824005127, + "eval_Qnli-dev_cosine_precision": 0.5938461538461538, + "eval_Qnli-dev_cosine_recall": 0.8177966101694916, + "eval_Qnli-dev_dot_accuracy": 0.671875, + "eval_Qnli-dev_dot_accuracy_threshold": 356.3788757324219, + "eval_Qnli-dev_dot_ap": 0.68222304767165, + "eval_Qnli-dev_dot_f1": 0.6720516962843297, + "eval_Qnli-dev_dot_f1_threshold": 300.6175537109375, + "eval_Qnli-dev_dot_precision": 0.5430809399477807, + "eval_Qnli-dev_dot_recall": 0.8813559322033898, + "eval_Qnli-dev_euclidean_accuracy": 0.708984375, + "eval_Qnli-dev_euclidean_accuracy_threshold": 15.971199035644531, + "eval_Qnli-dev_euclidean_ap": 0.7342248530985087, + "eval_Qnli-dev_euclidean_f1": 0.6920415224913494, + "eval_Qnli-dev_euclidean_f1_threshold": 18.420867919921875, + "eval_Qnli-dev_euclidean_precision": 0.5847953216374269, + "eval_Qnli-dev_euclidean_recall": 0.847457627118644, + "eval_Qnli-dev_manhattan_accuracy": 0.701171875, + "eval_Qnli-dev_manhattan_accuracy_threshold": 322.6009216308594, + "eval_Qnli-dev_manhattan_ap": 0.7364358394529807, + "eval_Qnli-dev_manhattan_f1": 0.6843971631205673, + "eval_Qnli-dev_manhattan_f1_threshold": 384.479248046875, + "eval_Qnli-dev_manhattan_precision": 0.5884146341463414, + "eval_Qnli-dev_manhattan_recall": 0.8177966101694916, + "eval_Qnli-dev_max_accuracy": 0.708984375, + "eval_Qnli-dev_max_accuracy_threshold": 356.3788757324219, + "eval_Qnli-dev_max_ap": 0.7364358394529807, + "eval_Qnli-dev_max_f1": 0.6920415224913494, + "eval_Qnli-dev_max_f1_threshold": 384.479248046875, + "eval_Qnli-dev_max_precision": 0.5938461538461538, + "eval_Qnli-dev_max_recall": 0.8813559322033898, + "eval_allNLI-dev_cosine_accuracy": 0.74609375, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8067381978034973, + "eval_allNLI-dev_cosine_ap": 0.6358773308113639, + "eval_allNLI-dev_cosine_f1": 0.6497695852534563, + "eval_allNLI-dev_cosine_f1_threshold": 0.6887349486351013, + "eval_allNLI-dev_cosine_precision": 0.5402298850574713, + "eval_allNLI-dev_cosine_recall": 0.815028901734104, + "eval_allNLI-dev_dot_accuracy": 0.7265625, + "eval_allNLI-dev_dot_accuracy_threshold": 390.38787841796875, + "eval_allNLI-dev_dot_ap": 0.5883350678492294, + "eval_allNLI-dev_dot_f1": 0.6176470588235294, + "eval_allNLI-dev_dot_f1_threshold": 306.22418212890625, + "eval_allNLI-dev_dot_precision": 0.48514851485148514, + "eval_allNLI-dev_dot_recall": 0.8497109826589595, + "eval_allNLI-dev_euclidean_accuracy": 0.755859375, + "eval_allNLI-dev_euclidean_accuracy_threshold": 14.002582550048828, + "eval_allNLI-dev_euclidean_ap": 0.6414315119282512, + "eval_allNLI-dev_euclidean_f1": 0.6558891454965358, + "eval_allNLI-dev_euclidean_f1_threshold": 17.105127334594727, + "eval_allNLI-dev_euclidean_precision": 0.5461538461538461, + "eval_allNLI-dev_euclidean_recall": 0.8208092485549133, + "eval_allNLI-dev_manhattan_accuracy": 0.75, + "eval_allNLI-dev_manhattan_accuracy_threshold": 298.34423828125, + "eval_allNLI-dev_manhattan_ap": 0.6380997534211968, + "eval_allNLI-dev_manhattan_f1": 0.6495327102803738, + "eval_allNLI-dev_manhattan_f1_threshold": 360.8920593261719, + "eval_allNLI-dev_manhattan_precision": 0.5450980392156862, + "eval_allNLI-dev_manhattan_recall": 0.8034682080924855, + "eval_allNLI-dev_max_accuracy": 0.755859375, + "eval_allNLI-dev_max_accuracy_threshold": 390.38787841796875, + "eval_allNLI-dev_max_ap": 0.6414315119282512, + "eval_allNLI-dev_max_f1": 0.6558891454965358, + "eval_allNLI-dev_max_f1_threshold": 360.8920593261719, + "eval_allNLI-dev_max_precision": 0.5461538461538461, + "eval_allNLI-dev_max_recall": 0.8497109826589595, + "eval_sequential_score": 0.7364358394529807, + "eval_sts-test_pearson_cosine": 0.8764071159390601, + "eval_sts-test_pearson_dot": 0.854284217185856, + "eval_sts-test_pearson_euclidean": 0.9064562902904141, + "eval_sts-test_pearson_manhattan": 0.9076733868441358, + "eval_sts-test_pearson_max": 0.9076733868441358, + "eval_sts-test_spearman_cosine": 0.9036925626317537, + "eval_sts-test_spearman_dot": 0.8560821431500577, + "eval_sts-test_spearman_euclidean": 0.9036303583784195, + "eval_sts-test_spearman_manhattan": 0.9048020806554308, + "eval_sts-test_spearman_max": 0.9048020806554308, + "eval_vitaminc-pairs_loss": 1.546329140663147, + "eval_vitaminc-pairs_runtime": 3.1864, + "eval_vitaminc-pairs_samples_per_second": 40.171, + "eval_vitaminc-pairs_steps_per_second": 0.314, + "step": 300 + }, + { + "epoch": 0.30643513789581206, + "eval_negation-triplets_loss": 0.8829111456871033, + "eval_negation-triplets_runtime": 0.6945, + "eval_negation-triplets_samples_per_second": 184.314, + "eval_negation-triplets_steps_per_second": 1.44, + "step": 300 + }, + { + "epoch": 0.30643513789581206, + "eval_scitail-pairs-pos_loss": 0.05185386538505554, + "eval_scitail-pairs-pos_runtime": 0.8139, + "eval_scitail-pairs-pos_samples_per_second": 157.271, + "eval_scitail-pairs-pos_steps_per_second": 1.229, + "step": 300 + }, + { + "epoch": 0.30643513789581206, + "eval_scitail-pairs-qa_loss": 0.00015708569844719023, + "eval_scitail-pairs-qa_runtime": 0.5183, + "eval_scitail-pairs-qa_samples_per_second": 246.984, + "eval_scitail-pairs-qa_steps_per_second": 1.93, + "step": 300 + }, + { + "epoch": 0.30643513789581206, + "eval_xsum-pairs_loss": 0.004001606721431017, + "eval_xsum-pairs_runtime": 2.7355, + "eval_xsum-pairs_samples_per_second": 46.793, + "eval_xsum-pairs_steps_per_second": 0.366, + "step": 300 + }, + { + "epoch": 0.30643513789581206, + "eval_sciq_pairs_loss": 0.01712801866233349, + "eval_sciq_pairs_runtime": 3.1466, + "eval_sciq_pairs_samples_per_second": 40.679, + "eval_sciq_pairs_steps_per_second": 0.318, + "step": 300 + }, + { + "epoch": 0.30643513789581206, + "eval_qasc_pairs_loss": 0.06327986717224121, + "eval_qasc_pairs_runtime": 0.5671, + "eval_qasc_pairs_samples_per_second": 225.698, + "eval_qasc_pairs_steps_per_second": 1.763, + "step": 300 + }, + { + "epoch": 0.30643513789581206, + "eval_openbookqa_pairs_loss": 0.710024893283844, + "eval_openbookqa_pairs_runtime": 0.5778, + "eval_openbookqa_pairs_samples_per_second": 221.516, + "eval_openbookqa_pairs_steps_per_second": 1.731, + "step": 300 + }, + { + "epoch": 0.30643513789581206, + "eval_msmarco_pairs_loss": 0.18048645555973053, + "eval_msmarco_pairs_runtime": 1.273, + "eval_msmarco_pairs_samples_per_second": 100.55, + "eval_msmarco_pairs_steps_per_second": 0.786, + "step": 300 + }, + { + "epoch": 0.30643513789581206, + "eval_nq_pairs_loss": 0.0711974948644638, + "eval_nq_pairs_runtime": 2.7311, + "eval_nq_pairs_samples_per_second": 46.868, + "eval_nq_pairs_steps_per_second": 0.366, + "step": 300 + }, + { + "epoch": 0.30643513789581206, + "eval_trivia_pairs_loss": 0.12784315645694733, + "eval_trivia_pairs_runtime": 3.2114, + "eval_trivia_pairs_samples_per_second": 39.859, + "eval_trivia_pairs_steps_per_second": 0.311, + "step": 300 + }, + { + "epoch": 0.30643513789581206, + "eval_gooaq_pairs_loss": 0.1734517365694046, + "eval_gooaq_pairs_runtime": 0.8762, + "eval_gooaq_pairs_samples_per_second": 146.088, + "eval_gooaq_pairs_steps_per_second": 1.141, + "step": 300 + }, + { + "epoch": 0.30643513789581206, + "eval_paws-pos_loss": 0.02314586378633976, + "eval_paws-pos_runtime": 0.6881, + "eval_paws-pos_samples_per_second": 186.017, + "eval_paws-pos_steps_per_second": 1.453, + "step": 300 + }, + { + "epoch": 0.30643513789581206, + "eval_global_dataset_loss": 0.19011227786540985, + "eval_global_dataset_runtime": 18.177, + "eval_global_dataset_samples_per_second": 30.038, + "eval_global_dataset_steps_per_second": 0.275, + "step": 300 + }, + { + "epoch": 0.30745658835546474, + "grad_norm": 6.156424522399902, + "learning_rate": 1.3910355486862443e-05, + "loss": 0.19, + "step": 301 + }, + { + "epoch": 0.3084780388151175, + "grad_norm": 5.94158411026001, + "learning_rate": 1.3956723338485317e-05, + "loss": 0.2525, + "step": 302 + }, + { + "epoch": 0.30949948927477017, + "grad_norm": 4.755197525024414, + "learning_rate": 1.4003091190108192e-05, + "loss": 0.1157, + "step": 303 + }, + { + "epoch": 0.3105209397344229, + "grad_norm": 4.332251071929932, + "learning_rate": 1.4049459041731066e-05, + "loss": 0.1213, + "step": 304 + }, + { + "epoch": 0.3115423901940756, + "grad_norm": 5.716749668121338, + "learning_rate": 1.4095826893353941e-05, + "loss": 0.1931, + "step": 305 + }, + { + "epoch": 0.3125638406537283, + "grad_norm": 6.276858329772949, + "learning_rate": 1.4142194744976816e-05, + "loss": 0.153, + "step": 306 + }, + { + "epoch": 0.313585291113381, + "grad_norm": 6.499155044555664, + "learning_rate": 1.4188562596599692e-05, + "loss": 0.1416, + "step": 307 + }, + { + "epoch": 0.3146067415730337, + "grad_norm": 4.293001651763916, + "learning_rate": 1.4234930448222565e-05, + "loss": 0.1161, + "step": 308 + }, + { + "epoch": 0.31562819203268644, + "grad_norm": 4.688383102416992, + "learning_rate": 1.4281298299845441e-05, + "loss": 0.1887, + "step": 309 + }, + { + "epoch": 0.3166496424923391, + "grad_norm": 7.010243892669678, + "learning_rate": 1.4327666151468314e-05, + "loss": 0.2003, + "step": 310 + }, + { + "epoch": 0.3176710929519918, + "grad_norm": 8.255718231201172, + "learning_rate": 1.437403400309119e-05, + "loss": 0.2119, + "step": 311 + }, + { + "epoch": 0.31869254341164455, + "grad_norm": 3.2170424461364746, + "learning_rate": 1.4420401854714067e-05, + "loss": 0.088, + "step": 312 + }, + { + "epoch": 0.31971399387129723, + "grad_norm": 6.751638889312744, + "learning_rate": 1.446676970633694e-05, + "loss": 0.179, + "step": 313 + }, + { + "epoch": 0.32073544433094997, + "grad_norm": 14.980527877807617, + "learning_rate": 1.4513137557959816e-05, + "loss": 1.2446, + "step": 314 + }, + { + "epoch": 0.32175689479060265, + "grad_norm": 5.247725486755371, + "learning_rate": 1.4559505409582689e-05, + "loss": 0.0955, + "step": 315 + }, + { + "epoch": 0.32175689479060265, + "eval_Qnli-dev_cosine_accuracy": 0.69921875, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7298833131790161, + "eval_Qnli-dev_cosine_ap": 0.724998608118465, + "eval_Qnli-dev_cosine_f1": 0.6843971631205673, + "eval_Qnli-dev_cosine_f1_threshold": 0.656394362449646, + "eval_Qnli-dev_cosine_precision": 0.5884146341463414, + "eval_Qnli-dev_cosine_recall": 0.8177966101694916, + "eval_Qnli-dev_dot_accuracy": 0.662109375, + "eval_Qnli-dev_dot_accuracy_threshold": 384.91998291015625, + "eval_Qnli-dev_dot_ap": 0.6798698842777653, + "eval_Qnli-dev_dot_f1": 0.671850699844479, + "eval_Qnli-dev_dot_f1_threshold": 275.90283203125, + "eval_Qnli-dev_dot_precision": 0.5307125307125307, + "eval_Qnli-dev_dot_recall": 0.9152542372881356, + "eval_Qnli-dev_euclidean_accuracy": 0.705078125, + "eval_Qnli-dev_euclidean_accuracy_threshold": 16.018356323242188, + "eval_Qnli-dev_euclidean_ap": 0.7335796410374035, + "eval_Qnli-dev_euclidean_f1": 0.6900175131348512, + "eval_Qnli-dev_euclidean_f1_threshold": 18.48751449584961, + "eval_Qnli-dev_euclidean_precision": 0.5880597014925373, + "eval_Qnli-dev_euclidean_recall": 0.8347457627118644, + "eval_Qnli-dev_manhattan_accuracy": 0.70703125, + "eval_Qnli-dev_manhattan_accuracy_threshold": 347.44671630859375, + "eval_Qnli-dev_manhattan_ap": 0.7373674852724033, + "eval_Qnli-dev_manhattan_f1": 0.6867256637168142, + "eval_Qnli-dev_manhattan_f1_threshold": 388.9920654296875, + "eval_Qnli-dev_manhattan_precision": 0.5896656534954408, + "eval_Qnli-dev_manhattan_recall": 0.8220338983050848, + "eval_Qnli-dev_max_accuracy": 0.70703125, + "eval_Qnli-dev_max_accuracy_threshold": 384.91998291015625, + "eval_Qnli-dev_max_ap": 0.7373674852724033, + "eval_Qnli-dev_max_f1": 0.6900175131348512, + "eval_Qnli-dev_max_f1_threshold": 388.9920654296875, + "eval_Qnli-dev_max_precision": 0.5896656534954408, + "eval_Qnli-dev_max_recall": 0.9152542372881356, + "eval_allNLI-dev_cosine_accuracy": 0.744140625, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.788373589515686, + "eval_allNLI-dev_cosine_ap": 0.6358233096395716, + "eval_allNLI-dev_cosine_f1": 0.6572769953051643, + "eval_allNLI-dev_cosine_f1_threshold": 0.698913037776947, + "eval_allNLI-dev_cosine_precision": 0.5533596837944664, + "eval_allNLI-dev_cosine_recall": 0.8092485549132948, + "eval_allNLI-dev_dot_accuracy": 0.728515625, + "eval_allNLI-dev_dot_accuracy_threshold": 358.747802734375, + "eval_allNLI-dev_dot_ap": 0.5916443958892955, + "eval_allNLI-dev_dot_f1": 0.6150442477876107, + "eval_allNLI-dev_dot_f1_threshold": 317.09967041015625, + "eval_allNLI-dev_dot_precision": 0.4982078853046595, + "eval_allNLI-dev_dot_recall": 0.8034682080924855, + "eval_allNLI-dev_euclidean_accuracy": 0.751953125, + "eval_allNLI-dev_euclidean_accuracy_threshold": 13.513811111450195, + "eval_allNLI-dev_euclidean_ap": 0.6421381548383314, + "eval_allNLI-dev_euclidean_f1": 0.6542056074766355, + "eval_allNLI-dev_euclidean_f1_threshold": 16.974750518798828, + "eval_allNLI-dev_euclidean_precision": 0.5490196078431373, + "eval_allNLI-dev_euclidean_recall": 0.8092485549132948, + "eval_allNLI-dev_manhattan_accuracy": 0.748046875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 302.1901550292969, + "eval_allNLI-dev_manhattan_ap": 0.6396923464740729, + "eval_allNLI-dev_manhattan_f1": 0.6490765171503958, + "eval_allNLI-dev_manhattan_f1_threshold": 336.8003845214844, + "eval_allNLI-dev_manhattan_precision": 0.5970873786407767, + "eval_allNLI-dev_manhattan_recall": 0.7109826589595376, + "eval_allNLI-dev_max_accuracy": 0.751953125, + "eval_allNLI-dev_max_accuracy_threshold": 358.747802734375, + "eval_allNLI-dev_max_ap": 0.6421381548383314, + "eval_allNLI-dev_max_f1": 0.6572769953051643, + "eval_allNLI-dev_max_f1_threshold": 336.8003845214844, + "eval_allNLI-dev_max_precision": 0.5970873786407767, + "eval_allNLI-dev_max_recall": 0.8092485549132948, + "eval_sequential_score": 0.7373674852724033, + "eval_sts-test_pearson_cosine": 0.8775975441836863, + "eval_sts-test_pearson_dot": 0.8553432208508764, + "eval_sts-test_pearson_euclidean": 0.9076088707187173, + "eval_sts-test_pearson_manhattan": 0.908368070714995, + "eval_sts-test_pearson_max": 0.908368070714995, + "eval_sts-test_spearman_cosine": 0.9051328372283746, + "eval_sts-test_spearman_dot": 0.8587504818617813, + "eval_sts-test_spearman_euclidean": 0.9052312452521741, + "eval_sts-test_spearman_manhattan": 0.905854227562004, + "eval_sts-test_spearman_max": 0.905854227562004, + "eval_vitaminc-pairs_loss": 1.5383586883544922, + "eval_vitaminc-pairs_runtime": 3.2637, + "eval_vitaminc-pairs_samples_per_second": 39.219, + "eval_vitaminc-pairs_steps_per_second": 0.306, + "step": 315 + }, + { + "epoch": 0.32175689479060265, + "eval_negation-triplets_loss": 0.8863205313682556, + "eval_negation-triplets_runtime": 0.6942, + "eval_negation-triplets_samples_per_second": 184.395, + "eval_negation-triplets_steps_per_second": 1.441, + "step": 315 + }, + { + "epoch": 0.32175689479060265, + "eval_scitail-pairs-pos_loss": 0.04996222257614136, + "eval_scitail-pairs-pos_runtime": 0.8051, + "eval_scitail-pairs-pos_samples_per_second": 158.985, + "eval_scitail-pairs-pos_steps_per_second": 1.242, + "step": 315 + }, + { + "epoch": 0.32175689479060265, + "eval_scitail-pairs-qa_loss": 0.00010935126920230687, + "eval_scitail-pairs-qa_runtime": 0.5183, + "eval_scitail-pairs-qa_samples_per_second": 246.958, + "eval_scitail-pairs-qa_steps_per_second": 1.929, + "step": 315 + }, + { + "epoch": 0.32175689479060265, + "eval_xsum-pairs_loss": 0.0022275070659816265, + "eval_xsum-pairs_runtime": 2.7268, + "eval_xsum-pairs_samples_per_second": 46.942, + "eval_xsum-pairs_steps_per_second": 0.367, + "step": 315 + }, + { + "epoch": 0.32175689479060265, + "eval_sciq_pairs_loss": 0.019459517672657967, + "eval_sciq_pairs_runtime": 3.1358, + "eval_sciq_pairs_samples_per_second": 40.819, + "eval_sciq_pairs_steps_per_second": 0.319, + "step": 315 + }, + { + "epoch": 0.32175689479060265, + "eval_qasc_pairs_loss": 0.06486809998750687, + "eval_qasc_pairs_runtime": 0.5754, + "eval_qasc_pairs_samples_per_second": 222.457, + "eval_qasc_pairs_steps_per_second": 1.738, + "step": 315 + }, + { + "epoch": 0.32175689479060265, + "eval_openbookqa_pairs_loss": 0.8444811105728149, + "eval_openbookqa_pairs_runtime": 0.5855, + "eval_openbookqa_pairs_samples_per_second": 218.634, + "eval_openbookqa_pairs_steps_per_second": 1.708, + "step": 315 + }, + { + "epoch": 0.32175689479060265, + "eval_msmarco_pairs_loss": 0.1706206053495407, + "eval_msmarco_pairs_runtime": 1.2768, + "eval_msmarco_pairs_samples_per_second": 100.254, + "eval_msmarco_pairs_steps_per_second": 0.783, + "step": 315 + }, + { + "epoch": 0.32175689479060265, + "eval_nq_pairs_loss": 0.07448726892471313, + "eval_nq_pairs_runtime": 2.7378, + "eval_nq_pairs_samples_per_second": 46.753, + "eval_nq_pairs_steps_per_second": 0.365, + "step": 315 + }, + { + "epoch": 0.32175689479060265, + "eval_trivia_pairs_loss": 0.10676610469818115, + "eval_trivia_pairs_runtime": 3.2055, + "eval_trivia_pairs_samples_per_second": 39.931, + "eval_trivia_pairs_steps_per_second": 0.312, + "step": 315 + }, + { + "epoch": 0.32175689479060265, + "eval_gooaq_pairs_loss": 0.17288200557231903, + "eval_gooaq_pairs_runtime": 0.8767, + "eval_gooaq_pairs_samples_per_second": 145.997, + "eval_gooaq_pairs_steps_per_second": 1.141, + "step": 315 + }, + { + "epoch": 0.32175689479060265, + "eval_paws-pos_loss": 0.023032238706946373, + "eval_paws-pos_runtime": 0.6899, + "eval_paws-pos_samples_per_second": 185.534, + "eval_paws-pos_steps_per_second": 1.449, + "step": 315 + }, + { + "epoch": 0.32175689479060265, + "eval_global_dataset_loss": 0.18488678336143494, + "eval_global_dataset_runtime": 18.1871, + "eval_global_dataset_samples_per_second": 30.021, + "eval_global_dataset_steps_per_second": 0.275, + "step": 315 + }, + { + "epoch": 0.32277834525025534, + "grad_norm": 4.056392192840576, + "learning_rate": 1.4605873261205565e-05, + "loss": 0.0774, + "step": 316 + }, + { + "epoch": 0.3237997957099081, + "grad_norm": 6.029316425323486, + "learning_rate": 1.465224111282844e-05, + "loss": 0.2037, + "step": 317 + }, + { + "epoch": 0.32482124616956076, + "grad_norm": 9.636774063110352, + "learning_rate": 1.4698608964451314e-05, + "loss": 0.3577, + "step": 318 + }, + { + "epoch": 0.3258426966292135, + "grad_norm": 3.6292381286621094, + "learning_rate": 1.4744976816074189e-05, + "loss": 0.0613, + "step": 319 + }, + { + "epoch": 0.3268641470888662, + "grad_norm": 15.577709197998047, + "learning_rate": 1.4791344667697064e-05, + "loss": 1.5805, + "step": 320 + }, + { + "epoch": 0.32788559754851887, + "grad_norm": 10.113024711608887, + "learning_rate": 1.4837712519319938e-05, + "loss": 0.4659, + "step": 321 + }, + { + "epoch": 0.3289070480081716, + "grad_norm": 7.356515884399414, + "learning_rate": 1.4884080370942815e-05, + "loss": 0.1798, + "step": 322 + }, + { + "epoch": 0.3299284984678243, + "grad_norm": 5.009124279022217, + "learning_rate": 1.4930448222565688e-05, + "loss": 0.0925, + "step": 323 + }, + { + "epoch": 0.33094994892747703, + "grad_norm": 7.641706466674805, + "learning_rate": 1.4976816074188564e-05, + "loss": 0.2453, + "step": 324 + }, + { + "epoch": 0.3319713993871297, + "grad_norm": 6.878350257873535, + "learning_rate": 1.5023183925811439e-05, + "loss": 0.2034, + "step": 325 + }, + { + "epoch": 0.33299284984678246, + "grad_norm": 7.814288139343262, + "learning_rate": 1.5069551777434313e-05, + "loss": 0.1711, + "step": 326 + }, + { + "epoch": 0.33401430030643514, + "grad_norm": 7.0652289390563965, + "learning_rate": 1.5115919629057186e-05, + "loss": 0.1541, + "step": 327 + }, + { + "epoch": 0.3350357507660878, + "grad_norm": 5.543487071990967, + "learning_rate": 1.5162287480680064e-05, + "loss": 0.1005, + "step": 328 + }, + { + "epoch": 0.33605720122574056, + "grad_norm": 6.212592124938965, + "learning_rate": 1.5208655332302937e-05, + "loss": 0.1289, + "step": 329 + }, + { + "epoch": 0.33707865168539325, + "grad_norm": 5.460144996643066, + "learning_rate": 1.5255023183925812e-05, + "loss": 0.1242, + "step": 330 + }, + { + "epoch": 0.33707865168539325, + "eval_Qnli-dev_cosine_accuracy": 0.693359375, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7256989479064941, + "eval_Qnli-dev_cosine_ap": 0.7135900243542426, + "eval_Qnli-dev_cosine_f1": 0.6835016835016835, + "eval_Qnli-dev_cosine_f1_threshold": 0.6368885040283203, + "eval_Qnli-dev_cosine_precision": 0.5670391061452514, + "eval_Qnli-dev_cosine_recall": 0.8601694915254238, + "eval_Qnli-dev_dot_accuracy": 0.654296875, + "eval_Qnli-dev_dot_accuracy_threshold": 392.331787109375, + "eval_Qnli-dev_dot_ap": 0.6672395054550004, + "eval_Qnli-dev_dot_f1": 0.6666666666666667, + "eval_Qnli-dev_dot_f1_threshold": 289.88958740234375, + "eval_Qnli-dev_dot_precision": 0.5345268542199488, + "eval_Qnli-dev_dot_recall": 0.885593220338983, + "eval_Qnli-dev_euclidean_accuracy": 0.705078125, + "eval_Qnli-dev_euclidean_accuracy_threshold": 16.11324691772461, + "eval_Qnli-dev_euclidean_ap": 0.7222163069993965, + "eval_Qnli-dev_euclidean_f1": 0.6814310051107325, + "eval_Qnli-dev_euclidean_f1_threshold": 18.713932037353516, + "eval_Qnli-dev_euclidean_precision": 0.5698005698005698, + "eval_Qnli-dev_euclidean_recall": 0.847457627118644, + "eval_Qnli-dev_manhattan_accuracy": 0.697265625, + "eval_Qnli-dev_manhattan_accuracy_threshold": 346.6036682128906, + "eval_Qnli-dev_manhattan_ap": 0.7251336785707464, + "eval_Qnli-dev_manhattan_f1": 0.6771378708551483, + "eval_Qnli-dev_manhattan_f1_threshold": 388.4070129394531, + "eval_Qnli-dev_manhattan_precision": 0.5756676557863502, + "eval_Qnli-dev_manhattan_recall": 0.8220338983050848, + "eval_Qnli-dev_max_accuracy": 0.705078125, + "eval_Qnli-dev_max_accuracy_threshold": 392.331787109375, + "eval_Qnli-dev_max_ap": 0.7251336785707464, + "eval_Qnli-dev_max_f1": 0.6835016835016835, + "eval_Qnli-dev_max_f1_threshold": 388.4070129394531, + "eval_Qnli-dev_max_precision": 0.5756676557863502, + "eval_Qnli-dev_max_recall": 0.885593220338983, + "eval_allNLI-dev_cosine_accuracy": 0.7421875, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.821968138217926, + "eval_allNLI-dev_cosine_ap": 0.6314074866903452, + "eval_allNLI-dev_cosine_f1": 0.6509433962264151, + "eval_allNLI-dev_cosine_f1_threshold": 0.7014142870903015, + "eval_allNLI-dev_cosine_precision": 0.549800796812749, + "eval_allNLI-dev_cosine_recall": 0.7976878612716763, + "eval_allNLI-dev_dot_accuracy": 0.720703125, + "eval_allNLI-dev_dot_accuracy_threshold": 371.16778564453125, + "eval_allNLI-dev_dot_ap": 0.5859582934275832, + "eval_allNLI-dev_dot_f1": 0.6163522012578616, + "eval_allNLI-dev_dot_f1_threshold": 309.29315185546875, + "eval_allNLI-dev_dot_precision": 0.48355263157894735, + "eval_allNLI-dev_dot_recall": 0.8497109826589595, + "eval_allNLI-dev_euclidean_accuracy": 0.75, + "eval_allNLI-dev_euclidean_accuracy_threshold": 13.6043701171875, + "eval_allNLI-dev_euclidean_ap": 0.6391791878103057, + "eval_allNLI-dev_euclidean_f1": 0.654292343387471, + "eval_allNLI-dev_euclidean_f1_threshold": 16.919557571411133, + "eval_allNLI-dev_euclidean_precision": 0.5465116279069767, + "eval_allNLI-dev_euclidean_recall": 0.815028901734104, + "eval_allNLI-dev_manhattan_accuracy": 0.75, + "eval_allNLI-dev_manhattan_accuracy_threshold": 296.8101806640625, + "eval_allNLI-dev_manhattan_ap": 0.6365525347632632, + "eval_allNLI-dev_manhattan_f1": 0.6463700234192038, + "eval_allNLI-dev_manhattan_f1_threshold": 358.01708984375, + "eval_allNLI-dev_manhattan_precision": 0.5433070866141733, + "eval_allNLI-dev_manhattan_recall": 0.7976878612716763, + "eval_allNLI-dev_max_accuracy": 0.75, + "eval_allNLI-dev_max_accuracy_threshold": 371.16778564453125, + "eval_allNLI-dev_max_ap": 0.6391791878103057, + "eval_allNLI-dev_max_f1": 0.654292343387471, + "eval_allNLI-dev_max_f1_threshold": 358.01708984375, + "eval_allNLI-dev_max_precision": 0.549800796812749, + "eval_allNLI-dev_max_recall": 0.8497109826589595, + "eval_sequential_score": 0.7251336785707464, + "eval_sts-test_pearson_cosine": 0.8767726605681215, + "eval_sts-test_pearson_dot": 0.8545745306875346, + "eval_sts-test_pearson_euclidean": 0.9078883780148814, + "eval_sts-test_pearson_manhattan": 0.908743377105566, + "eval_sts-test_pearson_max": 0.908743377105566, + "eval_sts-test_spearman_cosine": 0.9046651417955731, + "eval_sts-test_spearman_dot": 0.8575806838890101, + "eval_sts-test_spearman_euclidean": 0.904950520589466, + "eval_sts-test_spearman_manhattan": 0.9058202166033463, + "eval_sts-test_spearman_max": 0.9058202166033463, + "eval_vitaminc-pairs_loss": 1.5013313293457031, + "eval_vitaminc-pairs_runtime": 3.1835, + "eval_vitaminc-pairs_samples_per_second": 40.207, + "eval_vitaminc-pairs_steps_per_second": 0.314, + "step": 330 + }, + { + "epoch": 0.33707865168539325, + "eval_negation-triplets_loss": 0.8697316646575928, + "eval_negation-triplets_runtime": 0.692, + "eval_negation-triplets_samples_per_second": 184.963, + "eval_negation-triplets_steps_per_second": 1.445, + "step": 330 + }, + { + "epoch": 0.33707865168539325, + "eval_scitail-pairs-pos_loss": 0.051664724946022034, + "eval_scitail-pairs-pos_runtime": 0.8019, + "eval_scitail-pairs-pos_samples_per_second": 159.622, + "eval_scitail-pairs-pos_steps_per_second": 1.247, + "step": 330 + }, + { + "epoch": 0.33707865168539325, + "eval_scitail-pairs-qa_loss": 7.885550439823419e-05, + "eval_scitail-pairs-qa_runtime": 0.5124, + "eval_scitail-pairs-qa_samples_per_second": 249.805, + "eval_scitail-pairs-qa_steps_per_second": 1.952, + "step": 330 + }, + { + "epoch": 0.33707865168539325, + "eval_xsum-pairs_loss": 0.0016007832018658519, + "eval_xsum-pairs_runtime": 2.7211, + "eval_xsum-pairs_samples_per_second": 47.04, + "eval_xsum-pairs_steps_per_second": 0.367, + "step": 330 + }, + { + "epoch": 0.33707865168539325, + "eval_sciq_pairs_loss": 0.0189877450466156, + "eval_sciq_pairs_runtime": 3.099, + "eval_sciq_pairs_samples_per_second": 41.304, + "eval_sciq_pairs_steps_per_second": 0.323, + "step": 330 + }, + { + "epoch": 0.33707865168539325, + "eval_qasc_pairs_loss": 0.05424018204212189, + "eval_qasc_pairs_runtime": 0.5676, + "eval_qasc_pairs_samples_per_second": 225.499, + "eval_qasc_pairs_steps_per_second": 1.762, + "step": 330 + }, + { + "epoch": 0.33707865168539325, + "eval_openbookqa_pairs_loss": 0.8975388407707214, + "eval_openbookqa_pairs_runtime": 0.5776, + "eval_openbookqa_pairs_samples_per_second": 221.597, + "eval_openbookqa_pairs_steps_per_second": 1.731, + "step": 330 + }, + { + "epoch": 0.33707865168539325, + "eval_msmarco_pairs_loss": 0.18051746487617493, + "eval_msmarco_pairs_runtime": 1.272, + "eval_msmarco_pairs_samples_per_second": 100.626, + "eval_msmarco_pairs_steps_per_second": 0.786, + "step": 330 + }, + { + "epoch": 0.33707865168539325, + "eval_nq_pairs_loss": 0.07745326310396194, + "eval_nq_pairs_runtime": 2.7275, + "eval_nq_pairs_samples_per_second": 46.929, + "eval_nq_pairs_steps_per_second": 0.367, + "step": 330 + }, + { + "epoch": 0.33707865168539325, + "eval_trivia_pairs_loss": 0.08102034777402878, + "eval_trivia_pairs_runtime": 3.1942, + "eval_trivia_pairs_samples_per_second": 40.073, + "eval_trivia_pairs_steps_per_second": 0.313, + "step": 330 + }, + { + "epoch": 0.33707865168539325, + "eval_gooaq_pairs_loss": 0.1758139431476593, + "eval_gooaq_pairs_runtime": 0.8778, + "eval_gooaq_pairs_samples_per_second": 145.815, + "eval_gooaq_pairs_steps_per_second": 1.139, + "step": 330 + }, + { + "epoch": 0.33707865168539325, + "eval_paws-pos_loss": 0.02289787121117115, + "eval_paws-pos_runtime": 0.6834, + "eval_paws-pos_samples_per_second": 187.29, + "eval_paws-pos_steps_per_second": 1.463, + "step": 330 + }, + { + "epoch": 0.33707865168539325, + "eval_global_dataset_loss": 0.18260228633880615, + "eval_global_dataset_runtime": 18.1683, + "eval_global_dataset_samples_per_second": 30.052, + "eval_global_dataset_steps_per_second": 0.275, + "step": 330 + }, + { + "epoch": 0.338100102145046, + "grad_norm": 9.6026029586792, + "learning_rate": 1.5301391035548686e-05, + "loss": 0.3312, + "step": 331 + }, + { + "epoch": 0.3391215526046987, + "grad_norm": 0.4827215075492859, + "learning_rate": 1.5347758887171563e-05, + "loss": 0.0054, + "step": 332 + }, + { + "epoch": 0.34014300306435136, + "grad_norm": 4.201548099517822, + "learning_rate": 1.5394126738794436e-05, + "loss": 0.1248, + "step": 333 + }, + { + "epoch": 0.3411644535240041, + "grad_norm": 8.177660942077637, + "learning_rate": 1.544049459041731e-05, + "loss": 0.2118, + "step": 334 + }, + { + "epoch": 0.3421859039836568, + "grad_norm": 5.3610358238220215, + "learning_rate": 1.5486862442040188e-05, + "loss": 0.1197, + "step": 335 + }, + { + "epoch": 0.3432073544433095, + "grad_norm": 5.404463768005371, + "learning_rate": 1.553323029366306e-05, + "loss": 0.2249, + "step": 336 + }, + { + "epoch": 0.3442288049029622, + "grad_norm": 9.293848037719727, + "learning_rate": 1.5579598145285934e-05, + "loss": 0.4465, + "step": 337 + }, + { + "epoch": 0.3452502553626149, + "grad_norm": 4.159994125366211, + "learning_rate": 1.562596599690881e-05, + "loss": 0.1024, + "step": 338 + }, + { + "epoch": 0.34627170582226763, + "grad_norm": 8.487483024597168, + "learning_rate": 1.5672333848531687e-05, + "loss": 0.285, + "step": 339 + }, + { + "epoch": 0.3472931562819203, + "grad_norm": 4.548951148986816, + "learning_rate": 1.571870170015456e-05, + "loss": 0.1373, + "step": 340 + }, + { + "epoch": 0.34831460674157305, + "grad_norm": 2.439406156539917, + "learning_rate": 1.5765069551777432e-05, + "loss": 0.0513, + "step": 341 + }, + { + "epoch": 0.34933605720122574, + "grad_norm": 5.205477714538574, + "learning_rate": 1.5811437403400312e-05, + "loss": 0.1769, + "step": 342 + }, + { + "epoch": 0.3503575076608784, + "grad_norm": 0.022974058985710144, + "learning_rate": 1.5857805255023185e-05, + "loss": 0.0002, + "step": 343 + }, + { + "epoch": 0.35137895812053116, + "grad_norm": 8.67325210571289, + "learning_rate": 1.5904173106646058e-05, + "loss": 0.2479, + "step": 344 + }, + { + "epoch": 0.35240040858018384, + "grad_norm": 3.9924445152282715, + "learning_rate": 1.5950540958268934e-05, + "loss": 0.093, + "step": 345 + }, + { + "epoch": 0.35240040858018384, + "eval_Qnli-dev_cosine_accuracy": 0.697265625, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7564169764518738, + "eval_Qnli-dev_cosine_ap": 0.7233529514412451, + "eval_Qnli-dev_cosine_f1": 0.6776859504132232, + "eval_Qnli-dev_cosine_f1_threshold": 0.6330949068069458, + "eval_Qnli-dev_cosine_precision": 0.5555555555555556, + "eval_Qnli-dev_cosine_recall": 0.8686440677966102, + "eval_Qnli-dev_dot_accuracy": 0.654296875, + "eval_Qnli-dev_dot_accuracy_threshold": 403.67242431640625, + "eval_Qnli-dev_dot_ap": 0.6732448653169163, + "eval_Qnli-dev_dot_f1": 0.6707882534775889, + "eval_Qnli-dev_dot_f1_threshold": 280.5927734375, + "eval_Qnli-dev_dot_precision": 0.5279805352798054, + "eval_Qnli-dev_dot_recall": 0.9194915254237288, + "eval_Qnli-dev_euclidean_accuracy": 0.703125, + "eval_Qnli-dev_euclidean_accuracy_threshold": 15.930200576782227, + "eval_Qnli-dev_euclidean_ap": 0.7319092420829139, + "eval_Qnli-dev_euclidean_f1": 0.6836935166994106, + "eval_Qnli-dev_euclidean_f1_threshold": 16.93238067626953, + "eval_Qnli-dev_euclidean_precision": 0.6373626373626373, + "eval_Qnli-dev_euclidean_recall": 0.7372881355932204, + "eval_Qnli-dev_manhattan_accuracy": 0.705078125, + "eval_Qnli-dev_manhattan_accuracy_threshold": 317.0030822753906, + "eval_Qnli-dev_manhattan_ap": 0.734519776802989, + "eval_Qnli-dev_manhattan_f1": 0.6812386156648452, + "eval_Qnli-dev_manhattan_f1_threshold": 375.60125732421875, + "eval_Qnli-dev_manhattan_precision": 0.597444089456869, + "eval_Qnli-dev_manhattan_recall": 0.7923728813559322, + "eval_Qnli-dev_max_accuracy": 0.705078125, + "eval_Qnli-dev_max_accuracy_threshold": 403.67242431640625, + "eval_Qnli-dev_max_ap": 0.734519776802989, + "eval_Qnli-dev_max_f1": 0.6836935166994106, + "eval_Qnli-dev_max_f1_threshold": 375.60125732421875, + "eval_Qnli-dev_max_precision": 0.6373626373626373, + "eval_Qnli-dev_max_recall": 0.9194915254237288, + "eval_allNLI-dev_cosine_accuracy": 0.74609375, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.7974586486816406, + "eval_allNLI-dev_cosine_ap": 0.6348489677397605, + "eval_allNLI-dev_cosine_f1": 0.6476190476190476, + "eval_allNLI-dev_cosine_f1_threshold": 0.7176597118377686, + "eval_allNLI-dev_cosine_precision": 0.5506072874493927, + "eval_allNLI-dev_cosine_recall": 0.7861271676300579, + "eval_allNLI-dev_dot_accuracy": 0.72265625, + "eval_allNLI-dev_dot_accuracy_threshold": 360.510498046875, + "eval_allNLI-dev_dot_ap": 0.5871403961073137, + "eval_allNLI-dev_dot_f1": 0.6083499005964215, + "eval_allNLI-dev_dot_f1_threshold": 301.9743347167969, + "eval_allNLI-dev_dot_precision": 0.4636363636363636, + "eval_allNLI-dev_dot_recall": 0.884393063583815, + "eval_allNLI-dev_euclidean_accuracy": 0.75, + "eval_allNLI-dev_euclidean_accuracy_threshold": 13.944112777709961, + "eval_allNLI-dev_euclidean_ap": 0.6419251225642049, + "eval_allNLI-dev_euclidean_f1": 0.6485260770975058, + "eval_allNLI-dev_euclidean_f1_threshold": 16.85090446472168, + "eval_allNLI-dev_euclidean_precision": 0.5335820895522388, + "eval_allNLI-dev_euclidean_recall": 0.8265895953757225, + "eval_allNLI-dev_manhattan_accuracy": 0.751953125, + "eval_allNLI-dev_manhattan_accuracy_threshold": 320.1395263671875, + "eval_allNLI-dev_manhattan_ap": 0.6383854431509484, + "eval_allNLI-dev_manhattan_f1": 0.6430379746835444, + "eval_allNLI-dev_manhattan_f1_threshold": 334.00848388671875, + "eval_allNLI-dev_manhattan_precision": 0.5720720720720721, + "eval_allNLI-dev_manhattan_recall": 0.7341040462427746, + "eval_allNLI-dev_max_accuracy": 0.751953125, + "eval_allNLI-dev_max_accuracy_threshold": 360.510498046875, + "eval_allNLI-dev_max_ap": 0.6419251225642049, + "eval_allNLI-dev_max_f1": 0.6485260770975058, + "eval_allNLI-dev_max_f1_threshold": 334.00848388671875, + "eval_allNLI-dev_max_precision": 0.5720720720720721, + "eval_allNLI-dev_max_recall": 0.884393063583815, + "eval_sequential_score": 0.734519776802989, + "eval_sts-test_pearson_cosine": 0.876432196836719, + "eval_sts-test_pearson_dot": 0.8505402609476747, + "eval_sts-test_pearson_euclidean": 0.9079057923300784, + "eval_sts-test_pearson_manhattan": 0.9091772273551437, + "eval_sts-test_pearson_max": 0.9091772273551437, + "eval_sts-test_spearman_cosine": 0.9050215855925554, + "eval_sts-test_spearman_dot": 0.8534220386704301, + "eval_sts-test_spearman_euclidean": 0.9051156079927388, + "eval_sts-test_spearman_manhattan": 0.9065126976221168, + "eval_sts-test_spearman_max": 0.9065126976221168, + "eval_vitaminc-pairs_loss": 1.4504694938659668, + "eval_vitaminc-pairs_runtime": 3.179, + "eval_vitaminc-pairs_samples_per_second": 40.264, + "eval_vitaminc-pairs_steps_per_second": 0.315, + "step": 345 + }, + { + "epoch": 0.35240040858018384, + "eval_negation-triplets_loss": 0.8716320395469666, + "eval_negation-triplets_runtime": 0.6906, + "eval_negation-triplets_samples_per_second": 185.356, + "eval_negation-triplets_steps_per_second": 1.448, + "step": 345 + }, + { + "epoch": 0.35240040858018384, + "eval_scitail-pairs-pos_loss": 0.05133823677897453, + "eval_scitail-pairs-pos_runtime": 0.7962, + "eval_scitail-pairs-pos_samples_per_second": 160.768, + "eval_scitail-pairs-pos_steps_per_second": 1.256, + "step": 345 + }, + { + "epoch": 0.35240040858018384, + "eval_scitail-pairs-qa_loss": 7.034523878246546e-05, + "eval_scitail-pairs-qa_runtime": 0.5153, + "eval_scitail-pairs-qa_samples_per_second": 248.398, + "eval_scitail-pairs-qa_steps_per_second": 1.941, + "step": 345 + }, + { + "epoch": 0.35240040858018384, + "eval_xsum-pairs_loss": 0.0014645290793851018, + "eval_xsum-pairs_runtime": 2.7173, + "eval_xsum-pairs_samples_per_second": 47.105, + "eval_xsum-pairs_steps_per_second": 0.368, + "step": 345 + }, + { + "epoch": 0.35240040858018384, + "eval_sciq_pairs_loss": 0.016533996909856796, + "eval_sciq_pairs_runtime": 3.1125, + "eval_sciq_pairs_samples_per_second": 41.124, + "eval_sciq_pairs_steps_per_second": 0.321, + "step": 345 + }, + { + "epoch": 0.35240040858018384, + "eval_qasc_pairs_loss": 0.04721737653017044, + "eval_qasc_pairs_runtime": 0.5649, + "eval_qasc_pairs_samples_per_second": 226.595, + "eval_qasc_pairs_steps_per_second": 1.77, + "step": 345 + }, + { + "epoch": 0.35240040858018384, + "eval_openbookqa_pairs_loss": 0.844680905342102, + "eval_openbookqa_pairs_runtime": 0.5758, + "eval_openbookqa_pairs_samples_per_second": 222.292, + "eval_openbookqa_pairs_steps_per_second": 1.737, + "step": 345 + }, + { + "epoch": 0.35240040858018384, + "eval_msmarco_pairs_loss": 0.19032695889472961, + "eval_msmarco_pairs_runtime": 1.2721, + "eval_msmarco_pairs_samples_per_second": 100.621, + "eval_msmarco_pairs_steps_per_second": 0.786, + "step": 345 + }, + { + "epoch": 0.35240040858018384, + "eval_nq_pairs_loss": 0.07721181213855743, + "eval_nq_pairs_runtime": 2.7284, + "eval_nq_pairs_samples_per_second": 46.914, + "eval_nq_pairs_steps_per_second": 0.367, + "step": 345 + }, + { + "epoch": 0.35240040858018384, + "eval_trivia_pairs_loss": 0.09631358832120895, + "eval_trivia_pairs_runtime": 3.2036, + "eval_trivia_pairs_samples_per_second": 39.955, + "eval_trivia_pairs_steps_per_second": 0.312, + "step": 345 + }, + { + "epoch": 0.35240040858018384, + "eval_gooaq_pairs_loss": 0.1446821391582489, + "eval_gooaq_pairs_runtime": 0.8729, + "eval_gooaq_pairs_samples_per_second": 146.643, + "eval_gooaq_pairs_steps_per_second": 1.146, + "step": 345 + }, + { + "epoch": 0.35240040858018384, + "eval_paws-pos_loss": 0.022854922339320183, + "eval_paws-pos_runtime": 0.6908, + "eval_paws-pos_samples_per_second": 185.28, + "eval_paws-pos_steps_per_second": 1.448, + "step": 345 + }, + { + "epoch": 0.35240040858018384, + "eval_global_dataset_loss": 0.17787586152553558, + "eval_global_dataset_runtime": 18.1383, + "eval_global_dataset_samples_per_second": 30.102, + "eval_global_dataset_steps_per_second": 0.276, + "step": 345 + }, + { + "epoch": 0.3534218590398366, + "grad_norm": 3.014742136001587, + "learning_rate": 1.599690880989181e-05, + "loss": 0.0656, + "step": 346 + }, + { + "epoch": 0.35444330949948927, + "grad_norm": 13.554019927978516, + "learning_rate": 1.6043276661514684e-05, + "loss": 1.1389, + "step": 347 + }, + { + "epoch": 0.355464759959142, + "grad_norm": 1.1227961778640747, + "learning_rate": 1.6089644513137557e-05, + "loss": 0.0511, + "step": 348 + }, + { + "epoch": 0.3564862104187947, + "grad_norm": 9.689891815185547, + "learning_rate": 1.6136012364760433e-05, + "loss": 0.5599, + "step": 349 + }, + { + "epoch": 0.3575076608784474, + "grad_norm": 6.500387191772461, + "learning_rate": 1.618238021638331e-05, + "loss": 0.2122, + "step": 350 + }, + { + "epoch": 0.3585291113381001, + "grad_norm": 8.804593086242676, + "learning_rate": 1.6228748068006182e-05, + "loss": 0.2426, + "step": 351 + }, + { + "epoch": 0.3595505617977528, + "grad_norm": 0.05541450157761574, + "learning_rate": 1.627511591962906e-05, + "loss": 0.0005, + "step": 352 + }, + { + "epoch": 0.36057201225740554, + "grad_norm": 2.9838674068450928, + "learning_rate": 1.632148377125193e-05, + "loss": 0.058, + "step": 353 + }, + { + "epoch": 0.3615934627170582, + "grad_norm": 3.657801389694214, + "learning_rate": 1.6367851622874808e-05, + "loss": 0.0953, + "step": 354 + }, + { + "epoch": 0.3626149131767109, + "grad_norm": 6.8335161209106445, + "learning_rate": 1.6414219474497684e-05, + "loss": 0.1586, + "step": 355 + }, + { + "epoch": 0.36363636363636365, + "grad_norm": 7.608448505401611, + "learning_rate": 1.6460587326120557e-05, + "loss": 0.2778, + "step": 356 + }, + { + "epoch": 0.36465781409601633, + "grad_norm": 3.210745096206665, + "learning_rate": 1.650695517774343e-05, + "loss": 0.0863, + "step": 357 + }, + { + "epoch": 0.36567926455566907, + "grad_norm": 2.3987958431243896, + "learning_rate": 1.6553323029366306e-05, + "loss": 0.0341, + "step": 358 + }, + { + "epoch": 0.36670071501532175, + "grad_norm": 6.528378486633301, + "learning_rate": 1.6599690880989182e-05, + "loss": 0.2889, + "step": 359 + }, + { + "epoch": 0.36772216547497444, + "grad_norm": 10.558550834655762, + "learning_rate": 1.6646058732612055e-05, + "loss": 0.4869, + "step": 360 + }, + { + "epoch": 0.36772216547497444, + "eval_Qnli-dev_cosine_accuracy": 0.716796875, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7531403303146362, + "eval_Qnli-dev_cosine_ap": 0.7390258673180305, + "eval_Qnli-dev_cosine_f1": 0.6897810218978102, + "eval_Qnli-dev_cosine_f1_threshold": 0.6856715679168701, + "eval_Qnli-dev_cosine_precision": 0.6057692307692307, + "eval_Qnli-dev_cosine_recall": 0.8008474576271186, + "eval_Qnli-dev_dot_accuracy": 0.673828125, + "eval_Qnli-dev_dot_accuracy_threshold": 375.48236083984375, + "eval_Qnli-dev_dot_ap": 0.6872905848478156, + "eval_Qnli-dev_dot_f1": 0.6751188589540412, + "eval_Qnli-dev_dot_f1_threshold": 290.2564697265625, + "eval_Qnli-dev_dot_precision": 0.5392405063291139, + "eval_Qnli-dev_dot_recall": 0.902542372881356, + "eval_Qnli-dev_euclidean_accuracy": 0.73046875, + "eval_Qnli-dev_euclidean_accuracy_threshold": 15.566179275512695, + "eval_Qnli-dev_euclidean_ap": 0.7477086304537104, + "eval_Qnli-dev_euclidean_f1": 0.694949494949495, + "eval_Qnli-dev_euclidean_f1_threshold": 16.509952545166016, + "eval_Qnli-dev_euclidean_precision": 0.6640926640926641, + "eval_Qnli-dev_euclidean_recall": 0.7288135593220338, + "eval_Qnli-dev_manhattan_accuracy": 0.72265625, + "eval_Qnli-dev_manhattan_accuracy_threshold": 325.6196594238281, + "eval_Qnli-dev_manhattan_ap": 0.7509129881995745, + "eval_Qnli-dev_manhattan_f1": 0.6964285714285714, + "eval_Qnli-dev_manhattan_f1_threshold": 375.9530334472656, + "eval_Qnli-dev_manhattan_precision": 0.6018518518518519, + "eval_Qnli-dev_manhattan_recall": 0.826271186440678, + "eval_Qnli-dev_max_accuracy": 0.73046875, + "eval_Qnli-dev_max_accuracy_threshold": 375.48236083984375, + "eval_Qnli-dev_max_ap": 0.7509129881995745, + "eval_Qnli-dev_max_f1": 0.6964285714285714, + "eval_Qnli-dev_max_f1_threshold": 375.9530334472656, + "eval_Qnli-dev_max_precision": 0.6640926640926641, + "eval_Qnli-dev_max_recall": 0.902542372881356, + "eval_allNLI-dev_cosine_accuracy": 0.7421875, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.7899453639984131, + "eval_allNLI-dev_cosine_ap": 0.6305021349782971, + "eval_allNLI-dev_cosine_f1": 0.6410835214446953, + "eval_allNLI-dev_cosine_f1_threshold": 0.7155214548110962, + "eval_allNLI-dev_cosine_precision": 0.5259259259259259, + "eval_allNLI-dev_cosine_recall": 0.8208092485549133, + "eval_allNLI-dev_dot_accuracy": 0.716796875, + "eval_allNLI-dev_dot_accuracy_threshold": 402.31121826171875, + "eval_allNLI-dev_dot_ap": 0.5835795658925824, + "eval_allNLI-dev_dot_f1": 0.6073752711496746, + "eval_allNLI-dev_dot_f1_threshold": 330.6991271972656, + "eval_allNLI-dev_dot_precision": 0.4861111111111111, + "eval_allNLI-dev_dot_recall": 0.8092485549132948, + "eval_allNLI-dev_euclidean_accuracy": 0.74609375, + "eval_allNLI-dev_euclidean_accuracy_threshold": 12.789877891540527, + "eval_allNLI-dev_euclidean_ap": 0.6381063053277856, + "eval_allNLI-dev_euclidean_f1": 0.6421568627450981, + "eval_allNLI-dev_euclidean_f1_threshold": 15.769261360168457, + "eval_allNLI-dev_euclidean_precision": 0.5574468085106383, + "eval_allNLI-dev_euclidean_recall": 0.7572254335260116, + "eval_allNLI-dev_manhattan_accuracy": 0.748046875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 286.81292724609375, + "eval_allNLI-dev_manhattan_ap": 0.6361054194245931, + "eval_allNLI-dev_manhattan_f1": 0.6398104265402844, + "eval_allNLI-dev_manhattan_f1_threshold": 340.4715576171875, + "eval_allNLI-dev_manhattan_precision": 0.5421686746987951, + "eval_allNLI-dev_manhattan_recall": 0.7803468208092486, + "eval_allNLI-dev_max_accuracy": 0.748046875, + "eval_allNLI-dev_max_accuracy_threshold": 402.31121826171875, + "eval_allNLI-dev_max_ap": 0.6381063053277856, + "eval_allNLI-dev_max_f1": 0.6421568627450981, + "eval_allNLI-dev_max_f1_threshold": 340.4715576171875, + "eval_allNLI-dev_max_precision": 0.5574468085106383, + "eval_allNLI-dev_max_recall": 0.8208092485549133, + "eval_sequential_score": 0.7509129881995745, + "eval_sts-test_pearson_cosine": 0.8753092133540921, + "eval_sts-test_pearson_dot": 0.844372604612416, + "eval_sts-test_pearson_euclidean": 0.9062249021051101, + "eval_sts-test_pearson_manhattan": 0.9079214373207487, + "eval_sts-test_pearson_max": 0.9079214373207487, + "eval_sts-test_spearman_cosine": 0.9039105460259913, + "eval_sts-test_spearman_dot": 0.8477134335135591, + "eval_sts-test_spearman_euclidean": 0.9040858367168613, + "eval_sts-test_spearman_manhattan": 0.9060312635520682, + "eval_sts-test_spearman_max": 0.9060312635520682, + "eval_vitaminc-pairs_loss": 1.4861621856689453, + "eval_vitaminc-pairs_runtime": 3.2082, + "eval_vitaminc-pairs_samples_per_second": 39.898, + "eval_vitaminc-pairs_steps_per_second": 0.312, + "step": 360 + }, + { + "epoch": 0.36772216547497444, + "eval_negation-triplets_loss": 0.8837717175483704, + "eval_negation-triplets_runtime": 0.6939, + "eval_negation-triplets_samples_per_second": 184.465, + "eval_negation-triplets_steps_per_second": 1.441, + "step": 360 + }, + { + "epoch": 0.36772216547497444, + "eval_scitail-pairs-pos_loss": 0.05153922736644745, + "eval_scitail-pairs-pos_runtime": 0.8154, + "eval_scitail-pairs-pos_samples_per_second": 156.98, + "eval_scitail-pairs-pos_steps_per_second": 1.226, + "step": 360 + }, + { + "epoch": 0.36772216547497444, + "eval_scitail-pairs-qa_loss": 5.517902536666952e-05, + "eval_scitail-pairs-qa_runtime": 0.512, + "eval_scitail-pairs-qa_samples_per_second": 250.01, + "eval_scitail-pairs-qa_steps_per_second": 1.953, + "step": 360 + }, + { + "epoch": 0.36772216547497444, + "eval_xsum-pairs_loss": 0.002099951496347785, + "eval_xsum-pairs_runtime": 2.7116, + "eval_xsum-pairs_samples_per_second": 47.204, + "eval_xsum-pairs_steps_per_second": 0.369, + "step": 360 + }, + { + "epoch": 0.36772216547497444, + "eval_sciq_pairs_loss": 0.015899181365966797, + "eval_sciq_pairs_runtime": 3.1093, + "eval_sciq_pairs_samples_per_second": 41.167, + "eval_sciq_pairs_steps_per_second": 0.322, + "step": 360 + }, + { + "epoch": 0.36772216547497444, + "eval_qasc_pairs_loss": 0.0450480617582798, + "eval_qasc_pairs_runtime": 0.5653, + "eval_qasc_pairs_samples_per_second": 226.41, + "eval_qasc_pairs_steps_per_second": 1.769, + "step": 360 + }, + { + "epoch": 0.36772216547497444, + "eval_openbookqa_pairs_loss": 0.7635648846626282, + "eval_openbookqa_pairs_runtime": 0.6039, + "eval_openbookqa_pairs_samples_per_second": 211.962, + "eval_openbookqa_pairs_steps_per_second": 1.656, + "step": 360 + }, + { + "epoch": 0.36772216547497444, + "eval_msmarco_pairs_loss": 0.17312315106391907, + "eval_msmarco_pairs_runtime": 1.2711, + "eval_msmarco_pairs_samples_per_second": 100.702, + "eval_msmarco_pairs_steps_per_second": 0.787, + "step": 360 + }, + { + "epoch": 0.36772216547497444, + "eval_nq_pairs_loss": 0.09537816792726517, + "eval_nq_pairs_runtime": 2.7322, + "eval_nq_pairs_samples_per_second": 46.849, + "eval_nq_pairs_steps_per_second": 0.366, + "step": 360 + }, + { + "epoch": 0.36772216547497444, + "eval_trivia_pairs_loss": 0.1068655326962471, + "eval_trivia_pairs_runtime": 3.1995, + "eval_trivia_pairs_samples_per_second": 40.007, + "eval_trivia_pairs_steps_per_second": 0.313, + "step": 360 + }, + { + "epoch": 0.36772216547497444, + "eval_gooaq_pairs_loss": 0.1148056760430336, + "eval_gooaq_pairs_runtime": 0.8742, + "eval_gooaq_pairs_samples_per_second": 146.419, + "eval_gooaq_pairs_steps_per_second": 1.144, + "step": 360 + }, + { + "epoch": 0.36772216547497444, + "eval_paws-pos_loss": 0.023134460672736168, + "eval_paws-pos_runtime": 0.682, + "eval_paws-pos_samples_per_second": 187.694, + "eval_paws-pos_steps_per_second": 1.466, + "step": 360 + }, + { + "epoch": 0.36772216547497444, + "eval_global_dataset_loss": 0.1904250830411911, + "eval_global_dataset_runtime": 18.1383, + "eval_global_dataset_samples_per_second": 30.102, + "eval_global_dataset_steps_per_second": 0.276, + "step": 360 + }, + { + "epoch": 0.3687436159346272, + "grad_norm": 5.853100299835205, + "learning_rate": 1.669242658423493e-05, + "loss": 0.1611, + "step": 361 + }, + { + "epoch": 0.36976506639427986, + "grad_norm": 8.181849479675293, + "learning_rate": 1.6738794435857808e-05, + "loss": 0.4088, + "step": 362 + }, + { + "epoch": 0.3707865168539326, + "grad_norm": 5.738925933837891, + "learning_rate": 1.678516228748068e-05, + "loss": 0.1824, + "step": 363 + }, + { + "epoch": 0.3718079673135853, + "grad_norm": 4.485989093780518, + "learning_rate": 1.6831530139103554e-05, + "loss": 0.0945, + "step": 364 + }, + { + "epoch": 0.372829417773238, + "grad_norm": 6.651602745056152, + "learning_rate": 1.687789799072643e-05, + "loss": 0.2917, + "step": 365 + }, + { + "epoch": 0.3738508682328907, + "grad_norm": 4.756803512573242, + "learning_rate": 1.6924265842349307e-05, + "loss": 0.0956, + "step": 366 + }, + { + "epoch": 0.3748723186925434, + "grad_norm": 5.1150593757629395, + "learning_rate": 1.697063369397218e-05, + "loss": 0.2052, + "step": 367 + }, + { + "epoch": 0.37589376915219613, + "grad_norm": 3.5622498989105225, + "learning_rate": 1.7017001545595052e-05, + "loss": 0.0672, + "step": 368 + }, + { + "epoch": 0.3769152196118488, + "grad_norm": 4.850342273712158, + "learning_rate": 1.7063369397217932e-05, + "loss": 0.1006, + "step": 369 + }, + { + "epoch": 0.37793667007150156, + "grad_norm": 4.534098148345947, + "learning_rate": 1.7109737248840805e-05, + "loss": 0.0767, + "step": 370 + }, + { + "epoch": 0.37895812053115424, + "grad_norm": 2.767317056655884, + "learning_rate": 1.7156105100463678e-05, + "loss": 0.0384, + "step": 371 + }, + { + "epoch": 0.3799795709908069, + "grad_norm": 4.798379898071289, + "learning_rate": 1.7202472952086554e-05, + "loss": 0.0769, + "step": 372 + }, + { + "epoch": 0.38100102145045966, + "grad_norm": 4.833694934844971, + "learning_rate": 1.724884080370943e-05, + "loss": 0.0819, + "step": 373 + }, + { + "epoch": 0.38202247191011235, + "grad_norm": 5.652013301849365, + "learning_rate": 1.7295208655332303e-05, + "loss": 0.247, + "step": 374 + }, + { + "epoch": 0.3830439223697651, + "grad_norm": 15.108179092407227, + "learning_rate": 1.7341576506955176e-05, + "loss": 1.3804, + "step": 375 + }, + { + "epoch": 0.3830439223697651, + "eval_Qnli-dev_cosine_accuracy": 0.71875, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7465329766273499, + "eval_Qnli-dev_cosine_ap": 0.7439372561449934, + "eval_Qnli-dev_cosine_f1": 0.6990990990990992, + "eval_Qnli-dev_cosine_f1_threshold": 0.6727563142776489, + "eval_Qnli-dev_cosine_precision": 0.6081504702194357, + "eval_Qnli-dev_cosine_recall": 0.8220338983050848, + "eval_Qnli-dev_dot_accuracy": 0.697265625, + "eval_Qnli-dev_dot_accuracy_threshold": 363.0225524902344, + "eval_Qnli-dev_dot_ap": 0.6992161202137417, + "eval_Qnli-dev_dot_f1": 0.6804835924006908, + "eval_Qnli-dev_dot_f1_threshold": 311.5521545410156, + "eval_Qnli-dev_dot_precision": 0.5743440233236151, + "eval_Qnli-dev_dot_recall": 0.8347457627118644, + "eval_Qnli-dev_euclidean_accuracy": 0.7265625, + "eval_Qnli-dev_euclidean_accuracy_threshold": 15.489303588867188, + "eval_Qnli-dev_euclidean_ap": 0.7514130972069869, + "eval_Qnli-dev_euclidean_f1": 0.700990099009901, + "eval_Qnli-dev_euclidean_f1_threshold": 16.8115234375, + "eval_Qnli-dev_euclidean_precision": 0.6579925650557621, + "eval_Qnli-dev_euclidean_recall": 0.75, + "eval_Qnli-dev_manhattan_accuracy": 0.72265625, + "eval_Qnli-dev_manhattan_accuracy_threshold": 327.23046875, + "eval_Qnli-dev_manhattan_ap": 0.756075770068998, + "eval_Qnli-dev_manhattan_f1": 0.699604743083004, + "eval_Qnli-dev_manhattan_f1_threshold": 356.86669921875, + "eval_Qnli-dev_manhattan_precision": 0.6555555555555556, + "eval_Qnli-dev_manhattan_recall": 0.75, + "eval_Qnli-dev_max_accuracy": 0.7265625, + "eval_Qnli-dev_max_accuracy_threshold": 363.0225524902344, + "eval_Qnli-dev_max_ap": 0.756075770068998, + "eval_Qnli-dev_max_f1": 0.700990099009901, + "eval_Qnli-dev_max_f1_threshold": 356.86669921875, + "eval_Qnli-dev_max_precision": 0.6579925650557621, + "eval_Qnli-dev_max_recall": 0.8347457627118644, + "eval_allNLI-dev_cosine_accuracy": 0.744140625, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8228938579559326, + "eval_allNLI-dev_cosine_ap": 0.6325020556077607, + "eval_allNLI-dev_cosine_f1": 0.6350710900473934, + "eval_allNLI-dev_cosine_f1_threshold": 0.7203170657157898, + "eval_allNLI-dev_cosine_precision": 0.5381526104417671, + "eval_allNLI-dev_cosine_recall": 0.7745664739884393, + "eval_allNLI-dev_dot_accuracy": 0.716796875, + "eval_allNLI-dev_dot_accuracy_threshold": 397.91845703125, + "eval_allNLI-dev_dot_ap": 0.5929864056396388, + "eval_allNLI-dev_dot_f1": 0.6136363636363636, + "eval_allNLI-dev_dot_f1_threshold": 331.270751953125, + "eval_allNLI-dev_dot_precision": 0.5056179775280899, + "eval_allNLI-dev_dot_recall": 0.7803468208092486, + "eval_allNLI-dev_euclidean_accuracy": 0.74609375, + "eval_allNLI-dev_euclidean_accuracy_threshold": 12.980132102966309, + "eval_allNLI-dev_euclidean_ap": 0.6382576318631519, + "eval_allNLI-dev_euclidean_f1": 0.6510538641686183, + "eval_allNLI-dev_euclidean_f1_threshold": 16.41952133178711, + "eval_allNLI-dev_euclidean_precision": 0.547244094488189, + "eval_allNLI-dev_euclidean_recall": 0.8034682080924855, + "eval_allNLI-dev_manhattan_accuracy": 0.7421875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 275.19342041015625, + "eval_allNLI-dev_manhattan_ap": 0.6353268586724142, + "eval_allNLI-dev_manhattan_f1": 0.6519607843137254, + "eval_allNLI-dev_manhattan_f1_threshold": 337.06695556640625, + "eval_allNLI-dev_manhattan_precision": 0.5659574468085107, + "eval_allNLI-dev_manhattan_recall": 0.7687861271676301, + "eval_allNLI-dev_max_accuracy": 0.74609375, + "eval_allNLI-dev_max_accuracy_threshold": 397.91845703125, + "eval_allNLI-dev_max_ap": 0.6382576318631519, + "eval_allNLI-dev_max_f1": 0.6519607843137254, + "eval_allNLI-dev_max_f1_threshold": 337.06695556640625, + "eval_allNLI-dev_max_precision": 0.5659574468085107, + "eval_allNLI-dev_max_recall": 0.8034682080924855, + "eval_sequential_score": 0.756075770068998, + "eval_sts-test_pearson_cosine": 0.879180487175544, + "eval_sts-test_pearson_dot": 0.8580216051117527, + "eval_sts-test_pearson_euclidean": 0.9082941648035145, + "eval_sts-test_pearson_manhattan": 0.9091102728500898, + "eval_sts-test_pearson_max": 0.9091102728500898, + "eval_sts-test_spearman_cosine": 0.9055812896187777, + "eval_sts-test_spearman_dot": 0.8606510231818126, + "eval_sts-test_spearman_euclidean": 0.9050969467167385, + "eval_sts-test_spearman_manhattan": 0.9064017144938663, + "eval_sts-test_spearman_max": 0.9064017144938663, + "eval_vitaminc-pairs_loss": 1.566162109375, + "eval_vitaminc-pairs_runtime": 3.1695, + "eval_vitaminc-pairs_samples_per_second": 40.385, + "eval_vitaminc-pairs_steps_per_second": 0.316, + "step": 375 + }, + { + "epoch": 0.3830439223697651, + "eval_negation-triplets_loss": 0.8644618988037109, + "eval_negation-triplets_runtime": 0.6922, + "eval_negation-triplets_samples_per_second": 184.919, + "eval_negation-triplets_steps_per_second": 1.445, + "step": 375 + }, + { + "epoch": 0.3830439223697651, + "eval_scitail-pairs-pos_loss": 0.050737157464027405, + "eval_scitail-pairs-pos_runtime": 0.811, + "eval_scitail-pairs-pos_samples_per_second": 157.837, + "eval_scitail-pairs-pos_steps_per_second": 1.233, + "step": 375 + }, + { + "epoch": 0.3830439223697651, + "eval_scitail-pairs-qa_loss": 5.68832692806609e-05, + "eval_scitail-pairs-qa_runtime": 0.5172, + "eval_scitail-pairs-qa_samples_per_second": 247.467, + "eval_scitail-pairs-qa_steps_per_second": 1.933, + "step": 375 + }, + { + "epoch": 0.3830439223697651, + "eval_xsum-pairs_loss": 0.0031360751017928123, + "eval_xsum-pairs_runtime": 2.7154, + "eval_xsum-pairs_samples_per_second": 47.138, + "eval_xsum-pairs_steps_per_second": 0.368, + "step": 375 + }, + { + "epoch": 0.3830439223697651, + "eval_sciq_pairs_loss": 0.015375628136098385, + "eval_sciq_pairs_runtime": 3.1208, + "eval_sciq_pairs_samples_per_second": 41.015, + "eval_sciq_pairs_steps_per_second": 0.32, + "step": 375 + }, + { + "epoch": 0.3830439223697651, + "eval_qasc_pairs_loss": 0.045548874884843826, + "eval_qasc_pairs_runtime": 0.5664, + "eval_qasc_pairs_samples_per_second": 225.989, + "eval_qasc_pairs_steps_per_second": 1.766, + "step": 375 + }, + { + "epoch": 0.3830439223697651, + "eval_openbookqa_pairs_loss": 0.6701202392578125, + "eval_openbookqa_pairs_runtime": 0.577, + "eval_openbookqa_pairs_samples_per_second": 221.826, + "eval_openbookqa_pairs_steps_per_second": 1.733, + "step": 375 + }, + { + "epoch": 0.3830439223697651, + "eval_msmarco_pairs_loss": 0.16577866673469543, + "eval_msmarco_pairs_runtime": 1.2896, + "eval_msmarco_pairs_samples_per_second": 99.253, + "eval_msmarco_pairs_steps_per_second": 0.775, + "step": 375 + }, + { + "epoch": 0.3830439223697651, + "eval_nq_pairs_loss": 0.1191788837313652, + "eval_nq_pairs_runtime": 2.7257, + "eval_nq_pairs_samples_per_second": 46.96, + "eval_nq_pairs_steps_per_second": 0.367, + "step": 375 + }, + { + "epoch": 0.3830439223697651, + "eval_trivia_pairs_loss": 0.09977059066295624, + "eval_trivia_pairs_runtime": 3.1932, + "eval_trivia_pairs_samples_per_second": 40.085, + "eval_trivia_pairs_steps_per_second": 0.313, + "step": 375 + }, + { + "epoch": 0.3830439223697651, + "eval_gooaq_pairs_loss": 0.11558964848518372, + "eval_gooaq_pairs_runtime": 0.8862, + "eval_gooaq_pairs_samples_per_second": 144.441, + "eval_gooaq_pairs_steps_per_second": 1.128, + "step": 375 + }, + { + "epoch": 0.3830439223697651, + "eval_paws-pos_loss": 0.02350064180791378, + "eval_paws-pos_runtime": 0.7068, + "eval_paws-pos_samples_per_second": 181.111, + "eval_paws-pos_steps_per_second": 1.415, + "step": 375 + }, + { + "epoch": 0.3830439223697651, + "eval_global_dataset_loss": 0.18980403244495392, + "eval_global_dataset_runtime": 18.132, + "eval_global_dataset_samples_per_second": 30.113, + "eval_global_dataset_steps_per_second": 0.276, + "step": 375 + }, + { + "epoch": 0.3840653728294178, + "grad_norm": 7.2509846687316895, + "learning_rate": 1.7387944358578053e-05, + "loss": 0.2688, + "step": 376 + }, + { + "epoch": 0.38508682328907046, + "grad_norm": 5.156454563140869, + "learning_rate": 1.743431221020093e-05, + "loss": 0.1713, + "step": 377 + }, + { + "epoch": 0.3861082737487232, + "grad_norm": 5.2640485763549805, + "learning_rate": 1.7480680061823802e-05, + "loss": 0.139, + "step": 378 + }, + { + "epoch": 0.3871297242083759, + "grad_norm": 4.831561088562012, + "learning_rate": 1.7527047913446678e-05, + "loss": 0.0889, + "step": 379 + }, + { + "epoch": 0.3881511746680286, + "grad_norm": 5.145294666290283, + "learning_rate": 1.757341576506955e-05, + "loss": 0.0858, + "step": 380 + }, + { + "epoch": 0.3891726251276813, + "grad_norm": 4.231915473937988, + "learning_rate": 1.7619783616692428e-05, + "loss": 0.0761, + "step": 381 + }, + { + "epoch": 0.390194075587334, + "grad_norm": 3.786268711090088, + "learning_rate": 1.76661514683153e-05, + "loss": 0.0665, + "step": 382 + }, + { + "epoch": 0.39121552604698673, + "grad_norm": 10.587583541870117, + "learning_rate": 1.7712519319938177e-05, + "loss": 0.458, + "step": 383 + }, + { + "epoch": 0.3922369765066394, + "grad_norm": 1.7961347103118896, + "learning_rate": 1.775888717156105e-05, + "loss": 0.0214, + "step": 384 + }, + { + "epoch": 0.39325842696629215, + "grad_norm": 3.9166648387908936, + "learning_rate": 1.7805255023183926e-05, + "loss": 0.0729, + "step": 385 + }, + { + "epoch": 0.39427987742594484, + "grad_norm": 5.538015365600586, + "learning_rate": 1.7851622874806802e-05, + "loss": 0.0986, + "step": 386 + }, + { + "epoch": 0.3953013278855976, + "grad_norm": 0.7619813084602356, + "learning_rate": 1.7897990726429675e-05, + "loss": 0.0448, + "step": 387 + }, + { + "epoch": 0.39632277834525026, + "grad_norm": 6.053344249725342, + "learning_rate": 1.7944358578052548e-05, + "loss": 0.1959, + "step": 388 + }, + { + "epoch": 0.39734422880490294, + "grad_norm": 7.9977192878723145, + "learning_rate": 1.7990726429675428e-05, + "loss": 0.328, + "step": 389 + }, + { + "epoch": 0.3983656792645557, + "grad_norm": 4.943056106567383, + "learning_rate": 1.80370942812983e-05, + "loss": 0.2057, + "step": 390 + }, + { + "epoch": 0.3983656792645557, + "eval_Qnli-dev_cosine_accuracy": 0.70703125, + "eval_Qnli-dev_cosine_accuracy_threshold": 0.7331311702728271, + "eval_Qnli-dev_cosine_ap": 0.7292541077050789, + "eval_Qnli-dev_cosine_f1": 0.684, + "eval_Qnli-dev_cosine_f1_threshold": 0.7085533142089844, + "eval_Qnli-dev_cosine_precision": 0.6477272727272727, + "eval_Qnli-dev_cosine_recall": 0.7245762711864406, + "eval_Qnli-dev_dot_accuracy": 0.671875, + "eval_Qnli-dev_dot_accuracy_threshold": 384.9295959472656, + "eval_Qnli-dev_dot_ap": 0.6875512899729347, + "eval_Qnli-dev_dot_f1": 0.6799999999999999, + "eval_Qnli-dev_dot_f1_threshold": 278.6095275878906, + "eval_Qnli-dev_dot_precision": 0.533816425120773, + "eval_Qnli-dev_dot_recall": 0.9364406779661016, + "eval_Qnli-dev_euclidean_accuracy": 0.712890625, + "eval_Qnli-dev_euclidean_accuracy_threshold": 15.836432456970215, + "eval_Qnli-dev_euclidean_ap": 0.7382195625575553, + "eval_Qnli-dev_euclidean_f1": 0.689655172413793, + "eval_Qnli-dev_euclidean_f1_threshold": 18.449493408203125, + "eval_Qnli-dev_euclidean_precision": 0.5813953488372093, + "eval_Qnli-dev_euclidean_recall": 0.847457627118644, + "eval_Qnli-dev_manhattan_accuracy": 0.71875, + "eval_Qnli-dev_manhattan_accuracy_threshold": 342.0755615234375, + "eval_Qnli-dev_manhattan_ap": 0.7427653192203225, + "eval_Qnli-dev_manhattan_f1": 0.6889632107023412, + "eval_Qnli-dev_manhattan_f1_threshold": 398.22186279296875, + "eval_Qnli-dev_manhattan_precision": 0.569060773480663, + "eval_Qnli-dev_manhattan_recall": 0.8728813559322034, + "eval_Qnli-dev_max_accuracy": 0.71875, + "eval_Qnli-dev_max_accuracy_threshold": 384.9295959472656, + "eval_Qnli-dev_max_ap": 0.7427653192203225, + "eval_Qnli-dev_max_f1": 0.689655172413793, + "eval_Qnli-dev_max_f1_threshold": 398.22186279296875, + "eval_Qnli-dev_max_precision": 0.6477272727272727, + "eval_Qnli-dev_max_recall": 0.9364406779661016, + "eval_allNLI-dev_cosine_accuracy": 0.740234375, + "eval_allNLI-dev_cosine_accuracy_threshold": 0.8269187211990356, + "eval_allNLI-dev_cosine_ap": 0.6320714262495556, + "eval_allNLI-dev_cosine_f1": 0.6384976525821597, + "eval_allNLI-dev_cosine_f1_threshold": 0.7145483493804932, + "eval_allNLI-dev_cosine_precision": 0.5375494071146245, + "eval_allNLI-dev_cosine_recall": 0.7861271676300579, + "eval_allNLI-dev_dot_accuracy": 0.708984375, + "eval_allNLI-dev_dot_accuracy_threshold": 393.4770202636719, + "eval_allNLI-dev_dot_ap": 0.5936522366099091, + "eval_allNLI-dev_dot_f1": 0.616822429906542, + "eval_allNLI-dev_dot_f1_threshold": 334.7947998046875, + "eval_allNLI-dev_dot_precision": 0.5176470588235295, + "eval_allNLI-dev_dot_recall": 0.7630057803468208, + "eval_allNLI-dev_euclidean_accuracy": 0.74609375, + "eval_allNLI-dev_euclidean_accuracy_threshold": 13.205503463745117, + "eval_allNLI-dev_euclidean_ap": 0.6369031445369584, + "eval_allNLI-dev_euclidean_f1": 0.6405867970660145, + "eval_allNLI-dev_euclidean_f1_threshold": 16.006072998046875, + "eval_allNLI-dev_euclidean_precision": 0.5550847457627118, + "eval_allNLI-dev_euclidean_recall": 0.7572254335260116, + "eval_allNLI-dev_manhattan_accuracy": 0.748046875, + "eval_allNLI-dev_manhattan_accuracy_threshold": 314.0073547363281, + "eval_allNLI-dev_manhattan_ap": 0.6358952637625356, + "eval_allNLI-dev_manhattan_f1": 0.645631067961165, + "eval_allNLI-dev_manhattan_f1_threshold": 340.57635498046875, + "eval_allNLI-dev_manhattan_precision": 0.5564853556485355, + "eval_allNLI-dev_manhattan_recall": 0.7687861271676301, + "eval_allNLI-dev_max_accuracy": 0.748046875, + "eval_allNLI-dev_max_accuracy_threshold": 393.4770202636719, + "eval_allNLI-dev_max_ap": 0.6369031445369584, + "eval_allNLI-dev_max_f1": 0.645631067961165, + "eval_allNLI-dev_max_f1_threshold": 340.57635498046875, + "eval_allNLI-dev_max_precision": 0.5564853556485355, + "eval_allNLI-dev_max_recall": 0.7861271676300579, + "eval_sequential_score": 0.7427653192203225, + "eval_sts-test_pearson_cosine": 0.8780319252726332, + "eval_sts-test_pearson_dot": 0.8612936487527763, + "eval_sts-test_pearson_euclidean": 0.908229232214534, + "eval_sts-test_pearson_manhattan": 0.9090248955561852, + "eval_sts-test_pearson_max": 0.9090248955561852, + "eval_sts-test_spearman_cosine": 0.9058545408208336, + "eval_sts-test_spearman_dot": 0.8653780989225862, + "eval_sts-test_spearman_euclidean": 0.9051877470261016, + "eval_sts-test_spearman_manhattan": 0.906523214168544, + "eval_sts-test_spearman_max": 0.906523214168544, + "eval_vitaminc-pairs_loss": 1.51134192943573, + "eval_vitaminc-pairs_runtime": 3.2154, + "eval_vitaminc-pairs_samples_per_second": 39.808, + "eval_vitaminc-pairs_steps_per_second": 0.311, + "step": 390 + }, + { + "epoch": 0.3983656792645557, + "eval_negation-triplets_loss": 0.8657093644142151, + "eval_negation-triplets_runtime": 0.7021, + "eval_negation-triplets_samples_per_second": 182.322, + "eval_negation-triplets_steps_per_second": 1.424, + "step": 390 + }, + { + "epoch": 0.3983656792645557, + "eval_scitail-pairs-pos_loss": 0.052908435463905334, + "eval_scitail-pairs-pos_runtime": 0.8111, + "eval_scitail-pairs-pos_samples_per_second": 157.805, + "eval_scitail-pairs-pos_steps_per_second": 1.233, + "step": 390 + }, + { + "epoch": 0.3983656792645557, + "eval_scitail-pairs-qa_loss": 6.008312266203575e-05, + "eval_scitail-pairs-qa_runtime": 0.52, + "eval_scitail-pairs-qa_samples_per_second": 246.163, + "eval_scitail-pairs-qa_steps_per_second": 1.923, + "step": 390 + }, + { + "epoch": 0.3983656792645557, + "eval_xsum-pairs_loss": 0.002819963963702321, + "eval_xsum-pairs_runtime": 2.7277, + "eval_xsum-pairs_samples_per_second": 46.926, + "eval_xsum-pairs_steps_per_second": 0.367, + "step": 390 + }, + { + "epoch": 0.3983656792645557, + "eval_sciq_pairs_loss": 0.016140233725309372, + "eval_sciq_pairs_runtime": 3.1734, + "eval_sciq_pairs_samples_per_second": 40.335, + "eval_sciq_pairs_steps_per_second": 0.315, + "step": 390 + }, + { + "epoch": 0.3983656792645557, + "eval_qasc_pairs_loss": 0.04277563840150833, + "eval_qasc_pairs_runtime": 0.5666, + "eval_qasc_pairs_samples_per_second": 225.908, + "eval_qasc_pairs_steps_per_second": 1.765, + "step": 390 + }, + { + "epoch": 0.3983656792645557, + "eval_openbookqa_pairs_loss": 0.7199844121932983, + "eval_openbookqa_pairs_runtime": 0.5886, + "eval_openbookqa_pairs_samples_per_second": 217.458, + "eval_openbookqa_pairs_steps_per_second": 1.699, + "step": 390 + }, + { + "epoch": 0.3983656792645557, + "eval_msmarco_pairs_loss": 0.17293693125247955, + "eval_msmarco_pairs_runtime": 1.2807, + "eval_msmarco_pairs_samples_per_second": 99.945, + "eval_msmarco_pairs_steps_per_second": 0.781, + "step": 390 + }, + { + "epoch": 0.3983656792645557, + "eval_nq_pairs_loss": 0.09607299417257309, + "eval_nq_pairs_runtime": 2.7275, + "eval_nq_pairs_samples_per_second": 46.929, + "eval_nq_pairs_steps_per_second": 0.367, + "step": 390 + }, + { + "epoch": 0.3983656792645557, + "eval_trivia_pairs_loss": 0.08972787111997604, + "eval_trivia_pairs_runtime": 3.2091, + "eval_trivia_pairs_samples_per_second": 39.887, + "eval_trivia_pairs_steps_per_second": 0.312, + "step": 390 + }, + { + "epoch": 0.3983656792645557, + "eval_gooaq_pairs_loss": 0.14858603477478027, + "eval_gooaq_pairs_runtime": 0.8785, + "eval_gooaq_pairs_samples_per_second": 145.709, + "eval_gooaq_pairs_steps_per_second": 1.138, + "step": 390 + }, + { + "epoch": 0.3983656792645557, + "eval_paws-pos_loss": 0.02280971221625805, + "eval_paws-pos_runtime": 0.6899, + "eval_paws-pos_samples_per_second": 185.527, + "eval_paws-pos_steps_per_second": 1.449, + "step": 390 + }, + { + "epoch": 0.3983656792645557, + "eval_global_dataset_loss": 0.17271192371845245, + "eval_global_dataset_runtime": 18.1498, + "eval_global_dataset_samples_per_second": 30.083, + "eval_global_dataset_steps_per_second": 0.275, + "step": 390 + }, + { + "epoch": 0.39938712972420837, + "grad_norm": 3.0186429023742676, + "learning_rate": 1.8083462132921174e-05, + "loss": 0.1239, + "step": 391 + }, + { + "epoch": 0.4004085801838611, + "grad_norm": 5.241666316986084, + "learning_rate": 1.812982998454405e-05, + "loss": 0.1484, + "step": 392 + } + ], + "logging_steps": 1, + "max_steps": 1958, + "num_input_tokens_seen": 0, + "num_train_epochs": 2, + "save_steps": 196, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 0.0, + "train_batch_size": 128, + "trial_name": null, + "trial_params": null +}