{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.6508848779916296, "global_step": 100000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "accuracy": 12.0117, "active_queue_size": 16384.0, "cl_loss": 205.0697, "doc_norm": 8.422, "encoder_q-embeddings": 32726.8477, "encoder_q-layer.0": 40223.3984, "encoder_q-layer.1": 28549.7891, "encoder_q-layer.10": 80907.9141, "encoder_q-layer.11": 51684.8945, "encoder_q-layer.2": 32712.6719, "encoder_q-layer.3": 35076.6016, "encoder_q-layer.4": 40438.4062, "encoder_q-layer.5": 45207.9922, "encoder_q-layer.6": 58602.7773, "encoder_q-layer.7": 69649.3828, "encoder_q-layer.8": 88143.8828, "encoder_q-layer.9": 69553.7969, "epoch": 0.0, "inbatch_neg_score": 39.2948, "inbatch_pos_score": 47.3125, "learning_rate": 5.000000000000001e-07, "loss": 205.0697, "norm_diff": 0.2642, "norm_loss": 0.0, "num_token_doc": 66.773, "num_token_overlap": 11.6306, "num_token_query": 31.8418, "num_token_union": 65.3721, "num_word_context": 202.5435, "num_word_doc": 49.8487, "num_word_query": 23.5307, "postclip_grad_norm": 1.0, "preclip_grad_norm": 75397.5792, "preclip_grad_norm_avg": 0.0007, "q@queue_neg_score": 39.25, "query_norm": 8.1577, "queue_k_norm": 8.4205, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8418, "sent_len_1": 66.773, "sent_len_max_0": 127.4613, "sent_len_max_1": 190.3137, "stdk": 0.1799, "stdq": 0.1962, "stdqueue_k": 0.1803, "stdqueue_q": 0.0, "step": 100 }, { "accuracy": 12.207, "active_queue_size": 16384.0, "cl_loss": 131.6069, "doc_norm": 8.374, "encoder_q-embeddings": 9078.3477, "encoder_q-layer.0": 9086.2949, "encoder_q-layer.1": 11035.335, "encoder_q-layer.10": 23186.6641, "encoder_q-layer.11": 24105.5039, "encoder_q-layer.2": 12861.1494, "encoder_q-layer.3": 12292.7637, "encoder_q-layer.4": 12463.209, "encoder_q-layer.5": 13197.3398, "encoder_q-layer.6": 14923.377, "encoder_q-layer.7": 16361.3721, "encoder_q-layer.8": 20527.9648, "encoder_q-layer.9": 15739.5547, "epoch": 0.0, "inbatch_neg_score": 36.176, "inbatch_pos_score": 40.7188, "learning_rate": 1.0000000000000002e-06, "loss": 131.6069, "norm_diff": 1.0328, "norm_loss": 0.0, "num_token_doc": 66.801, "num_token_overlap": 11.7371, "num_token_query": 32.1278, "num_token_union": 65.4604, "num_word_context": 202.4328, "num_word_doc": 49.8574, "num_word_query": 23.7377, "postclip_grad_norm": 1.0, "preclip_grad_norm": 21716.8208, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 36.25, "query_norm": 7.3411, "queue_k_norm": 8.3505, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 32.1278, "sent_len_1": 66.801, "sent_len_max_0": 127.5025, "sent_len_max_1": 189.47, "stdk": 0.1784, "stdq": 0.1464, "stdqueue_k": 0.1777, "stdqueue_q": 0.0, "step": 200 }, { "accuracy": 8.9844, "active_queue_size": 16384.0, "cl_loss": 78.861, "doc_norm": 8.2104, "encoder_q-embeddings": 2448.1536, "encoder_q-layer.0": 2022.5718, "encoder_q-layer.1": 2714.1208, "encoder_q-layer.10": 5996.5918, "encoder_q-layer.11": 9828.8867, "encoder_q-layer.2": 3167.5586, "encoder_q-layer.3": 2961.6101, "encoder_q-layer.4": 3018.9746, "encoder_q-layer.5": 3138.9519, "encoder_q-layer.6": 3530.0549, "encoder_q-layer.7": 3550.6536, "encoder_q-layer.8": 4165.0303, "encoder_q-layer.9": 3345.332, "epoch": 0.0, "inbatch_neg_score": 34.9354, "inbatch_pos_score": 37.6562, "learning_rate": 1.5e-06, "loss": 78.861, "norm_diff": 1.1885, "norm_loss": 0.0, "num_token_doc": 66.9421, "num_token_overlap": 11.6964, "num_token_query": 31.9354, "num_token_union": 65.482, "num_word_context": 202.4479, "num_word_doc": 49.9525, "num_word_query": 23.5911, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6527.3727, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 34.8438, "query_norm": 7.0219, "queue_k_norm": 8.227, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9354, "sent_len_1": 66.9421, "sent_len_max_0": 127.5875, "sent_len_max_1": 188.77, "stdk": 0.1718, "stdq": 0.1128, "stdqueue_k": 0.1734, "stdqueue_q": 0.0, "step": 300 }, { "accuracy": 10.3516, "active_queue_size": 16384.0, "cl_loss": 55.6594, "doc_norm": 8.0761, "encoder_q-embeddings": 1532.4146, "encoder_q-layer.0": 1390.2769, "encoder_q-layer.1": 1447.5186, "encoder_q-layer.10": 3873.7288, "encoder_q-layer.11": 7197.2324, "encoder_q-layer.2": 1566.2478, "encoder_q-layer.3": 1607.1282, "encoder_q-layer.4": 1721.8683, "encoder_q-layer.5": 1840.074, "encoder_q-layer.6": 2164.6484, "encoder_q-layer.7": 2383.1626, "encoder_q-layer.8": 2922.6218, "encoder_q-layer.9": 2568.0862, "epoch": 0.0, "inbatch_neg_score": 32.8159, "inbatch_pos_score": 34.9062, "learning_rate": 2.0000000000000003e-06, "loss": 55.6594, "norm_diff": 1.0616, "norm_loss": 0.0, "num_token_doc": 66.6964, "num_token_overlap": 11.6495, "num_token_query": 31.8158, "num_token_union": 65.3259, "num_word_context": 202.328, "num_word_doc": 49.7822, "num_word_query": 23.4864, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4637.7214, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 32.875, "query_norm": 7.0146, "queue_k_norm": 8.0694, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.8158, "sent_len_1": 66.6964, "sent_len_max_0": 127.3462, "sent_len_max_1": 189.0613, "stdk": 0.1687, "stdq": 0.1016, "stdqueue_k": 0.1675, "stdqueue_q": 0.0, "step": 400 }, { "accuracy": 10.4492, "active_queue_size": 16384.0, "cl_loss": 41.8603, "doc_norm": 7.9044, "encoder_q-embeddings": 1212.3054, "encoder_q-layer.0": 1023.8399, "encoder_q-layer.1": 1210.9276, "encoder_q-layer.10": 3270.1272, "encoder_q-layer.11": 5550.2852, "encoder_q-layer.2": 1398.775, "encoder_q-layer.3": 1541.1379, "encoder_q-layer.4": 1709.3184, "encoder_q-layer.5": 1806.4252, "encoder_q-layer.6": 1992.3741, "encoder_q-layer.7": 2118.281, "encoder_q-layer.8": 2423.53, "encoder_q-layer.9": 2079.8342, "epoch": 0.0, "inbatch_neg_score": 30.6354, "inbatch_pos_score": 32.1875, "learning_rate": 2.5e-06, "loss": 41.8603, "norm_diff": 0.8284, "norm_loss": 0.0, "num_token_doc": 66.8076, "num_token_overlap": 11.6734, "num_token_query": 31.9743, "num_token_union": 65.3668, "num_word_context": 202.3013, "num_word_doc": 49.7952, "num_word_query": 23.6265, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3629.8225, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 30.6562, "query_norm": 7.076, "queue_k_norm": 7.9079, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9743, "sent_len_1": 66.8076, "sent_len_max_0": 127.5, "sent_len_max_1": 191.4462, "stdk": 0.1621, "stdq": 0.0955, "stdqueue_k": 0.1616, "stdqueue_q": 0.0, "step": 500 }, { "accuracy": 11.2305, "active_queue_size": 16384.0, "cl_loss": 33.8801, "doc_norm": 7.7257, "encoder_q-embeddings": 1906.0612, "encoder_q-layer.0": 1555.0142, "encoder_q-layer.1": 1877.787, "encoder_q-layer.10": 3418.6069, "encoder_q-layer.11": 4885.6426, "encoder_q-layer.2": 2178.5771, "encoder_q-layer.3": 2304.4421, "encoder_q-layer.4": 2440.3364, "encoder_q-layer.5": 2522.4031, "encoder_q-layer.6": 2531.6848, "encoder_q-layer.7": 2565.0383, "encoder_q-layer.8": 2746.0847, "encoder_q-layer.9": 2126.5571, "epoch": 0.0, "inbatch_neg_score": 28.0479, "inbatch_pos_score": 29.375, "learning_rate": 3e-06, "loss": 33.8801, "norm_diff": 0.9008, "norm_loss": 0.0, "num_token_doc": 66.7036, "num_token_overlap": 11.6464, "num_token_query": 31.8594, "num_token_union": 65.2977, "num_word_context": 202.2286, "num_word_doc": 49.764, "num_word_query": 23.5271, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3844.2112, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 28.0, "query_norm": 6.8248, "queue_k_norm": 7.7479, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8594, "sent_len_1": 66.7036, "sent_len_max_0": 127.5025, "sent_len_max_1": 189.915, "stdk": 0.1553, "stdq": 0.0905, "stdqueue_k": 0.1565, "stdqueue_q": 0.0, "step": 600 }, { "accuracy": 10.9375, "active_queue_size": 16384.0, "cl_loss": 28.7679, "doc_norm": 7.5791, "encoder_q-embeddings": 2049.249, "encoder_q-layer.0": 1964.1655, "encoder_q-layer.1": 2105.3442, "encoder_q-layer.10": 4020.9651, "encoder_q-layer.11": 5687.3003, "encoder_q-layer.2": 2369.9429, "encoder_q-layer.3": 2458.1599, "encoder_q-layer.4": 2945.5759, "encoder_q-layer.5": 2798.1582, "encoder_q-layer.6": 2648.3796, "encoder_q-layer.7": 2638.9253, "encoder_q-layer.8": 3038.8689, "encoder_q-layer.9": 2146.6919, "epoch": 0.0, "inbatch_neg_score": 24.516, "inbatch_pos_score": 25.5312, "learning_rate": 3.5000000000000004e-06, "loss": 28.7679, "norm_diff": 1.4729, "norm_loss": 0.0, "num_token_doc": 66.6576, "num_token_overlap": 11.6798, "num_token_query": 31.8259, "num_token_union": 65.1765, "num_word_context": 201.9315, "num_word_doc": 49.7089, "num_word_query": 23.4906, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4281.0756, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 24.5469, "query_norm": 6.1062, "queue_k_norm": 7.576, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8259, "sent_len_1": 66.6576, "sent_len_max_0": 127.5475, "sent_len_max_1": 188.5462, "stdk": 0.1513, "stdq": 0.0815, "stdqueue_k": 0.1496, "stdqueue_q": 0.0, "step": 700 }, { "accuracy": 10.7422, "active_queue_size": 16384.0, "cl_loss": 23.9759, "doc_norm": 7.4196, "encoder_q-embeddings": 2764.8113, "encoder_q-layer.0": 2222.5698, "encoder_q-layer.1": 2679.7373, "encoder_q-layer.10": 8853.8398, "encoder_q-layer.11": 11248.6201, "encoder_q-layer.2": 2966.478, "encoder_q-layer.3": 3447.3206, "encoder_q-layer.4": 4450.8931, "encoder_q-layer.5": 4873.2432, "encoder_q-layer.6": 5059.7808, "encoder_q-layer.7": 5155.002, "encoder_q-layer.8": 5852.4077, "encoder_q-layer.9": 4735.1709, "epoch": 0.01, "inbatch_neg_score": 18.5073, "inbatch_pos_score": 19.3594, "learning_rate": 4.000000000000001e-06, "loss": 23.9759, "norm_diff": 2.3072, "norm_loss": 0.0, "num_token_doc": 66.7958, "num_token_overlap": 11.6483, "num_token_query": 31.7086, "num_token_union": 65.2354, "num_word_context": 202.1712, "num_word_doc": 49.8703, "num_word_query": 23.4194, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7772.4085, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 18.4844, "query_norm": 5.1124, "queue_k_norm": 7.4222, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.7086, "sent_len_1": 66.7958, "sent_len_max_0": 127.6513, "sent_len_max_1": 189.4638, "stdk": 0.1437, "stdq": 0.078, "stdqueue_k": 0.144, "stdqueue_q": 0.0, "step": 800 }, { "accuracy": 11.6211, "active_queue_size": 16384.0, "cl_loss": 20.148, "doc_norm": 7.2772, "encoder_q-embeddings": 2250.7339, "encoder_q-layer.0": 1886.5756, "encoder_q-layer.1": 2103.5637, "encoder_q-layer.10": 2648.3054, "encoder_q-layer.11": 4315.6558, "encoder_q-layer.2": 2374.2754, "encoder_q-layer.3": 2620.4998, "encoder_q-layer.4": 3118.459, "encoder_q-layer.5": 2891.5098, "encoder_q-layer.6": 2471.8198, "encoder_q-layer.7": 2293.0872, "encoder_q-layer.8": 2097.1394, "encoder_q-layer.9": 1332.3228, "epoch": 0.01, "inbatch_neg_score": 13.5894, "inbatch_pos_score": 14.2969, "learning_rate": 4.5e-06, "loss": 20.148, "norm_diff": 3.1803, "norm_loss": 0.0, "num_token_doc": 66.8331, "num_token_overlap": 11.6969, "num_token_query": 31.9122, "num_token_union": 65.375, "num_word_context": 202.0843, "num_word_doc": 49.8517, "num_word_query": 23.5594, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3768.8636, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 13.5781, "query_norm": 4.0969, "queue_k_norm": 7.2797, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9122, "sent_len_1": 66.8331, "sent_len_max_0": 127.4188, "sent_len_max_1": 187.0863, "stdk": 0.1385, "stdq": 0.0738, "stdqueue_k": 0.1385, "stdqueue_q": 0.0, "step": 900 }, { "accuracy": 12.207, "active_queue_size": 16384.0, "cl_loss": 17.2954, "doc_norm": 7.1454, "encoder_q-embeddings": 3000.6206, "encoder_q-layer.0": 2558.4192, "encoder_q-layer.1": 2920.2239, "encoder_q-layer.10": 2002.5332, "encoder_q-layer.11": 3424.9978, "encoder_q-layer.2": 3377.2437, "encoder_q-layer.3": 3738.7539, "encoder_q-layer.4": 3832.6304, "encoder_q-layer.5": 3946.4312, "encoder_q-layer.6": 3194.7161, "encoder_q-layer.7": 2224.7952, "encoder_q-layer.8": 1823.5233, "encoder_q-layer.9": 1056.5105, "epoch": 0.01, "inbatch_neg_score": 10.2139, "inbatch_pos_score": 10.8594, "learning_rate": 5e-06, "loss": 17.2954, "norm_diff": 3.7628, "norm_loss": 0.0, "num_token_doc": 66.8999, "num_token_overlap": 11.6663, "num_token_query": 32.0023, "num_token_union": 65.4752, "num_word_context": 202.5884, "num_word_doc": 49.9467, "num_word_query": 23.6124, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4359.6729, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 10.1875, "query_norm": 3.3826, "queue_k_norm": 7.1449, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 32.0023, "sent_len_1": 66.8999, "sent_len_max_0": 127.5738, "sent_len_max_1": 189.4162, "stdk": 0.1321, "stdq": 0.0718, "stdqueue_k": 0.1331, "stdqueue_q": 0.0, "step": 1000 }, { "accuracy": 11.5234, "active_queue_size": 16384.0, "cl_loss": 15.4846, "doc_norm": 7.0186, "encoder_q-embeddings": 3191.533, "encoder_q-layer.0": 2900.9983, "encoder_q-layer.1": 3295.1294, "encoder_q-layer.10": 3592.5652, "encoder_q-layer.11": 4417.6367, "encoder_q-layer.2": 3741.5896, "encoder_q-layer.3": 3866.3835, "encoder_q-layer.4": 4491.3999, "encoder_q-layer.5": 4900.7183, "encoder_q-layer.6": 3952.2542, "encoder_q-layer.7": 3115.5176, "encoder_q-layer.8": 2609.1555, "encoder_q-layer.9": 1162.8529, "epoch": 0.01, "inbatch_neg_score": 7.6243, "inbatch_pos_score": 8.1406, "learning_rate": 5.500000000000001e-06, "loss": 15.4846, "norm_diff": 4.146, "norm_loss": 0.0, "num_token_doc": 66.8372, "num_token_overlap": 11.6853, "num_token_query": 31.921, "num_token_union": 65.4078, "num_word_context": 202.7004, "num_word_doc": 49.9017, "num_word_query": 23.5796, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5194.0736, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 7.6133, "query_norm": 2.8726, "queue_k_norm": 7.0202, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.921, "sent_len_1": 66.8372, "sent_len_max_0": 127.5438, "sent_len_max_1": 188.575, "stdk": 0.1269, "stdq": 0.0699, "stdqueue_k": 0.1269, "stdqueue_q": 0.0, "step": 1100 }, { "accuracy": 12.0117, "active_queue_size": 16384.0, "cl_loss": 14.3443, "doc_norm": 6.9127, "encoder_q-embeddings": 2706.7764, "encoder_q-layer.0": 2341.7568, "encoder_q-layer.1": 2755.8118, "encoder_q-layer.10": 1912.699, "encoder_q-layer.11": 3380.3818, "encoder_q-layer.2": 3091.5164, "encoder_q-layer.3": 3234.3711, "encoder_q-layer.4": 3776.5867, "encoder_q-layer.5": 3916.1365, "encoder_q-layer.6": 3015.9722, "encoder_q-layer.7": 2336.7007, "encoder_q-layer.8": 2513.2812, "encoder_q-layer.9": 948.3696, "epoch": 0.01, "inbatch_neg_score": 5.1113, "inbatch_pos_score": 5.5898, "learning_rate": 6e-06, "loss": 14.3443, "norm_diff": 4.4836, "norm_loss": 0.0, "num_token_doc": 66.6442, "num_token_overlap": 11.6251, "num_token_query": 31.7986, "num_token_union": 65.2669, "num_word_context": 202.2885, "num_word_doc": 49.7614, "num_word_query": 23.4808, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4157.3942, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 5.1055, "query_norm": 2.4291, "queue_k_norm": 6.9128, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.7986, "sent_len_1": 66.6442, "sent_len_max_0": 127.51, "sent_len_max_1": 188.555, "stdk": 0.1216, "stdq": 0.0631, "stdqueue_k": 0.1222, "stdqueue_q": 0.0, "step": 1200 }, { "accuracy": 11.3281, "active_queue_size": 16384.0, "cl_loss": 13.3173, "doc_norm": 6.7983, "encoder_q-embeddings": 4281.1055, "encoder_q-layer.0": 4142.6606, "encoder_q-layer.1": 4800.7534, "encoder_q-layer.10": 2663.626, "encoder_q-layer.11": 4462.0459, "encoder_q-layer.2": 5308.52, "encoder_q-layer.3": 5648.6582, "encoder_q-layer.4": 6693.3765, "encoder_q-layer.5": 7692.853, "encoder_q-layer.6": 6780.1865, "encoder_q-layer.7": 6298.377, "encoder_q-layer.8": 6904.4434, "encoder_q-layer.9": 1738.8402, "epoch": 0.01, "inbatch_neg_score": 2.0754, "inbatch_pos_score": 2.541, "learning_rate": 6.5000000000000004e-06, "loss": 13.3173, "norm_diff": 4.5295, "norm_loss": 0.0, "num_token_doc": 66.7136, "num_token_overlap": 11.7089, "num_token_query": 32.1508, "num_token_union": 65.4368, "num_word_context": 202.1953, "num_word_doc": 49.7725, "num_word_query": 23.7364, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7864.4856, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 2.0801, "query_norm": 2.2688, "queue_k_norm": 6.8072, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 32.1508, "sent_len_1": 66.7136, "sent_len_max_0": 127.6488, "sent_len_max_1": 188.855, "stdk": 0.1167, "stdq": 0.0626, "stdqueue_k": 0.1168, "stdqueue_q": 0.0, "step": 1300 }, { "accuracy": 11.6211, "active_queue_size": 16384.0, "cl_loss": 12.8375, "doc_norm": 6.695, "encoder_q-embeddings": 4003.7622, "encoder_q-layer.0": 3465.3708, "encoder_q-layer.1": 3929.7664, "encoder_q-layer.10": 2140.6865, "encoder_q-layer.11": 3767.894, "encoder_q-layer.2": 4856.5249, "encoder_q-layer.3": 4914.9614, "encoder_q-layer.4": 5284.103, "encoder_q-layer.5": 5271.1592, "encoder_q-layer.6": 4332.5811, "encoder_q-layer.7": 2659.8752, "encoder_q-layer.8": 1918.4426, "encoder_q-layer.9": 919.2321, "epoch": 0.01, "inbatch_neg_score": 2.1592, "inbatch_pos_score": 2.584, "learning_rate": 7.000000000000001e-06, "loss": 12.8375, "norm_diff": 4.5278, "norm_loss": 0.0, "num_token_doc": 66.819, "num_token_overlap": 11.6887, "num_token_query": 32.021, "num_token_union": 65.4087, "num_word_context": 202.4863, "num_word_doc": 49.8686, "num_word_query": 23.6496, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5747.0324, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 2.1699, "query_norm": 2.1673, "queue_k_norm": 6.7131, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 32.021, "sent_len_1": 66.819, "sent_len_max_0": 127.5837, "sent_len_max_1": 188.5037, "stdk": 0.1116, "stdq": 0.0602, "stdqueue_k": 0.112, "stdqueue_q": 0.0, "step": 1400 }, { "accuracy": 13.4766, "active_queue_size": 16384.0, "cl_loss": 12.2273, "doc_norm": 6.6061, "encoder_q-embeddings": 3089.9255, "encoder_q-layer.0": 2707.6409, "encoder_q-layer.1": 3047.5801, "encoder_q-layer.10": 1652.4001, "encoder_q-layer.11": 3165.0203, "encoder_q-layer.2": 3260.6045, "encoder_q-layer.3": 3414.915, "encoder_q-layer.4": 3797.6941, "encoder_q-layer.5": 4354.7671, "encoder_q-layer.6": 3167.4092, "encoder_q-layer.7": 2646.4419, "encoder_q-layer.8": 2330.4736, "encoder_q-layer.9": 964.3807, "epoch": 0.01, "inbatch_neg_score": 4.1487, "inbatch_pos_score": 4.6094, "learning_rate": 7.5e-06, "loss": 12.2273, "norm_diff": 4.3657, "norm_loss": 0.0, "num_token_doc": 66.5709, "num_token_overlap": 11.6298, "num_token_query": 31.7366, "num_token_union": 65.1618, "num_word_context": 202.0384, "num_word_doc": 49.6914, "num_word_query": 23.44, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4330.7032, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 4.1445, "query_norm": 2.2404, "queue_k_norm": 6.6186, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.7366, "sent_len_1": 66.5709, "sent_len_max_0": 127.4375, "sent_len_max_1": 188.89, "stdk": 0.1071, "stdq": 0.0591, "stdqueue_k": 0.1071, "stdqueue_q": 0.0, "step": 1500 }, { "accuracy": 13.7695, "active_queue_size": 16384.0, "cl_loss": 12.0696, "doc_norm": 6.5298, "encoder_q-embeddings": 2940.9326, "encoder_q-layer.0": 2523.8179, "encoder_q-layer.1": 2909.3875, "encoder_q-layer.10": 1154.337, "encoder_q-layer.11": 2375.0593, "encoder_q-layer.2": 3109.9268, "encoder_q-layer.3": 2983.5522, "encoder_q-layer.4": 3104.1543, "encoder_q-layer.5": 3091.7903, "encoder_q-layer.6": 2837.2356, "encoder_q-layer.7": 2350.1047, "encoder_q-layer.8": 1842.6564, "encoder_q-layer.9": 636.3265, "epoch": 0.01, "inbatch_neg_score": 2.2136, "inbatch_pos_score": 2.6406, "learning_rate": 8.000000000000001e-06, "loss": 12.0696, "norm_diff": 4.3814, "norm_loss": 0.0, "num_token_doc": 66.9562, "num_token_overlap": 11.6988, "num_token_query": 31.9857, "num_token_union": 65.5141, "num_word_context": 202.792, "num_word_doc": 49.9373, "num_word_query": 23.6242, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3831.8849, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 2.2031, "query_norm": 2.1485, "queue_k_norm": 6.5293, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9857, "sent_len_1": 66.9562, "sent_len_max_0": 127.525, "sent_len_max_1": 191.4062, "stdk": 0.1015, "stdq": 0.0594, "stdqueue_k": 0.1024, "stdqueue_q": 0.0, "step": 1600 }, { "accuracy": 13.2812, "active_queue_size": 16384.0, "cl_loss": 11.6384, "doc_norm": 6.4366, "encoder_q-embeddings": 2667.8315, "encoder_q-layer.0": 2197.7961, "encoder_q-layer.1": 2728.5061, "encoder_q-layer.10": 2642.8057, "encoder_q-layer.11": 3715.2808, "encoder_q-layer.2": 3192.6943, "encoder_q-layer.3": 3327.7866, "encoder_q-layer.4": 3562.2993, "encoder_q-layer.5": 3675.634, "encoder_q-layer.6": 3101.4207, "encoder_q-layer.7": 2605.9878, "encoder_q-layer.8": 2778.7031, "encoder_q-layer.9": 1505.2811, "epoch": 0.01, "inbatch_neg_score": 2.377, "inbatch_pos_score": 2.8086, "learning_rate": 8.500000000000002e-06, "loss": 11.6384, "norm_diff": 4.2652, "norm_loss": 0.0, "num_token_doc": 66.871, "num_token_overlap": 11.6798, "num_token_query": 31.8838, "num_token_union": 65.4486, "num_word_context": 202.3565, "num_word_doc": 49.9452, "num_word_query": 23.5499, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4211.5743, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 2.3574, "query_norm": 2.1714, "queue_k_norm": 6.4375, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8838, "sent_len_1": 66.871, "sent_len_max_0": 127.4087, "sent_len_max_1": 186.9125, "stdk": 0.0966, "stdq": 0.0602, "stdqueue_k": 0.0981, "stdqueue_q": 0.0, "step": 1700 }, { "accuracy": 13.8672, "active_queue_size": 16384.0, "cl_loss": 11.8247, "doc_norm": 6.3272, "encoder_q-embeddings": 2545.3838, "encoder_q-layer.0": 2184.7878, "encoder_q-layer.1": 2512.4392, "encoder_q-layer.10": 1392.4027, "encoder_q-layer.11": 2955.0142, "encoder_q-layer.2": 2797.0808, "encoder_q-layer.3": 2760.3914, "encoder_q-layer.4": 2937.3882, "encoder_q-layer.5": 2871.5129, "encoder_q-layer.6": 2387.8813, "encoder_q-layer.7": 1836.6306, "encoder_q-layer.8": 1982.6448, "encoder_q-layer.9": 839.6464, "epoch": 0.01, "inbatch_neg_score": 2.6284, "inbatch_pos_score": 3.0352, "learning_rate": 9e-06, "loss": 11.8247, "norm_diff": 4.0718, "norm_loss": 0.0, "num_token_doc": 66.6889, "num_token_overlap": 11.6478, "num_token_query": 31.9018, "num_token_union": 65.293, "num_word_context": 202.2621, "num_word_doc": 49.7647, "num_word_query": 23.5548, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3519.6715, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 2.6348, "query_norm": 2.2554, "queue_k_norm": 6.3385, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9018, "sent_len_1": 66.6889, "sent_len_max_0": 127.48, "sent_len_max_1": 189.6738, "stdk": 0.0943, "stdq": 0.0614, "stdqueue_k": 0.0936, "stdqueue_q": 0.0, "step": 1800 }, { "accuracy": 12.4023, "active_queue_size": 16384.0, "cl_loss": 11.511, "doc_norm": 6.2118, "encoder_q-embeddings": 2231.6389, "encoder_q-layer.0": 1749.906, "encoder_q-layer.1": 2224.7534, "encoder_q-layer.10": 4037.416, "encoder_q-layer.11": 4693.0034, "encoder_q-layer.2": 2873.5774, "encoder_q-layer.3": 3272.7039, "encoder_q-layer.4": 4112.8618, "encoder_q-layer.5": 4966.416, "encoder_q-layer.6": 5064.5938, "encoder_q-layer.7": 4711.9985, "encoder_q-layer.8": 4925.3457, "encoder_q-layer.9": 2720.1602, "epoch": 0.01, "inbatch_neg_score": 1.4159, "inbatch_pos_score": 1.8203, "learning_rate": 9.5e-06, "loss": 11.511, "norm_diff": 3.9388, "norm_loss": 0.0, "num_token_doc": 66.9125, "num_token_overlap": 11.6522, "num_token_query": 31.8723, "num_token_union": 65.4093, "num_word_context": 202.2678, "num_word_doc": 49.9063, "num_word_query": 23.5317, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5290.7557, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.4043, "query_norm": 2.273, "queue_k_norm": 6.2248, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8723, "sent_len_1": 66.9125, "sent_len_max_0": 127.425, "sent_len_max_1": 191.0712, "stdk": 0.0889, "stdq": 0.0641, "stdqueue_k": 0.0898, "stdqueue_q": 0.0, "step": 1900 }, { "accuracy": 12.4023, "active_queue_size": 16384.0, "cl_loss": 11.2776, "doc_norm": 6.0612, "encoder_q-embeddings": 2668.2446, "encoder_q-layer.0": 2166.5667, "encoder_q-layer.1": 2330.5068, "encoder_q-layer.10": 1143.2104, "encoder_q-layer.11": 2160.1265, "encoder_q-layer.2": 2732.0835, "encoder_q-layer.3": 2545.4277, "encoder_q-layer.4": 2556.6467, "encoder_q-layer.5": 2187.2676, "encoder_q-layer.6": 1733.889, "encoder_q-layer.7": 1332.7659, "encoder_q-layer.8": 1276.1434, "encoder_q-layer.9": 966.5012, "epoch": 0.01, "inbatch_neg_score": 1.7881, "inbatch_pos_score": 2.1934, "learning_rate": 1e-05, "loss": 11.2776, "norm_diff": 3.6255, "norm_loss": 0.0, "num_token_doc": 66.6308, "num_token_overlap": 11.6831, "num_token_query": 31.8859, "num_token_union": 65.23, "num_word_context": 201.7009, "num_word_doc": 49.6829, "num_word_query": 23.5503, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3122.1981, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.7783, "query_norm": 2.4358, "queue_k_norm": 6.0914, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.8859, "sent_len_1": 66.6308, "sent_len_max_0": 127.5525, "sent_len_max_1": 189.1188, "stdk": 0.0857, "stdq": 0.0691, "stdqueue_k": 0.086, "stdqueue_q": 0.0, "step": 2000 }, { "accuracy": 13.5742, "active_queue_size": 16384.0, "cl_loss": 10.881, "doc_norm": 5.9271, "encoder_q-embeddings": 2778.9009, "encoder_q-layer.0": 2447.8633, "encoder_q-layer.1": 2780.6116, "encoder_q-layer.10": 3298.4661, "encoder_q-layer.11": 3784.6028, "encoder_q-layer.2": 3248.2483, "encoder_q-layer.3": 3294.042, "encoder_q-layer.4": 3679.0737, "encoder_q-layer.5": 3810.249, "encoder_q-layer.6": 3985.9797, "encoder_q-layer.7": 4031.9709, "encoder_q-layer.8": 3974.0095, "encoder_q-layer.9": 2628.3416, "epoch": 0.01, "inbatch_neg_score": 1.0249, "inbatch_pos_score": 1.4199, "learning_rate": 1.05e-05, "loss": 10.881, "norm_diff": 3.614, "norm_loss": 0.0, "num_token_doc": 66.8103, "num_token_overlap": 11.6058, "num_token_query": 31.752, "num_token_union": 65.3227, "num_word_context": 202.3693, "num_word_doc": 49.7959, "num_word_query": 23.4434, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4724.9562, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.0137, "query_norm": 2.3131, "queue_k_norm": 5.9324, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.752, "sent_len_1": 66.8103, "sent_len_max_0": 127.3925, "sent_len_max_1": 190.3913, "stdk": 0.082, "stdq": 0.0669, "stdqueue_k": 0.0826, "stdqueue_q": 0.0, "step": 2100 }, { "accuracy": 14.2578, "active_queue_size": 16384.0, "cl_loss": 10.3784, "doc_norm": 5.7336, "encoder_q-embeddings": 1116.2257, "encoder_q-layer.0": 941.7418, "encoder_q-layer.1": 1079.0164, "encoder_q-layer.10": 4769.7109, "encoder_q-layer.11": 4696.5493, "encoder_q-layer.2": 1286.1998, "encoder_q-layer.3": 1371.5051, "encoder_q-layer.4": 1587.1577, "encoder_q-layer.5": 1700.6489, "encoder_q-layer.6": 2093.9673, "encoder_q-layer.7": 2272.002, "encoder_q-layer.8": 2916.5376, "encoder_q-layer.9": 3210.4067, "epoch": 0.01, "inbatch_neg_score": 1.2619, "inbatch_pos_score": 1.6719, "learning_rate": 1.1000000000000001e-05, "loss": 10.3784, "norm_diff": 3.4152, "norm_loss": 0.0, "num_token_doc": 66.8375, "num_token_overlap": 11.6462, "num_token_query": 31.8711, "num_token_union": 65.3982, "num_word_context": 202.3853, "num_word_doc": 49.9107, "num_word_query": 23.545, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3366.148, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 1.2422, "query_norm": 2.3185, "queue_k_norm": 5.7462, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8711, "sent_len_1": 66.8375, "sent_len_max_0": 127.6575, "sent_len_max_1": 189.0263, "stdk": 0.0779, "stdq": 0.067, "stdqueue_k": 0.0794, "stdqueue_q": 0.0, "step": 2200 }, { "accuracy": 14.6484, "active_queue_size": 16384.0, "cl_loss": 9.8475, "doc_norm": 5.5223, "encoder_q-embeddings": 3572.3196, "encoder_q-layer.0": 3286.3499, "encoder_q-layer.1": 3653.0684, "encoder_q-layer.10": 2681.231, "encoder_q-layer.11": 3646.3804, "encoder_q-layer.2": 4024.3079, "encoder_q-layer.3": 4030.6284, "encoder_q-layer.4": 4322.6396, "encoder_q-layer.5": 4777.1504, "encoder_q-layer.6": 5513.4116, "encoder_q-layer.7": 5534.8511, "encoder_q-layer.8": 5272.1265, "encoder_q-layer.9": 3682.075, "epoch": 0.01, "inbatch_neg_score": 0.5759, "inbatch_pos_score": 0.9648, "learning_rate": 1.1500000000000002e-05, "loss": 9.8475, "norm_diff": 3.2347, "norm_loss": 0.0, "num_token_doc": 66.7445, "num_token_overlap": 11.6484, "num_token_query": 31.7655, "num_token_union": 65.2776, "num_word_context": 202.145, "num_word_doc": 49.8318, "num_word_query": 23.45, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5983.7602, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5669, "query_norm": 2.2876, "queue_k_norm": 5.5352, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.7655, "sent_len_1": 66.7445, "sent_len_max_0": 127.6262, "sent_len_max_1": 189.3462, "stdk": 0.0756, "stdq": 0.0662, "stdqueue_k": 0.0762, "stdqueue_q": 0.0, "step": 2300 }, { "accuracy": 14.5508, "active_queue_size": 16384.0, "cl_loss": 9.3347, "doc_norm": 5.2707, "encoder_q-embeddings": 1442.2197, "encoder_q-layer.0": 1236.5505, "encoder_q-layer.1": 1307.1864, "encoder_q-layer.10": 1685.6726, "encoder_q-layer.11": 2970.3091, "encoder_q-layer.2": 1386.4841, "encoder_q-layer.3": 1308.3115, "encoder_q-layer.4": 1256.0439, "encoder_q-layer.5": 1131.2875, "encoder_q-layer.6": 1082.7408, "encoder_q-layer.7": 1078.5262, "encoder_q-layer.8": 1297.8911, "encoder_q-layer.9": 1247.1794, "epoch": 0.02, "inbatch_neg_score": 0.9033, "inbatch_pos_score": 1.293, "learning_rate": 1.2e-05, "loss": 9.3347, "norm_diff": 3.0304, "norm_loss": 0.0, "num_token_doc": 66.6657, "num_token_overlap": 11.671, "num_token_query": 31.9515, "num_token_union": 65.3368, "num_word_context": 202.4092, "num_word_doc": 49.7788, "num_word_query": 23.6052, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2133.1132, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.8979, "query_norm": 2.2402, "queue_k_norm": 5.3082, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9515, "sent_len_1": 66.6657, "sent_len_max_0": 127.4175, "sent_len_max_1": 188.145, "stdk": 0.0735, "stdq": 0.065, "stdqueue_k": 0.0733, "stdqueue_q": 0.0, "step": 2400 }, { "accuracy": 16.6016, "active_queue_size": 16384.0, "cl_loss": 9.1773, "doc_norm": 5.0617, "encoder_q-embeddings": 7640.2876, "encoder_q-layer.0": 7775.6162, "encoder_q-layer.1": 7564.333, "encoder_q-layer.10": 9940.5098, "encoder_q-layer.11": 9090.2051, "encoder_q-layer.2": 6220.0854, "encoder_q-layer.3": 5698.8838, "encoder_q-layer.4": 4884.3047, "encoder_q-layer.5": 3893.7532, "encoder_q-layer.6": 3884.6182, "encoder_q-layer.7": 4285.7993, "encoder_q-layer.8": 5309.4014, "encoder_q-layer.9": 6592.3662, "epoch": 0.02, "inbatch_neg_score": 0.8152, "inbatch_pos_score": 1.2197, "learning_rate": 1.25e-05, "loss": 9.1773, "norm_diff": 2.8534, "norm_loss": 0.0, "num_token_doc": 66.7351, "num_token_overlap": 11.6781, "num_token_query": 31.8418, "num_token_union": 65.3246, "num_word_context": 202.5033, "num_word_doc": 49.8347, "num_word_query": 23.5546, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9440.8409, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.8101, "query_norm": 2.2082, "queue_k_norm": 5.0691, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8418, "sent_len_1": 66.7351, "sent_len_max_0": 127.5512, "sent_len_max_1": 187.9863, "stdk": 0.0703, "stdq": 0.0653, "stdqueue_k": 0.0707, "stdqueue_q": 0.0, "step": 2500 }, { "accuracy": 15.7227, "active_queue_size": 16384.0, "cl_loss": 8.9865, "doc_norm": 4.7886, "encoder_q-embeddings": 2302.0657, "encoder_q-layer.0": 2247.8403, "encoder_q-layer.1": 2516.9758, "encoder_q-layer.10": 5582.9897, "encoder_q-layer.11": 5118.2515, "encoder_q-layer.2": 2934.2102, "encoder_q-layer.3": 2834.5466, "encoder_q-layer.4": 2888.5806, "encoder_q-layer.5": 2900.2249, "encoder_q-layer.6": 3134.8867, "encoder_q-layer.7": 3704.6101, "encoder_q-layer.8": 4376.0967, "encoder_q-layer.9": 4625.7373, "epoch": 0.02, "inbatch_neg_score": 0.6721, "inbatch_pos_score": 1.0645, "learning_rate": 1.3000000000000001e-05, "loss": 8.9865, "norm_diff": 2.6075, "norm_loss": 0.0, "num_token_doc": 67.0481, "num_token_overlap": 11.7188, "num_token_query": 32.0077, "num_token_union": 65.5088, "num_word_context": 202.238, "num_word_doc": 50.051, "num_word_query": 23.6412, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4894.2615, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6631, "query_norm": 2.1811, "queue_k_norm": 4.8252, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 32.0077, "sent_len_1": 67.0481, "sent_len_max_0": 127.6688, "sent_len_max_1": 189.0712, "stdk": 0.0686, "stdq": 0.0637, "stdqueue_k": 0.0686, "stdqueue_q": 0.0, "step": 2600 }, { "accuracy": 18.6523, "active_queue_size": 16384.0, "cl_loss": 8.5047, "doc_norm": 4.5632, "encoder_q-embeddings": 1663.757, "encoder_q-layer.0": 1393.5201, "encoder_q-layer.1": 1534.4094, "encoder_q-layer.10": 6372.3789, "encoder_q-layer.11": 5914.1577, "encoder_q-layer.2": 1867.6373, "encoder_q-layer.3": 2070.8462, "encoder_q-layer.4": 2711.2859, "encoder_q-layer.5": 3454.7747, "encoder_q-layer.6": 4981.7656, "encoder_q-layer.7": 5990.0137, "encoder_q-layer.8": 6789.6841, "encoder_q-layer.9": 6459.7539, "epoch": 0.02, "inbatch_neg_score": 0.9898, "inbatch_pos_score": 1.4014, "learning_rate": 1.3500000000000001e-05, "loss": 8.5047, "norm_diff": 2.4055, "norm_loss": 0.0, "num_token_doc": 66.5284, "num_token_overlap": 11.6188, "num_token_query": 31.7812, "num_token_union": 65.1602, "num_word_context": 202.0157, "num_word_doc": 49.6183, "num_word_query": 23.48, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5878.597, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.981, "query_norm": 2.1577, "queue_k_norm": 4.5891, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.7812, "sent_len_1": 66.5284, "sent_len_max_0": 127.5, "sent_len_max_1": 189.8837, "stdk": 0.066, "stdq": 0.0613, "stdqueue_k": 0.0666, "stdqueue_q": 0.0, "step": 2700 }, { "accuracy": 19.043, "active_queue_size": 16384.0, "cl_loss": 8.1816, "doc_norm": 4.3517, "encoder_q-embeddings": 2685.5723, "encoder_q-layer.0": 3077.9246, "encoder_q-layer.1": 2745.656, "encoder_q-layer.10": 3803.8018, "encoder_q-layer.11": 3710.7197, "encoder_q-layer.2": 2609.3347, "encoder_q-layer.3": 2666.4968, "encoder_q-layer.4": 2598.146, "encoder_q-layer.5": 2804.1755, "encoder_q-layer.6": 3745.8247, "encoder_q-layer.7": 4508.7749, "encoder_q-layer.8": 4933.9795, "encoder_q-layer.9": 4115.6104, "epoch": 0.02, "inbatch_neg_score": 0.9556, "inbatch_pos_score": 1.3242, "learning_rate": 1.4000000000000001e-05, "loss": 8.1816, "norm_diff": 2.23, "norm_loss": 0.0, "num_token_doc": 66.6683, "num_token_overlap": 11.7338, "num_token_query": 31.9174, "num_token_union": 65.2271, "num_word_context": 202.1758, "num_word_doc": 49.7492, "num_word_query": 23.5538, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4892.9721, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.9438, "query_norm": 2.1217, "queue_k_norm": 4.3747, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9174, "sent_len_1": 66.6683, "sent_len_max_0": 127.4425, "sent_len_max_1": 190.005, "stdk": 0.0643, "stdq": 0.0614, "stdqueue_k": 0.0649, "stdqueue_q": 0.0, "step": 2800 }, { "accuracy": 20.3125, "active_queue_size": 16384.0, "cl_loss": 8.159, "doc_norm": 4.1437, "encoder_q-embeddings": 1864.9047, "encoder_q-layer.0": 1688.2, "encoder_q-layer.1": 1581.8759, "encoder_q-layer.10": 4538.2017, "encoder_q-layer.11": 3972.8401, "encoder_q-layer.2": 1531.9858, "encoder_q-layer.3": 1416.3698, "encoder_q-layer.4": 1613.4254, "encoder_q-layer.5": 1985.0894, "encoder_q-layer.6": 2795.0613, "encoder_q-layer.7": 3519.8, "encoder_q-layer.8": 4521.7231, "encoder_q-layer.9": 4381.4883, "epoch": 0.02, "inbatch_neg_score": 0.7072, "inbatch_pos_score": 1.1172, "learning_rate": 1.45e-05, "loss": 8.159, "norm_diff": 2.0775, "norm_loss": 0.0, "num_token_doc": 66.9335, "num_token_overlap": 11.7497, "num_token_query": 31.9884, "num_token_union": 65.3959, "num_word_context": 202.4239, "num_word_doc": 49.922, "num_word_query": 23.6242, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4002.2087, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7021, "query_norm": 2.0662, "queue_k_norm": 4.1633, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9884, "sent_len_1": 66.9335, "sent_len_max_0": 127.5537, "sent_len_max_1": 188.1362, "stdk": 0.0634, "stdq": 0.0583, "stdqueue_k": 0.0631, "stdqueue_q": 0.0, "step": 2900 }, { "accuracy": 19.6289, "active_queue_size": 16384.0, "cl_loss": 7.8978, "doc_norm": 3.9621, "encoder_q-embeddings": 942.2388, "encoder_q-layer.0": 834.9678, "encoder_q-layer.1": 841.024, "encoder_q-layer.10": 1477.5515, "encoder_q-layer.11": 2309.3621, "encoder_q-layer.2": 879.0493, "encoder_q-layer.3": 849.0429, "encoder_q-layer.4": 839.9254, "encoder_q-layer.5": 824.0525, "encoder_q-layer.6": 860.2535, "encoder_q-layer.7": 872.3854, "encoder_q-layer.8": 1160.3378, "encoder_q-layer.9": 1157.8108, "epoch": 0.02, "inbatch_neg_score": 0.8356, "inbatch_pos_score": 1.2207, "learning_rate": 1.5e-05, "loss": 7.8978, "norm_diff": 1.8918, "norm_loss": 0.0, "num_token_doc": 66.751, "num_token_overlap": 11.7089, "num_token_query": 31.9332, "num_token_union": 65.2813, "num_word_context": 202.2803, "num_word_doc": 49.7988, "num_word_query": 23.583, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1601.7877, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.8252, "query_norm": 2.0703, "queue_k_norm": 3.9645, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9332, "sent_len_1": 66.751, "sent_len_max_0": 127.5588, "sent_len_max_1": 188.1925, "stdk": 0.0612, "stdq": 0.0588, "stdqueue_k": 0.0618, "stdqueue_q": 0.0, "step": 3000 }, { "accuracy": 20.6055, "active_queue_size": 16384.0, "cl_loss": 7.7275, "doc_norm": 3.7955, "encoder_q-embeddings": 1595.3286, "encoder_q-layer.0": 1392.6277, "encoder_q-layer.1": 1382.0269, "encoder_q-layer.10": 4684.7354, "encoder_q-layer.11": 4052.502, "encoder_q-layer.2": 1312.5521, "encoder_q-layer.3": 1287.3383, "encoder_q-layer.4": 1430.5477, "encoder_q-layer.5": 1495.0853, "encoder_q-layer.6": 1885.4283, "encoder_q-layer.7": 2494.7952, "encoder_q-layer.8": 3299.4502, "encoder_q-layer.9": 3610.3806, "epoch": 0.02, "inbatch_neg_score": 0.64, "inbatch_pos_score": 1.0244, "learning_rate": 1.55e-05, "loss": 7.7275, "norm_diff": 1.7346, "norm_loss": 0.0, "num_token_doc": 66.9101, "num_token_overlap": 11.7219, "num_token_query": 31.969, "num_token_union": 65.4199, "num_word_context": 202.1856, "num_word_doc": 49.9282, "num_word_query": 23.6156, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3413.7152, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6362, "query_norm": 2.061, "queue_k_norm": 3.79, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.969, "sent_len_1": 66.9101, "sent_len_max_0": 127.5537, "sent_len_max_1": 190.2637, "stdk": 0.0609, "stdq": 0.0581, "stdqueue_k": 0.0607, "stdqueue_q": 0.0, "step": 3100 }, { "accuracy": 19.4336, "active_queue_size": 16384.0, "cl_loss": 7.3855, "doc_norm": 3.6033, "encoder_q-embeddings": 3995.4731, "encoder_q-layer.0": 3898.8279, "encoder_q-layer.1": 3502.5544, "encoder_q-layer.10": 11909.3086, "encoder_q-layer.11": 9370.7051, "encoder_q-layer.2": 2613.1221, "encoder_q-layer.3": 2517.8765, "encoder_q-layer.4": 3342.8528, "encoder_q-layer.5": 4448.3242, "encoder_q-layer.6": 6215.4097, "encoder_q-layer.7": 8012.2598, "encoder_q-layer.8": 9726.1484, "encoder_q-layer.9": 10082.793, "epoch": 0.02, "inbatch_neg_score": 0.9602, "inbatch_pos_score": 1.3613, "learning_rate": 1.6000000000000003e-05, "loss": 7.3855, "norm_diff": 1.5645, "norm_loss": 0.0, "num_token_doc": 66.845, "num_token_overlap": 11.6838, "num_token_query": 31.9484, "num_token_union": 65.4012, "num_word_context": 202.3795, "num_word_doc": 49.8971, "num_word_query": 23.5843, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9100.8166, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.957, "query_norm": 2.0388, "queue_k_norm": 3.6254, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9484, "sent_len_1": 66.845, "sent_len_max_0": 127.6112, "sent_len_max_1": 188.9238, "stdk": 0.0598, "stdq": 0.0575, "stdqueue_k": 0.0596, "stdqueue_q": 0.0, "step": 3200 }, { "accuracy": 19.8242, "active_queue_size": 16384.0, "cl_loss": 7.271, "doc_norm": 3.4766, "encoder_q-embeddings": 1065.6029, "encoder_q-layer.0": 905.804, "encoder_q-layer.1": 1078.934, "encoder_q-layer.10": 7312.5459, "encoder_q-layer.11": 6073.9077, "encoder_q-layer.2": 1317.9629, "encoder_q-layer.3": 1453.5439, "encoder_q-layer.4": 1852.5154, "encoder_q-layer.5": 2426.3962, "encoder_q-layer.6": 3448.1719, "encoder_q-layer.7": 4911.1821, "encoder_q-layer.8": 6445.77, "encoder_q-layer.9": 6649.7734, "epoch": 0.02, "inbatch_neg_score": 0.6278, "inbatch_pos_score": 1.0293, "learning_rate": 1.65e-05, "loss": 7.271, "norm_diff": 1.5146, "norm_loss": 0.0, "num_token_doc": 66.8196, "num_token_overlap": 11.6534, "num_token_query": 31.876, "num_token_union": 65.3998, "num_word_context": 202.2281, "num_word_doc": 49.8529, "num_word_query": 23.5444, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5516.8908, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.6201, "query_norm": 1.962, "queue_k_norm": 3.4742, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.876, "sent_len_1": 66.8196, "sent_len_max_0": 127.5625, "sent_len_max_1": 188.5625, "stdk": 0.0583, "stdq": 0.055, "stdqueue_k": 0.0588, "stdqueue_q": 0.0, "step": 3300 }, { "accuracy": 24.5117, "active_queue_size": 16384.0, "cl_loss": 7.0583, "doc_norm": 3.3089, "encoder_q-embeddings": 1090.7194, "encoder_q-layer.0": 1004.7092, "encoder_q-layer.1": 1002.2723, "encoder_q-layer.10": 3586.9373, "encoder_q-layer.11": 3639.9998, "encoder_q-layer.2": 1090.4409, "encoder_q-layer.3": 1140.1885, "encoder_q-layer.4": 1202.2881, "encoder_q-layer.5": 1293.3363, "encoder_q-layer.6": 1827.2235, "encoder_q-layer.7": 2479.9001, "encoder_q-layer.8": 3262.3325, "encoder_q-layer.9": 3139.2844, "epoch": 0.02, "inbatch_neg_score": 0.566, "inbatch_pos_score": 0.9707, "learning_rate": 1.7000000000000003e-05, "loss": 7.0583, "norm_diff": 1.3588, "norm_loss": 0.0, "num_token_doc": 66.638, "num_token_overlap": 11.6696, "num_token_query": 31.9391, "num_token_union": 65.33, "num_word_context": 202.036, "num_word_doc": 49.7321, "num_word_query": 23.5912, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2998.4911, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5586, "query_norm": 1.9501, "queue_k_norm": 3.3313, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9391, "sent_len_1": 66.638, "sent_len_max_0": 127.6188, "sent_len_max_1": 187.5813, "stdk": 0.0581, "stdq": 0.0558, "stdqueue_k": 0.0579, "stdqueue_q": 0.0, "step": 3400 }, { "accuracy": 22.7539, "active_queue_size": 16384.0, "cl_loss": 6.799, "doc_norm": 3.1915, "encoder_q-embeddings": 2156.3486, "encoder_q-layer.0": 2063.0042, "encoder_q-layer.1": 1888.588, "encoder_q-layer.10": 2531.7766, "encoder_q-layer.11": 2673.2393, "encoder_q-layer.2": 1642.5555, "encoder_q-layer.3": 1497.5662, "encoder_q-layer.4": 1363.1597, "encoder_q-layer.5": 1280.5649, "encoder_q-layer.6": 1394.2969, "encoder_q-layer.7": 1708.7971, "encoder_q-layer.8": 2007.1515, "encoder_q-layer.9": 2167.5376, "epoch": 0.02, "inbatch_neg_score": 0.6687, "inbatch_pos_score": 1.0576, "learning_rate": 1.75e-05, "loss": 6.799, "norm_diff": 1.2936, "norm_loss": 0.0, "num_token_doc": 66.8219, "num_token_overlap": 11.6994, "num_token_query": 32.0078, "num_token_union": 65.4348, "num_word_context": 202.4362, "num_word_doc": 49.8329, "num_word_query": 23.6251, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2778.7306, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6655, "query_norm": 1.8979, "queue_k_norm": 3.1995, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 32.0078, "sent_len_1": 66.8219, "sent_len_max_0": 127.44, "sent_len_max_1": 189.9688, "stdk": 0.0571, "stdq": 0.0534, "stdqueue_k": 0.0571, "stdqueue_q": 0.0, "step": 3500 }, { "accuracy": 20.8008, "active_queue_size": 16384.0, "cl_loss": 6.6275, "doc_norm": 3.0735, "encoder_q-embeddings": 1224.207, "encoder_q-layer.0": 1090.2598, "encoder_q-layer.1": 1129.3584, "encoder_q-layer.10": 5294.8477, "encoder_q-layer.11": 4845.5605, "encoder_q-layer.2": 1154.0454, "encoder_q-layer.3": 1129.3838, "encoder_q-layer.4": 1269.2375, "encoder_q-layer.5": 1543.3362, "encoder_q-layer.6": 2233.7266, "encoder_q-layer.7": 3227.719, "encoder_q-layer.8": 4043.4771, "encoder_q-layer.9": 4520.4727, "epoch": 0.02, "inbatch_neg_score": 0.6083, "inbatch_pos_score": 0.9863, "learning_rate": 1.8e-05, "loss": 6.6275, "norm_diff": 1.1983, "norm_loss": 0.0, "num_token_doc": 66.9682, "num_token_overlap": 11.6939, "num_token_query": 31.9496, "num_token_union": 65.4903, "num_word_context": 202.8111, "num_word_doc": 49.9519, "num_word_query": 23.6113, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3927.1563, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6055, "query_norm": 1.8751, "queue_k_norm": 3.0785, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9496, "sent_len_1": 66.9682, "sent_len_max_0": 127.5875, "sent_len_max_1": 189.4863, "stdk": 0.0564, "stdq": 0.0516, "stdqueue_k": 0.0563, "stdqueue_q": 0.0, "step": 3600 }, { "accuracy": 24.3164, "active_queue_size": 16384.0, "cl_loss": 6.5224, "doc_norm": 2.9297, "encoder_q-embeddings": 1645.49, "encoder_q-layer.0": 1616.4651, "encoder_q-layer.1": 1444.3759, "encoder_q-layer.10": 4245.812, "encoder_q-layer.11": 4105.3965, "encoder_q-layer.2": 1267.6582, "encoder_q-layer.3": 1223.8943, "encoder_q-layer.4": 1465.1274, "encoder_q-layer.5": 1866.9062, "encoder_q-layer.6": 2713.4229, "encoder_q-layer.7": 3544.4587, "encoder_q-layer.8": 4139.9683, "encoder_q-layer.9": 3782.2141, "epoch": 0.02, "inbatch_neg_score": 0.5314, "inbatch_pos_score": 0.9468, "learning_rate": 1.85e-05, "loss": 6.5224, "norm_diff": 1.0421, "norm_loss": 0.0, "num_token_doc": 66.9505, "num_token_overlap": 11.6709, "num_token_query": 31.948, "num_token_union": 65.4505, "num_word_context": 202.4648, "num_word_doc": 49.911, "num_word_query": 23.5861, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3812.1304, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5254, "query_norm": 1.8876, "queue_k_norm": 2.9513, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.948, "sent_len_1": 66.9505, "sent_len_max_0": 127.465, "sent_len_max_1": 191.555, "stdk": 0.0555, "stdq": 0.0528, "stdqueue_k": 0.0557, "stdqueue_q": 0.0, "step": 3700 }, { "accuracy": 27.5391, "active_queue_size": 16384.0, "cl_loss": 6.3521, "doc_norm": 2.8379, "encoder_q-embeddings": 1590.819, "encoder_q-layer.0": 1298.3898, "encoder_q-layer.1": 1223.2782, "encoder_q-layer.10": 1357.8093, "encoder_q-layer.11": 1925.1952, "encoder_q-layer.2": 1143.6892, "encoder_q-layer.3": 1088.8524, "encoder_q-layer.4": 911.0908, "encoder_q-layer.5": 689.785, "encoder_q-layer.6": 637.3288, "encoder_q-layer.7": 724.6076, "encoder_q-layer.8": 823.5762, "encoder_q-layer.9": 726.8301, "epoch": 0.02, "inbatch_neg_score": 0.4901, "inbatch_pos_score": 0.916, "learning_rate": 1.9e-05, "loss": 6.3521, "norm_diff": 1.0354, "norm_loss": 0.0, "num_token_doc": 66.6645, "num_token_overlap": 11.6641, "num_token_query": 31.8489, "num_token_union": 65.2601, "num_word_context": 202.3027, "num_word_doc": 49.7282, "num_word_query": 23.5171, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1720.9303, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4846, "query_norm": 1.8025, "queue_k_norm": 2.8362, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8489, "sent_len_1": 66.6645, "sent_len_max_0": 127.5675, "sent_len_max_1": 189.175, "stdk": 0.0552, "stdq": 0.0499, "stdqueue_k": 0.0553, "stdqueue_q": 0.0, "step": 3800 }, { "accuracy": 26.3672, "active_queue_size": 16384.0, "cl_loss": 6.2237, "doc_norm": 2.7058, "encoder_q-embeddings": 2293.5208, "encoder_q-layer.0": 2140.6128, "encoder_q-layer.1": 1734.2682, "encoder_q-layer.10": 2295.7788, "encoder_q-layer.11": 2493.8005, "encoder_q-layer.2": 1389.6406, "encoder_q-layer.3": 1237.2246, "encoder_q-layer.4": 1049.0144, "encoder_q-layer.5": 895.5275, "encoder_q-layer.6": 1107.2507, "encoder_q-layer.7": 1357.6709, "encoder_q-layer.8": 1569.7633, "encoder_q-layer.9": 1561.2484, "epoch": 0.03, "inbatch_neg_score": 0.3927, "inbatch_pos_score": 0.7988, "learning_rate": 1.9500000000000003e-05, "loss": 6.2237, "norm_diff": 0.9178, "norm_loss": 0.0, "num_token_doc": 67.0218, "num_token_overlap": 11.7107, "num_token_query": 31.8938, "num_token_union": 65.478, "num_word_context": 202.5761, "num_word_doc": 50.002, "num_word_query": 23.5272, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2505.3864, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3862, "query_norm": 1.7881, "queue_k_norm": 2.7176, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8938, "sent_len_1": 67.0218, "sent_len_max_0": 127.4788, "sent_len_max_1": 190.175, "stdk": 0.0542, "stdq": 0.0495, "stdqueue_k": 0.0544, "stdqueue_q": 0.0, "step": 3900 }, { "accuracy": 27.1484, "active_queue_size": 16384.0, "cl_loss": 6.1397, "doc_norm": 2.5991, "encoder_q-embeddings": 1314.6952, "encoder_q-layer.0": 1170.1249, "encoder_q-layer.1": 1043.1571, "encoder_q-layer.10": 1743.0364, "encoder_q-layer.11": 2462.5776, "encoder_q-layer.2": 1200.2086, "encoder_q-layer.3": 1074.1323, "encoder_q-layer.4": 985.7745, "encoder_q-layer.5": 906.7318, "encoder_q-layer.6": 847.7744, "encoder_q-layer.7": 841.9675, "encoder_q-layer.8": 1009.2193, "encoder_q-layer.9": 1002.1323, "epoch": 0.03, "inbatch_neg_score": 0.4279, "inbatch_pos_score": 0.8296, "learning_rate": 2e-05, "loss": 6.1397, "norm_diff": 0.7725, "norm_loss": 0.0, "num_token_doc": 66.6174, "num_token_overlap": 11.6947, "num_token_query": 32.0051, "num_token_union": 65.2993, "num_word_context": 202.3744, "num_word_doc": 49.6764, "num_word_query": 23.6378, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1841.7456, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4199, "query_norm": 1.8266, "queue_k_norm": 2.6101, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 32.0051, "sent_len_1": 66.6174, "sent_len_max_0": 127.6437, "sent_len_max_1": 188.8262, "stdk": 0.0537, "stdq": 0.0501, "stdqueue_k": 0.054, "stdqueue_q": 0.0, "step": 4000 }, { "accuracy": 24.9023, "active_queue_size": 16384.0, "cl_loss": 6.1563, "doc_norm": 2.5043, "encoder_q-embeddings": 2071.9746, "encoder_q-layer.0": 1880.4088, "encoder_q-layer.1": 1721.3859, "encoder_q-layer.10": 3732.4465, "encoder_q-layer.11": 3799.4915, "encoder_q-layer.2": 1400.1964, "encoder_q-layer.3": 1314.8468, "encoder_q-layer.4": 1488.0145, "encoder_q-layer.5": 1808.8608, "encoder_q-layer.6": 2579.3796, "encoder_q-layer.7": 2941.8645, "encoder_q-layer.8": 3257.748, "encoder_q-layer.9": 3149.5579, "epoch": 0.03, "inbatch_neg_score": 0.4462, "inbatch_pos_score": 0.8418, "learning_rate": 2.05e-05, "loss": 6.1563, "norm_diff": 0.6432, "norm_loss": 0.0, "num_token_doc": 67.1459, "num_token_overlap": 11.7184, "num_token_query": 31.9671, "num_token_union": 65.5532, "num_word_context": 202.4607, "num_word_doc": 50.1097, "num_word_query": 23.6207, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3507.0954, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4368, "query_norm": 1.8611, "queue_k_norm": 2.5087, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9671, "sent_len_1": 67.1459, "sent_len_max_0": 127.63, "sent_len_max_1": 190.1312, "stdk": 0.053, "stdq": 0.0512, "stdqueue_k": 0.0534, "stdqueue_q": 0.0, "step": 4100 }, { "accuracy": 26.4648, "active_queue_size": 16384.0, "cl_loss": 6.0497, "doc_norm": 2.408, "encoder_q-embeddings": 770.8534, "encoder_q-layer.0": 684.2087, "encoder_q-layer.1": 692.8184, "encoder_q-layer.10": 1396.2843, "encoder_q-layer.11": 1693.0162, "encoder_q-layer.2": 739.9814, "encoder_q-layer.3": 711.0517, "encoder_q-layer.4": 702.5658, "encoder_q-layer.5": 632.4464, "encoder_q-layer.6": 711.3911, "encoder_q-layer.7": 781.9481, "encoder_q-layer.8": 1046.2751, "encoder_q-layer.9": 1154.5271, "epoch": 0.03, "inbatch_neg_score": 0.3925, "inbatch_pos_score": 0.8066, "learning_rate": 2.1e-05, "loss": 6.0497, "norm_diff": 0.5812, "norm_loss": 0.0, "num_token_doc": 66.8998, "num_token_overlap": 11.6934, "num_token_query": 31.9341, "num_token_union": 65.4772, "num_word_context": 202.4728, "num_word_doc": 49.9291, "num_word_query": 23.5858, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1332.3839, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3857, "query_norm": 1.8268, "queue_k_norm": 2.4107, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9341, "sent_len_1": 66.8998, "sent_len_max_0": 127.4875, "sent_len_max_1": 188.4725, "stdk": 0.053, "stdq": 0.0497, "stdqueue_k": 0.0529, "stdqueue_q": 0.0, "step": 4200 }, { "accuracy": 29.7852, "active_queue_size": 16384.0, "cl_loss": 5.9443, "doc_norm": 2.313, "encoder_q-embeddings": 1661.2754, "encoder_q-layer.0": 1390.2098, "encoder_q-layer.1": 1335.2872, "encoder_q-layer.10": 2906.1753, "encoder_q-layer.11": 3441.5466, "encoder_q-layer.2": 1478.5444, "encoder_q-layer.3": 1228.9659, "encoder_q-layer.4": 1248.1729, "encoder_q-layer.5": 1561.0422, "encoder_q-layer.6": 2117.8691, "encoder_q-layer.7": 2929.4468, "encoder_q-layer.8": 3326.9883, "encoder_q-layer.9": 3040.6653, "epoch": 0.03, "inbatch_neg_score": 0.4453, "inbatch_pos_score": 0.8799, "learning_rate": 2.15e-05, "loss": 5.9443, "norm_diff": 0.5121, "norm_loss": 0.0, "num_token_doc": 66.7388, "num_token_overlap": 11.7285, "num_token_query": 32.088, "num_token_union": 65.4075, "num_word_context": 202.024, "num_word_doc": 49.7956, "num_word_query": 23.7058, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3182.0599, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4395, "query_norm": 1.8009, "queue_k_norm": 2.3209, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 32.088, "sent_len_1": 66.7388, "sent_len_max_0": 127.3738, "sent_len_max_1": 190.2975, "stdk": 0.0522, "stdq": 0.0489, "stdqueue_k": 0.0525, "stdqueue_q": 0.0, "step": 4300 }, { "accuracy": 27.5391, "active_queue_size": 16384.0, "cl_loss": 5.9196, "doc_norm": 2.232, "encoder_q-embeddings": 3243.9692, "encoder_q-layer.0": 3214.4561, "encoder_q-layer.1": 3284.1445, "encoder_q-layer.10": 7417.9771, "encoder_q-layer.11": 6456.0317, "encoder_q-layer.2": 3198.6035, "encoder_q-layer.3": 2943.7549, "encoder_q-layer.4": 3074.3206, "encoder_q-layer.5": 3306.5183, "encoder_q-layer.6": 4213.4604, "encoder_q-layer.7": 6217.0122, "encoder_q-layer.8": 8179.2954, "encoder_q-layer.9": 7443.5991, "epoch": 0.03, "inbatch_neg_score": 0.428, "inbatch_pos_score": 0.8394, "learning_rate": 2.2000000000000003e-05, "loss": 5.9196, "norm_diff": 0.4469, "norm_loss": 0.0, "num_token_doc": 66.8843, "num_token_overlap": 11.6454, "num_token_query": 31.9102, "num_token_union": 65.4141, "num_word_context": 202.6783, "num_word_doc": 49.8376, "num_word_query": 23.5704, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7154.8753, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4241, "query_norm": 1.7851, "queue_k_norm": 2.2388, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9102, "sent_len_1": 66.8843, "sent_len_max_0": 127.6012, "sent_len_max_1": 191.4888, "stdk": 0.052, "stdq": 0.0489, "stdqueue_k": 0.0519, "stdqueue_q": 0.0, "step": 4400 }, { "accuracy": 28.6133, "active_queue_size": 16384.0, "cl_loss": 5.8619, "doc_norm": 2.1629, "encoder_q-embeddings": 1459.4573, "encoder_q-layer.0": 1188.7753, "encoder_q-layer.1": 1159.8557, "encoder_q-layer.10": 2810.5972, "encoder_q-layer.11": 3560.0872, "encoder_q-layer.2": 1340.7809, "encoder_q-layer.3": 1165.6268, "encoder_q-layer.4": 1230.7305, "encoder_q-layer.5": 1491.6013, "encoder_q-layer.6": 1895.881, "encoder_q-layer.7": 2452.0833, "encoder_q-layer.8": 2780.354, "encoder_q-layer.9": 2559.9424, "epoch": 0.03, "inbatch_neg_score": 0.5752, "inbatch_pos_score": 1.001, "learning_rate": 2.25e-05, "loss": 5.8619, "norm_diff": 0.3452, "norm_loss": 0.0, "num_token_doc": 66.6042, "num_token_overlap": 11.6197, "num_token_query": 31.8176, "num_token_union": 65.2266, "num_word_context": 202.4904, "num_word_doc": 49.7161, "num_word_query": 23.4787, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2881.3076, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5659, "query_norm": 1.8177, "queue_k_norm": 2.1646, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8176, "sent_len_1": 66.6042, "sent_len_max_0": 127.55, "sent_len_max_1": 189.585, "stdk": 0.0512, "stdq": 0.0506, "stdqueue_k": 0.0515, "stdqueue_q": 0.0, "step": 4500 }, { "accuracy": 28.8086, "active_queue_size": 16384.0, "cl_loss": 5.7637, "doc_norm": 2.0924, "encoder_q-embeddings": 1942.0938, "encoder_q-layer.0": 1651.6129, "encoder_q-layer.1": 1589.2704, "encoder_q-layer.10": 2398.2749, "encoder_q-layer.11": 3443.1362, "encoder_q-layer.2": 1711.1622, "encoder_q-layer.3": 1490.0366, "encoder_q-layer.4": 1370.1678, "encoder_q-layer.5": 1271.9205, "encoder_q-layer.6": 1387.3403, "encoder_q-layer.7": 1687.2421, "encoder_q-layer.8": 2053.1323, "encoder_q-layer.9": 1812.024, "epoch": 0.03, "inbatch_neg_score": 0.5175, "inbatch_pos_score": 0.9346, "learning_rate": 2.3000000000000003e-05, "loss": 5.7637, "norm_diff": 0.3039, "norm_loss": 0.0, "num_token_doc": 66.7433, "num_token_overlap": 11.6789, "num_token_query": 31.9717, "num_token_union": 65.3713, "num_word_context": 201.9814, "num_word_doc": 49.7896, "num_word_query": 23.6039, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2750.0376, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5137, "query_norm": 1.7885, "queue_k_norm": 2.0969, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9717, "sent_len_1": 66.7433, "sent_len_max_0": 127.5975, "sent_len_max_1": 190.2837, "stdk": 0.0513, "stdq": 0.0477, "stdqueue_k": 0.0511, "stdqueue_q": 0.0, "step": 4600 }, { "accuracy": 28.8086, "active_queue_size": 16384.0, "cl_loss": 5.7287, "doc_norm": 2.0417, "encoder_q-embeddings": 3004.9158, "encoder_q-layer.0": 2608.4119, "encoder_q-layer.1": 2279.6909, "encoder_q-layer.10": 1502.5314, "encoder_q-layer.11": 2617.8152, "encoder_q-layer.2": 2392.8264, "encoder_q-layer.3": 2291.5193, "encoder_q-layer.4": 2153.0652, "encoder_q-layer.5": 2154.9326, "encoder_q-layer.6": 2519.2319, "encoder_q-layer.7": 2155.6965, "encoder_q-layer.8": 1612.9008, "encoder_q-layer.9": 1129.5435, "epoch": 0.03, "inbatch_neg_score": 0.547, "inbatch_pos_score": 0.9419, "learning_rate": 2.35e-05, "loss": 5.7287, "norm_diff": 0.2667, "norm_loss": 0.0, "num_token_doc": 66.626, "num_token_overlap": 11.689, "num_token_query": 31.9445, "num_token_union": 65.2764, "num_word_context": 202.1106, "num_word_doc": 49.7321, "num_word_query": 23.5901, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3416.0873, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5439, "query_norm": 1.7749, "queue_k_norm": 2.0389, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9445, "sent_len_1": 66.626, "sent_len_max_0": 127.5875, "sent_len_max_1": 189.2113, "stdk": 0.0511, "stdq": 0.0486, "stdqueue_k": 0.0508, "stdqueue_q": 0.0, "step": 4700 }, { "accuracy": 30.3711, "active_queue_size": 16384.0, "cl_loss": 5.6451, "doc_norm": 1.9932, "encoder_q-embeddings": 1113.3066, "encoder_q-layer.0": 925.7037, "encoder_q-layer.1": 957.159, "encoder_q-layer.10": 1282.9365, "encoder_q-layer.11": 2306.5063, "encoder_q-layer.2": 1083.827, "encoder_q-layer.3": 972.7651, "encoder_q-layer.4": 853.9071, "encoder_q-layer.5": 878.3228, "encoder_q-layer.6": 879.4494, "encoder_q-layer.7": 903.0487, "encoder_q-layer.8": 1043.5403, "encoder_q-layer.9": 958.9872, "epoch": 0.03, "inbatch_neg_score": 0.6097, "inbatch_pos_score": 1.0508, "learning_rate": 2.4e-05, "loss": 5.6451, "norm_diff": 0.1986, "norm_loss": 0.0, "num_token_doc": 66.9239, "num_token_overlap": 11.6932, "num_token_query": 32.0249, "num_token_union": 65.5281, "num_word_context": 202.4722, "num_word_doc": 49.9391, "num_word_query": 23.6358, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1694.5699, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6069, "query_norm": 1.7946, "queue_k_norm": 1.9875, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 32.0249, "sent_len_1": 66.9239, "sent_len_max_0": 127.5575, "sent_len_max_1": 190.5375, "stdk": 0.0507, "stdq": 0.0482, "stdqueue_k": 0.0504, "stdqueue_q": 0.0, "step": 4800 }, { "accuracy": 29.7852, "active_queue_size": 16384.0, "cl_loss": 5.6101, "doc_norm": 1.9364, "encoder_q-embeddings": 1106.3584, "encoder_q-layer.0": 879.665, "encoder_q-layer.1": 899.5497, "encoder_q-layer.10": 2175.2434, "encoder_q-layer.11": 2766.1438, "encoder_q-layer.2": 991.3842, "encoder_q-layer.3": 904.9715, "encoder_q-layer.4": 894.1366, "encoder_q-layer.5": 915.2363, "encoder_q-layer.6": 1002.8583, "encoder_q-layer.7": 1449.1432, "encoder_q-layer.8": 2051.9871, "encoder_q-layer.9": 1998.6666, "epoch": 0.03, "inbatch_neg_score": 0.6149, "inbatch_pos_score": 1.042, "learning_rate": 2.45e-05, "loss": 5.6101, "norm_diff": 0.1588, "norm_loss": 0.0, "num_token_doc": 66.9288, "num_token_overlap": 11.6631, "num_token_query": 31.799, "num_token_union": 65.3978, "num_word_context": 202.8045, "num_word_doc": 49.9633, "num_word_query": 23.5001, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2119.0, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6108, "query_norm": 1.7776, "queue_k_norm": 1.9403, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.799, "sent_len_1": 66.9288, "sent_len_max_0": 127.4537, "sent_len_max_1": 191.7175, "stdk": 0.0498, "stdq": 0.0479, "stdqueue_k": 0.05, "stdqueue_q": 0.0, "step": 4900 }, { "accuracy": 29.8828, "active_queue_size": 16384.0, "cl_loss": 5.5586, "doc_norm": 1.9109, "encoder_q-embeddings": 1697.1666, "encoder_q-layer.0": 1424.22, "encoder_q-layer.1": 1292.9722, "encoder_q-layer.10": 2605.1965, "encoder_q-layer.11": 2863.4985, "encoder_q-layer.2": 1462.2683, "encoder_q-layer.3": 1326.9849, "encoder_q-layer.4": 1222.6007, "encoder_q-layer.5": 1458.3225, "encoder_q-layer.6": 1980.0226, "encoder_q-layer.7": 2787.7341, "encoder_q-layer.8": 3410.0283, "encoder_q-layer.9": 2729.8855, "epoch": 0.03, "inbatch_neg_score": 0.6675, "inbatch_pos_score": 1.1045, "learning_rate": 2.5e-05, "loss": 5.5586, "norm_diff": 0.0987, "norm_loss": 0.0, "num_token_doc": 66.7615, "num_token_overlap": 11.66, "num_token_query": 31.9118, "num_token_union": 65.3759, "num_word_context": 202.0434, "num_word_doc": 49.8119, "num_word_query": 23.5437, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3011.0181, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6646, "query_norm": 1.8122, "queue_k_norm": 1.8986, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9118, "sent_len_1": 66.7615, "sent_len_max_0": 127.4262, "sent_len_max_1": 189.9288, "stdk": 0.05, "stdq": 0.0476, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 5000 }, { "accuracy": 28.5156, "active_queue_size": 16384.0, "cl_loss": 5.5177, "doc_norm": 1.863, "encoder_q-embeddings": 2933.2681, "encoder_q-layer.0": 2520.2344, "encoder_q-layer.1": 2722.5129, "encoder_q-layer.10": 2596.1702, "encoder_q-layer.11": 3660.5349, "encoder_q-layer.2": 3329.0225, "encoder_q-layer.3": 3232.158, "encoder_q-layer.4": 3994.4016, "encoder_q-layer.5": 3455.9128, "encoder_q-layer.6": 4382.1147, "encoder_q-layer.7": 3174.4966, "encoder_q-layer.8": 2311.8928, "encoder_q-layer.9": 1909.028, "epoch": 0.03, "inbatch_neg_score": 0.706, "inbatch_pos_score": 1.1279, "learning_rate": 2.5500000000000003e-05, "loss": 5.5177, "norm_diff": 0.0408, "norm_loss": 0.0, "num_token_doc": 66.5497, "num_token_overlap": 11.6965, "num_token_query": 31.9567, "num_token_union": 65.2395, "num_word_context": 202.1874, "num_word_doc": 49.6335, "num_word_query": 23.5987, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4655.812, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6997, "query_norm": 1.8222, "queue_k_norm": 1.8743, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9567, "sent_len_1": 66.5497, "sent_len_max_0": 127.57, "sent_len_max_1": 190.0588, "stdk": 0.0491, "stdq": 0.0489, "stdqueue_k": 0.0495, "stdqueue_q": 0.0, "step": 5100 }, { "accuracy": 31.1523, "active_queue_size": 16384.0, "cl_loss": 5.5136, "doc_norm": 1.8493, "encoder_q-embeddings": 755.8551, "encoder_q-layer.0": 594.8244, "encoder_q-layer.1": 584.5533, "encoder_q-layer.10": 1190.9226, "encoder_q-layer.11": 2033.701, "encoder_q-layer.2": 666.2786, "encoder_q-layer.3": 712.015, "encoder_q-layer.4": 643.1187, "encoder_q-layer.5": 598.1689, "encoder_q-layer.6": 657.9568, "encoder_q-layer.7": 789.6885, "encoder_q-layer.8": 1035.2782, "encoder_q-layer.9": 922.2877, "epoch": 0.03, "inbatch_neg_score": 0.7153, "inbatch_pos_score": 1.1484, "learning_rate": 2.6000000000000002e-05, "loss": 5.5136, "norm_diff": 0.0174, "norm_loss": 0.0, "num_token_doc": 66.9622, "num_token_overlap": 11.6644, "num_token_query": 31.9045, "num_token_union": 65.4723, "num_word_context": 202.6654, "num_word_doc": 49.9826, "num_word_query": 23.5529, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1354.679, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.71, "query_norm": 1.8474, "queue_k_norm": 1.8456, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9045, "sent_len_1": 66.9622, "sent_len_max_0": 127.505, "sent_len_max_1": 190.93, "stdk": 0.0492, "stdq": 0.0484, "stdqueue_k": 0.0492, "stdqueue_q": 0.0, "step": 5200 }, { "accuracy": 30.7617, "active_queue_size": 16384.0, "cl_loss": 5.4648, "doc_norm": 1.8202, "encoder_q-embeddings": 1437.9847, "encoder_q-layer.0": 1278.1046, "encoder_q-layer.1": 1324.6436, "encoder_q-layer.10": 1842.3235, "encoder_q-layer.11": 2807.0669, "encoder_q-layer.2": 1499.672, "encoder_q-layer.3": 1331.1172, "encoder_q-layer.4": 1230.3367, "encoder_q-layer.5": 1284.0828, "encoder_q-layer.6": 1471.5833, "encoder_q-layer.7": 1756.0297, "encoder_q-layer.8": 1844.6895, "encoder_q-layer.9": 1506.0494, "epoch": 0.03, "inbatch_neg_score": 0.7635, "inbatch_pos_score": 1.2012, "learning_rate": 2.6500000000000004e-05, "loss": 5.4648, "norm_diff": 0.0756, "norm_loss": 0.0, "num_token_doc": 66.7224, "num_token_overlap": 11.6676, "num_token_query": 31.7946, "num_token_union": 65.2697, "num_word_context": 202.0284, "num_word_doc": 49.7561, "num_word_query": 23.4659, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2385.043, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7544, "query_norm": 1.8957, "queue_k_norm": 1.8247, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.7946, "sent_len_1": 66.7224, "sent_len_max_0": 127.2775, "sent_len_max_1": 189.57, "stdk": 0.0486, "stdq": 0.0498, "stdqueue_k": 0.0489, "stdqueue_q": 0.0, "step": 5300 }, { "accuracy": 30.3711, "active_queue_size": 16384.0, "cl_loss": 5.4609, "doc_norm": 1.8049, "encoder_q-embeddings": 1631.858, "encoder_q-layer.0": 1357.4491, "encoder_q-layer.1": 1315.061, "encoder_q-layer.10": 2372.157, "encoder_q-layer.11": 3594.7012, "encoder_q-layer.2": 1339.4983, "encoder_q-layer.3": 1174.0256, "encoder_q-layer.4": 1056.0623, "encoder_q-layer.5": 1106.0283, "encoder_q-layer.6": 1391.7842, "encoder_q-layer.7": 2049.0586, "encoder_q-layer.8": 2532.4504, "encoder_q-layer.9": 2352.5623, "epoch": 0.04, "inbatch_neg_score": 0.8297, "inbatch_pos_score": 1.2627, "learning_rate": 2.7000000000000002e-05, "loss": 5.4609, "norm_diff": 0.0869, "norm_loss": 0.0, "num_token_doc": 66.8363, "num_token_overlap": 11.6371, "num_token_query": 31.8762, "num_token_union": 65.4163, "num_word_context": 202.3214, "num_word_doc": 49.8533, "num_word_query": 23.5153, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2735.0532, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.8242, "query_norm": 1.8918, "queue_k_norm": 1.8064, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8762, "sent_len_1": 66.8363, "sent_len_max_0": 127.61, "sent_len_max_1": 189.3988, "stdk": 0.0485, "stdq": 0.0495, "stdqueue_k": 0.0486, "stdqueue_q": 0.0, "step": 5400 }, { "accuracy": 33.3008, "active_queue_size": 16384.0, "cl_loss": 5.4099, "doc_norm": 1.7904, "encoder_q-embeddings": 2192.3738, "encoder_q-layer.0": 2173.1426, "encoder_q-layer.1": 2108.6013, "encoder_q-layer.10": 1303.7084, "encoder_q-layer.11": 2479.905, "encoder_q-layer.2": 2226.4197, "encoder_q-layer.3": 1919.986, "encoder_q-layer.4": 1642.2073, "encoder_q-layer.5": 1288.5839, "encoder_q-layer.6": 1148.8147, "encoder_q-layer.7": 951.5146, "encoder_q-layer.8": 1057.2892, "encoder_q-layer.9": 1009.515, "epoch": 0.04, "inbatch_neg_score": 0.7852, "inbatch_pos_score": 1.2461, "learning_rate": 2.7500000000000004e-05, "loss": 5.4099, "norm_diff": 0.156, "norm_loss": 0.0, "num_token_doc": 66.9039, "num_token_overlap": 11.6144, "num_token_query": 31.6896, "num_token_union": 65.3595, "num_word_context": 202.4736, "num_word_doc": 49.9191, "num_word_query": 23.3902, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2658.5925, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7793, "query_norm": 1.9464, "queue_k_norm": 1.7946, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.6896, "sent_len_1": 66.9039, "sent_len_max_0": 127.4488, "sent_len_max_1": 189.185, "stdk": 0.0483, "stdq": 0.0509, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 5500 }, { "accuracy": 28.7109, "active_queue_size": 16384.0, "cl_loss": 5.3676, "doc_norm": 1.7743, "encoder_q-embeddings": 1130.1307, "encoder_q-layer.0": 941.0566, "encoder_q-layer.1": 993.845, "encoder_q-layer.10": 1051.5953, "encoder_q-layer.11": 2012.1215, "encoder_q-layer.2": 1082.2604, "encoder_q-layer.3": 1093.2894, "encoder_q-layer.4": 1123.9542, "encoder_q-layer.5": 1168.9885, "encoder_q-layer.6": 1003.3269, "encoder_q-layer.7": 782.5601, "encoder_q-layer.8": 795.5253, "encoder_q-layer.9": 769.6371, "epoch": 0.04, "inbatch_neg_score": 0.7832, "inbatch_pos_score": 1.209, "learning_rate": 2.8000000000000003e-05, "loss": 5.3676, "norm_diff": 0.1344, "norm_loss": 0.0, "num_token_doc": 66.6107, "num_token_overlap": 11.6221, "num_token_query": 31.7791, "num_token_union": 65.1766, "num_word_context": 201.9516, "num_word_doc": 49.6906, "num_word_query": 23.4485, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1649.432, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7744, "query_norm": 1.9088, "queue_k_norm": 1.7853, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.7791, "sent_len_1": 66.6107, "sent_len_max_0": 127.6562, "sent_len_max_1": 189.7988, "stdk": 0.0477, "stdq": 0.049, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 5600 }, { "accuracy": 32.8125, "active_queue_size": 16384.0, "cl_loss": 5.3431, "doc_norm": 1.7739, "encoder_q-embeddings": 3083.1438, "encoder_q-layer.0": 2437.0774, "encoder_q-layer.1": 2478.5513, "encoder_q-layer.10": 1561.0786, "encoder_q-layer.11": 2672.5874, "encoder_q-layer.2": 2540.3455, "encoder_q-layer.3": 2480.9824, "encoder_q-layer.4": 1902.5428, "encoder_q-layer.5": 1464.4288, "encoder_q-layer.6": 1340.4116, "encoder_q-layer.7": 1155.5027, "encoder_q-layer.8": 1380.7654, "encoder_q-layer.9": 1174.7318, "epoch": 0.04, "inbatch_neg_score": 0.8467, "inbatch_pos_score": 1.292, "learning_rate": 2.8499999999999998e-05, "loss": 5.3431, "norm_diff": 0.174, "norm_loss": 0.0, "num_token_doc": 66.64, "num_token_overlap": 11.6588, "num_token_query": 31.8158, "num_token_union": 65.2149, "num_word_context": 202.3895, "num_word_doc": 49.7394, "num_word_query": 23.4872, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3198.9603, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.8379, "query_norm": 1.9479, "queue_k_norm": 1.7755, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8158, "sent_len_1": 66.64, "sent_len_max_0": 127.5187, "sent_len_max_1": 189.9913, "stdk": 0.048, "stdq": 0.0474, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 5700 }, { "accuracy": 30.8594, "active_queue_size": 16384.0, "cl_loss": 5.2916, "doc_norm": 1.7746, "encoder_q-embeddings": 2069.4497, "encoder_q-layer.0": 1610.1228, "encoder_q-layer.1": 1381.6503, "encoder_q-layer.10": 1399.3085, "encoder_q-layer.11": 2733.8152, "encoder_q-layer.2": 1443.1844, "encoder_q-layer.3": 1225.2889, "encoder_q-layer.4": 1150.4066, "encoder_q-layer.5": 1179.7405, "encoder_q-layer.6": 1250.9999, "encoder_q-layer.7": 1044.5251, "encoder_q-layer.8": 986.4238, "encoder_q-layer.9": 969.7536, "epoch": 0.04, "inbatch_neg_score": 0.8329, "inbatch_pos_score": 1.2686, "learning_rate": 2.9e-05, "loss": 5.2916, "norm_diff": 0.2652, "norm_loss": 0.0, "num_token_doc": 66.7523, "num_token_overlap": 11.6183, "num_token_query": 31.7837, "num_token_union": 65.3307, "num_word_context": 202.4815, "num_word_doc": 49.774, "num_word_query": 23.482, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2281.7228, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.8228, "query_norm": 2.0398, "queue_k_norm": 1.7684, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.7837, "sent_len_1": 66.7523, "sent_len_max_0": 127.4938, "sent_len_max_1": 189.9613, "stdk": 0.0479, "stdq": 0.0485, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 5800 }, { "accuracy": 32.0312, "active_queue_size": 16384.0, "cl_loss": 5.2616, "doc_norm": 1.7614, "encoder_q-embeddings": 635.0823, "encoder_q-layer.0": 490.9083, "encoder_q-layer.1": 514.2209, "encoder_q-layer.10": 1118.83, "encoder_q-layer.11": 2064.9526, "encoder_q-layer.2": 576.4737, "encoder_q-layer.3": 607.7005, "encoder_q-layer.4": 568.682, "encoder_q-layer.5": 523.4114, "encoder_q-layer.6": 577.48, "encoder_q-layer.7": 576.6963, "encoder_q-layer.8": 709.8079, "encoder_q-layer.9": 782.0535, "epoch": 0.04, "inbatch_neg_score": 0.8534, "inbatch_pos_score": 1.3027, "learning_rate": 2.95e-05, "loss": 5.2616, "norm_diff": 0.3305, "norm_loss": 0.0, "num_token_doc": 66.7018, "num_token_overlap": 11.6285, "num_token_query": 31.8054, "num_token_union": 65.3108, "num_word_context": 202.3216, "num_word_doc": 49.7787, "num_word_query": 23.4921, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1235.0429, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.8462, "query_norm": 2.0919, "queue_k_norm": 1.764, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8054, "sent_len_1": 66.7018, "sent_len_max_0": 127.5113, "sent_len_max_1": 188.7537, "stdk": 0.0477, "stdq": 0.0495, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 5900 }, { "accuracy": 33.7891, "active_queue_size": 16384.0, "cl_loss": 5.2058, "doc_norm": 1.7552, "encoder_q-embeddings": 1247.741, "encoder_q-layer.0": 1051.5957, "encoder_q-layer.1": 1189.4984, "encoder_q-layer.10": 1035.1969, "encoder_q-layer.11": 2074.5903, "encoder_q-layer.2": 1394.7238, "encoder_q-layer.3": 1614.7966, "encoder_q-layer.4": 1670.4207, "encoder_q-layer.5": 1681.4109, "encoder_q-layer.6": 1444.2866, "encoder_q-layer.7": 1037.5935, "encoder_q-layer.8": 777.2864, "encoder_q-layer.9": 727.7588, "epoch": 0.04, "inbatch_neg_score": 0.8446, "inbatch_pos_score": 1.2793, "learning_rate": 3e-05, "loss": 5.2058, "norm_diff": 0.3587, "norm_loss": 0.0, "num_token_doc": 66.6407, "num_token_overlap": 11.6806, "num_token_query": 31.95, "num_token_union": 65.353, "num_word_context": 202.2341, "num_word_doc": 49.768, "num_word_query": 23.6074, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2027.0341, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.8335, "query_norm": 2.114, "queue_k_norm": 1.7615, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.95, "sent_len_1": 66.6407, "sent_len_max_0": 127.5312, "sent_len_max_1": 186.6475, "stdk": 0.0477, "stdq": 0.0468, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 6000 }, { "accuracy": 34.5703, "active_queue_size": 16384.0, "cl_loss": 5.1681, "doc_norm": 1.7482, "encoder_q-embeddings": 927.2411, "encoder_q-layer.0": 742.3511, "encoder_q-layer.1": 727.0202, "encoder_q-layer.10": 1191.146, "encoder_q-layer.11": 2235.354, "encoder_q-layer.2": 820.5278, "encoder_q-layer.3": 807.2703, "encoder_q-layer.4": 750.3071, "encoder_q-layer.5": 730.5679, "encoder_q-layer.6": 771.3978, "encoder_q-layer.7": 673.6693, "encoder_q-layer.8": 765.7922, "encoder_q-layer.9": 745.3066, "epoch": 0.04, "inbatch_neg_score": 0.9018, "inbatch_pos_score": 1.3604, "learning_rate": 3.05e-05, "loss": 5.1681, "norm_diff": 0.457, "norm_loss": 0.0, "num_token_doc": 66.6633, "num_token_overlap": 11.6263, "num_token_query": 31.6654, "num_token_union": 65.1687, "num_word_context": 202.2808, "num_word_doc": 49.7238, "num_word_query": 23.3643, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1464.7709, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.8906, "query_norm": 2.2052, "queue_k_norm": 1.757, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.6654, "sent_len_1": 66.6633, "sent_len_max_0": 127.4412, "sent_len_max_1": 190.6587, "stdk": 0.0472, "stdq": 0.0484, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 6100 }, { "accuracy": 34.5703, "active_queue_size": 16384.0, "cl_loss": 5.1281, "doc_norm": 1.7506, "encoder_q-embeddings": 633.824, "encoder_q-layer.0": 485.584, "encoder_q-layer.1": 524.8162, "encoder_q-layer.10": 1047.5555, "encoder_q-layer.11": 1956.8004, "encoder_q-layer.2": 601.2589, "encoder_q-layer.3": 589.4916, "encoder_q-layer.4": 592.1393, "encoder_q-layer.5": 578.0292, "encoder_q-layer.6": 714.3891, "encoder_q-layer.7": 909.9547, "encoder_q-layer.8": 1031.783, "encoder_q-layer.9": 923.5218, "epoch": 0.04, "inbatch_neg_score": 0.854, "inbatch_pos_score": 1.3047, "learning_rate": 3.1e-05, "loss": 5.1281, "norm_diff": 0.4794, "norm_loss": 0.0, "num_token_doc": 66.7638, "num_token_overlap": 11.6581, "num_token_query": 31.8181, "num_token_union": 65.321, "num_word_context": 202.0886, "num_word_doc": 49.809, "num_word_query": 23.4718, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1291.8026, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.8394, "query_norm": 2.2301, "queue_k_norm": 1.7549, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8181, "sent_len_1": 66.7638, "sent_len_max_0": 127.5713, "sent_len_max_1": 187.5513, "stdk": 0.0474, "stdq": 0.0469, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 6200 }, { "accuracy": 34.668, "active_queue_size": 16384.0, "cl_loss": 5.1144, "doc_norm": 1.7479, "encoder_q-embeddings": 1554.2953, "encoder_q-layer.0": 1279.7065, "encoder_q-layer.1": 1387.522, "encoder_q-layer.10": 2869.0605, "encoder_q-layer.11": 5554.7119, "encoder_q-layer.2": 1542.1715, "encoder_q-layer.3": 1537.87, "encoder_q-layer.4": 1552.4473, "encoder_q-layer.5": 1581.2069, "encoder_q-layer.6": 1507.2874, "encoder_q-layer.7": 1476.6118, "encoder_q-layer.8": 1724.4824, "encoder_q-layer.9": 1642.733, "epoch": 0.04, "inbatch_neg_score": 0.8907, "inbatch_pos_score": 1.3652, "learning_rate": 3.15e-05, "loss": 5.1144, "norm_diff": 0.5873, "norm_loss": 0.0, "num_token_doc": 66.6482, "num_token_overlap": 11.6526, "num_token_query": 31.8791, "num_token_union": 65.2439, "num_word_context": 202.2379, "num_word_doc": 49.7445, "num_word_query": 23.5305, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3233.9338, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.876, "query_norm": 2.3352, "queue_k_norm": 1.7554, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8791, "sent_len_1": 66.6482, "sent_len_max_0": 127.5325, "sent_len_max_1": 190.1037, "stdk": 0.0473, "stdq": 0.0486, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 6300 }, { "accuracy": 34.668, "active_queue_size": 16384.0, "cl_loss": 5.0845, "doc_norm": 1.7514, "encoder_q-embeddings": 1535.1053, "encoder_q-layer.0": 1216.4094, "encoder_q-layer.1": 1235.7433, "encoder_q-layer.10": 2353.3962, "encoder_q-layer.11": 4510.2085, "encoder_q-layer.2": 1375.0023, "encoder_q-layer.3": 1414.3499, "encoder_q-layer.4": 1293.3175, "encoder_q-layer.5": 1227.2529, "encoder_q-layer.6": 1163.1699, "encoder_q-layer.7": 1096.3132, "encoder_q-layer.8": 1256.7229, "encoder_q-layer.9": 1297.4023, "epoch": 0.04, "inbatch_neg_score": 0.8084, "inbatch_pos_score": 1.2568, "learning_rate": 3.2000000000000005e-05, "loss": 5.0845, "norm_diff": 0.5459, "norm_loss": 0.0, "num_token_doc": 66.711, "num_token_overlap": 11.6451, "num_token_query": 31.7985, "num_token_union": 65.249, "num_word_context": 202.5136, "num_word_doc": 49.7914, "num_word_query": 23.5016, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2708.5547, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7949, "query_norm": 2.2974, "queue_k_norm": 1.7478, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.7985, "sent_len_1": 66.711, "sent_len_max_0": 127.505, "sent_len_max_1": 191.905, "stdk": 0.0474, "stdq": 0.0473, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 6400 }, { "accuracy": 34.9609, "active_queue_size": 16384.0, "cl_loss": 5.0463, "doc_norm": 1.7467, "encoder_q-embeddings": 1374.4783, "encoder_q-layer.0": 993.5135, "encoder_q-layer.1": 996.4131, "encoder_q-layer.10": 1648.4177, "encoder_q-layer.11": 3371.5249, "encoder_q-layer.2": 1117.0682, "encoder_q-layer.3": 1141.7706, "encoder_q-layer.4": 1152.9871, "encoder_q-layer.5": 1087.4132, "encoder_q-layer.6": 1190.2114, "encoder_q-layer.7": 1208.4648, "encoder_q-layer.8": 1458.8655, "encoder_q-layer.9": 1345.988, "epoch": 0.04, "inbatch_neg_score": 0.8259, "inbatch_pos_score": 1.2676, "learning_rate": 3.2500000000000004e-05, "loss": 5.0463, "norm_diff": 0.5814, "norm_loss": 0.0, "num_token_doc": 66.7518, "num_token_overlap": 11.6618, "num_token_query": 31.9322, "num_token_union": 65.3878, "num_word_context": 202.3212, "num_word_doc": 49.806, "num_word_query": 23.5953, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2230.3791, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.8052, "query_norm": 2.328, "queue_k_norm": 1.7477, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9322, "sent_len_1": 66.7518, "sent_len_max_0": 127.5925, "sent_len_max_1": 189.1113, "stdk": 0.0472, "stdq": 0.0455, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 6500 }, { "accuracy": 32.5195, "active_queue_size": 16384.0, "cl_loss": 5.0277, "doc_norm": 1.7419, "encoder_q-embeddings": 1402.2107, "encoder_q-layer.0": 1075.3447, "encoder_q-layer.1": 1067.2645, "encoder_q-layer.10": 3268.4709, "encoder_q-layer.11": 5869.229, "encoder_q-layer.2": 1136.2455, "encoder_q-layer.3": 1114.5406, "encoder_q-layer.4": 1081.2982, "encoder_q-layer.5": 990.6197, "encoder_q-layer.6": 1149.7417, "encoder_q-layer.7": 1242.3838, "encoder_q-layer.8": 1487.2787, "encoder_q-layer.9": 1682.5852, "epoch": 0.04, "inbatch_neg_score": 0.845, "inbatch_pos_score": 1.2734, "learning_rate": 3.3e-05, "loss": 5.0277, "norm_diff": 0.6456, "norm_loss": 0.0, "num_token_doc": 66.9512, "num_token_overlap": 11.6766, "num_token_query": 31.8829, "num_token_union": 65.4555, "num_word_context": 202.6086, "num_word_doc": 49.9357, "num_word_query": 23.5362, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3132.0703, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.8281, "query_norm": 2.3875, "queue_k_norm": 1.7441, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8829, "sent_len_1": 66.9512, "sent_len_max_0": 127.6688, "sent_len_max_1": 191.2012, "stdk": 0.0472, "stdq": 0.0473, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 6600 }, { "accuracy": 33.6914, "active_queue_size": 16384.0, "cl_loss": 5.0056, "doc_norm": 1.7391, "encoder_q-embeddings": 2448.6204, "encoder_q-layer.0": 2034.0061, "encoder_q-layer.1": 2192.4597, "encoder_q-layer.10": 2348.9468, "encoder_q-layer.11": 3838.7695, "encoder_q-layer.2": 2355.0728, "encoder_q-layer.3": 2289.0415, "encoder_q-layer.4": 1971.0135, "encoder_q-layer.5": 1506.4377, "encoder_q-layer.6": 1415.697, "encoder_q-layer.7": 1257.7732, "encoder_q-layer.8": 1505.6145, "encoder_q-layer.9": 1579.5531, "epoch": 0.04, "inbatch_neg_score": 0.7651, "inbatch_pos_score": 1.2061, "learning_rate": 3.35e-05, "loss": 5.0056, "norm_diff": 0.5968, "norm_loss": 0.0, "num_token_doc": 66.8507, "num_token_overlap": 11.6957, "num_token_query": 31.9364, "num_token_union": 65.3889, "num_word_context": 202.23, "num_word_doc": 49.8628, "num_word_query": 23.5759, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3218.6924, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.749, "query_norm": 2.3359, "queue_k_norm": 1.7411, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9364, "sent_len_1": 66.8507, "sent_len_max_0": 127.5725, "sent_len_max_1": 189.0788, "stdk": 0.0471, "stdq": 0.0467, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 6700 }, { "accuracy": 32.5195, "active_queue_size": 16384.0, "cl_loss": 5.0091, "doc_norm": 1.7336, "encoder_q-embeddings": 1305.0874, "encoder_q-layer.0": 967.8143, "encoder_q-layer.1": 1028.9822, "encoder_q-layer.10": 1831.116, "encoder_q-layer.11": 3721.3584, "encoder_q-layer.2": 1085.8569, "encoder_q-layer.3": 1111.4119, "encoder_q-layer.4": 1085.7356, "encoder_q-layer.5": 1010.084, "encoder_q-layer.6": 1125.7878, "encoder_q-layer.7": 1308.4596, "encoder_q-layer.8": 1680.5577, "encoder_q-layer.9": 1500.649, "epoch": 0.04, "inbatch_neg_score": 0.7269, "inbatch_pos_score": 1.1592, "learning_rate": 3.4000000000000007e-05, "loss": 5.0091, "norm_diff": 0.6041, "norm_loss": 0.0, "num_token_doc": 66.9126, "num_token_overlap": 11.6443, "num_token_query": 31.7062, "num_token_union": 65.3267, "num_word_context": 202.4905, "num_word_doc": 49.9723, "num_word_query": 23.4313, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2375.2056, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.71, "query_norm": 2.3377, "queue_k_norm": 1.7363, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.7062, "sent_len_1": 66.9126, "sent_len_max_0": 127.4013, "sent_len_max_1": 188.615, "stdk": 0.047, "stdq": 0.0472, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 6800 }, { "accuracy": 33.6914, "active_queue_size": 16384.0, "cl_loss": 4.9791, "doc_norm": 1.726, "encoder_q-embeddings": 1583.9011, "encoder_q-layer.0": 1278.8903, "encoder_q-layer.1": 1172.4409, "encoder_q-layer.10": 1769.1807, "encoder_q-layer.11": 3573.4595, "encoder_q-layer.2": 1250.9261, "encoder_q-layer.3": 1195.58, "encoder_q-layer.4": 1250.8046, "encoder_q-layer.5": 1253.0317, "encoder_q-layer.6": 1434.2263, "encoder_q-layer.7": 1670.1329, "encoder_q-layer.8": 1893.597, "encoder_q-layer.9": 1694.8859, "epoch": 0.04, "inbatch_neg_score": 0.6535, "inbatch_pos_score": 1.1133, "learning_rate": 3.45e-05, "loss": 4.9791, "norm_diff": 0.5925, "norm_loss": 0.0, "num_token_doc": 66.9233, "num_token_overlap": 11.7003, "num_token_query": 31.928, "num_token_union": 65.4384, "num_word_context": 202.1102, "num_word_doc": 49.9357, "num_word_query": 23.5957, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2517.1141, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6387, "query_norm": 2.3185, "queue_k_norm": 1.7303, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.928, "sent_len_1": 66.9233, "sent_len_max_0": 127.5787, "sent_len_max_1": 190.7725, "stdk": 0.0469, "stdq": 0.0484, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 6900 }, { "accuracy": 33.5938, "active_queue_size": 16384.0, "cl_loss": 5.0137, "doc_norm": 1.7349, "encoder_q-embeddings": 2926.533, "encoder_q-layer.0": 2350.7263, "encoder_q-layer.1": 2273.4983, "encoder_q-layer.10": 2018.8828, "encoder_q-layer.11": 4365.0444, "encoder_q-layer.2": 2429.6274, "encoder_q-layer.3": 2267.8643, "encoder_q-layer.4": 2142.0535, "encoder_q-layer.5": 2163.606, "encoder_q-layer.6": 2239.9329, "encoder_q-layer.7": 1579.4586, "encoder_q-layer.8": 1450.2095, "encoder_q-layer.9": 1417.7637, "epoch": 0.05, "inbatch_neg_score": 0.6454, "inbatch_pos_score": 1.084, "learning_rate": 3.5e-05, "loss": 5.0137, "norm_diff": 0.5513, "norm_loss": 0.0, "num_token_doc": 66.7933, "num_token_overlap": 11.7428, "num_token_query": 32.0599, "num_token_union": 65.4124, "num_word_context": 202.1545, "num_word_doc": 49.8592, "num_word_query": 23.7125, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3612.1233, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6304, "query_norm": 2.2863, "queue_k_norm": 1.7254, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 32.0599, "sent_len_1": 66.7933, "sent_len_max_0": 127.6088, "sent_len_max_1": 188.65, "stdk": 0.0472, "stdq": 0.0457, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 7000 }, { "accuracy": 36.7188, "active_queue_size": 16384.0, "cl_loss": 4.9563, "doc_norm": 1.7149, "encoder_q-embeddings": 1465.5834, "encoder_q-layer.0": 1049.4712, "encoder_q-layer.1": 1009.2972, "encoder_q-layer.10": 1604.29, "encoder_q-layer.11": 3646.9114, "encoder_q-layer.2": 1031.385, "encoder_q-layer.3": 1025.5171, "encoder_q-layer.4": 982.8475, "encoder_q-layer.5": 916.1735, "encoder_q-layer.6": 967.2902, "encoder_q-layer.7": 963.9823, "encoder_q-layer.8": 1111.1989, "encoder_q-layer.9": 1130.4453, "epoch": 0.05, "inbatch_neg_score": 0.5928, "inbatch_pos_score": 1.0508, "learning_rate": 3.55e-05, "loss": 4.9563, "norm_diff": 0.6293, "norm_loss": 0.0, "num_token_doc": 66.6308, "num_token_overlap": 11.7016, "num_token_query": 31.916, "num_token_union": 65.2763, "num_word_context": 202.1514, "num_word_doc": 49.7586, "num_word_query": 23.5574, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2206.9462, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5737, "query_norm": 2.3442, "queue_k_norm": 1.7175, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.916, "sent_len_1": 66.6308, "sent_len_max_0": 127.54, "sent_len_max_1": 189.9212, "stdk": 0.0469, "stdq": 0.0463, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 7100 }, { "accuracy": 37.4023, "active_queue_size": 16384.0, "cl_loss": 4.9468, "doc_norm": 1.7161, "encoder_q-embeddings": 2562.4832, "encoder_q-layer.0": 2099.3706, "encoder_q-layer.1": 2144.7981, "encoder_q-layer.10": 1664.9099, "encoder_q-layer.11": 3393.8069, "encoder_q-layer.2": 2157.1245, "encoder_q-layer.3": 2011.7206, "encoder_q-layer.4": 1789.0649, "encoder_q-layer.5": 1477.7662, "encoder_q-layer.6": 1309.519, "encoder_q-layer.7": 1172.62, "encoder_q-layer.8": 1318.036, "encoder_q-layer.9": 1189.8828, "epoch": 0.05, "inbatch_neg_score": 0.5858, "inbatch_pos_score": 1.0635, "learning_rate": 3.6e-05, "loss": 4.9468, "norm_diff": 0.6554, "norm_loss": 0.0, "num_token_doc": 66.4655, "num_token_overlap": 11.6556, "num_token_query": 31.8545, "num_token_union": 65.182, "num_word_context": 202.2087, "num_word_doc": 49.6139, "num_word_query": 23.5233, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2993.023, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5649, "query_norm": 2.3715, "queue_k_norm": 1.7113, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.8545, "sent_len_1": 66.4655, "sent_len_max_0": 127.5738, "sent_len_max_1": 188.265, "stdk": 0.0472, "stdq": 0.0472, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 7200 }, { "accuracy": 36.8164, "active_queue_size": 16384.0, "cl_loss": 4.9106, "doc_norm": 1.7051, "encoder_q-embeddings": 1178.9982, "encoder_q-layer.0": 894.8866, "encoder_q-layer.1": 920.0095, "encoder_q-layer.10": 2646.4988, "encoder_q-layer.11": 5676.4663, "encoder_q-layer.2": 1012.9563, "encoder_q-layer.3": 1012.0856, "encoder_q-layer.4": 1066.108, "encoder_q-layer.5": 1062.667, "encoder_q-layer.6": 1310.5796, "encoder_q-layer.7": 1716.113, "encoder_q-layer.8": 2112.4741, "encoder_q-layer.9": 1999.4799, "epoch": 0.05, "inbatch_neg_score": 0.6137, "inbatch_pos_score": 1.0986, "learning_rate": 3.65e-05, "loss": 4.9106, "norm_diff": 0.7938, "norm_loss": 0.0, "num_token_doc": 67.0539, "num_token_overlap": 11.6793, "num_token_query": 31.8161, "num_token_union": 65.4836, "num_word_context": 202.3901, "num_word_doc": 50.0286, "num_word_query": 23.4938, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3026.3789, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5947, "query_norm": 2.4988, "queue_k_norm": 1.7057, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8161, "sent_len_1": 67.0539, "sent_len_max_0": 127.3462, "sent_len_max_1": 190.7038, "stdk": 0.047, "stdq": 0.0482, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 7300 }, { "accuracy": 35.8398, "active_queue_size": 16384.0, "cl_loss": 4.8743, "doc_norm": 1.7006, "encoder_q-embeddings": 1579.5386, "encoder_q-layer.0": 1194.0081, "encoder_q-layer.1": 1210.1567, "encoder_q-layer.10": 1798.4111, "encoder_q-layer.11": 3549.2349, "encoder_q-layer.2": 1292.459, "encoder_q-layer.3": 1252.9567, "encoder_q-layer.4": 1237.0339, "encoder_q-layer.5": 1177.7159, "encoder_q-layer.6": 1170.7603, "encoder_q-layer.7": 1173.7576, "encoder_q-layer.8": 1289.7374, "encoder_q-layer.9": 1234.7422, "epoch": 0.05, "inbatch_neg_score": 0.6454, "inbatch_pos_score": 1.1123, "learning_rate": 3.7e-05, "loss": 4.8743, "norm_diff": 0.9052, "norm_loss": 0.0, "num_token_doc": 66.7149, "num_token_overlap": 11.6792, "num_token_query": 31.8547, "num_token_union": 65.2501, "num_word_context": 202.2795, "num_word_doc": 49.815, "num_word_query": 23.527, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2396.3261, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.623, "query_norm": 2.6058, "queue_k_norm": 1.699, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8547, "sent_len_1": 66.7149, "sent_len_max_0": 127.6688, "sent_len_max_1": 187.54, "stdk": 0.0471, "stdq": 0.0458, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 7400 }, { "accuracy": 33.2031, "active_queue_size": 16384.0, "cl_loss": 4.8535, "doc_norm": 1.6872, "encoder_q-embeddings": 1880.1088, "encoder_q-layer.0": 1461.9254, "encoder_q-layer.1": 1612.12, "encoder_q-layer.10": 1523.9818, "encoder_q-layer.11": 3099.1125, "encoder_q-layer.2": 1716.6243, "encoder_q-layer.3": 1791.5352, "encoder_q-layer.4": 1995.8104, "encoder_q-layer.5": 1612.8076, "encoder_q-layer.6": 1500.1755, "encoder_q-layer.7": 1294.5093, "encoder_q-layer.8": 1243.4469, "encoder_q-layer.9": 1163.8503, "epoch": 0.05, "inbatch_neg_score": 0.7523, "inbatch_pos_score": 1.1904, "learning_rate": 3.7500000000000003e-05, "loss": 4.8535, "norm_diff": 0.9474, "norm_loss": 0.0, "num_token_doc": 66.6464, "num_token_overlap": 11.6389, "num_token_query": 31.8477, "num_token_union": 65.2701, "num_word_context": 202.2365, "num_word_doc": 49.7543, "num_word_query": 23.5243, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2630.0585, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7271, "query_norm": 2.6346, "queue_k_norm": 1.6974, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8477, "sent_len_1": 66.6464, "sent_len_max_0": 127.6075, "sent_len_max_1": 190.4325, "stdk": 0.0465, "stdq": 0.0457, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 7500 }, { "accuracy": 37.9883, "active_queue_size": 16384.0, "cl_loss": 4.8473, "doc_norm": 1.691, "encoder_q-embeddings": 2266.1929, "encoder_q-layer.0": 1787.6101, "encoder_q-layer.1": 1886.9078, "encoder_q-layer.10": 2363.6538, "encoder_q-layer.11": 4856.4155, "encoder_q-layer.2": 1732.6951, "encoder_q-layer.3": 1728.7069, "encoder_q-layer.4": 1651.709, "encoder_q-layer.5": 1555.7574, "encoder_q-layer.6": 1551.0397, "encoder_q-layer.7": 1259.7911, "encoder_q-layer.8": 1294.6354, "encoder_q-layer.9": 1272.2728, "epoch": 0.05, "inbatch_neg_score": 0.7934, "inbatch_pos_score": 1.249, "learning_rate": 3.8e-05, "loss": 4.8473, "norm_diff": 0.9123, "norm_loss": 0.0, "num_token_doc": 66.6807, "num_token_overlap": 11.6757, "num_token_query": 32.0022, "num_token_union": 65.347, "num_word_context": 202.249, "num_word_doc": 49.7316, "num_word_query": 23.615, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3239.889, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.77, "query_norm": 2.6033, "queue_k_norm": 1.6995, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 32.0022, "sent_len_1": 66.6807, "sent_len_max_0": 127.4613, "sent_len_max_1": 190.0513, "stdk": 0.0466, "stdq": 0.0451, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 7600 }, { "accuracy": 36.2305, "active_queue_size": 16384.0, "cl_loss": 4.8855, "doc_norm": 1.6949, "encoder_q-embeddings": 15533.5107, "encoder_q-layer.0": 11753.0205, "encoder_q-layer.1": 10318.3486, "encoder_q-layer.10": 1456.0681, "encoder_q-layer.11": 3391.8447, "encoder_q-layer.2": 10236.292, "encoder_q-layer.3": 8843.8154, "encoder_q-layer.4": 7286.7383, "encoder_q-layer.5": 4219.6694, "encoder_q-layer.6": 2709.4104, "encoder_q-layer.7": 1498.3052, "encoder_q-layer.8": 1241.6293, "encoder_q-layer.9": 1072.866, "epoch": 0.05, "inbatch_neg_score": 0.8346, "inbatch_pos_score": 1.2979, "learning_rate": 3.85e-05, "loss": 4.8855, "norm_diff": 0.7315, "norm_loss": 0.0, "num_token_doc": 66.8361, "num_token_overlap": 11.6458, "num_token_query": 31.855, "num_token_union": 65.3742, "num_word_context": 202.4358, "num_word_doc": 49.8588, "num_word_query": 23.5048, "postclip_grad_norm": 1.0, "preclip_grad_norm": 12549.8981, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.8184, "query_norm": 2.4264, "queue_k_norm": 1.7038, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.855, "sent_len_1": 66.8361, "sent_len_max_0": 127.6287, "sent_len_max_1": 189.9137, "stdk": 0.0467, "stdq": 0.0462, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 7700 }, { "accuracy": 35.2539, "active_queue_size": 16384.0, "cl_loss": 4.8824, "doc_norm": 1.7051, "encoder_q-embeddings": 1520.6139, "encoder_q-layer.0": 1187.7059, "encoder_q-layer.1": 1272.7142, "encoder_q-layer.10": 1503.8232, "encoder_q-layer.11": 3445.2632, "encoder_q-layer.2": 1408.6761, "encoder_q-layer.3": 1190.619, "encoder_q-layer.4": 996.2919, "encoder_q-layer.5": 898.921, "encoder_q-layer.6": 934.1895, "encoder_q-layer.7": 941.316, "encoder_q-layer.8": 1146.3727, "encoder_q-layer.9": 1027.4033, "epoch": 0.05, "inbatch_neg_score": 0.7628, "inbatch_pos_score": 1.2139, "learning_rate": 3.9000000000000006e-05, "loss": 4.8824, "norm_diff": 0.5061, "norm_loss": 0.0, "num_token_doc": 66.7492, "num_token_overlap": 11.6891, "num_token_query": 31.8714, "num_token_union": 65.3072, "num_word_context": 202.0387, "num_word_doc": 49.8095, "num_word_query": 23.5288, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2225.4346, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.749, "query_norm": 2.2113, "queue_k_norm": 1.7058, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8714, "sent_len_1": 66.7492, "sent_len_max_0": 127.5438, "sent_len_max_1": 188.6225, "stdk": 0.0469, "stdq": 0.0463, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 7800 }, { "accuracy": 34.4727, "active_queue_size": 16384.0, "cl_loss": 4.9134, "doc_norm": 1.7016, "encoder_q-embeddings": 1298.2028, "encoder_q-layer.0": 893.9333, "encoder_q-layer.1": 946.0887, "encoder_q-layer.10": 1904.6437, "encoder_q-layer.11": 3877.0408, "encoder_q-layer.2": 1007.377, "encoder_q-layer.3": 953.0177, "encoder_q-layer.4": 892.9269, "encoder_q-layer.5": 821.4377, "encoder_q-layer.6": 883.5958, "encoder_q-layer.7": 1046.0817, "encoder_q-layer.8": 1292.7589, "encoder_q-layer.9": 1295.8959, "epoch": 0.05, "inbatch_neg_score": 0.6139, "inbatch_pos_score": 1.0547, "learning_rate": 3.9500000000000005e-05, "loss": 4.9134, "norm_diff": 0.3757, "norm_loss": 0.0, "num_token_doc": 66.5332, "num_token_overlap": 11.656, "num_token_query": 31.8734, "num_token_union": 65.1729, "num_word_context": 201.9954, "num_word_doc": 49.6417, "num_word_query": 23.5374, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2257.3426, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.603, "query_norm": 2.0773, "queue_k_norm": 1.7037, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8734, "sent_len_1": 66.5332, "sent_len_max_0": 127.5012, "sent_len_max_1": 189.2738, "stdk": 0.0468, "stdq": 0.0447, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 7900 }, { "accuracy": 37.793, "active_queue_size": 16384.0, "cl_loss": 4.865, "doc_norm": 1.7046, "encoder_q-embeddings": 7379.2793, "encoder_q-layer.0": 6350.458, "encoder_q-layer.1": 6188.7876, "encoder_q-layer.10": 1774.6271, "encoder_q-layer.11": 4197.2651, "encoder_q-layer.2": 5661.6475, "encoder_q-layer.3": 3424.5823, "encoder_q-layer.4": 2378.4355, "encoder_q-layer.5": 1378.3629, "encoder_q-layer.6": 1140.8359, "encoder_q-layer.7": 1104.4504, "encoder_q-layer.8": 1184.6077, "encoder_q-layer.9": 1079.2819, "epoch": 0.05, "inbatch_neg_score": 0.4869, "inbatch_pos_score": 0.9448, "learning_rate": 4e-05, "loss": 4.865, "norm_diff": 0.3433, "norm_loss": 0.0, "num_token_doc": 66.7483, "num_token_overlap": 11.6533, "num_token_query": 31.8277, "num_token_union": 65.2833, "num_word_context": 202.3566, "num_word_doc": 49.7981, "num_word_query": 23.4946, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6467.3414, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4773, "query_norm": 2.0479, "queue_k_norm": 1.6985, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.8277, "sent_len_1": 66.7483, "sent_len_max_0": 127.5212, "sent_len_max_1": 191.0962, "stdk": 0.0469, "stdq": 0.0448, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 8000 }, { "accuracy": 37.793, "active_queue_size": 16384.0, "cl_loss": 4.8481, "doc_norm": 1.6978, "encoder_q-embeddings": 1443.1217, "encoder_q-layer.0": 1168.1232, "encoder_q-layer.1": 1176.9998, "encoder_q-layer.10": 1559.7595, "encoder_q-layer.11": 3486.2234, "encoder_q-layer.2": 1188.9432, "encoder_q-layer.3": 1108.6493, "encoder_q-layer.4": 934.9357, "encoder_q-layer.5": 867.6685, "encoder_q-layer.6": 881.2443, "encoder_q-layer.7": 926.1447, "encoder_q-layer.8": 1087.7177, "encoder_q-layer.9": 1101.5604, "epoch": 0.05, "inbatch_neg_score": 0.3949, "inbatch_pos_score": 0.8682, "learning_rate": 4.05e-05, "loss": 4.8481, "norm_diff": 0.4449, "norm_loss": 0.0, "num_token_doc": 66.8564, "num_token_overlap": 11.6064, "num_token_query": 31.7766, "num_token_union": 65.3798, "num_word_context": 202.4528, "num_word_doc": 49.906, "num_word_query": 23.4582, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2158.517, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3823, "query_norm": 2.1427, "queue_k_norm": 1.6934, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.7766, "sent_len_1": 66.8564, "sent_len_max_0": 127.5438, "sent_len_max_1": 190.8, "stdk": 0.047, "stdq": 0.0469, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 8100 }, { "accuracy": 37.4023, "active_queue_size": 16384.0, "cl_loss": 4.821, "doc_norm": 1.6822, "encoder_q-embeddings": 1116.8063, "encoder_q-layer.0": 806.8991, "encoder_q-layer.1": 839.9654, "encoder_q-layer.10": 2355.5325, "encoder_q-layer.11": 4932.9751, "encoder_q-layer.2": 916.5682, "encoder_q-layer.3": 900.4954, "encoder_q-layer.4": 912.9101, "encoder_q-layer.5": 895.2377, "encoder_q-layer.6": 1024.8444, "encoder_q-layer.7": 1124.8225, "encoder_q-layer.8": 1370.8455, "encoder_q-layer.9": 1322.4908, "epoch": 0.05, "inbatch_neg_score": 0.3708, "inbatch_pos_score": 0.8335, "learning_rate": 4.1e-05, "loss": 4.821, "norm_diff": 0.5031, "norm_loss": 0.0, "num_token_doc": 66.6525, "num_token_overlap": 11.6538, "num_token_query": 31.8233, "num_token_union": 65.2643, "num_word_context": 202.0522, "num_word_doc": 49.757, "num_word_query": 23.4989, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2606.8202, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3584, "query_norm": 2.1853, "queue_k_norm": 1.6858, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8233, "sent_len_1": 66.6525, "sent_len_max_0": 127.6663, "sent_len_max_1": 189.8063, "stdk": 0.0468, "stdq": 0.0447, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 8200 }, { "accuracy": 36.0352, "active_queue_size": 16384.0, "cl_loss": 4.798, "doc_norm": 1.6793, "encoder_q-embeddings": 3554.8879, "encoder_q-layer.0": 2699.5786, "encoder_q-layer.1": 2735.8926, "encoder_q-layer.10": 2621.374, "encoder_q-layer.11": 6040.4541, "encoder_q-layer.2": 3044.6743, "encoder_q-layer.3": 2888.0884, "encoder_q-layer.4": 2556.4307, "encoder_q-layer.5": 2093.478, "encoder_q-layer.6": 2045.9836, "encoder_q-layer.7": 1977.8157, "encoder_q-layer.8": 2308.8325, "encoder_q-layer.9": 2139.1985, "epoch": 0.05, "inbatch_neg_score": 0.346, "inbatch_pos_score": 0.7993, "learning_rate": 4.15e-05, "loss": 4.798, "norm_diff": 0.6381, "norm_loss": 0.0, "num_token_doc": 66.4194, "num_token_overlap": 11.6098, "num_token_query": 31.9096, "num_token_union": 65.2051, "num_word_context": 202.0932, "num_word_doc": 49.5436, "num_word_query": 23.5588, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4557.0365, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3311, "query_norm": 2.3174, "queue_k_norm": 1.6744, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9096, "sent_len_1": 66.4194, "sent_len_max_0": 127.5738, "sent_len_max_1": 189.0488, "stdk": 0.047, "stdq": 0.0454, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 8300 }, { "accuracy": 37.3047, "active_queue_size": 16384.0, "cl_loss": 4.7927, "doc_norm": 1.6721, "encoder_q-embeddings": 2921.7092, "encoder_q-layer.0": 2279.2241, "encoder_q-layer.1": 1771.3396, "encoder_q-layer.10": 2117.8625, "encoder_q-layer.11": 4648.7979, "encoder_q-layer.2": 1940.1259, "encoder_q-layer.3": 1840.6277, "encoder_q-layer.4": 1848.1104, "encoder_q-layer.5": 1411.9363, "encoder_q-layer.6": 1304.9343, "encoder_q-layer.7": 1107.5968, "encoder_q-layer.8": 1178.1279, "encoder_q-layer.9": 1132.3162, "epoch": 0.05, "inbatch_neg_score": 0.3405, "inbatch_pos_score": 0.8125, "learning_rate": 4.2e-05, "loss": 4.7927, "norm_diff": 0.6966, "norm_loss": 0.0, "num_token_doc": 66.6256, "num_token_overlap": 11.7137, "num_token_query": 32.0492, "num_token_union": 65.3643, "num_word_context": 202.1671, "num_word_doc": 49.7586, "num_word_query": 23.7027, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3303.7113, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3259, "query_norm": 2.3686, "queue_k_norm": 1.6645, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 32.0492, "sent_len_1": 66.6256, "sent_len_max_0": 127.4887, "sent_len_max_1": 186.6937, "stdk": 0.047, "stdq": 0.0439, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 8400 }, { "accuracy": 39.8438, "active_queue_size": 16384.0, "cl_loss": 4.759, "doc_norm": 1.6577, "encoder_q-embeddings": 1103.7625, "encoder_q-layer.0": 840.9193, "encoder_q-layer.1": 826.4485, "encoder_q-layer.10": 1444.6862, "encoder_q-layer.11": 3476.3337, "encoder_q-layer.2": 955.9866, "encoder_q-layer.3": 923.2128, "encoder_q-layer.4": 917.3812, "encoder_q-layer.5": 761.1045, "encoder_q-layer.6": 799.9033, "encoder_q-layer.7": 878.2432, "encoder_q-layer.8": 1107.3087, "encoder_q-layer.9": 1029.8896, "epoch": 0.06, "inbatch_neg_score": 0.4067, "inbatch_pos_score": 0.896, "learning_rate": 4.25e-05, "loss": 4.759, "norm_diff": 0.6426, "norm_loss": 0.0, "num_token_doc": 66.8151, "num_token_overlap": 11.6852, "num_token_query": 31.876, "num_token_union": 65.3847, "num_word_context": 202.2346, "num_word_doc": 49.865, "num_word_query": 23.5381, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1975.309, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3923, "query_norm": 2.3002, "queue_k_norm": 1.6576, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.876, "sent_len_1": 66.8151, "sent_len_max_0": 127.48, "sent_len_max_1": 189.725, "stdk": 0.0468, "stdq": 0.046, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 8500 }, { "accuracy": 37.0117, "active_queue_size": 16384.0, "cl_loss": 4.7823, "doc_norm": 1.645, "encoder_q-embeddings": 2202.6453, "encoder_q-layer.0": 1775.2826, "encoder_q-layer.1": 1820.6472, "encoder_q-layer.10": 1317.0587, "encoder_q-layer.11": 3025.5247, "encoder_q-layer.2": 1935.0754, "encoder_q-layer.3": 1700.1371, "encoder_q-layer.4": 1496.6281, "encoder_q-layer.5": 1226.0441, "encoder_q-layer.6": 1207.6631, "encoder_q-layer.7": 1197.1212, "encoder_q-layer.8": 1343.4927, "encoder_q-layer.9": 1096.6774, "epoch": 0.06, "inbatch_neg_score": 0.4679, "inbatch_pos_score": 0.9131, "learning_rate": 4.3e-05, "loss": 4.7823, "norm_diff": 0.6018, "norm_loss": 0.0, "num_token_doc": 66.6335, "num_token_overlap": 11.6506, "num_token_query": 31.925, "num_token_union": 65.2415, "num_word_context": 201.9455, "num_word_doc": 49.6515, "num_word_query": 23.5479, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2596.5073, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4509, "query_norm": 2.2467, "queue_k_norm": 1.6489, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.925, "sent_len_1": 66.6335, "sent_len_max_0": 127.5088, "sent_len_max_1": 191.825, "stdk": 0.0466, "stdq": 0.0445, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 8600 }, { "accuracy": 37.9883, "active_queue_size": 16384.0, "cl_loss": 4.7558, "doc_norm": 1.6354, "encoder_q-embeddings": 902.6927, "encoder_q-layer.0": 646.0697, "encoder_q-layer.1": 677.8077, "encoder_q-layer.10": 1500.9315, "encoder_q-layer.11": 3241.5212, "encoder_q-layer.2": 750.8217, "encoder_q-layer.3": 761.4393, "encoder_q-layer.4": 815.8774, "encoder_q-layer.5": 813.28, "encoder_q-layer.6": 929.4854, "encoder_q-layer.7": 980.5338, "encoder_q-layer.8": 1195.6565, "encoder_q-layer.9": 1079.1367, "epoch": 0.06, "inbatch_neg_score": 0.4846, "inbatch_pos_score": 0.9639, "learning_rate": 4.35e-05, "loss": 4.7558, "norm_diff": 0.5245, "norm_loss": 0.0, "num_token_doc": 66.7517, "num_token_overlap": 11.7141, "num_token_query": 31.8816, "num_token_union": 65.3082, "num_word_context": 202.2095, "num_word_doc": 49.7956, "num_word_query": 23.5595, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1891.1533, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4709, "query_norm": 2.1599, "queue_k_norm": 1.6393, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8816, "sent_len_1": 66.7517, "sent_len_max_0": 127.4675, "sent_len_max_1": 190.8525, "stdk": 0.0465, "stdq": 0.0453, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 8700 }, { "accuracy": 35.1562, "active_queue_size": 16384.0, "cl_loss": 4.7365, "doc_norm": 1.6348, "encoder_q-embeddings": 1862.6018, "encoder_q-layer.0": 1552.0903, "encoder_q-layer.1": 1642.4873, "encoder_q-layer.10": 1402.7274, "encoder_q-layer.11": 3116.1487, "encoder_q-layer.2": 1852.7856, "encoder_q-layer.3": 1737.2465, "encoder_q-layer.4": 1542.855, "encoder_q-layer.5": 1350.6398, "encoder_q-layer.6": 1306.2183, "encoder_q-layer.7": 1132.2998, "encoder_q-layer.8": 1119.6261, "encoder_q-layer.9": 968.4334, "epoch": 0.06, "inbatch_neg_score": 0.4906, "inbatch_pos_score": 0.9512, "learning_rate": 4.4000000000000006e-05, "loss": 4.7365, "norm_diff": 0.4968, "norm_loss": 0.0, "num_token_doc": 66.8663, "num_token_overlap": 11.7053, "num_token_query": 31.9661, "num_token_union": 65.4604, "num_word_context": 202.768, "num_word_doc": 49.9157, "num_word_query": 23.6267, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2480.5078, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4763, "query_norm": 2.1315, "queue_k_norm": 1.6328, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9661, "sent_len_1": 66.8663, "sent_len_max_0": 127.6462, "sent_len_max_1": 188.3688, "stdk": 0.0468, "stdq": 0.0444, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 8800 }, { "accuracy": 39.8438, "active_queue_size": 16384.0, "cl_loss": 4.7496, "doc_norm": 1.6256, "encoder_q-embeddings": 1001.2117, "encoder_q-layer.0": 765.0566, "encoder_q-layer.1": 757.4282, "encoder_q-layer.10": 1290.3704, "encoder_q-layer.11": 3091.7078, "encoder_q-layer.2": 801.116, "encoder_q-layer.3": 826.2661, "encoder_q-layer.4": 814.4343, "encoder_q-layer.5": 750.1152, "encoder_q-layer.6": 821.054, "encoder_q-layer.7": 890.2149, "encoder_q-layer.8": 1111.9901, "encoder_q-layer.9": 1001.3934, "epoch": 0.06, "inbatch_neg_score": 0.5035, "inbatch_pos_score": 1.0029, "learning_rate": 4.4500000000000004e-05, "loss": 4.7496, "norm_diff": 0.5141, "norm_loss": 0.0, "num_token_doc": 66.7623, "num_token_overlap": 11.6154, "num_token_query": 31.8019, "num_token_union": 65.3707, "num_word_context": 202.3987, "num_word_doc": 49.8642, "num_word_query": 23.4709, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1822.8075, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4922, "query_norm": 2.1397, "queue_k_norm": 1.6238, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8019, "sent_len_1": 66.7623, "sent_len_max_0": 127.55, "sent_len_max_1": 188.8475, "stdk": 0.0469, "stdq": 0.0459, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 8900 }, { "accuracy": 39.6484, "active_queue_size": 16384.0, "cl_loss": 4.7378, "doc_norm": 1.6215, "encoder_q-embeddings": 947.1725, "encoder_q-layer.0": 697.7013, "encoder_q-layer.1": 706.124, "encoder_q-layer.10": 1241.6228, "encoder_q-layer.11": 2792.7529, "encoder_q-layer.2": 784.1431, "encoder_q-layer.3": 810.6326, "encoder_q-layer.4": 848.7004, "encoder_q-layer.5": 861.1785, "encoder_q-layer.6": 958.7896, "encoder_q-layer.7": 1162.0188, "encoder_q-layer.8": 1261.6682, "encoder_q-layer.9": 1097.174, "epoch": 0.06, "inbatch_neg_score": 0.5134, "inbatch_pos_score": 1.0068, "learning_rate": 4.5e-05, "loss": 4.7378, "norm_diff": 0.5625, "norm_loss": 0.0, "num_token_doc": 66.5929, "num_token_overlap": 11.7661, "num_token_query": 32.194, "num_token_union": 65.3327, "num_word_context": 201.8614, "num_word_doc": 49.6647, "num_word_query": 23.7865, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1803.8015, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5, "query_norm": 2.184, "queue_k_norm": 1.6167, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 32.194, "sent_len_1": 66.5929, "sent_len_max_0": 127.4663, "sent_len_max_1": 190.4975, "stdk": 0.0468, "stdq": 0.0454, "stdqueue_k": 0.0467, "stdqueue_q": 0.0, "step": 9000 }, { "accuracy": 39.4531, "active_queue_size": 16384.0, "cl_loss": 4.7394, "doc_norm": 1.6168, "encoder_q-embeddings": 1720.0024, "encoder_q-layer.0": 1388.4724, "encoder_q-layer.1": 1305.5029, "encoder_q-layer.10": 1294.4702, "encoder_q-layer.11": 2888.1716, "encoder_q-layer.2": 1465.1685, "encoder_q-layer.3": 1483.5192, "encoder_q-layer.4": 1383.6155, "encoder_q-layer.5": 1392.7532, "encoder_q-layer.6": 1544.296, "encoder_q-layer.7": 1256.3499, "encoder_q-layer.8": 1027.2407, "encoder_q-layer.9": 870.1609, "epoch": 0.06, "inbatch_neg_score": 0.5136, "inbatch_pos_score": 0.9922, "learning_rate": 4.55e-05, "loss": 4.7394, "norm_diff": 0.5591, "norm_loss": 0.0, "num_token_doc": 66.9822, "num_token_overlap": 11.6832, "num_token_query": 31.9656, "num_token_union": 65.53, "num_word_context": 202.5849, "num_word_doc": 49.9489, "num_word_query": 23.6091, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2330.1405, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5, "query_norm": 2.1758, "queue_k_norm": 1.6143, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9656, "sent_len_1": 66.9822, "sent_len_max_0": 127.5463, "sent_len_max_1": 190.61, "stdk": 0.0467, "stdq": 0.0455, "stdqueue_k": 0.0467, "stdqueue_q": 0.0, "step": 9100 }, { "accuracy": 37.207, "active_queue_size": 16384.0, "cl_loss": 4.7268, "doc_norm": 1.6172, "encoder_q-embeddings": 7404.6182, "encoder_q-layer.0": 6354.0371, "encoder_q-layer.1": 5349.1333, "encoder_q-layer.10": 1294.5142, "encoder_q-layer.11": 2964.791, "encoder_q-layer.2": 6615.7896, "encoder_q-layer.3": 5884.6694, "encoder_q-layer.4": 4467.1133, "encoder_q-layer.5": 3895.9019, "encoder_q-layer.6": 3145.6733, "encoder_q-layer.7": 2394.7168, "encoder_q-layer.8": 1247.7535, "encoder_q-layer.9": 976.8034, "epoch": 0.06, "inbatch_neg_score": 0.5166, "inbatch_pos_score": 0.979, "learning_rate": 4.600000000000001e-05, "loss": 4.7268, "norm_diff": 0.5135, "norm_loss": 0.0, "num_token_doc": 66.5741, "num_token_overlap": 11.7072, "num_token_query": 31.9584, "num_token_union": 65.2475, "num_word_context": 202.153, "num_word_doc": 49.741, "num_word_query": 23.5967, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7153.824, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5054, "query_norm": 2.1306, "queue_k_norm": 1.611, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9584, "sent_len_1": 66.5741, "sent_len_max_0": 127.515, "sent_len_max_1": 187.605, "stdk": 0.0469, "stdq": 0.0441, "stdqueue_k": 0.0467, "stdqueue_q": 0.0, "step": 9200 }, { "accuracy": 37.5, "active_queue_size": 16384.0, "cl_loss": 4.724, "doc_norm": 1.6174, "encoder_q-embeddings": 24139.3516, "encoder_q-layer.0": 21314.5898, "encoder_q-layer.1": 11147.4492, "encoder_q-layer.10": 1244.6208, "encoder_q-layer.11": 2776.4761, "encoder_q-layer.2": 7752.5552, "encoder_q-layer.3": 4838.981, "encoder_q-layer.4": 1229.6594, "encoder_q-layer.5": 1175.0612, "encoder_q-layer.6": 1197.7781, "encoder_q-layer.7": 1140.4227, "encoder_q-layer.8": 1252.2946, "encoder_q-layer.9": 1157.7875, "epoch": 0.06, "inbatch_neg_score": 0.5237, "inbatch_pos_score": 0.9907, "learning_rate": 4.6500000000000005e-05, "loss": 4.724, "norm_diff": 0.5086, "norm_loss": 0.0, "num_token_doc": 66.53, "num_token_overlap": 11.6828, "num_token_query": 31.9341, "num_token_union": 65.233, "num_word_context": 201.9154, "num_word_doc": 49.6622, "num_word_query": 23.5921, "postclip_grad_norm": 1.0, "preclip_grad_norm": 17608.8102, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.5117, "query_norm": 2.126, "queue_k_norm": 1.6072, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9341, "sent_len_1": 66.53, "sent_len_max_0": 127.6575, "sent_len_max_1": 190.1175, "stdk": 0.0469, "stdq": 0.046, "stdqueue_k": 0.0467, "stdqueue_q": 0.0, "step": 9300 }, { "accuracy": 35.4492, "active_queue_size": 16384.0, "cl_loss": 4.6859, "doc_norm": 1.604, "encoder_q-embeddings": 1123.4955, "encoder_q-layer.0": 817.9218, "encoder_q-layer.1": 825.7344, "encoder_q-layer.10": 1242.3538, "encoder_q-layer.11": 2867.4233, "encoder_q-layer.2": 886.1926, "encoder_q-layer.3": 865.0591, "encoder_q-layer.4": 771.5801, "encoder_q-layer.5": 707.0164, "encoder_q-layer.6": 771.1909, "encoder_q-layer.7": 859.8264, "encoder_q-layer.8": 1006.0908, "encoder_q-layer.9": 1066.2948, "epoch": 0.06, "inbatch_neg_score": 0.4763, "inbatch_pos_score": 0.9316, "learning_rate": 4.7e-05, "loss": 4.6859, "norm_diff": 0.5283, "norm_loss": 0.0, "num_token_doc": 66.7278, "num_token_overlap": 11.6532, "num_token_query": 31.8046, "num_token_union": 65.2464, "num_word_context": 201.8781, "num_word_doc": 49.7618, "num_word_query": 23.4903, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1826.9715, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4609, "query_norm": 2.1324, "queue_k_norm": 1.6027, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8046, "sent_len_1": 66.7278, "sent_len_max_0": 127.4912, "sent_len_max_1": 190.0462, "stdk": 0.0466, "stdq": 0.0456, "stdqueue_k": 0.0466, "stdqueue_q": 0.0, "step": 9400 }, { "accuracy": 37.8906, "active_queue_size": 16384.0, "cl_loss": 4.661, "doc_norm": 1.6018, "encoder_q-embeddings": 901.2653, "encoder_q-layer.0": 594.9335, "encoder_q-layer.1": 603.9521, "encoder_q-layer.10": 1207.2528, "encoder_q-layer.11": 2945.6448, "encoder_q-layer.2": 660.5385, "encoder_q-layer.3": 705.4806, "encoder_q-layer.4": 736.4579, "encoder_q-layer.5": 717.0191, "encoder_q-layer.6": 848.6494, "encoder_q-layer.7": 905.02, "encoder_q-layer.8": 970.7391, "encoder_q-layer.9": 917.5721, "epoch": 0.06, "inbatch_neg_score": 0.4618, "inbatch_pos_score": 0.9365, "learning_rate": 4.75e-05, "loss": 4.661, "norm_diff": 0.5047, "norm_loss": 0.0, "num_token_doc": 66.7117, "num_token_overlap": 11.6832, "num_token_query": 31.8437, "num_token_union": 65.2299, "num_word_context": 201.9162, "num_word_doc": 49.7718, "num_word_query": 23.498, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1700.5324, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4502, "query_norm": 2.1065, "queue_k_norm": 1.6002, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8437, "sent_len_1": 66.7117, "sent_len_max_0": 127.43, "sent_len_max_1": 189.9975, "stdk": 0.0467, "stdq": 0.0456, "stdqueue_k": 0.0467, "stdqueue_q": 0.0, "step": 9500 }, { "accuracy": 39.9414, "active_queue_size": 16384.0, "cl_loss": 4.6441, "doc_norm": 1.5942, "encoder_q-embeddings": 896.3704, "encoder_q-layer.0": 572.7906, "encoder_q-layer.1": 621.4945, "encoder_q-layer.10": 1219.415, "encoder_q-layer.11": 3232.4236, "encoder_q-layer.2": 707.1094, "encoder_q-layer.3": 758.8379, "encoder_q-layer.4": 796.1038, "encoder_q-layer.5": 801.3603, "encoder_q-layer.6": 917.5024, "encoder_q-layer.7": 914.3898, "encoder_q-layer.8": 1104.3827, "encoder_q-layer.9": 979.178, "epoch": 0.06, "inbatch_neg_score": 0.4647, "inbatch_pos_score": 0.9565, "learning_rate": 4.8e-05, "loss": 4.6441, "norm_diff": 0.49, "norm_loss": 0.0, "num_token_doc": 66.6005, "num_token_overlap": 11.6833, "num_token_query": 31.9711, "num_token_union": 65.3086, "num_word_context": 202.1221, "num_word_doc": 49.6843, "num_word_query": 23.6041, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1826.3484, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4517, "query_norm": 2.0842, "queue_k_norm": 1.5977, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9711, "sent_len_1": 66.6005, "sent_len_max_0": 127.4475, "sent_len_max_1": 188.7175, "stdk": 0.0466, "stdq": 0.0443, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 9600 }, { "accuracy": 38.5742, "active_queue_size": 16384.0, "cl_loss": 4.6377, "doc_norm": 1.5915, "encoder_q-embeddings": 2280.3762, "encoder_q-layer.0": 1647.7534, "encoder_q-layer.1": 1740.5494, "encoder_q-layer.10": 1123.6849, "encoder_q-layer.11": 2691.4229, "encoder_q-layer.2": 2004.4375, "encoder_q-layer.3": 1955.5959, "encoder_q-layer.4": 1486.9751, "encoder_q-layer.5": 1246.1558, "encoder_q-layer.6": 1069.2991, "encoder_q-layer.7": 973.6614, "encoder_q-layer.8": 1031.5033, "encoder_q-layer.9": 906.6766, "epoch": 0.06, "inbatch_neg_score": 0.4576, "inbatch_pos_score": 0.9497, "learning_rate": 4.85e-05, "loss": 4.6377, "norm_diff": 0.4856, "norm_loss": 0.0, "num_token_doc": 66.7943, "num_token_overlap": 11.6586, "num_token_query": 31.9579, "num_token_union": 65.3875, "num_word_context": 202.1732, "num_word_doc": 49.8527, "num_word_query": 23.6005, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2533.3054, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4453, "query_norm": 2.0771, "queue_k_norm": 1.5916, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9579, "sent_len_1": 66.7943, "sent_len_max_0": 127.35, "sent_len_max_1": 189.5362, "stdk": 0.0467, "stdq": 0.0438, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 9700 }, { "accuracy": 40.2344, "active_queue_size": 16384.0, "cl_loss": 4.641, "doc_norm": 1.5868, "encoder_q-embeddings": 831.1284, "encoder_q-layer.0": 554.5148, "encoder_q-layer.1": 595.8465, "encoder_q-layer.10": 1367.2382, "encoder_q-layer.11": 3329.5444, "encoder_q-layer.2": 670.6044, "encoder_q-layer.3": 726.4731, "encoder_q-layer.4": 762.9484, "encoder_q-layer.5": 815.646, "encoder_q-layer.6": 910.3972, "encoder_q-layer.7": 961.6286, "encoder_q-layer.8": 1029.0056, "encoder_q-layer.9": 957.899, "epoch": 0.06, "inbatch_neg_score": 0.4904, "inbatch_pos_score": 0.9624, "learning_rate": 4.9e-05, "loss": 4.641, "norm_diff": 0.4549, "norm_loss": 0.0, "num_token_doc": 67.0356, "num_token_overlap": 11.6827, "num_token_query": 31.7965, "num_token_union": 65.4403, "num_word_context": 202.4703, "num_word_doc": 49.9786, "num_word_query": 23.4735, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1844.3885, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4795, "query_norm": 2.0417, "queue_k_norm": 1.5852, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.7965, "sent_len_1": 67.0356, "sent_len_max_0": 127.54, "sent_len_max_1": 189.4575, "stdk": 0.0467, "stdq": 0.0426, "stdqueue_k": 0.0467, "stdqueue_q": 0.0, "step": 9800 }, { "accuracy": 40.625, "active_queue_size": 16384.0, "cl_loss": 4.6056, "doc_norm": 1.5854, "encoder_q-embeddings": 829.7751, "encoder_q-layer.0": 560.7736, "encoder_q-layer.1": 612.0391, "encoder_q-layer.10": 1064.8943, "encoder_q-layer.11": 2799.4661, "encoder_q-layer.2": 662.0797, "encoder_q-layer.3": 705.828, "encoder_q-layer.4": 731.8336, "encoder_q-layer.5": 735.2805, "encoder_q-layer.6": 863.8995, "encoder_q-layer.7": 810.5585, "encoder_q-layer.8": 912.1046, "encoder_q-layer.9": 788.29, "epoch": 0.06, "inbatch_neg_score": 0.4805, "inbatch_pos_score": 0.9849, "learning_rate": 4.9500000000000004e-05, "loss": 4.6056, "norm_diff": 0.3858, "norm_loss": 0.0, "num_token_doc": 66.7954, "num_token_overlap": 11.6559, "num_token_query": 31.93, "num_token_union": 65.4373, "num_word_context": 202.4812, "num_word_doc": 49.8724, "num_word_query": 23.6017, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1594.8197, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4707, "query_norm": 1.9712, "queue_k_norm": 1.5795, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.93, "sent_len_1": 66.7954, "sent_len_max_0": 127.3, "sent_len_max_1": 189.79, "stdk": 0.0469, "stdq": 0.0431, "stdqueue_k": 0.0467, "stdqueue_q": 0.0, "step": 9900 }, { "accuracy": 38.1836, "active_queue_size": 16384.0, "cl_loss": 4.627, "doc_norm": 1.5733, "encoder_q-embeddings": 1326.9031, "encoder_q-layer.0": 962.3494, "encoder_q-layer.1": 1017.9773, "encoder_q-layer.10": 987.8453, "encoder_q-layer.11": 2638.9553, "encoder_q-layer.2": 1138.179, "encoder_q-layer.3": 1194.573, "encoder_q-layer.4": 1253.6871, "encoder_q-layer.5": 1228.0786, "encoder_q-layer.6": 1307.0101, "encoder_q-layer.7": 1015.3419, "encoder_q-layer.8": 964.849, "encoder_q-layer.9": 832.835, "epoch": 0.07, "inbatch_neg_score": 0.468, "inbatch_pos_score": 0.9634, "learning_rate": 5e-05, "loss": 4.627, "norm_diff": 0.2985, "norm_loss": 0.0, "num_token_doc": 66.8058, "num_token_overlap": 11.6669, "num_token_query": 31.8889, "num_token_union": 65.3371, "num_word_context": 202.2333, "num_word_doc": 49.869, "num_word_query": 23.5525, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1925.9752, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4583, "query_norm": 1.8718, "queue_k_norm": 1.573, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.8889, "sent_len_1": 66.8058, "sent_len_max_0": 127.6338, "sent_len_max_1": 189.5225, "stdk": 0.0465, "stdq": 0.0433, "stdqueue_k": 0.0467, "stdqueue_q": 0.0, "step": 10000 }, { "dev_runtime": 44.8883, "dev_samples_per_second": 1.426, "dev_steps_per_second": 0.022, "epoch": 0.07, "step": 10000, "test_accuracy": 90.966796875, "test_active_queue_size": 16384.0, "test_cl_loss": 0.5173982381820679, "test_doc_norm": 1.4974920749664307, "test_inbatch_neg_score": 0.7705318331718445, "test_inbatch_pos_score": 1.4910674095153809, "test_loss": 0.5173982381820679, "test_loss_align": 2.660085439682007, "test_loss_unif": 3.585453510284424, "test_loss_unif_q@queue": 3.5854530334472656, "test_norm_diff": 0.3414408564567566, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.4532324969768524, "test_query_norm": 1.8389328718185425, "test_queue_k_norm": 1.572371482849121, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.03819933533668518, "test_stdq": 0.037295740097761154, "test_stdqueue_k": 0.04663398861885071, "test_stdqueue_q": 0.0 }, { "dev_runtime": 44.8883, "dev_samples_per_second": 1.426, "dev_steps_per_second": 0.022, "epoch": 0.07, "eval_beir-arguana_ndcg@10": 0.25734, "eval_beir-arguana_recall@10": 0.45377, "eval_beir-arguana_recall@100": 0.80085, "eval_beir-arguana_recall@20": 0.59033, "eval_beir-avg_ndcg@10": 0.2280921666666667, "eval_beir-avg_recall@10": 0.2885918333333334, "eval_beir-avg_recall@100": 0.48162900000000003, "eval_beir-avg_recall@20": 0.3411066666666666, "eval_beir-cqadupstack_ndcg@10": 0.12453166666666667, "eval_beir-cqadupstack_recall@10": 0.1823783333333333, "eval_beir-cqadupstack_recall@100": 0.37128000000000005, "eval_beir-cqadupstack_recall@20": 0.22920666666666664, "eval_beir-fiqa_ndcg@10": 0.12404, "eval_beir-fiqa_recall@10": 0.16834, "eval_beir-fiqa_recall@100": 0.36236, "eval_beir-fiqa_recall@20": 0.21189, "eval_beir-nfcorpus_ndcg@10": 0.22016, "eval_beir-nfcorpus_recall@10": 0.09755, "eval_beir-nfcorpus_recall@100": 0.22347, "eval_beir-nfcorpus_recall@20": 0.12954, "eval_beir-nq_ndcg@10": 0.16042, "eval_beir-nq_recall@10": 0.26629, "eval_beir-nq_recall@100": 0.56424, "eval_beir-nq_recall@20": 0.35173, "eval_beir-quora_ndcg@10": 0.42581, "eval_beir-quora_recall@10": 0.55062, "eval_beir-quora_recall@100": 0.78972, "eval_beir-quora_recall@20": 0.62871, "eval_beir-scidocs_ndcg@10": 0.10009, "eval_beir-scidocs_recall@10": 0.10758, "eval_beir-scidocs_recall@100": 0.27995, "eval_beir-scidocs_recall@20": 0.15067, "eval_beir-scifact_ndcg@10": 0.5104, "eval_beir-scifact_recall@10": 0.64739, "eval_beir-scifact_recall@100": 0.84367, "eval_beir-scifact_recall@20": 0.715, "eval_beir-trec-covid_ndcg@10": 0.35813, "eval_beir-trec-covid_recall@10": 0.412, "eval_beir-trec-covid_recall@100": 0.2784, "eval_beir-trec-covid_recall@20": 0.368, "eval_beir-webis-touche2020_ndcg@10": 0.0, "eval_beir-webis-touche2020_recall@10": 0.0, "eval_beir-webis-touche2020_recall@100": 0.30235, "eval_beir-webis-touche2020_recall@20": 0.03599, "eval_senteval-avg_sts": 0.6839743048267974, "eval_senteval-sickr_spearman": 0.6390017389201299, "eval_senteval-stsb_spearman": 0.728946870733465, "step": 10000, "test_accuracy": 90.966796875, "test_active_queue_size": 16384.0, "test_cl_loss": 0.5173982381820679, "test_doc_norm": 1.4974920749664307, "test_inbatch_neg_score": 0.7705318331718445, "test_inbatch_pos_score": 1.4910674095153809, "test_loss": 0.5173982381820679, "test_loss_align": 2.660085439682007, "test_loss_unif": 3.585453510284424, "test_loss_unif_q@queue": 3.5854530334472656, "test_norm_diff": 0.3414408564567566, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.4532324969768524, "test_query_norm": 1.8389328718185425, "test_queue_k_norm": 1.572371482849121, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.03819933533668518, "test_stdq": 0.037295740097761154, "test_stdqueue_k": 0.04663398861885071, "test_stdqueue_q": 0.0 }, { "accuracy": 37.5, "active_queue_size": 16384.0, "cl_loss": 4.6255, "doc_norm": 1.5664, "encoder_q-embeddings": 932.4318, "encoder_q-layer.0": 650.2582, "encoder_q-layer.1": 697.3156, "encoder_q-layer.10": 1328.6833, "encoder_q-layer.11": 3476.4292, "encoder_q-layer.2": 763.3229, "encoder_q-layer.3": 883.9921, "encoder_q-layer.4": 962.329, "encoder_q-layer.5": 962.8787, "encoder_q-layer.6": 946.6631, "encoder_q-layer.7": 912.7146, "encoder_q-layer.8": 981.4044, "encoder_q-layer.9": 870.7405, "epoch": 0.07, "inbatch_neg_score": 0.4801, "inbatch_pos_score": 0.96, "learning_rate": 4.994444444444445e-05, "loss": 4.6255, "norm_diff": 0.222, "norm_loss": 0.0, "num_token_doc": 66.9555, "num_token_overlap": 11.6661, "num_token_query": 31.8953, "num_token_union": 65.4927, "num_word_context": 202.3216, "num_word_doc": 49.9288, "num_word_query": 23.5536, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1936.0714, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4712, "query_norm": 1.7884, "queue_k_norm": 1.5647, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8953, "sent_len_1": 66.9555, "sent_len_max_0": 127.5012, "sent_len_max_1": 189.8225, "stdk": 0.0466, "stdq": 0.0423, "stdqueue_k": 0.0465, "stdqueue_q": 0.0, "step": 10100 }, { "accuracy": 39.7461, "active_queue_size": 16384.0, "cl_loss": 4.6, "doc_norm": 1.5643, "encoder_q-embeddings": 1273.7734, "encoder_q-layer.0": 879.8844, "encoder_q-layer.1": 957.1116, "encoder_q-layer.10": 1072.3462, "encoder_q-layer.11": 2954.2605, "encoder_q-layer.2": 1059.1969, "encoder_q-layer.3": 1175.2777, "encoder_q-layer.4": 1175.7966, "encoder_q-layer.5": 1157.3733, "encoder_q-layer.6": 1194.4159, "encoder_q-layer.7": 1114.5223, "encoder_q-layer.8": 1125.7795, "encoder_q-layer.9": 929.3165, "epoch": 0.07, "inbatch_neg_score": 0.4286, "inbatch_pos_score": 0.918, "learning_rate": 4.9888888888888894e-05, "loss": 4.6, "norm_diff": 0.1731, "norm_loss": 0.0, "num_token_doc": 66.7565, "num_token_overlap": 11.7024, "num_token_query": 32.0268, "num_token_union": 65.3845, "num_word_context": 202.1877, "num_word_doc": 49.8322, "num_word_query": 23.665, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2046.5634, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4231, "query_norm": 1.7374, "queue_k_norm": 1.5551, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 32.0268, "sent_len_1": 66.7565, "sent_len_max_0": 127.6762, "sent_len_max_1": 190.52, "stdk": 0.0466, "stdq": 0.0429, "stdqueue_k": 0.0464, "stdqueue_q": 0.0, "step": 10200 }, { "accuracy": 40.2344, "active_queue_size": 16384.0, "cl_loss": 4.6576, "doc_norm": 1.5467, "encoder_q-embeddings": 976.0228, "encoder_q-layer.0": 735.6951, "encoder_q-layer.1": 799.8289, "encoder_q-layer.10": 1002.8515, "encoder_q-layer.11": 3531.8843, "encoder_q-layer.2": 933.0902, "encoder_q-layer.3": 995.6217, "encoder_q-layer.4": 1060.3333, "encoder_q-layer.5": 1035.4149, "encoder_q-layer.6": 1141.2589, "encoder_q-layer.7": 1151.6115, "encoder_q-layer.8": 1327.3922, "encoder_q-layer.9": 1017.9967, "epoch": 0.07, "inbatch_neg_score": 0.4178, "inbatch_pos_score": 0.9043, "learning_rate": 4.9833333333333336e-05, "loss": 4.6576, "norm_diff": 0.1188, "norm_loss": 0.0, "num_token_doc": 66.8125, "num_token_overlap": 11.6494, "num_token_query": 31.7883, "num_token_union": 65.3482, "num_word_context": 201.8225, "num_word_doc": 49.8675, "num_word_query": 23.4667, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2091.373, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4097, "query_norm": 1.6655, "queue_k_norm": 1.5505, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.7883, "sent_len_1": 66.8125, "sent_len_max_0": 127.4875, "sent_len_max_1": 188.89, "stdk": 0.0462, "stdq": 0.0419, "stdqueue_k": 0.0465, "stdqueue_q": 0.0, "step": 10300 }, { "accuracy": 39.1602, "active_queue_size": 16384.0, "cl_loss": 4.6669, "doc_norm": 1.5429, "encoder_q-embeddings": 1708.3961, "encoder_q-layer.0": 1265.8396, "encoder_q-layer.1": 1313.8224, "encoder_q-layer.10": 2029.7246, "encoder_q-layer.11": 5968.0776, "encoder_q-layer.2": 1512.275, "encoder_q-layer.3": 1564.7058, "encoder_q-layer.4": 1578.0349, "encoder_q-layer.5": 1634.338, "encoder_q-layer.6": 1945.2097, "encoder_q-layer.7": 1732.1373, "encoder_q-layer.8": 1865.2893, "encoder_q-layer.9": 1692.6929, "epoch": 0.07, "inbatch_neg_score": 0.4107, "inbatch_pos_score": 0.896, "learning_rate": 4.977777777777778e-05, "loss": 4.6669, "norm_diff": 0.1275, "norm_loss": 0.0, "num_token_doc": 66.7363, "num_token_overlap": 11.6739, "num_token_query": 31.9202, "num_token_union": 65.3129, "num_word_context": 202.0896, "num_word_doc": 49.7574, "num_word_query": 23.5619, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3313.1958, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4041, "query_norm": 1.6704, "queue_k_norm": 1.5427, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9202, "sent_len_1": 66.7363, "sent_len_max_0": 127.6275, "sent_len_max_1": 189.945, "stdk": 0.0463, "stdq": 0.042, "stdqueue_k": 0.0464, "stdqueue_q": 0.0, "step": 10400 }, { "accuracy": 39.9414, "active_queue_size": 16384.0, "cl_loss": 4.6152, "doc_norm": 1.5389, "encoder_q-embeddings": 10749.0352, "encoder_q-layer.0": 7947.6157, "encoder_q-layer.1": 7758.3838, "encoder_q-layer.10": 1896.7428, "encoder_q-layer.11": 5613.9746, "encoder_q-layer.2": 7502.4517, "encoder_q-layer.3": 7662.877, "encoder_q-layer.4": 7446.437, "encoder_q-layer.5": 6367.0288, "encoder_q-layer.6": 7858.728, "encoder_q-layer.7": 7031.1313, "encoder_q-layer.8": 5257.9844, "encoder_q-layer.9": 2218.085, "epoch": 0.07, "inbatch_neg_score": 0.44, "inbatch_pos_score": 0.9355, "learning_rate": 4.972222222222223e-05, "loss": 4.6152, "norm_diff": 0.1656, "norm_loss": 0.0, "num_token_doc": 66.7608, "num_token_overlap": 11.6327, "num_token_query": 31.7425, "num_token_union": 65.2957, "num_word_context": 202.3621, "num_word_doc": 49.8498, "num_word_query": 23.4376, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10787.0084, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4353, "query_norm": 1.7045, "queue_k_norm": 1.5378, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.7425, "sent_len_1": 66.7608, "sent_len_max_0": 127.5962, "sent_len_max_1": 189.3288, "stdk": 0.0464, "stdq": 0.042, "stdqueue_k": 0.0464, "stdqueue_q": 0.0, "step": 10500 }, { "accuracy": 39.8438, "active_queue_size": 16384.0, "cl_loss": 4.6168, "doc_norm": 1.5326, "encoder_q-embeddings": 1793.8413, "encoder_q-layer.0": 1291.3472, "encoder_q-layer.1": 1223.5239, "encoder_q-layer.10": 1956.6646, "encoder_q-layer.11": 5856.438, "encoder_q-layer.2": 1383.87, "encoder_q-layer.3": 1467.5186, "encoder_q-layer.4": 1476.7557, "encoder_q-layer.5": 1425.8046, "encoder_q-layer.6": 1589.949, "encoder_q-layer.7": 1580.3228, "encoder_q-layer.8": 1791.5095, "encoder_q-layer.9": 1494.4769, "epoch": 0.07, "inbatch_neg_score": 0.4136, "inbatch_pos_score": 0.8936, "learning_rate": 4.966666666666667e-05, "loss": 4.6168, "norm_diff": 0.1254, "norm_loss": 0.0, "num_token_doc": 66.7599, "num_token_overlap": 11.6624, "num_token_query": 31.9194, "num_token_union": 65.383, "num_word_context": 202.6433, "num_word_doc": 49.8502, "num_word_query": 23.555, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3224.9871, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4082, "query_norm": 1.6579, "queue_k_norm": 1.5322, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9194, "sent_len_1": 66.7599, "sent_len_max_0": 127.5537, "sent_len_max_1": 188.7575, "stdk": 0.0465, "stdq": 0.041, "stdqueue_k": 0.0464, "stdqueue_q": 0.0, "step": 10600 }, { "accuracy": 38.4766, "active_queue_size": 16384.0, "cl_loss": 4.6035, "doc_norm": 1.5281, "encoder_q-embeddings": 2133.6665, "encoder_q-layer.0": 1530.7231, "encoder_q-layer.1": 1608.3341, "encoder_q-layer.10": 2067.3228, "encoder_q-layer.11": 6536.5142, "encoder_q-layer.2": 1842.4158, "encoder_q-layer.3": 1971.4254, "encoder_q-layer.4": 2152.002, "encoder_q-layer.5": 2199.0149, "encoder_q-layer.6": 2211.9104, "encoder_q-layer.7": 2062.8325, "encoder_q-layer.8": 2221.1519, "encoder_q-layer.9": 1743.4182, "epoch": 0.07, "inbatch_neg_score": 0.41, "inbatch_pos_score": 0.8911, "learning_rate": 4.961111111111111e-05, "loss": 4.6035, "norm_diff": 0.1365, "norm_loss": 0.0, "num_token_doc": 66.6464, "num_token_overlap": 11.6134, "num_token_query": 31.764, "num_token_union": 65.2555, "num_word_context": 202.2479, "num_word_doc": 49.7364, "num_word_query": 23.4566, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3893.5741, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4036, "query_norm": 1.6646, "queue_k_norm": 1.529, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.764, "sent_len_1": 66.6464, "sent_len_max_0": 127.3388, "sent_len_max_1": 188.315, "stdk": 0.0462, "stdq": 0.0417, "stdqueue_k": 0.0464, "stdqueue_q": 0.0, "step": 10700 }, { "accuracy": 39.8438, "active_queue_size": 16384.0, "cl_loss": 4.5577, "doc_norm": 1.5289, "encoder_q-embeddings": 2394.8813, "encoder_q-layer.0": 1805.4344, "encoder_q-layer.1": 1996.182, "encoder_q-layer.10": 1889.1639, "encoder_q-layer.11": 5858.3911, "encoder_q-layer.2": 2261.9846, "encoder_q-layer.3": 2479.8792, "encoder_q-layer.4": 2633.8748, "encoder_q-layer.5": 2634.4712, "encoder_q-layer.6": 2628.5256, "encoder_q-layer.7": 2484.9058, "encoder_q-layer.8": 2319.1714, "encoder_q-layer.9": 1662.47, "epoch": 0.07, "inbatch_neg_score": 0.3756, "inbatch_pos_score": 0.8652, "learning_rate": 4.955555555555556e-05, "loss": 4.5577, "norm_diff": 0.1118, "norm_loss": 0.0, "num_token_doc": 66.7728, "num_token_overlap": 11.711, "num_token_query": 32.0051, "num_token_union": 65.4036, "num_word_context": 202.3116, "num_word_doc": 49.8347, "num_word_query": 23.6635, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4073.2773, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3718, "query_norm": 1.6408, "queue_k_norm": 1.529, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 32.0051, "sent_len_1": 66.7728, "sent_len_max_0": 127.4437, "sent_len_max_1": 190.275, "stdk": 0.0465, "stdq": 0.0414, "stdqueue_k": 0.0465, "stdqueue_q": 0.0, "step": 10800 }, { "accuracy": 35.7422, "active_queue_size": 16384.0, "cl_loss": 4.579, "doc_norm": 1.5215, "encoder_q-embeddings": 2054.8875, "encoder_q-layer.0": 1436.1509, "encoder_q-layer.1": 1561.3618, "encoder_q-layer.10": 1934.4282, "encoder_q-layer.11": 4769.2314, "encoder_q-layer.2": 1698.3263, "encoder_q-layer.3": 1842.8436, "encoder_q-layer.4": 2010.6266, "encoder_q-layer.5": 2041.9106, "encoder_q-layer.6": 2206.4795, "encoder_q-layer.7": 2110.126, "encoder_q-layer.8": 1971.176, "encoder_q-layer.9": 1644.6489, "epoch": 0.07, "inbatch_neg_score": 0.4017, "inbatch_pos_score": 0.8564, "learning_rate": 4.9500000000000004e-05, "loss": 4.579, "norm_diff": 0.115, "norm_loss": 0.0, "num_token_doc": 66.6773, "num_token_overlap": 11.702, "num_token_query": 31.8965, "num_token_union": 65.2744, "num_word_context": 202.0258, "num_word_doc": 49.7722, "num_word_query": 23.5682, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3344.3733, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3955, "query_norm": 1.6365, "queue_k_norm": 1.5304, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8965, "sent_len_1": 66.6773, "sent_len_max_0": 127.5288, "sent_len_max_1": 189.3212, "stdk": 0.0461, "stdq": 0.0409, "stdqueue_k": 0.0466, "stdqueue_q": 0.0, "step": 10900 }, { "accuracy": 39.9414, "active_queue_size": 16384.0, "cl_loss": 4.5391, "doc_norm": 1.5362, "encoder_q-embeddings": 1898.8622, "encoder_q-layer.0": 1390.0748, "encoder_q-layer.1": 1498.2744, "encoder_q-layer.10": 1854.5128, "encoder_q-layer.11": 4900.5068, "encoder_q-layer.2": 1710.6312, "encoder_q-layer.3": 1932.8344, "encoder_q-layer.4": 2097.1462, "encoder_q-layer.5": 2099.8496, "encoder_q-layer.6": 1987.9231, "encoder_q-layer.7": 1965.4237, "encoder_q-layer.8": 1760.5886, "encoder_q-layer.9": 1501.8464, "epoch": 0.07, "inbatch_neg_score": 0.3781, "inbatch_pos_score": 0.8613, "learning_rate": 4.9444444444444446e-05, "loss": 4.5391, "norm_diff": 0.0838, "norm_loss": 0.0, "num_token_doc": 66.9236, "num_token_overlap": 11.719, "num_token_query": 32.0977, "num_token_union": 65.5183, "num_word_context": 202.267, "num_word_doc": 49.8908, "num_word_query": 23.7135, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3239.7567, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3735, "query_norm": 1.62, "queue_k_norm": 1.5346, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 32.0977, "sent_len_1": 66.9236, "sent_len_max_0": 127.4213, "sent_len_max_1": 192.0775, "stdk": 0.0467, "stdq": 0.041, "stdqueue_k": 0.0467, "stdqueue_q": 0.0, "step": 11000 }, { "accuracy": 36.6211, "active_queue_size": 16384.0, "cl_loss": 4.538, "doc_norm": 1.5331, "encoder_q-embeddings": 5980.3906, "encoder_q-layer.0": 4425.4229, "encoder_q-layer.1": 5023.3545, "encoder_q-layer.10": 1995.2179, "encoder_q-layer.11": 4899.6636, "encoder_q-layer.2": 5694.3198, "encoder_q-layer.3": 5790.979, "encoder_q-layer.4": 5725.8047, "encoder_q-layer.5": 5505.0278, "encoder_q-layer.6": 5569.9009, "encoder_q-layer.7": 4265.0532, "encoder_q-layer.8": 3820.394, "encoder_q-layer.9": 2615.5356, "epoch": 0.07, "inbatch_neg_score": 0.3816, "inbatch_pos_score": 0.8525, "learning_rate": 4.938888888888889e-05, "loss": 4.538, "norm_diff": 0.0937, "norm_loss": 0.0, "num_token_doc": 66.903, "num_token_overlap": 11.6793, "num_token_query": 31.9783, "num_token_union": 65.4646, "num_word_context": 202.0134, "num_word_doc": 49.8925, "num_word_query": 23.6198, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7282.6586, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.377, "query_norm": 1.6269, "queue_k_norm": 1.5358, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9783, "sent_len_1": 66.903, "sent_len_max_0": 127.4775, "sent_len_max_1": 189.5838, "stdk": 0.0466, "stdq": 0.0413, "stdqueue_k": 0.0467, "stdqueue_q": 0.0, "step": 11100 }, { "accuracy": 40.2344, "active_queue_size": 16384.0, "cl_loss": 4.5654, "doc_norm": 1.5405, "encoder_q-embeddings": 8437.7432, "encoder_q-layer.0": 6744.6714, "encoder_q-layer.1": 6752.0474, "encoder_q-layer.10": 1880.2126, "encoder_q-layer.11": 5038.3926, "encoder_q-layer.2": 8067.3726, "encoder_q-layer.3": 9833.4258, "encoder_q-layer.4": 9422.6826, "encoder_q-layer.5": 8737.8457, "encoder_q-layer.6": 8960.4502, "encoder_q-layer.7": 8827.8789, "encoder_q-layer.8": 6755.731, "encoder_q-layer.9": 3060.2422, "epoch": 0.07, "inbatch_neg_score": 0.4095, "inbatch_pos_score": 0.8965, "learning_rate": 4.933333333333334e-05, "loss": 4.5654, "norm_diff": 0.0878, "norm_loss": 0.0, "num_token_doc": 66.786, "num_token_overlap": 11.6324, "num_token_query": 31.8, "num_token_union": 65.345, "num_word_context": 202.3615, "num_word_doc": 49.8572, "num_word_query": 23.497, "postclip_grad_norm": 1.0, "preclip_grad_norm": 11507.8019, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4053, "query_norm": 1.6283, "queue_k_norm": 1.5397, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.8, "sent_len_1": 66.786, "sent_len_max_0": 127.585, "sent_len_max_1": 189.3575, "stdk": 0.0467, "stdq": 0.0409, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 11200 }, { "accuracy": 38.6719, "active_queue_size": 16384.0, "cl_loss": 4.5354, "doc_norm": 1.5434, "encoder_q-embeddings": 9124.3008, "encoder_q-layer.0": 7102.1367, "encoder_q-layer.1": 7197.105, "encoder_q-layer.10": 2278.8828, "encoder_q-layer.11": 5636.252, "encoder_q-layer.2": 8250.1807, "encoder_q-layer.3": 10067.0996, "encoder_q-layer.4": 10889.9258, "encoder_q-layer.5": 11541.1914, "encoder_q-layer.6": 12215.3398, "encoder_q-layer.7": 15546.3838, "encoder_q-layer.8": 17370.4902, "encoder_q-layer.9": 9741.123, "epoch": 0.07, "inbatch_neg_score": 0.3239, "inbatch_pos_score": 0.8071, "learning_rate": 4.927777777777778e-05, "loss": 4.5354, "norm_diff": 0.1333, "norm_loss": 0.0, "num_token_doc": 66.8375, "num_token_overlap": 11.7802, "num_token_query": 32.116, "num_token_union": 65.4241, "num_word_context": 202.3322, "num_word_doc": 49.8878, "num_word_query": 23.7235, "postclip_grad_norm": 1.0, "preclip_grad_norm": 15345.3861, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.321, "query_norm": 1.6768, "queue_k_norm": 1.5415, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 32.116, "sent_len_1": 66.8375, "sent_len_max_0": 127.5138, "sent_len_max_1": 191.9588, "stdk": 0.0467, "stdq": 0.0422, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 11300 }, { "accuracy": 36.9141, "active_queue_size": 16384.0, "cl_loss": 4.6055, "doc_norm": 1.5404, "encoder_q-embeddings": 3992.3557, "encoder_q-layer.0": 3085.9316, "encoder_q-layer.1": 3672.0881, "encoder_q-layer.10": 1779.2122, "encoder_q-layer.11": 4520.6118, "encoder_q-layer.2": 4078.7131, "encoder_q-layer.3": 4365.311, "encoder_q-layer.4": 4952.5298, "encoder_q-layer.5": 4881.5059, "encoder_q-layer.6": 4770.4985, "encoder_q-layer.7": 4378.4678, "encoder_q-layer.8": 3436.9797, "encoder_q-layer.9": 1989.8381, "epoch": 0.07, "inbatch_neg_score": 0.3615, "inbatch_pos_score": 0.8291, "learning_rate": 4.922222222222222e-05, "loss": 4.6055, "norm_diff": 0.0892, "norm_loss": 0.0, "num_token_doc": 66.5913, "num_token_overlap": 11.6526, "num_token_query": 31.7732, "num_token_union": 65.2165, "num_word_context": 202.1398, "num_word_doc": 49.7043, "num_word_query": 23.4458, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5741.538, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3582, "query_norm": 1.6297, "queue_k_norm": 1.5393, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.7732, "sent_len_1": 66.5913, "sent_len_max_0": 127.4375, "sent_len_max_1": 186.9525, "stdk": 0.0466, "stdq": 0.0413, "stdqueue_k": 0.0467, "stdqueue_q": 0.0, "step": 11400 }, { "accuracy": 38.8672, "active_queue_size": 16384.0, "cl_loss": 4.5671, "doc_norm": 1.5332, "encoder_q-embeddings": 6177.2705, "encoder_q-layer.0": 4393.8647, "encoder_q-layer.1": 4899.0415, "encoder_q-layer.10": 4719.708, "encoder_q-layer.11": 17241.4707, "encoder_q-layer.2": 5633.752, "encoder_q-layer.3": 5965.5, "encoder_q-layer.4": 7158.124, "encoder_q-layer.5": 6942.9121, "encoder_q-layer.6": 7115.8096, "encoder_q-layer.7": 7303.6333, "encoder_q-layer.8": 5254.8345, "encoder_q-layer.9": 3235.1768, "epoch": 0.07, "inbatch_neg_score": 0.3206, "inbatch_pos_score": 0.7935, "learning_rate": 4.9166666666666665e-05, "loss": 4.5671, "norm_diff": 0.1066, "norm_loss": 0.0, "num_token_doc": 66.5278, "num_token_overlap": 11.652, "num_token_query": 31.8708, "num_token_union": 65.1926, "num_word_context": 202.1781, "num_word_doc": 49.6132, "num_word_query": 23.5127, "postclip_grad_norm": 1.0, "preclip_grad_norm": 11613.8372, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3159, "query_norm": 1.6397, "queue_k_norm": 1.5396, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8708, "sent_len_1": 66.5278, "sent_len_max_0": 127.3675, "sent_len_max_1": 190.1275, "stdk": 0.0463, "stdq": 0.0404, "stdqueue_k": 0.0467, "stdqueue_q": 0.0, "step": 11500 }, { "accuracy": 38.6719, "active_queue_size": 16384.0, "cl_loss": 4.552, "doc_norm": 1.5349, "encoder_q-embeddings": 3892.4688, "encoder_q-layer.0": 2615.77, "encoder_q-layer.1": 2965.0869, "encoder_q-layer.10": 1762.1731, "encoder_q-layer.11": 3918.51, "encoder_q-layer.2": 3482.4629, "encoder_q-layer.3": 3806.4692, "encoder_q-layer.4": 4006.2495, "encoder_q-layer.5": 4277.335, "encoder_q-layer.6": 4654.9692, "encoder_q-layer.7": 4250.0107, "encoder_q-layer.8": 3105.8157, "encoder_q-layer.9": 1665.142, "epoch": 0.08, "inbatch_neg_score": 0.295, "inbatch_pos_score": 0.7861, "learning_rate": 4.9111111111111114e-05, "loss": 4.552, "norm_diff": 0.1208, "norm_loss": 0.0, "num_token_doc": 66.9332, "num_token_overlap": 11.6093, "num_token_query": 31.7116, "num_token_union": 65.3708, "num_word_context": 202.5449, "num_word_doc": 49.897, "num_word_query": 23.4015, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5186.8412, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2898, "query_norm": 1.6557, "queue_k_norm": 1.5384, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.7116, "sent_len_1": 66.9332, "sent_len_max_0": 127.455, "sent_len_max_1": 191.0412, "stdk": 0.0463, "stdq": 0.0415, "stdqueue_k": 0.0465, "stdqueue_q": 0.0, "step": 11600 }, { "accuracy": 39.8438, "active_queue_size": 16384.0, "cl_loss": 4.5231, "doc_norm": 1.5418, "encoder_q-embeddings": 1755.2402, "encoder_q-layer.0": 1248.6627, "encoder_q-layer.1": 1407.7889, "encoder_q-layer.10": 884.4219, "encoder_q-layer.11": 2024.6567, "encoder_q-layer.2": 1465.4818, "encoder_q-layer.3": 1511.1245, "encoder_q-layer.4": 1585.5458, "encoder_q-layer.5": 1655.397, "encoder_q-layer.6": 1789.7062, "encoder_q-layer.7": 1864.8859, "encoder_q-layer.8": 1408.366, "encoder_q-layer.9": 846.7049, "epoch": 0.08, "inbatch_neg_score": 0.3022, "inbatch_pos_score": 0.7939, "learning_rate": 4.905555555555556e-05, "loss": 4.5231, "norm_diff": 0.052, "norm_loss": 0.0, "num_token_doc": 66.9014, "num_token_overlap": 11.6348, "num_token_query": 31.7485, "num_token_union": 65.364, "num_word_context": 202.5592, "num_word_doc": 49.9087, "num_word_query": 23.4511, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2233.031, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2969, "query_norm": 1.5938, "queue_k_norm": 1.5384, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.7485, "sent_len_1": 66.9014, "sent_len_max_0": 127.5187, "sent_len_max_1": 189.0425, "stdk": 0.0465, "stdq": 0.0408, "stdqueue_k": 0.0465, "stdqueue_q": 0.0, "step": 11700 }, { "accuracy": 37.8906, "active_queue_size": 16384.0, "cl_loss": 4.5237, "doc_norm": 1.5374, "encoder_q-embeddings": 1097.1683, "encoder_q-layer.0": 817.5483, "encoder_q-layer.1": 926.6381, "encoder_q-layer.10": 954.5109, "encoder_q-layer.11": 2274.7122, "encoder_q-layer.2": 1118.6661, "encoder_q-layer.3": 1221.1997, "encoder_q-layer.4": 1281.5232, "encoder_q-layer.5": 1374.4628, "encoder_q-layer.6": 1570.926, "encoder_q-layer.7": 1437.1295, "encoder_q-layer.8": 1318.083, "encoder_q-layer.9": 923.6083, "epoch": 0.08, "inbatch_neg_score": 0.3248, "inbatch_pos_score": 0.8149, "learning_rate": 4.9e-05, "loss": 4.5237, "norm_diff": 0.0719, "norm_loss": 0.0, "num_token_doc": 66.6902, "num_token_overlap": 11.7145, "num_token_query": 32.01, "num_token_union": 65.3426, "num_word_context": 202.1398, "num_word_doc": 49.8259, "num_word_query": 23.6609, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1874.0436, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3208, "query_norm": 1.6093, "queue_k_norm": 1.5369, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 32.01, "sent_len_1": 66.6902, "sent_len_max_0": 127.615, "sent_len_max_1": 186.5838, "stdk": 0.0464, "stdq": 0.043, "stdqueue_k": 0.0464, "stdqueue_q": 0.0, "step": 11800 }, { "accuracy": 40.5273, "active_queue_size": 16384.0, "cl_loss": 4.546, "doc_norm": 1.5352, "encoder_q-embeddings": 13135.9648, "encoder_q-layer.0": 9617.7852, "encoder_q-layer.1": 10034.9033, "encoder_q-layer.10": 927.171, "encoder_q-layer.11": 2628.094, "encoder_q-layer.2": 11866.5215, "encoder_q-layer.3": 13220.5488, "encoder_q-layer.4": 13214.9023, "encoder_q-layer.5": 14027.8262, "encoder_q-layer.6": 13329.7539, "encoder_q-layer.7": 14196.9365, "encoder_q-layer.8": 12925.8174, "encoder_q-layer.9": 4488.1353, "epoch": 0.08, "inbatch_neg_score": 0.3396, "inbatch_pos_score": 0.8257, "learning_rate": 4.894444444444445e-05, "loss": 4.546, "norm_diff": 0.1556, "norm_loss": 0.0, "num_token_doc": 66.6766, "num_token_overlap": 11.6733, "num_token_query": 31.7813, "num_token_union": 65.2076, "num_word_context": 202.3285, "num_word_doc": 49.7578, "num_word_query": 23.4908, "postclip_grad_norm": 1.0, "preclip_grad_norm": 16954.2457, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.334, "query_norm": 1.6907, "queue_k_norm": 1.5327, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.7813, "sent_len_1": 66.6766, "sent_len_max_0": 127.67, "sent_len_max_1": 190.7325, "stdk": 0.0464, "stdq": 0.0422, "stdqueue_k": 0.0464, "stdqueue_q": 0.0, "step": 11900 }, { "accuracy": 37.9883, "active_queue_size": 16384.0, "cl_loss": 4.6342, "doc_norm": 1.5253, "encoder_q-embeddings": 7571.9009, "encoder_q-layer.0": 5513.4087, "encoder_q-layer.1": 5344.7046, "encoder_q-layer.10": 1094.2332, "encoder_q-layer.11": 3378.4873, "encoder_q-layer.2": 6324.8232, "encoder_q-layer.3": 6688.6582, "encoder_q-layer.4": 7086.3237, "encoder_q-layer.5": 7541.7036, "encoder_q-layer.6": 7962.9209, "encoder_q-layer.7": 7710.752, "encoder_q-layer.8": 7903.9487, "encoder_q-layer.9": 3799.4358, "epoch": 0.08, "inbatch_neg_score": 0.3867, "inbatch_pos_score": 0.8457, "learning_rate": 4.888888888888889e-05, "loss": 4.6342, "norm_diff": 0.2895, "norm_loss": 0.0, "num_token_doc": 66.8467, "num_token_overlap": 11.7191, "num_token_query": 31.9232, "num_token_union": 65.3865, "num_word_context": 202.5199, "num_word_doc": 49.9067, "num_word_query": 23.5777, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9798.6789, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3811, "query_norm": 1.8148, "queue_k_norm": 1.5284, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9232, "sent_len_1": 66.8467, "sent_len_max_0": 127.6462, "sent_len_max_1": 188.4938, "stdk": 0.0463, "stdq": 0.042, "stdqueue_k": 0.0465, "stdqueue_q": 0.0, "step": 12000 }, { "accuracy": 37.8906, "active_queue_size": 16384.0, "cl_loss": 4.6596, "doc_norm": 1.5271, "encoder_q-embeddings": 1296.4919, "encoder_q-layer.0": 902.5809, "encoder_q-layer.1": 1144.0994, "encoder_q-layer.10": 451.8011, "encoder_q-layer.11": 1199.0945, "encoder_q-layer.2": 1395.2324, "encoder_q-layer.3": 1506.5482, "encoder_q-layer.4": 1303.3169, "encoder_q-layer.5": 1211.4417, "encoder_q-layer.6": 1081.4316, "encoder_q-layer.7": 967.123, "encoder_q-layer.8": 696.9552, "encoder_q-layer.9": 423.4694, "epoch": 0.08, "inbatch_neg_score": 0.325, "inbatch_pos_score": 0.7964, "learning_rate": 4.883333333333334e-05, "loss": 4.6596, "norm_diff": 0.1911, "norm_loss": 0.0, "num_token_doc": 66.5998, "num_token_overlap": 11.6873, "num_token_query": 32.0142, "num_token_union": 65.3102, "num_word_context": 202.2339, "num_word_doc": 49.727, "num_word_query": 23.6603, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1639.6253, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3208, "query_norm": 1.7182, "queue_k_norm": 1.5272, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 32.0142, "sent_len_1": 66.5998, "sent_len_max_0": 127.6012, "sent_len_max_1": 188.2188, "stdk": 0.0466, "stdq": 0.0413, "stdqueue_k": 0.0467, "stdqueue_q": 0.0, "step": 12100 }, { "accuracy": 41.3086, "active_queue_size": 16384.0, "cl_loss": 4.5377, "doc_norm": 1.5203, "encoder_q-embeddings": 1841.715, "encoder_q-layer.0": 1401.3073, "encoder_q-layer.1": 1759.8057, "encoder_q-layer.10": 405.9269, "encoder_q-layer.11": 1133.345, "encoder_q-layer.2": 1904.9873, "encoder_q-layer.3": 2055.978, "encoder_q-layer.4": 2376.145, "encoder_q-layer.5": 2170.3228, "encoder_q-layer.6": 2072.1277, "encoder_q-layer.7": 1840.4626, "encoder_q-layer.8": 1474.4208, "encoder_q-layer.9": 501.1758, "epoch": 0.08, "inbatch_neg_score": 0.3178, "inbatch_pos_score": 0.8291, "learning_rate": 4.8777777777777775e-05, "loss": 4.5377, "norm_diff": 0.1811, "norm_loss": 0.0, "num_token_doc": 66.8595, "num_token_overlap": 11.7153, "num_token_query": 31.8863, "num_token_union": 65.3308, "num_word_context": 202.5244, "num_word_doc": 49.8943, "num_word_query": 23.5353, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2515.8007, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3127, "query_norm": 1.7014, "queue_k_norm": 1.5213, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8863, "sent_len_1": 66.8595, "sent_len_max_0": 127.3162, "sent_len_max_1": 190.4963, "stdk": 0.0467, "stdq": 0.0417, "stdqueue_k": 0.0467, "stdqueue_q": 0.0, "step": 12200 }, { "accuracy": 39.2578, "active_queue_size": 16384.0, "cl_loss": 4.5458, "doc_norm": 1.5138, "encoder_q-embeddings": 1663.0665, "encoder_q-layer.0": 1243.6696, "encoder_q-layer.1": 1408.4769, "encoder_q-layer.10": 410.9211, "encoder_q-layer.11": 1066.5431, "encoder_q-layer.2": 1669.1514, "encoder_q-layer.3": 1955.5681, "encoder_q-layer.4": 2221.1643, "encoder_q-layer.5": 2050.6423, "encoder_q-layer.6": 2007.5511, "encoder_q-layer.7": 1709.8733, "encoder_q-layer.8": 1447.5208, "encoder_q-layer.9": 809.2731, "epoch": 0.08, "inbatch_neg_score": 0.3519, "inbatch_pos_score": 0.8413, "learning_rate": 4.8722222222222224e-05, "loss": 4.5458, "norm_diff": 0.1763, "norm_loss": 0.0, "num_token_doc": 66.554, "num_token_overlap": 11.6934, "num_token_query": 31.8993, "num_token_union": 65.2448, "num_word_context": 202.0884, "num_word_doc": 49.6798, "num_word_query": 23.5665, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2361.7036, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3442, "query_norm": 1.6901, "queue_k_norm": 1.5177, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8993, "sent_len_1": 66.554, "sent_len_max_0": 127.4875, "sent_len_max_1": 187.4187, "stdk": 0.0465, "stdq": 0.0414, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 12300 }, { "accuracy": 37.0117, "active_queue_size": 16384.0, "cl_loss": 4.5395, "doc_norm": 1.5105, "encoder_q-embeddings": 1152.4176, "encoder_q-layer.0": 833.2993, "encoder_q-layer.1": 973.842, "encoder_q-layer.10": 422.2829, "encoder_q-layer.11": 1155.2834, "encoder_q-layer.2": 1142.3416, "encoder_q-layer.3": 1255.6678, "encoder_q-layer.4": 1382.467, "encoder_q-layer.5": 1513.515, "encoder_q-layer.6": 1336.3149, "encoder_q-layer.7": 1041.8403, "encoder_q-layer.8": 614.9804, "encoder_q-layer.9": 398.799, "epoch": 0.08, "inbatch_neg_score": 0.2685, "inbatch_pos_score": 0.7378, "learning_rate": 4.866666666666667e-05, "loss": 4.5395, "norm_diff": 0.1319, "norm_loss": 0.0, "num_token_doc": 66.9112, "num_token_overlap": 11.6985, "num_token_query": 31.87, "num_token_union": 65.414, "num_word_context": 202.7705, "num_word_doc": 49.9814, "num_word_query": 23.5565, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1566.3375, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2617, "query_norm": 1.6424, "queue_k_norm": 1.5121, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.87, "sent_len_1": 66.9112, "sent_len_max_0": 127.6162, "sent_len_max_1": 188.4775, "stdk": 0.0467, "stdq": 0.0413, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 12400 }, { "accuracy": 38.5742, "active_queue_size": 16384.0, "cl_loss": 4.5566, "doc_norm": 1.5025, "encoder_q-embeddings": 5572.4893, "encoder_q-layer.0": 4077.6125, "encoder_q-layer.1": 4676.1084, "encoder_q-layer.10": 468.0998, "encoder_q-layer.11": 1186.2806, "encoder_q-layer.2": 5798.439, "encoder_q-layer.3": 6300.0117, "encoder_q-layer.4": 6098.52, "encoder_q-layer.5": 6652.0913, "encoder_q-layer.6": 4834.6475, "encoder_q-layer.7": 2305.9316, "encoder_q-layer.8": 1169.1133, "encoder_q-layer.9": 519.1792, "epoch": 0.08, "inbatch_neg_score": 0.2779, "inbatch_pos_score": 0.7593, "learning_rate": 4.8611111111111115e-05, "loss": 4.5566, "norm_diff": 0.1341, "norm_loss": 0.0, "num_token_doc": 66.9944, "num_token_overlap": 11.6329, "num_token_query": 31.761, "num_token_union": 65.4447, "num_word_context": 202.5804, "num_word_doc": 49.9785, "num_word_query": 23.4689, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6726.1042, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2732, "query_norm": 1.6366, "queue_k_norm": 1.506, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.761, "sent_len_1": 66.9944, "sent_len_max_0": 127.4025, "sent_len_max_1": 189.9075, "stdk": 0.0466, "stdq": 0.0407, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 12500 }, { "accuracy": 38.7695, "active_queue_size": 16384.0, "cl_loss": 4.5336, "doc_norm": 1.4945, "encoder_q-embeddings": 773.2667, "encoder_q-layer.0": 581.541, "encoder_q-layer.1": 626.5037, "encoder_q-layer.10": 425.8659, "encoder_q-layer.11": 1124.1919, "encoder_q-layer.2": 736.5098, "encoder_q-layer.3": 871.0461, "encoder_q-layer.4": 737.1706, "encoder_q-layer.5": 694.7272, "encoder_q-layer.6": 620.8758, "encoder_q-layer.7": 552.8037, "encoder_q-layer.8": 498.8638, "encoder_q-layer.9": 371.5535, "epoch": 0.08, "inbatch_neg_score": 0.2361, "inbatch_pos_score": 0.7046, "learning_rate": 4.855555555555556e-05, "loss": 4.5336, "norm_diff": 0.1284, "norm_loss": 0.0, "num_token_doc": 66.8748, "num_token_overlap": 11.6927, "num_token_query": 31.9566, "num_token_union": 65.4552, "num_word_context": 202.3229, "num_word_doc": 49.9162, "num_word_query": 23.5962, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1016.1806, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2302, "query_norm": 1.6229, "queue_k_norm": 1.4984, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9566, "sent_len_1": 66.8748, "sent_len_max_0": 127.5375, "sent_len_max_1": 188.895, "stdk": 0.0465, "stdq": 0.041, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 12600 }, { "accuracy": 39.4531, "active_queue_size": 16384.0, "cl_loss": 4.5325, "doc_norm": 1.5013, "encoder_q-embeddings": 1818.8217, "encoder_q-layer.0": 1237.1528, "encoder_q-layer.1": 1393.8475, "encoder_q-layer.10": 407.6965, "encoder_q-layer.11": 1077.9205, "encoder_q-layer.2": 1552.7255, "encoder_q-layer.3": 1793.8196, "encoder_q-layer.4": 1896.1018, "encoder_q-layer.5": 1825.5426, "encoder_q-layer.6": 1616.9683, "encoder_q-layer.7": 1211.8567, "encoder_q-layer.8": 780.4249, "encoder_q-layer.9": 418.4759, "epoch": 0.08, "inbatch_neg_score": 0.206, "inbatch_pos_score": 0.6978, "learning_rate": 4.85e-05, "loss": 4.5325, "norm_diff": 0.1267, "norm_loss": 0.0, "num_token_doc": 66.9155, "num_token_overlap": 11.6786, "num_token_query": 32.0442, "num_token_union": 65.5535, "num_word_context": 202.9312, "num_word_doc": 49.9224, "num_word_query": 23.6869, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2081.3933, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2031, "query_norm": 1.628, "queue_k_norm": 1.4917, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 32.0442, "sent_len_1": 66.9155, "sent_len_max_0": 127.58, "sent_len_max_1": 190.2738, "stdk": 0.0471, "stdq": 0.0414, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 12700 }, { "accuracy": 37.793, "active_queue_size": 16384.0, "cl_loss": 4.5228, "doc_norm": 1.4844, "encoder_q-embeddings": 2305.7107, "encoder_q-layer.0": 1658.7166, "encoder_q-layer.1": 1865.194, "encoder_q-layer.10": 418.2487, "encoder_q-layer.11": 1108.601, "encoder_q-layer.2": 2209.1135, "encoder_q-layer.3": 2364.6763, "encoder_q-layer.4": 2446.8069, "encoder_q-layer.5": 2670.7349, "encoder_q-layer.6": 2167.1318, "encoder_q-layer.7": 1199.3282, "encoder_q-layer.8": 821.234, "encoder_q-layer.9": 473.9746, "epoch": 0.08, "inbatch_neg_score": 0.1778, "inbatch_pos_score": 0.6587, "learning_rate": 4.844444444444445e-05, "loss": 4.5228, "norm_diff": 0.1612, "norm_loss": 0.0, "num_token_doc": 66.608, "num_token_overlap": 11.6531, "num_token_query": 31.8928, "num_token_union": 65.2994, "num_word_context": 201.9804, "num_word_doc": 49.6885, "num_word_query": 23.5585, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2712.933, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1736, "query_norm": 1.6456, "queue_k_norm": 1.4855, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.8928, "sent_len_1": 66.608, "sent_len_max_0": 127.4725, "sent_len_max_1": 190.195, "stdk": 0.0467, "stdq": 0.0418, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 12800 }, { "accuracy": 38.1836, "active_queue_size": 16384.0, "cl_loss": 4.5318, "doc_norm": 1.4747, "encoder_q-embeddings": 1240.3455, "encoder_q-layer.0": 883.5001, "encoder_q-layer.1": 1027.9352, "encoder_q-layer.10": 412.6982, "encoder_q-layer.11": 1107.2267, "encoder_q-layer.2": 1171.431, "encoder_q-layer.3": 1199.2029, "encoder_q-layer.4": 1220.2972, "encoder_q-layer.5": 1186.1455, "encoder_q-layer.6": 1223.8822, "encoder_q-layer.7": 1253.6881, "encoder_q-layer.8": 809.3984, "encoder_q-layer.9": 424.0208, "epoch": 0.08, "inbatch_neg_score": 0.1797, "inbatch_pos_score": 0.6543, "learning_rate": 4.838888888888889e-05, "loss": 4.5318, "norm_diff": 0.1675, "norm_loss": 0.0, "num_token_doc": 66.7864, "num_token_overlap": 11.6072, "num_token_query": 31.7497, "num_token_union": 65.2874, "num_word_context": 202.2816, "num_word_doc": 49.8429, "num_word_query": 23.4612, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1567.0475, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1752, "query_norm": 1.6422, "queue_k_norm": 1.477, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.7497, "sent_len_1": 66.7864, "sent_len_max_0": 127.6312, "sent_len_max_1": 190.5375, "stdk": 0.0466, "stdq": 0.0409, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 12900 }, { "accuracy": 41.6016, "active_queue_size": 16384.0, "cl_loss": 4.502, "doc_norm": 1.4746, "encoder_q-embeddings": 838.3874, "encoder_q-layer.0": 607.6268, "encoder_q-layer.1": 666.8454, "encoder_q-layer.10": 426.2654, "encoder_q-layer.11": 1154.5175, "encoder_q-layer.2": 670.1329, "encoder_q-layer.3": 638.0809, "encoder_q-layer.4": 638.8665, "encoder_q-layer.5": 699.7436, "encoder_q-layer.6": 733.1687, "encoder_q-layer.7": 718.9249, "encoder_q-layer.8": 696.4196, "encoder_q-layer.9": 498.418, "epoch": 0.08, "inbatch_neg_score": 0.1886, "inbatch_pos_score": 0.6914, "learning_rate": 4.8333333333333334e-05, "loss": 4.502, "norm_diff": 0.2202, "norm_loss": 0.0, "num_token_doc": 66.8064, "num_token_overlap": 11.7045, "num_token_query": 31.9732, "num_token_union": 65.381, "num_word_context": 202.6229, "num_word_doc": 49.8829, "num_word_query": 23.602, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1046.2357, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1829, "query_norm": 1.6948, "queue_k_norm": 1.4679, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9732, "sent_len_1": 66.8064, "sent_len_max_0": 127.5763, "sent_len_max_1": 189.8487, "stdk": 0.0468, "stdq": 0.0422, "stdqueue_k": 0.0467, "stdqueue_q": 0.0, "step": 13000 }, { "accuracy": 40.625, "active_queue_size": 16384.0, "cl_loss": 4.5107, "doc_norm": 1.4648, "encoder_q-embeddings": 909.0419, "encoder_q-layer.0": 606.5964, "encoder_q-layer.1": 679.3921, "encoder_q-layer.10": 430.5301, "encoder_q-layer.11": 1142.941, "encoder_q-layer.2": 780.7967, "encoder_q-layer.3": 801.3685, "encoder_q-layer.4": 839.4275, "encoder_q-layer.5": 794.0179, "encoder_q-layer.6": 763.0767, "encoder_q-layer.7": 733.1242, "encoder_q-layer.8": 618.476, "encoder_q-layer.9": 397.0273, "epoch": 0.09, "inbatch_neg_score": 0.1859, "inbatch_pos_score": 0.6807, "learning_rate": 4.8277777777777776e-05, "loss": 4.5107, "norm_diff": 0.245, "norm_loss": 0.0, "num_token_doc": 66.8506, "num_token_overlap": 11.6463, "num_token_query": 31.8693, "num_token_union": 65.3743, "num_word_context": 202.0048, "num_word_doc": 49.8711, "num_word_query": 23.5535, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1103.5078, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1804, "query_norm": 1.7098, "queue_k_norm": 1.461, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8693, "sent_len_1": 66.8506, "sent_len_max_0": 127.375, "sent_len_max_1": 189.8963, "stdk": 0.0468, "stdq": 0.0414, "stdqueue_k": 0.0467, "stdqueue_q": 0.0, "step": 13100 }, { "accuracy": 39.4531, "active_queue_size": 16384.0, "cl_loss": 4.4876, "doc_norm": 1.4515, "encoder_q-embeddings": 1624.085, "encoder_q-layer.0": 1127.9838, "encoder_q-layer.1": 1321.6252, "encoder_q-layer.10": 449.1666, "encoder_q-layer.11": 1289.7883, "encoder_q-layer.2": 1354.4445, "encoder_q-layer.3": 1400.5146, "encoder_q-layer.4": 1525.7463, "encoder_q-layer.5": 1776.0457, "encoder_q-layer.6": 1476.5175, "encoder_q-layer.7": 888.3405, "encoder_q-layer.8": 814.7719, "encoder_q-layer.9": 565.5094, "epoch": 0.09, "inbatch_neg_score": 0.1541, "inbatch_pos_score": 0.6353, "learning_rate": 4.8222222222222225e-05, "loss": 4.4876, "norm_diff": 0.2526, "norm_loss": 0.0, "num_token_doc": 67.0525, "num_token_overlap": 11.6606, "num_token_query": 31.8198, "num_token_union": 65.5363, "num_word_context": 202.392, "num_word_doc": 50.086, "num_word_query": 23.5134, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1882.355, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1475, "query_norm": 1.7041, "queue_k_norm": 1.4563, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.8198, "sent_len_1": 67.0525, "sent_len_max_0": 127.4613, "sent_len_max_1": 186.9988, "stdk": 0.0465, "stdq": 0.0413, "stdqueue_k": 0.0467, "stdqueue_q": 0.0, "step": 13200 }, { "accuracy": 41.8945, "active_queue_size": 16384.0, "cl_loss": 4.4698, "doc_norm": 1.4507, "encoder_q-embeddings": 2384.0896, "encoder_q-layer.0": 1714.7407, "encoder_q-layer.1": 1827.6184, "encoder_q-layer.10": 216.7137, "encoder_q-layer.11": 527.7662, "encoder_q-layer.2": 2113.1934, "encoder_q-layer.3": 2357.6113, "encoder_q-layer.4": 2735.2898, "encoder_q-layer.5": 2603.8884, "encoder_q-layer.6": 2062.385, "encoder_q-layer.7": 1924.3976, "encoder_q-layer.8": 1163.9937, "encoder_q-layer.9": 411.4643, "epoch": 0.09, "inbatch_neg_score": 0.1797, "inbatch_pos_score": 0.6904, "learning_rate": 4.8166666666666674e-05, "loss": 4.4698, "norm_diff": 0.2219, "norm_loss": 0.0, "num_token_doc": 66.6295, "num_token_overlap": 11.6403, "num_token_query": 31.8786, "num_token_union": 65.3171, "num_word_context": 202.1296, "num_word_doc": 49.7022, "num_word_query": 23.5419, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2862.1754, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1779, "query_norm": 1.6726, "queue_k_norm": 1.4457, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8786, "sent_len_1": 66.6295, "sent_len_max_0": 127.4562, "sent_len_max_1": 189.6362, "stdk": 0.0467, "stdq": 0.0416, "stdqueue_k": 0.0466, "stdqueue_q": 0.0, "step": 13300 }, { "accuracy": 36.9141, "active_queue_size": 16384.0, "cl_loss": 4.463, "doc_norm": 1.4358, "encoder_q-embeddings": 509.9568, "encoder_q-layer.0": 352.9512, "encoder_q-layer.1": 377.814, "encoder_q-layer.10": 211.6658, "encoder_q-layer.11": 513.5277, "encoder_q-layer.2": 414.2539, "encoder_q-layer.3": 477.5056, "encoder_q-layer.4": 475.1484, "encoder_q-layer.5": 450.1595, "encoder_q-layer.6": 473.6078, "encoder_q-layer.7": 482.3725, "encoder_q-layer.8": 527.0491, "encoder_q-layer.9": 333.349, "epoch": 0.09, "inbatch_neg_score": 0.1763, "inbatch_pos_score": 0.6758, "learning_rate": 4.811111111111111e-05, "loss": 4.463, "norm_diff": 0.2558, "norm_loss": 0.0, "num_token_doc": 66.836, "num_token_overlap": 11.5989, "num_token_query": 31.6169, "num_token_union": 65.2785, "num_word_context": 202.3133, "num_word_doc": 49.8647, "num_word_query": 23.337, "postclip_grad_norm": 1.0, "preclip_grad_norm": 655.6329, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.172, "query_norm": 1.6916, "queue_k_norm": 1.4362, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.6169, "sent_len_1": 66.836, "sent_len_max_0": 127.4213, "sent_len_max_1": 189.365, "stdk": 0.0464, "stdq": 0.0419, "stdqueue_k": 0.0465, "stdqueue_q": 0.0, "step": 13400 }, { "accuracy": 39.7461, "active_queue_size": 16384.0, "cl_loss": 4.4779, "doc_norm": 1.4299, "encoder_q-embeddings": 245.612, "encoder_q-layer.0": 161.0028, "encoder_q-layer.1": 175.0058, "encoder_q-layer.10": 224.1398, "encoder_q-layer.11": 583.4742, "encoder_q-layer.2": 195.5373, "encoder_q-layer.3": 213.4691, "encoder_q-layer.4": 230.8192, "encoder_q-layer.5": 256.5559, "encoder_q-layer.6": 253.4625, "encoder_q-layer.7": 238.2105, "encoder_q-layer.8": 260.0567, "encoder_q-layer.9": 202.7393, "epoch": 0.09, "inbatch_neg_score": 0.1735, "inbatch_pos_score": 0.6709, "learning_rate": 4.805555555555556e-05, "loss": 4.4779, "norm_diff": 0.2956, "norm_loss": 0.0, "num_token_doc": 67.0851, "num_token_overlap": 11.6563, "num_token_query": 31.8953, "num_token_union": 65.5357, "num_word_context": 202.457, "num_word_doc": 49.9936, "num_word_query": 23.5505, "postclip_grad_norm": 1.0, "preclip_grad_norm": 385.4314, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1685, "query_norm": 1.7256, "queue_k_norm": 1.4303, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8953, "sent_len_1": 67.0851, "sent_len_max_0": 127.4838, "sent_len_max_1": 192.9913, "stdk": 0.0464, "stdq": 0.0424, "stdqueue_k": 0.0465, "stdqueue_q": 0.0, "step": 13500 }, { "accuracy": 39.5508, "active_queue_size": 16384.0, "cl_loss": 4.4396, "doc_norm": 1.4244, "encoder_q-embeddings": 513.901, "encoder_q-layer.0": 378.5813, "encoder_q-layer.1": 404.2451, "encoder_q-layer.10": 220.3913, "encoder_q-layer.11": 543.0748, "encoder_q-layer.2": 450.9628, "encoder_q-layer.3": 455.5586, "encoder_q-layer.4": 506.3938, "encoder_q-layer.5": 486.7927, "encoder_q-layer.6": 482.1622, "encoder_q-layer.7": 410.7311, "encoder_q-layer.8": 447.1308, "encoder_q-layer.9": 278.5084, "epoch": 0.09, "inbatch_neg_score": 0.175, "inbatch_pos_score": 0.6802, "learning_rate": 4.8e-05, "loss": 4.4396, "norm_diff": 0.3093, "norm_loss": 0.0, "num_token_doc": 66.8048, "num_token_overlap": 11.7278, "num_token_query": 31.8811, "num_token_union": 65.3152, "num_word_context": 201.9258, "num_word_doc": 49.8373, "num_word_query": 23.5155, "postclip_grad_norm": 1.0, "preclip_grad_norm": 654.2941, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1687, "query_norm": 1.7337, "queue_k_norm": 1.4241, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.8811, "sent_len_1": 66.8048, "sent_len_max_0": 127.5012, "sent_len_max_1": 189.6488, "stdk": 0.0464, "stdq": 0.0425, "stdqueue_k": 0.0464, "stdqueue_q": 0.0, "step": 13600 }, { "accuracy": 42.1875, "active_queue_size": 16384.0, "cl_loss": 4.4442, "doc_norm": 1.419, "encoder_q-embeddings": 5532.6904, "encoder_q-layer.0": 4406.3271, "encoder_q-layer.1": 5031.9321, "encoder_q-layer.10": 261.2646, "encoder_q-layer.11": 501.2711, "encoder_q-layer.2": 6024.3188, "encoder_q-layer.3": 7377.3193, "encoder_q-layer.4": 9113.6777, "encoder_q-layer.5": 8372.5498, "encoder_q-layer.6": 5486.082, "encoder_q-layer.7": 3333.8467, "encoder_q-layer.8": 1929.4271, "encoder_q-layer.9": 875.9095, "epoch": 0.09, "inbatch_neg_score": 0.1972, "inbatch_pos_score": 0.689, "learning_rate": 4.794444444444445e-05, "loss": 4.4442, "norm_diff": 0.2818, "norm_loss": 0.0, "num_token_doc": 66.861, "num_token_overlap": 11.7142, "num_token_query": 32.0657, "num_token_union": 65.4955, "num_word_context": 202.5762, "num_word_doc": 49.9114, "num_word_query": 23.6908, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7774.9602, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1917, "query_norm": 1.7007, "queue_k_norm": 1.4161, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 32.0657, "sent_len_1": 66.861, "sent_len_max_0": 127.5888, "sent_len_max_1": 190.0825, "stdk": 0.0463, "stdq": 0.041, "stdqueue_k": 0.0463, "stdqueue_q": 0.0, "step": 13700 }, { "accuracy": 38.4766, "active_queue_size": 16384.0, "cl_loss": 4.4483, "doc_norm": 1.405, "encoder_q-embeddings": 608.5542, "encoder_q-layer.0": 433.5658, "encoder_q-layer.1": 484.5444, "encoder_q-layer.10": 263.4075, "encoder_q-layer.11": 624.4095, "encoder_q-layer.2": 556.6978, "encoder_q-layer.3": 619.433, "encoder_q-layer.4": 662.5253, "encoder_q-layer.5": 620.5768, "encoder_q-layer.6": 624.5515, "encoder_q-layer.7": 558.0115, "encoder_q-layer.8": 511.5938, "encoder_q-layer.9": 334.2031, "epoch": 0.09, "inbatch_neg_score": 0.2238, "inbatch_pos_score": 0.7041, "learning_rate": 4.7888888888888886e-05, "loss": 4.4483, "norm_diff": 0.259, "norm_loss": 0.0, "num_token_doc": 66.9293, "num_token_overlap": 11.7031, "num_token_query": 31.943, "num_token_union": 65.4256, "num_word_context": 202.2866, "num_word_doc": 49.9102, "num_word_query": 23.6099, "postclip_grad_norm": 1.0, "preclip_grad_norm": 791.8025, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2179, "query_norm": 1.664, "queue_k_norm": 1.4106, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.943, "sent_len_1": 66.9293, "sent_len_max_0": 127.3475, "sent_len_max_1": 192.2325, "stdk": 0.046, "stdq": 0.0423, "stdqueue_k": 0.0463, "stdqueue_q": 0.0, "step": 13800 }, { "accuracy": 40.7227, "active_queue_size": 16384.0, "cl_loss": 4.4527, "doc_norm": 1.4037, "encoder_q-embeddings": 807.3166, "encoder_q-layer.0": 571.8192, "encoder_q-layer.1": 700.9831, "encoder_q-layer.10": 203.6116, "encoder_q-layer.11": 490.4271, "encoder_q-layer.2": 809.5443, "encoder_q-layer.3": 863.6333, "encoder_q-layer.4": 962.8434, "encoder_q-layer.5": 785.3871, "encoder_q-layer.6": 643.9865, "encoder_q-layer.7": 466.5262, "encoder_q-layer.8": 366.5758, "encoder_q-layer.9": 237.9557, "epoch": 0.09, "inbatch_neg_score": 0.2301, "inbatch_pos_score": 0.7241, "learning_rate": 4.7833333333333335e-05, "loss": 4.4527, "norm_diff": 0.2602, "norm_loss": 0.0, "num_token_doc": 66.8563, "num_token_overlap": 11.6455, "num_token_query": 31.741, "num_token_union": 65.3096, "num_word_context": 202.2553, "num_word_doc": 49.8535, "num_word_query": 23.4214, "postclip_grad_norm": 1.0, "preclip_grad_norm": 969.3728, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2258, "query_norm": 1.6639, "queue_k_norm": 1.4031, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.741, "sent_len_1": 66.8563, "sent_len_max_0": 127.6437, "sent_len_max_1": 190.825, "stdk": 0.0461, "stdq": 0.0421, "stdqueue_k": 0.0461, "stdqueue_q": 0.0, "step": 13900 }, { "accuracy": 44.1406, "active_queue_size": 16384.0, "cl_loss": 4.4733, "doc_norm": 1.4064, "encoder_q-embeddings": 803.0749, "encoder_q-layer.0": 647.4767, "encoder_q-layer.1": 683.3239, "encoder_q-layer.10": 206.3009, "encoder_q-layer.11": 479.6544, "encoder_q-layer.2": 777.3157, "encoder_q-layer.3": 855.4817, "encoder_q-layer.4": 926.3345, "encoder_q-layer.5": 932.4449, "encoder_q-layer.6": 835.9489, "encoder_q-layer.7": 805.8636, "encoder_q-layer.8": 747.0565, "encoder_q-layer.9": 341.5677, "epoch": 0.09, "inbatch_neg_score": 0.2109, "inbatch_pos_score": 0.7271, "learning_rate": 4.7777777777777784e-05, "loss": 4.4733, "norm_diff": 0.3195, "norm_loss": 0.0, "num_token_doc": 66.7613, "num_token_overlap": 11.6679, "num_token_query": 31.8869, "num_token_union": 65.2955, "num_word_context": 202.1215, "num_word_doc": 49.7739, "num_word_query": 23.5472, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1084.3141, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2069, "query_norm": 1.7259, "queue_k_norm": 1.3994, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.8869, "sent_len_1": 66.7613, "sent_len_max_0": 127.4737, "sent_len_max_1": 189.0362, "stdk": 0.0462, "stdq": 0.0422, "stdqueue_k": 0.0461, "stdqueue_q": 0.0, "step": 14000 }, { "accuracy": 40.332, "active_queue_size": 16384.0, "cl_loss": 4.4383, "doc_norm": 1.3948, "encoder_q-embeddings": 950.6774, "encoder_q-layer.0": 692.1666, "encoder_q-layer.1": 811.1135, "encoder_q-layer.10": 205.9252, "encoder_q-layer.11": 480.874, "encoder_q-layer.2": 1019.3424, "encoder_q-layer.3": 1022.2899, "encoder_q-layer.4": 1119.4473, "encoder_q-layer.5": 1035.6632, "encoder_q-layer.6": 949.1334, "encoder_q-layer.7": 808.4107, "encoder_q-layer.8": 809.3743, "encoder_q-layer.9": 404.0964, "epoch": 0.09, "inbatch_neg_score": 0.2498, "inbatch_pos_score": 0.7256, "learning_rate": 4.7722222222222226e-05, "loss": 4.4383, "norm_diff": 0.3073, "norm_loss": 0.0, "num_token_doc": 66.9426, "num_token_overlap": 11.7009, "num_token_query": 31.9331, "num_token_union": 65.4529, "num_word_context": 202.6115, "num_word_doc": 49.931, "num_word_query": 23.5741, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1241.2103, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2452, "query_norm": 1.7021, "queue_k_norm": 1.3962, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9331, "sent_len_1": 66.9426, "sent_len_max_0": 127.4175, "sent_len_max_1": 189.5613, "stdk": 0.0459, "stdq": 0.0414, "stdqueue_k": 0.046, "stdqueue_q": 0.0, "step": 14100 }, { "accuracy": 39.9414, "active_queue_size": 16384.0, "cl_loss": 4.4432, "doc_norm": 1.3946, "encoder_q-embeddings": 713.3699, "encoder_q-layer.0": 505.8028, "encoder_q-layer.1": 545.8732, "encoder_q-layer.10": 197.6663, "encoder_q-layer.11": 502.256, "encoder_q-layer.2": 647.7135, "encoder_q-layer.3": 692.982, "encoder_q-layer.4": 748.4559, "encoder_q-layer.5": 730.311, "encoder_q-layer.6": 630.0895, "encoder_q-layer.7": 480.0732, "encoder_q-layer.8": 364.3885, "encoder_q-layer.9": 194.5699, "epoch": 0.09, "inbatch_neg_score": 0.2713, "inbatch_pos_score": 0.7568, "learning_rate": 4.766666666666667e-05, "loss": 4.4432, "norm_diff": 0.3323, "norm_loss": 0.0, "num_token_doc": 66.818, "num_token_overlap": 11.6906, "num_token_query": 31.9302, "num_token_union": 65.3819, "num_word_context": 202.1791, "num_word_doc": 49.8799, "num_word_query": 23.5754, "postclip_grad_norm": 1.0, "preclip_grad_norm": 842.3211, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2651, "query_norm": 1.7269, "queue_k_norm": 1.3955, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9302, "sent_len_1": 66.818, "sent_len_max_0": 127.5563, "sent_len_max_1": 189.3013, "stdk": 0.0459, "stdq": 0.0419, "stdqueue_k": 0.0461, "stdqueue_q": 0.0, "step": 14200 }, { "accuracy": 39.7461, "active_queue_size": 16384.0, "cl_loss": 4.4233, "doc_norm": 1.3885, "encoder_q-embeddings": 853.3259, "encoder_q-layer.0": 674.8759, "encoder_q-layer.1": 779.8987, "encoder_q-layer.10": 203.5423, "encoder_q-layer.11": 525.1608, "encoder_q-layer.2": 922.4954, "encoder_q-layer.3": 957.8289, "encoder_q-layer.4": 969.0365, "encoder_q-layer.5": 922.2821, "encoder_q-layer.6": 951.3212, "encoder_q-layer.7": 798.5712, "encoder_q-layer.8": 681.1932, "encoder_q-layer.9": 313.1821, "epoch": 0.09, "inbatch_neg_score": 0.2861, "inbatch_pos_score": 0.771, "learning_rate": 4.761111111111111e-05, "loss": 4.4233, "norm_diff": 0.3584, "norm_loss": 0.0, "num_token_doc": 67.185, "num_token_overlap": 11.732, "num_token_query": 31.8755, "num_token_union": 65.5262, "num_word_context": 202.597, "num_word_doc": 50.1006, "num_word_query": 23.5325, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1140.9255, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2812, "query_norm": 1.7469, "queue_k_norm": 1.3925, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8755, "sent_len_1": 67.185, "sent_len_max_0": 127.2463, "sent_len_max_1": 191.5488, "stdk": 0.0457, "stdq": 0.0417, "stdqueue_k": 0.046, "stdqueue_q": 0.0, "step": 14300 }, { "accuracy": 42.1875, "active_queue_size": 16384.0, "cl_loss": 4.4265, "doc_norm": 1.3953, "encoder_q-embeddings": 287.1651, "encoder_q-layer.0": 204.8169, "encoder_q-layer.1": 242.7794, "encoder_q-layer.10": 208.9507, "encoder_q-layer.11": 522.369, "encoder_q-layer.2": 202.8903, "encoder_q-layer.3": 207.9066, "encoder_q-layer.4": 229.2901, "encoder_q-layer.5": 254.6293, "encoder_q-layer.6": 255.7869, "encoder_q-layer.7": 250.502, "encoder_q-layer.8": 271.7008, "encoder_q-layer.9": 196.8047, "epoch": 0.09, "inbatch_neg_score": 0.3118, "inbatch_pos_score": 0.8169, "learning_rate": 4.755555555555556e-05, "loss": 4.4265, "norm_diff": 0.3927, "norm_loss": 0.0, "num_token_doc": 66.5294, "num_token_overlap": 11.6898, "num_token_query": 31.9011, "num_token_union": 65.1965, "num_word_context": 201.8637, "num_word_doc": 49.6249, "num_word_query": 23.552, "postclip_grad_norm": 1.0, "preclip_grad_norm": 393.3174, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3059, "query_norm": 1.788, "queue_k_norm": 1.3922, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9011, "sent_len_1": 66.5294, "sent_len_max_0": 127.51, "sent_len_max_1": 190.0, "stdk": 0.0459, "stdq": 0.0423, "stdqueue_k": 0.046, "stdqueue_q": 0.0, "step": 14400 }, { "accuracy": 40.1367, "active_queue_size": 16384.0, "cl_loss": 4.4395, "doc_norm": 1.3874, "encoder_q-embeddings": 369.8248, "encoder_q-layer.0": 266.6922, "encoder_q-layer.1": 298.7563, "encoder_q-layer.10": 207.5375, "encoder_q-layer.11": 533.5688, "encoder_q-layer.2": 337.0504, "encoder_q-layer.3": 354.6858, "encoder_q-layer.4": 338.2853, "encoder_q-layer.5": 333.1038, "encoder_q-layer.6": 316.4347, "encoder_q-layer.7": 280.4208, "encoder_q-layer.8": 263.605, "encoder_q-layer.9": 184.9501, "epoch": 0.09, "inbatch_neg_score": 0.3463, "inbatch_pos_score": 0.8379, "learning_rate": 4.75e-05, "loss": 4.4395, "norm_diff": 0.3883, "norm_loss": 0.0, "num_token_doc": 66.5041, "num_token_overlap": 11.5889, "num_token_query": 31.9129, "num_token_union": 65.2581, "num_word_context": 202.0686, "num_word_doc": 49.6177, "num_word_query": 23.546, "postclip_grad_norm": 1.0, "preclip_grad_norm": 479.4622, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3408, "query_norm": 1.7757, "queue_k_norm": 1.3916, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9129, "sent_len_1": 66.5041, "sent_len_max_0": 127.63, "sent_len_max_1": 190.0987, "stdk": 0.0457, "stdq": 0.0419, "stdqueue_k": 0.0459, "stdqueue_q": 0.0, "step": 14500 }, { "accuracy": 37.793, "active_queue_size": 16384.0, "cl_loss": 4.4424, "doc_norm": 1.3906, "encoder_q-embeddings": 632.3698, "encoder_q-layer.0": 527.6331, "encoder_q-layer.1": 527.0748, "encoder_q-layer.10": 190.3918, "encoder_q-layer.11": 505.1778, "encoder_q-layer.2": 577.5733, "encoder_q-layer.3": 576.7676, "encoder_q-layer.4": 645.7769, "encoder_q-layer.5": 632.1277, "encoder_q-layer.6": 632.335, "encoder_q-layer.7": 582.4913, "encoder_q-layer.8": 457.7721, "encoder_q-layer.9": 221.7798, "epoch": 0.1, "inbatch_neg_score": 0.3702, "inbatch_pos_score": 0.834, "learning_rate": 4.7444444444444445e-05, "loss": 4.4424, "norm_diff": 0.3642, "norm_loss": 0.0, "num_token_doc": 66.3424, "num_token_overlap": 11.6799, "num_token_query": 31.9378, "num_token_union": 65.1092, "num_word_context": 201.9952, "num_word_doc": 49.5113, "num_word_query": 23.5865, "postclip_grad_norm": 1.0, "preclip_grad_norm": 794.7091, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3638, "query_norm": 1.7549, "queue_k_norm": 1.3923, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9378, "sent_len_1": 66.3424, "sent_len_max_0": 127.42, "sent_len_max_1": 187.4762, "stdk": 0.0457, "stdq": 0.041, "stdqueue_k": 0.0458, "stdqueue_q": 0.0, "step": 14600 }, { "accuracy": 39.9414, "active_queue_size": 16384.0, "cl_loss": 4.4445, "doc_norm": 1.392, "encoder_q-embeddings": 1071.6865, "encoder_q-layer.0": 765.6033, "encoder_q-layer.1": 800.5679, "encoder_q-layer.10": 198.5257, "encoder_q-layer.11": 494.0948, "encoder_q-layer.2": 953.6229, "encoder_q-layer.3": 1172.1373, "encoder_q-layer.4": 1250.127, "encoder_q-layer.5": 1260.8304, "encoder_q-layer.6": 1275.3153, "encoder_q-layer.7": 1007.3293, "encoder_q-layer.8": 848.7284, "encoder_q-layer.9": 337.5876, "epoch": 0.1, "inbatch_neg_score": 0.4199, "inbatch_pos_score": 0.8945, "learning_rate": 4.7388888888888894e-05, "loss": 4.4445, "norm_diff": 0.4345, "norm_loss": 0.0, "num_token_doc": 66.9344, "num_token_overlap": 11.6507, "num_token_query": 31.8919, "num_token_union": 65.463, "num_word_context": 202.4492, "num_word_doc": 49.9167, "num_word_query": 23.5417, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1410.3566, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4128, "query_norm": 1.8265, "queue_k_norm": 1.3948, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8919, "sent_len_1": 66.9344, "sent_len_max_0": 127.5763, "sent_len_max_1": 188.7363, "stdk": 0.0456, "stdq": 0.0419, "stdqueue_k": 0.0458, "stdqueue_q": 0.0, "step": 14700 }, { "accuracy": 39.6484, "active_queue_size": 16384.0, "cl_loss": 4.4658, "doc_norm": 1.3949, "encoder_q-embeddings": 598.2844, "encoder_q-layer.0": 411.6614, "encoder_q-layer.1": 441.6218, "encoder_q-layer.10": 195.1895, "encoder_q-layer.11": 458.7405, "encoder_q-layer.2": 494.2779, "encoder_q-layer.3": 466.3168, "encoder_q-layer.4": 471.4807, "encoder_q-layer.5": 402.4673, "encoder_q-layer.6": 404.6947, "encoder_q-layer.7": 429.9338, "encoder_q-layer.8": 362.4626, "encoder_q-layer.9": 190.2495, "epoch": 0.1, "inbatch_neg_score": 0.4298, "inbatch_pos_score": 0.9209, "learning_rate": 4.7333333333333336e-05, "loss": 4.4658, "norm_diff": 0.3472, "norm_loss": 0.0, "num_token_doc": 66.7031, "num_token_overlap": 11.6414, "num_token_query": 31.8789, "num_token_union": 65.3174, "num_word_context": 202.0926, "num_word_doc": 49.7927, "num_word_query": 23.522, "postclip_grad_norm": 1.0, "preclip_grad_norm": 645.8154, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4241, "query_norm": 1.742, "queue_k_norm": 1.3988, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.8789, "sent_len_1": 66.7031, "sent_len_max_0": 127.4912, "sent_len_max_1": 187.6025, "stdk": 0.0456, "stdq": 0.0422, "stdqueue_k": 0.0458, "stdqueue_q": 0.0, "step": 14800 }, { "accuracy": 38.7695, "active_queue_size": 16384.0, "cl_loss": 4.4707, "doc_norm": 1.4013, "encoder_q-embeddings": 1096.9658, "encoder_q-layer.0": 786.9394, "encoder_q-layer.1": 853.756, "encoder_q-layer.10": 229.898, "encoder_q-layer.11": 551.396, "encoder_q-layer.2": 919.111, "encoder_q-layer.3": 1003.1147, "encoder_q-layer.4": 1026.7834, "encoder_q-layer.5": 1011.3926, "encoder_q-layer.6": 1037.7087, "encoder_q-layer.7": 857.0402, "encoder_q-layer.8": 689.9415, "encoder_q-layer.9": 311.2373, "epoch": 0.1, "inbatch_neg_score": 0.4925, "inbatch_pos_score": 0.959, "learning_rate": 4.727777777777778e-05, "loss": 4.4707, "norm_diff": 0.3886, "norm_loss": 0.0, "num_token_doc": 66.6802, "num_token_overlap": 11.6262, "num_token_query": 31.8237, "num_token_union": 65.3084, "num_word_context": 202.4441, "num_word_doc": 49.7792, "num_word_query": 23.4945, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1256.9465, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4863, "query_norm": 1.7899, "queue_k_norm": 1.4021, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8237, "sent_len_1": 66.6802, "sent_len_max_0": 127.51, "sent_len_max_1": 189.7163, "stdk": 0.0456, "stdq": 0.0416, "stdqueue_k": 0.0458, "stdqueue_q": 0.0, "step": 14900 }, { "accuracy": 42.1875, "active_queue_size": 16384.0, "cl_loss": 4.5078, "doc_norm": 1.4149, "encoder_q-embeddings": 3708.8767, "encoder_q-layer.0": 2379.5122, "encoder_q-layer.1": 2603.2297, "encoder_q-layer.10": 240.9546, "encoder_q-layer.11": 582.7072, "encoder_q-layer.2": 2924.9565, "encoder_q-layer.3": 3364.0369, "encoder_q-layer.4": 3739.8643, "encoder_q-layer.5": 3866.7522, "encoder_q-layer.6": 4292.2222, "encoder_q-layer.7": 3831.2695, "encoder_q-layer.8": 1798.442, "encoder_q-layer.9": 445.0269, "epoch": 0.1, "inbatch_neg_score": 0.506, "inbatch_pos_score": 1.002, "learning_rate": 4.722222222222222e-05, "loss": 4.5078, "norm_diff": 0.3088, "norm_loss": 0.0, "num_token_doc": 66.7276, "num_token_overlap": 11.6633, "num_token_query": 31.7667, "num_token_union": 65.2428, "num_word_context": 202.1886, "num_word_doc": 49.771, "num_word_query": 23.4429, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4438.576, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.501, "query_norm": 1.7237, "queue_k_norm": 1.4074, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.7667, "sent_len_1": 66.7276, "sent_len_max_0": 127.4575, "sent_len_max_1": 188.8487, "stdk": 0.0459, "stdq": 0.0429, "stdqueue_k": 0.0457, "stdqueue_q": 0.0, "step": 15000 }, { "accuracy": 38.4766, "active_queue_size": 16384.0, "cl_loss": 4.5051, "doc_norm": 1.4174, "encoder_q-embeddings": 2128.8684, "encoder_q-layer.0": 1534.1829, "encoder_q-layer.1": 1669.5516, "encoder_q-layer.10": 203.4733, "encoder_q-layer.11": 569.4159, "encoder_q-layer.2": 2015.0258, "encoder_q-layer.3": 2160.197, "encoder_q-layer.4": 2427.9534, "encoder_q-layer.5": 2462.5352, "encoder_q-layer.6": 2225.4153, "encoder_q-layer.7": 1808.2532, "encoder_q-layer.8": 867.121, "encoder_q-layer.9": 256.643, "epoch": 0.1, "inbatch_neg_score": 0.525, "inbatch_pos_score": 0.9995, "learning_rate": 4.716666666666667e-05, "loss": 4.5051, "norm_diff": 0.2684, "norm_loss": 0.0, "num_token_doc": 66.7905, "num_token_overlap": 11.682, "num_token_query": 31.9537, "num_token_union": 65.3767, "num_word_context": 202.6528, "num_word_doc": 49.799, "num_word_query": 23.5872, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2608.2086, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5186, "query_norm": 1.6858, "queue_k_norm": 1.4132, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9537, "sent_len_1": 66.7905, "sent_len_max_0": 127.5925, "sent_len_max_1": 191.9575, "stdk": 0.0457, "stdq": 0.0423, "stdqueue_k": 0.0456, "stdqueue_q": 0.0, "step": 15100 }, { "accuracy": 40.625, "active_queue_size": 16384.0, "cl_loss": 4.5205, "doc_norm": 1.4247, "encoder_q-embeddings": 2454.4006, "encoder_q-layer.0": 1899.5767, "encoder_q-layer.1": 2020.0977, "encoder_q-layer.10": 224.927, "encoder_q-layer.11": 623.7004, "encoder_q-layer.2": 2468.5583, "encoder_q-layer.3": 2624.3655, "encoder_q-layer.4": 2697.1704, "encoder_q-layer.5": 2805.1082, "encoder_q-layer.6": 2481.1833, "encoder_q-layer.7": 1845.1047, "encoder_q-layer.8": 1066.5391, "encoder_q-layer.9": 298.2577, "epoch": 0.1, "inbatch_neg_score": 0.4673, "inbatch_pos_score": 0.9575, "learning_rate": 4.711111111111111e-05, "loss": 4.5205, "norm_diff": 0.2857, "norm_loss": 0.0, "num_token_doc": 66.8403, "num_token_overlap": 11.6946, "num_token_query": 31.896, "num_token_union": 65.4228, "num_word_context": 202.5227, "num_word_doc": 49.9282, "num_word_query": 23.5511, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2997.7136, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4646, "query_norm": 1.7104, "queue_k_norm": 1.4218, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.896, "sent_len_1": 66.8403, "sent_len_max_0": 127.5463, "sent_len_max_1": 187.6425, "stdk": 0.0456, "stdq": 0.0438, "stdqueue_k": 0.0457, "stdqueue_q": 0.0, "step": 15200 }, { "accuracy": 38.6719, "active_queue_size": 16384.0, "cl_loss": 4.5675, "doc_norm": 1.4321, "encoder_q-embeddings": 2026.792, "encoder_q-layer.0": 1522.9426, "encoder_q-layer.1": 1912.6174, "encoder_q-layer.10": 396.0818, "encoder_q-layer.11": 1287.2565, "encoder_q-layer.2": 2388.5544, "encoder_q-layer.3": 2691.3713, "encoder_q-layer.4": 2954.6802, "encoder_q-layer.5": 3208.0322, "encoder_q-layer.6": 3286.9858, "encoder_q-layer.7": 2436.7859, "encoder_q-layer.8": 1220.682, "encoder_q-layer.9": 435.8693, "epoch": 0.1, "inbatch_neg_score": 0.5632, "inbatch_pos_score": 1.0371, "learning_rate": 4.7055555555555555e-05, "loss": 4.5675, "norm_diff": 0.2221, "norm_loss": 0.0, "num_token_doc": 66.7174, "num_token_overlap": 11.6255, "num_token_query": 31.8214, "num_token_union": 65.3107, "num_word_context": 202.3601, "num_word_doc": 49.7998, "num_word_query": 23.4843, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3242.4881, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.561, "query_norm": 1.6542, "queue_k_norm": 1.4278, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8214, "sent_len_1": 66.7174, "sent_len_max_0": 127.39, "sent_len_max_1": 190.3988, "stdk": 0.0457, "stdq": 0.0419, "stdqueue_k": 0.0456, "stdqueue_q": 0.0, "step": 15300 }, { "accuracy": 39.3555, "active_queue_size": 16384.0, "cl_loss": 4.5339, "doc_norm": 1.4341, "encoder_q-embeddings": 1169.0768, "encoder_q-layer.0": 833.1882, "encoder_q-layer.1": 936.3766, "encoder_q-layer.10": 405.9169, "encoder_q-layer.11": 1299.9958, "encoder_q-layer.2": 1066.9764, "encoder_q-layer.3": 1194.1464, "encoder_q-layer.4": 1272.5927, "encoder_q-layer.5": 1300.1665, "encoder_q-layer.6": 1318.2534, "encoder_q-layer.7": 1022.3852, "encoder_q-layer.8": 675.167, "encoder_q-layer.9": 392.0267, "epoch": 0.1, "inbatch_neg_score": 0.5143, "inbatch_pos_score": 1.0107, "learning_rate": 4.7e-05, "loss": 4.5339, "norm_diff": 0.2189, "norm_loss": 0.0, "num_token_doc": 66.6936, "num_token_overlap": 11.6779, "num_token_query": 31.8387, "num_token_union": 65.2047, "num_word_context": 202.3102, "num_word_doc": 49.7356, "num_word_query": 23.512, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1536.0835, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5122, "query_norm": 1.653, "queue_k_norm": 1.4363, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8387, "sent_len_1": 66.6936, "sent_len_max_0": 127.5875, "sent_len_max_1": 190.4913, "stdk": 0.0455, "stdq": 0.0429, "stdqueue_k": 0.0457, "stdqueue_q": 0.0, "step": 15400 }, { "accuracy": 39.6484, "active_queue_size": 16384.0, "cl_loss": 4.5336, "doc_norm": 1.4411, "encoder_q-embeddings": 10295.4043, "encoder_q-layer.0": 7344.2793, "encoder_q-layer.1": 8260.7432, "encoder_q-layer.10": 417.2549, "encoder_q-layer.11": 1241.285, "encoder_q-layer.2": 10109.9141, "encoder_q-layer.3": 9407.6182, "encoder_q-layer.4": 8303.2314, "encoder_q-layer.5": 7158.0942, "encoder_q-layer.6": 6787.978, "encoder_q-layer.7": 4473.957, "encoder_q-layer.8": 3057.9348, "encoder_q-layer.9": 991.3997, "epoch": 0.1, "inbatch_neg_score": 0.5401, "inbatch_pos_score": 1.0352, "learning_rate": 4.6944444444444446e-05, "loss": 4.5336, "norm_diff": 0.2162, "norm_loss": 0.0, "num_token_doc": 66.6682, "num_token_overlap": 11.6805, "num_token_query": 31.7878, "num_token_union": 65.1654, "num_word_context": 202.1022, "num_word_doc": 49.6919, "num_word_query": 23.4636, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10584.0509, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5371, "query_norm": 1.6574, "queue_k_norm": 1.4408, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.7878, "sent_len_1": 66.6682, "sent_len_max_0": 127.48, "sent_len_max_1": 190.975, "stdk": 0.0456, "stdq": 0.0426, "stdqueue_k": 0.0457, "stdqueue_q": 0.0, "step": 15500 }, { "accuracy": 38.2812, "active_queue_size": 16384.0, "cl_loss": 4.4871, "doc_norm": 1.4475, "encoder_q-embeddings": 1558.3749, "encoder_q-layer.0": 1028.7135, "encoder_q-layer.1": 1146.054, "encoder_q-layer.10": 405.646, "encoder_q-layer.11": 1237.156, "encoder_q-layer.2": 1248.8002, "encoder_q-layer.3": 1417.0809, "encoder_q-layer.4": 1442.9691, "encoder_q-layer.5": 1516.6895, "encoder_q-layer.6": 1660.1395, "encoder_q-layer.7": 1249.7911, "encoder_q-layer.8": 825.4587, "encoder_q-layer.9": 376.0083, "epoch": 0.1, "inbatch_neg_score": 0.4761, "inbatch_pos_score": 0.9595, "learning_rate": 4.6888888888888895e-05, "loss": 4.4871, "norm_diff": 0.1727, "norm_loss": 0.0, "num_token_doc": 67.0372, "num_token_overlap": 11.7432, "num_token_query": 31.997, "num_token_union": 65.49, "num_word_context": 202.1689, "num_word_doc": 50.0488, "num_word_query": 23.6193, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1834.5586, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4727, "query_norm": 1.6202, "queue_k_norm": 1.4473, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.997, "sent_len_1": 67.0372, "sent_len_max_0": 127.575, "sent_len_max_1": 188.2275, "stdk": 0.0457, "stdq": 0.0419, "stdqueue_k": 0.0458, "stdqueue_q": 0.0, "step": 15600 }, { "accuracy": 40.625, "active_queue_size": 16384.0, "cl_loss": 4.4908, "doc_norm": 1.4436, "encoder_q-embeddings": 5469.7075, "encoder_q-layer.0": 3803.5925, "encoder_q-layer.1": 4104.0884, "encoder_q-layer.10": 430.2186, "encoder_q-layer.11": 1232.3965, "encoder_q-layer.2": 4450.5352, "encoder_q-layer.3": 4861.6748, "encoder_q-layer.4": 5006.3872, "encoder_q-layer.5": 5192.4229, "encoder_q-layer.6": 5338.9561, "encoder_q-layer.7": 4390.7056, "encoder_q-layer.8": 1870.3301, "encoder_q-layer.9": 544.8506, "epoch": 0.1, "inbatch_neg_score": 0.4687, "inbatch_pos_score": 0.9546, "learning_rate": 4.683333333333334e-05, "loss": 4.4908, "norm_diff": 0.1799, "norm_loss": 0.0, "num_token_doc": 66.9729, "num_token_overlap": 11.6543, "num_token_query": 31.8187, "num_token_union": 65.4888, "num_word_context": 202.7364, "num_word_doc": 49.9533, "num_word_query": 23.5008, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6140.3327, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4658, "query_norm": 1.6235, "queue_k_norm": 1.4489, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8187, "sent_len_1": 66.9729, "sent_len_max_0": 127.5062, "sent_len_max_1": 189.8088, "stdk": 0.0454, "stdq": 0.042, "stdqueue_k": 0.0458, "stdqueue_q": 0.0, "step": 15700 }, { "accuracy": 38.8672, "active_queue_size": 16384.0, "cl_loss": 4.5076, "doc_norm": 1.4507, "encoder_q-embeddings": 1473.7738, "encoder_q-layer.0": 1023.3738, "encoder_q-layer.1": 1082.4922, "encoder_q-layer.10": 425.4908, "encoder_q-layer.11": 1259.746, "encoder_q-layer.2": 1213.9961, "encoder_q-layer.3": 1223.5734, "encoder_q-layer.4": 1358.2231, "encoder_q-layer.5": 1338.9608, "encoder_q-layer.6": 1627.3553, "encoder_q-layer.7": 1437.8853, "encoder_q-layer.8": 1225.1465, "encoder_q-layer.9": 607.9707, "epoch": 0.1, "inbatch_neg_score": 0.4523, "inbatch_pos_score": 0.9248, "learning_rate": 4.677777777777778e-05, "loss": 4.5076, "norm_diff": 0.1701, "norm_loss": 0.0, "num_token_doc": 66.6718, "num_token_overlap": 11.6657, "num_token_query": 31.9866, "num_token_union": 65.3778, "num_word_context": 202.2983, "num_word_doc": 49.7204, "num_word_query": 23.6301, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1798.0292, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4487, "query_norm": 1.6208, "queue_k_norm": 1.4531, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9866, "sent_len_1": 66.6718, "sent_len_max_0": 127.61, "sent_len_max_1": 189.25, "stdk": 0.0457, "stdq": 0.0419, "stdqueue_k": 0.0459, "stdqueue_q": 0.0, "step": 15800 }, { "accuracy": 36.9141, "active_queue_size": 16384.0, "cl_loss": 4.4966, "doc_norm": 1.448, "encoder_q-embeddings": 1896.3043, "encoder_q-layer.0": 1336.5779, "encoder_q-layer.1": 1483.095, "encoder_q-layer.10": 446.8022, "encoder_q-layer.11": 1237.7898, "encoder_q-layer.2": 1555.5123, "encoder_q-layer.3": 1585.324, "encoder_q-layer.4": 1686.1332, "encoder_q-layer.5": 1553.2555, "encoder_q-layer.6": 1661.9449, "encoder_q-layer.7": 1555.2717, "encoder_q-layer.8": 1081.9459, "encoder_q-layer.9": 556.7817, "epoch": 0.1, "inbatch_neg_score": 0.3948, "inbatch_pos_score": 0.8467, "learning_rate": 4.672222222222222e-05, "loss": 4.4966, "norm_diff": 0.1254, "norm_loss": 0.0, "num_token_doc": 66.8776, "num_token_overlap": 11.6929, "num_token_query": 31.9552, "num_token_union": 65.4465, "num_word_context": 202.4643, "num_word_doc": 49.9005, "num_word_query": 23.6147, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2126.8186, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3911, "query_norm": 1.5734, "queue_k_norm": 1.4516, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9552, "sent_len_1": 66.8776, "sent_len_max_0": 127.4, "sent_len_max_1": 190.0, "stdk": 0.0456, "stdq": 0.0411, "stdqueue_k": 0.0459, "stdqueue_q": 0.0, "step": 15900 }, { "accuracy": 44.2383, "active_queue_size": 16384.0, "cl_loss": 4.4827, "doc_norm": 1.4476, "encoder_q-embeddings": 2848.4316, "encoder_q-layer.0": 2144.2524, "encoder_q-layer.1": 2549.0164, "encoder_q-layer.10": 419.9368, "encoder_q-layer.11": 1153.5392, "encoder_q-layer.2": 2890.8806, "encoder_q-layer.3": 3448.5752, "encoder_q-layer.4": 3614.7952, "encoder_q-layer.5": 3514.5872, "encoder_q-layer.6": 3258.511, "encoder_q-layer.7": 3255.3687, "encoder_q-layer.8": 2539.4771, "encoder_q-layer.9": 920.9232, "epoch": 0.1, "inbatch_neg_score": 0.386, "inbatch_pos_score": 0.8838, "learning_rate": 4.666666666666667e-05, "loss": 4.4827, "norm_diff": 0.1777, "norm_loss": 0.0, "num_token_doc": 66.8042, "num_token_overlap": 11.696, "num_token_query": 31.9681, "num_token_union": 65.3966, "num_word_context": 202.3022, "num_word_doc": 49.8688, "num_word_query": 23.6134, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3995.5831, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3821, "query_norm": 1.6253, "queue_k_norm": 1.4496, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9681, "sent_len_1": 66.8042, "sent_len_max_0": 127.3937, "sent_len_max_1": 189.5075, "stdk": 0.0457, "stdq": 0.0427, "stdqueue_k": 0.0459, "stdqueue_q": 0.0, "step": 16000 }, { "accuracy": 38.2812, "active_queue_size": 16384.0, "cl_loss": 4.4948, "doc_norm": 1.4447, "encoder_q-embeddings": 2826.1567, "encoder_q-layer.0": 2116.5605, "encoder_q-layer.1": 2266.9187, "encoder_q-layer.10": 438.1623, "encoder_q-layer.11": 1158.1641, "encoder_q-layer.2": 2510.7024, "encoder_q-layer.3": 2643.6438, "encoder_q-layer.4": 2699.9883, "encoder_q-layer.5": 2491.3254, "encoder_q-layer.6": 2314.7102, "encoder_q-layer.7": 1793.9091, "encoder_q-layer.8": 1135.4832, "encoder_q-layer.9": 474.314, "epoch": 0.1, "inbatch_neg_score": 0.3441, "inbatch_pos_score": 0.8369, "learning_rate": 4.6611111111111114e-05, "loss": 4.4948, "norm_diff": 0.1803, "norm_loss": 0.0, "num_token_doc": 66.7263, "num_token_overlap": 11.6522, "num_token_query": 31.7391, "num_token_union": 65.246, "num_word_context": 202.0516, "num_word_doc": 49.7937, "num_word_query": 23.4342, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3106.4023, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3403, "query_norm": 1.625, "queue_k_norm": 1.4474, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.7391, "sent_len_1": 66.7263, "sent_len_max_0": 127.53, "sent_len_max_1": 189.1687, "stdk": 0.0457, "stdq": 0.0427, "stdqueue_k": 0.0459, "stdqueue_q": 0.0, "step": 16100 }, { "accuracy": 45.2148, "active_queue_size": 16384.0, "cl_loss": 4.4865, "doc_norm": 1.4466, "encoder_q-embeddings": 6283.5234, "encoder_q-layer.0": 4269.8535, "encoder_q-layer.1": 4545.1279, "encoder_q-layer.10": 376.5895, "encoder_q-layer.11": 1087.4369, "encoder_q-layer.2": 4724.1938, "encoder_q-layer.3": 5400.6753, "encoder_q-layer.4": 6291.4497, "encoder_q-layer.5": 6040.0098, "encoder_q-layer.6": 4875.604, "encoder_q-layer.7": 3546.9829, "encoder_q-layer.8": 1789.3805, "encoder_q-layer.9": 563.9929, "epoch": 0.11, "inbatch_neg_score": 0.3086, "inbatch_pos_score": 0.8262, "learning_rate": 4.6555555555555556e-05, "loss": 4.4865, "norm_diff": 0.1569, "norm_loss": 0.0, "num_token_doc": 66.6138, "num_token_overlap": 11.6516, "num_token_query": 31.8972, "num_token_union": 65.2602, "num_word_context": 202.2836, "num_word_doc": 49.7131, "num_word_query": 23.5362, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6627.1342, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3035, "query_norm": 1.6035, "queue_k_norm": 1.4406, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8972, "sent_len_1": 66.6138, "sent_len_max_0": 127.5613, "sent_len_max_1": 188.0175, "stdk": 0.0459, "stdq": 0.0426, "stdqueue_k": 0.0458, "stdqueue_q": 0.0, "step": 16200 }, { "accuracy": 37.793, "active_queue_size": 16384.0, "cl_loss": 4.4862, "doc_norm": 1.4335, "encoder_q-embeddings": 5457.7471, "encoder_q-layer.0": 4482.9229, "encoder_q-layer.1": 4668.3682, "encoder_q-layer.10": 403.0368, "encoder_q-layer.11": 1114.4695, "encoder_q-layer.2": 5064.6548, "encoder_q-layer.3": 6052.0737, "encoder_q-layer.4": 6469.6436, "encoder_q-layer.5": 7533.1987, "encoder_q-layer.6": 5272.2534, "encoder_q-layer.7": 1874.0287, "encoder_q-layer.8": 1105.0125, "encoder_q-layer.9": 531.9404, "epoch": 0.11, "inbatch_neg_score": 0.2737, "inbatch_pos_score": 0.751, "learning_rate": 4.6500000000000005e-05, "loss": 4.4862, "norm_diff": 0.1828, "norm_loss": 0.0, "num_token_doc": 66.7981, "num_token_overlap": 11.6907, "num_token_query": 31.9367, "num_token_union": 65.3748, "num_word_context": 202.1964, "num_word_doc": 49.8249, "num_word_query": 23.5928, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6826.2828, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2705, "query_norm": 1.6163, "queue_k_norm": 1.4354, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9367, "sent_len_1": 66.7981, "sent_len_max_0": 127.4638, "sent_len_max_1": 189.6475, "stdk": 0.0456, "stdq": 0.0419, "stdqueue_k": 0.0458, "stdqueue_q": 0.0, "step": 16300 }, { "accuracy": 39.6484, "active_queue_size": 16384.0, "cl_loss": 4.4637, "doc_norm": 1.4306, "encoder_q-embeddings": 1884.5027, "encoder_q-layer.0": 1474.3851, "encoder_q-layer.1": 1501.6118, "encoder_q-layer.10": 494.1226, "encoder_q-layer.11": 1202.988, "encoder_q-layer.2": 1734.1449, "encoder_q-layer.3": 2078.8594, "encoder_q-layer.4": 1917.618, "encoder_q-layer.5": 2119.7947, "encoder_q-layer.6": 2092.8054, "encoder_q-layer.7": 2193.0413, "encoder_q-layer.8": 2065.6001, "encoder_q-layer.9": 888.4716, "epoch": 0.11, "inbatch_neg_score": 0.2771, "inbatch_pos_score": 0.7578, "learning_rate": 4.644444444444445e-05, "loss": 4.4637, "norm_diff": 0.2612, "norm_loss": 0.0, "num_token_doc": 66.8702, "num_token_overlap": 11.7399, "num_token_query": 32.0162, "num_token_union": 65.4466, "num_word_context": 202.552, "num_word_doc": 49.9401, "num_word_query": 23.6637, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2578.3569, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2737, "query_norm": 1.6918, "queue_k_norm": 1.4313, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 32.0162, "sent_len_1": 66.8702, "sent_len_max_0": 127.3738, "sent_len_max_1": 188.2388, "stdk": 0.0458, "stdq": 0.043, "stdqueue_k": 0.0459, "stdqueue_q": 0.0, "step": 16400 }, { "accuracy": 39.3555, "active_queue_size": 16384.0, "cl_loss": 4.4799, "doc_norm": 1.4281, "encoder_q-embeddings": 2578.7751, "encoder_q-layer.0": 1864.3729, "encoder_q-layer.1": 2237.6892, "encoder_q-layer.10": 404.6772, "encoder_q-layer.11": 1077.9176, "encoder_q-layer.2": 2414.2795, "encoder_q-layer.3": 2541.6428, "encoder_q-layer.4": 2754.4946, "encoder_q-layer.5": 3034.0554, "encoder_q-layer.6": 2766.5818, "encoder_q-layer.7": 2984.3645, "encoder_q-layer.8": 2747.0735, "encoder_q-layer.9": 1243.6555, "epoch": 0.11, "inbatch_neg_score": 0.3235, "inbatch_pos_score": 0.8027, "learning_rate": 4.638888888888889e-05, "loss": 4.4799, "norm_diff": 0.265, "norm_loss": 0.0, "num_token_doc": 66.7375, "num_token_overlap": 11.705, "num_token_query": 32.019, "num_token_union": 65.3653, "num_word_context": 202.4195, "num_word_doc": 49.784, "num_word_query": 23.6425, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3505.2731, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3188, "query_norm": 1.6931, "queue_k_norm": 1.4268, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 32.019, "sent_len_1": 66.7375, "sent_len_max_0": 127.5987, "sent_len_max_1": 190.5613, "stdk": 0.0459, "stdq": 0.0423, "stdqueue_k": 0.0459, "stdqueue_q": 0.0, "step": 16500 }, { "accuracy": 38.5742, "active_queue_size": 16384.0, "cl_loss": 4.536, "doc_norm": 1.4231, "encoder_q-embeddings": 2972.2817, "encoder_q-layer.0": 2017.7576, "encoder_q-layer.1": 2310.5811, "encoder_q-layer.10": 488.4925, "encoder_q-layer.11": 1408.8281, "encoder_q-layer.2": 2109.6077, "encoder_q-layer.3": 2129.2319, "encoder_q-layer.4": 2389.238, "encoder_q-layer.5": 2329.9956, "encoder_q-layer.6": 2444.614, "encoder_q-layer.7": 2285.4595, "encoder_q-layer.8": 2028.3746, "encoder_q-layer.9": 1080.9055, "epoch": 0.11, "inbatch_neg_score": 0.378, "inbatch_pos_score": 0.8516, "learning_rate": 4.633333333333333e-05, "loss": 4.536, "norm_diff": 0.3449, "norm_loss": 0.0, "num_token_doc": 66.5288, "num_token_overlap": 11.6007, "num_token_query": 31.717, "num_token_union": 65.1821, "num_word_context": 201.6125, "num_word_doc": 49.6333, "num_word_query": 23.4202, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3195.8991, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3752, "query_norm": 1.7681, "queue_k_norm": 1.4217, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.717, "sent_len_1": 66.5288, "sent_len_max_0": 127.46, "sent_len_max_1": 188.9038, "stdk": 0.0459, "stdq": 0.0414, "stdqueue_k": 0.0459, "stdqueue_q": 0.0, "step": 16600 }, { "accuracy": 41.0156, "active_queue_size": 16384.0, "cl_loss": 4.5421, "doc_norm": 1.416, "encoder_q-embeddings": 10748.4111, "encoder_q-layer.0": 8115.8604, "encoder_q-layer.1": 8294.8789, "encoder_q-layer.10": 490.788, "encoder_q-layer.11": 1176.0728, "encoder_q-layer.2": 9214.959, "encoder_q-layer.3": 9315.4932, "encoder_q-layer.4": 9865.2842, "encoder_q-layer.5": 9243.54, "encoder_q-layer.6": 8711.04, "encoder_q-layer.7": 7060.8149, "encoder_q-layer.8": 5891.1177, "encoder_q-layer.9": 2013.6051, "epoch": 0.11, "inbatch_neg_score": 0.3881, "inbatch_pos_score": 0.8989, "learning_rate": 4.627777777777778e-05, "loss": 4.5421, "norm_diff": 0.3345, "norm_loss": 0.0, "num_token_doc": 66.5294, "num_token_overlap": 11.6293, "num_token_query": 31.8278, "num_token_union": 65.2009, "num_word_context": 201.9288, "num_word_doc": 49.676, "num_word_query": 23.4961, "postclip_grad_norm": 1.0, "preclip_grad_norm": 11732.6393, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3823, "query_norm": 1.7505, "queue_k_norm": 1.421, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8278, "sent_len_1": 66.5294, "sent_len_max_0": 127.6137, "sent_len_max_1": 188.3475, "stdk": 0.0456, "stdq": 0.0435, "stdqueue_k": 0.0459, "stdqueue_q": 0.0, "step": 16700 }, { "accuracy": 37.0117, "active_queue_size": 16384.0, "cl_loss": 4.5174, "doc_norm": 1.4091, "encoder_q-embeddings": 3616.5188, "encoder_q-layer.0": 2662.6118, "encoder_q-layer.1": 2736.8743, "encoder_q-layer.10": 521.9575, "encoder_q-layer.11": 1236.1189, "encoder_q-layer.2": 3062.72, "encoder_q-layer.3": 2997.5181, "encoder_q-layer.4": 2425.3237, "encoder_q-layer.5": 2129.5811, "encoder_q-layer.6": 1717.6157, "encoder_q-layer.7": 1542.8479, "encoder_q-layer.8": 1035.8024, "encoder_q-layer.9": 539.2803, "epoch": 0.11, "inbatch_neg_score": 0.3911, "inbatch_pos_score": 0.8701, "learning_rate": 4.6222222222222224e-05, "loss": 4.5174, "norm_diff": 0.2467, "norm_loss": 0.0, "num_token_doc": 66.7112, "num_token_overlap": 11.654, "num_token_query": 31.8549, "num_token_union": 65.3106, "num_word_context": 202.0989, "num_word_doc": 49.7946, "num_word_query": 23.525, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3448.8512, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.387, "query_norm": 1.6558, "queue_k_norm": 1.4167, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.8549, "sent_len_1": 66.7112, "sent_len_max_0": 127.5613, "sent_len_max_1": 188.6575, "stdk": 0.0454, "stdq": 0.042, "stdqueue_k": 0.0458, "stdqueue_q": 0.0, "step": 16800 }, { "accuracy": 39.6484, "active_queue_size": 16384.0, "cl_loss": 4.522, "doc_norm": 1.4103, "encoder_q-embeddings": 1799.1216, "encoder_q-layer.0": 1335.6497, "encoder_q-layer.1": 1609.6306, "encoder_q-layer.10": 404.9425, "encoder_q-layer.11": 1067.7611, "encoder_q-layer.2": 1601.7087, "encoder_q-layer.3": 1670.3052, "encoder_q-layer.4": 1550.6105, "encoder_q-layer.5": 1474.1759, "encoder_q-layer.6": 1313.2341, "encoder_q-layer.7": 893.7219, "encoder_q-layer.8": 689.5696, "encoder_q-layer.9": 403.9263, "epoch": 0.11, "inbatch_neg_score": 0.3966, "inbatch_pos_score": 0.8765, "learning_rate": 4.6166666666666666e-05, "loss": 4.522, "norm_diff": 0.1697, "norm_loss": 0.0, "num_token_doc": 66.7139, "num_token_overlap": 11.676, "num_token_query": 31.8352, "num_token_union": 65.2681, "num_word_context": 202.1595, "num_word_doc": 49.7799, "num_word_query": 23.509, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1974.1393, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3931, "query_norm": 1.58, "queue_k_norm": 1.4152, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8352, "sent_len_1": 66.7139, "sent_len_max_0": 127.4313, "sent_len_max_1": 189.1087, "stdk": 0.0455, "stdq": 0.0419, "stdqueue_k": 0.0458, "stdqueue_q": 0.0, "step": 16900 }, { "accuracy": 41.1133, "active_queue_size": 16384.0, "cl_loss": 4.505, "doc_norm": 1.4191, "encoder_q-embeddings": 2605.0488, "encoder_q-layer.0": 1915.9596, "encoder_q-layer.1": 2151.2935, "encoder_q-layer.10": 408.1245, "encoder_q-layer.11": 1010.9601, "encoder_q-layer.2": 2527.637, "encoder_q-layer.3": 2494.6187, "encoder_q-layer.4": 2427.1184, "encoder_q-layer.5": 2232.2043, "encoder_q-layer.6": 1966.2137, "encoder_q-layer.7": 1983.4282, "encoder_q-layer.8": 1299.9569, "encoder_q-layer.9": 609.1456, "epoch": 0.11, "inbatch_neg_score": 0.3816, "inbatch_pos_score": 0.8701, "learning_rate": 4.6111111111111115e-05, "loss": 4.505, "norm_diff": 0.1005, "norm_loss": 0.0, "num_token_doc": 66.8334, "num_token_overlap": 11.6761, "num_token_query": 31.8887, "num_token_union": 65.3715, "num_word_context": 202.3556, "num_word_doc": 49.8349, "num_word_query": 23.5338, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2932.3452, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3782, "query_norm": 1.5196, "queue_k_norm": 1.4145, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8887, "sent_len_1": 66.8334, "sent_len_max_0": 127.5613, "sent_len_max_1": 191.3825, "stdk": 0.0459, "stdq": 0.0418, "stdqueue_k": 0.0458, "stdqueue_q": 0.0, "step": 17000 }, { "accuracy": 38.5742, "active_queue_size": 16384.0, "cl_loss": 4.5211, "doc_norm": 1.4105, "encoder_q-embeddings": 6361.8735, "encoder_q-layer.0": 4650.1724, "encoder_q-layer.1": 4217.8936, "encoder_q-layer.10": 383.8713, "encoder_q-layer.11": 1021.1117, "encoder_q-layer.2": 3822.104, "encoder_q-layer.3": 3838.9624, "encoder_q-layer.4": 3318.7195, "encoder_q-layer.5": 2690.7488, "encoder_q-layer.6": 2495.8323, "encoder_q-layer.7": 2464.3569, "encoder_q-layer.8": 2116.5977, "encoder_q-layer.9": 824.3245, "epoch": 0.11, "inbatch_neg_score": 0.3582, "inbatch_pos_score": 0.8252, "learning_rate": 4.605555555555556e-05, "loss": 4.5211, "norm_diff": 0.0431, "norm_loss": 0.0, "num_token_doc": 66.7131, "num_token_overlap": 11.6005, "num_token_query": 31.7515, "num_token_union": 65.3032, "num_word_context": 202.0666, "num_word_doc": 49.7428, "num_word_query": 23.4483, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5403.1232, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3542, "query_norm": 1.4536, "queue_k_norm": 1.4138, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.7515, "sent_len_1": 66.7131, "sent_len_max_0": 127.385, "sent_len_max_1": 191.8313, "stdk": 0.0456, "stdq": 0.0412, "stdqueue_k": 0.0458, "stdqueue_q": 0.0, "step": 17100 }, { "accuracy": 39.4531, "active_queue_size": 16384.0, "cl_loss": 4.4871, "doc_norm": 1.4038, "encoder_q-embeddings": 2619.9385, "encoder_q-layer.0": 1888.6166, "encoder_q-layer.1": 1852.8939, "encoder_q-layer.10": 412.4131, "encoder_q-layer.11": 943.4464, "encoder_q-layer.2": 2167.1152, "encoder_q-layer.3": 2539.0303, "encoder_q-layer.4": 2640.7336, "encoder_q-layer.5": 2702.179, "encoder_q-layer.6": 2290.5073, "encoder_q-layer.7": 2035.7621, "encoder_q-layer.8": 1967.0465, "encoder_q-layer.9": 708.2144, "epoch": 0.11, "inbatch_neg_score": 0.2925, "inbatch_pos_score": 0.7778, "learning_rate": 4.600000000000001e-05, "loss": 4.4871, "norm_diff": 0.0229, "norm_loss": 0.0, "num_token_doc": 66.8812, "num_token_overlap": 11.6739, "num_token_query": 31.9007, "num_token_union": 65.4092, "num_word_context": 202.5438, "num_word_doc": 49.9701, "num_word_query": 23.5531, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3044.3486, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2917, "query_norm": 1.4267, "queue_k_norm": 1.4094, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9007, "sent_len_1": 66.8812, "sent_len_max_0": 127.4675, "sent_len_max_1": 186.4963, "stdk": 0.0454, "stdq": 0.0422, "stdqueue_k": 0.0457, "stdqueue_q": 0.0, "step": 17200 }, { "accuracy": 41.1133, "active_queue_size": 16384.0, "cl_loss": 4.524, "doc_norm": 1.4057, "encoder_q-embeddings": 1069.3462, "encoder_q-layer.0": 721.7384, "encoder_q-layer.1": 812.1627, "encoder_q-layer.10": 765.9283, "encoder_q-layer.11": 1912.0763, "encoder_q-layer.2": 866.8505, "encoder_q-layer.3": 959.8097, "encoder_q-layer.4": 1019.2163, "encoder_q-layer.5": 978.7189, "encoder_q-layer.6": 1070.6688, "encoder_q-layer.7": 1015.6711, "encoder_q-layer.8": 1032.615, "encoder_q-layer.9": 758.5776, "epoch": 0.11, "inbatch_neg_score": 0.2263, "inbatch_pos_score": 0.7075, "learning_rate": 4.594444444444444e-05, "loss": 4.524, "norm_diff": 0.0134, "norm_loss": 0.0, "num_token_doc": 67.0748, "num_token_overlap": 11.649, "num_token_query": 31.7488, "num_token_union": 65.4304, "num_word_context": 202.1829, "num_word_doc": 50.0136, "num_word_query": 23.4383, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1522.6717, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2264, "query_norm": 1.4059, "queue_k_norm": 1.4078, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.7488, "sent_len_1": 67.0748, "sent_len_max_0": 127.2725, "sent_len_max_1": 191.035, "stdk": 0.0456, "stdq": 0.042, "stdqueue_k": 0.0458, "stdqueue_q": 0.0, "step": 17300 }, { "accuracy": 41.0156, "active_queue_size": 16384.0, "cl_loss": 4.5062, "doc_norm": 1.4019, "encoder_q-embeddings": 6486.0996, "encoder_q-layer.0": 4416.4937, "encoder_q-layer.1": 4662.5547, "encoder_q-layer.10": 750.0576, "encoder_q-layer.11": 1901.5487, "encoder_q-layer.2": 5683.4868, "encoder_q-layer.3": 6134.0308, "encoder_q-layer.4": 5947.4614, "encoder_q-layer.5": 5332.7163, "encoder_q-layer.6": 4570.9834, "encoder_q-layer.7": 3649.6597, "encoder_q-layer.8": 3164.3064, "encoder_q-layer.9": 1201.1765, "epoch": 0.11, "inbatch_neg_score": 0.2159, "inbatch_pos_score": 0.707, "learning_rate": 4.588888888888889e-05, "loss": 4.5062, "norm_diff": 0.011, "norm_loss": 0.0, "num_token_doc": 66.8293, "num_token_overlap": 11.6323, "num_token_query": 31.7852, "num_token_union": 65.4037, "num_word_context": 202.5579, "num_word_doc": 49.9033, "num_word_query": 23.4683, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6822.2079, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2148, "query_norm": 1.4014, "queue_k_norm": 1.4049, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.7852, "sent_len_1": 66.8293, "sent_len_max_0": 127.6125, "sent_len_max_1": 189.8237, "stdk": 0.0457, "stdq": 0.0422, "stdqueue_k": 0.0459, "stdqueue_q": 0.0, "step": 17400 }, { "accuracy": 37.9883, "active_queue_size": 16384.0, "cl_loss": 4.4956, "doc_norm": 1.392, "encoder_q-embeddings": 2402.0623, "encoder_q-layer.0": 1716.2291, "encoder_q-layer.1": 2035.7859, "encoder_q-layer.10": 964.5549, "encoder_q-layer.11": 2300.9753, "encoder_q-layer.2": 2265.4883, "encoder_q-layer.3": 2472.3599, "encoder_q-layer.4": 2423.0796, "encoder_q-layer.5": 2292.5796, "encoder_q-layer.6": 2390.8357, "encoder_q-layer.7": 2302.8904, "encoder_q-layer.8": 1819.2017, "encoder_q-layer.9": 1014.2141, "epoch": 0.11, "inbatch_neg_score": 0.1862, "inbatch_pos_score": 0.6743, "learning_rate": 4.5833333333333334e-05, "loss": 4.4956, "norm_diff": 0.0215, "norm_loss": 0.0, "num_token_doc": 66.8556, "num_token_overlap": 11.7238, "num_token_query": 31.9603, "num_token_union": 65.3935, "num_word_context": 202.3802, "num_word_doc": 49.8938, "num_word_query": 23.6063, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3082.6119, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1846, "query_norm": 1.4086, "queue_k_norm": 1.3983, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9603, "sent_len_1": 66.8556, "sent_len_max_0": 127.4225, "sent_len_max_1": 189.24, "stdk": 0.0455, "stdq": 0.0432, "stdqueue_k": 0.0458, "stdqueue_q": 0.0, "step": 17500 }, { "accuracy": 39.5508, "active_queue_size": 16384.0, "cl_loss": 4.493, "doc_norm": 1.391, "encoder_q-embeddings": 1721.9893, "encoder_q-layer.0": 1155.0377, "encoder_q-layer.1": 1213.3729, "encoder_q-layer.10": 865.2296, "encoder_q-layer.11": 2122.075, "encoder_q-layer.2": 1371.7556, "encoder_q-layer.3": 1587.0466, "encoder_q-layer.4": 1602.7336, "encoder_q-layer.5": 1562.2924, "encoder_q-layer.6": 1805.1871, "encoder_q-layer.7": 1632.2866, "encoder_q-layer.8": 1288.4821, "encoder_q-layer.9": 731.1588, "epoch": 0.11, "inbatch_neg_score": 0.1664, "inbatch_pos_score": 0.6494, "learning_rate": 4.577777777777778e-05, "loss": 4.493, "norm_diff": 0.0133, "norm_loss": 0.0, "num_token_doc": 66.7608, "num_token_overlap": 11.6619, "num_token_query": 31.8868, "num_token_union": 65.3368, "num_word_context": 202.2022, "num_word_doc": 49.8138, "num_word_query": 23.5331, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2198.9659, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1643, "query_norm": 1.3952, "queue_k_norm": 1.3924, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.8868, "sent_len_1": 66.7608, "sent_len_max_0": 127.5975, "sent_len_max_1": 188.7025, "stdk": 0.0457, "stdq": 0.0421, "stdqueue_k": 0.0458, "stdqueue_q": 0.0, "step": 17600 }, { "accuracy": 37.793, "active_queue_size": 16384.0, "cl_loss": 4.4922, "doc_norm": 1.3822, "encoder_q-embeddings": 1597.7655, "encoder_q-layer.0": 1113.9884, "encoder_q-layer.1": 1196.7301, "encoder_q-layer.10": 807.6722, "encoder_q-layer.11": 1933.2034, "encoder_q-layer.2": 1364.5725, "encoder_q-layer.3": 1423.0, "encoder_q-layer.4": 1469.7179, "encoder_q-layer.5": 1276.3008, "encoder_q-layer.6": 1146.5673, "encoder_q-layer.7": 964.087, "encoder_q-layer.8": 1002.7567, "encoder_q-layer.9": 740.4526, "epoch": 0.12, "inbatch_neg_score": 0.1538, "inbatch_pos_score": 0.6382, "learning_rate": 4.572222222222222e-05, "loss": 4.4922, "norm_diff": 0.026, "norm_loss": 0.0, "num_token_doc": 66.8804, "num_token_overlap": 11.6977, "num_token_query": 32.0608, "num_token_union": 65.4977, "num_word_context": 202.6066, "num_word_doc": 49.946, "num_word_query": 23.7, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1892.8701, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1527, "query_norm": 1.4081, "queue_k_norm": 1.3844, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 32.0608, "sent_len_1": 66.8804, "sent_len_max_0": 127.545, "sent_len_max_1": 189.9787, "stdk": 0.0456, "stdq": 0.0429, "stdqueue_k": 0.0457, "stdqueue_q": 0.0, "step": 17700 }, { "accuracy": 40.1367, "active_queue_size": 16384.0, "cl_loss": 4.4999, "doc_norm": 1.3728, "encoder_q-embeddings": 9042.8379, "encoder_q-layer.0": 6683.915, "encoder_q-layer.1": 8014.0059, "encoder_q-layer.10": 731.7925, "encoder_q-layer.11": 1845.2133, "encoder_q-layer.2": 9626.0596, "encoder_q-layer.3": 9447.3955, "encoder_q-layer.4": 10406.6855, "encoder_q-layer.5": 8890.8672, "encoder_q-layer.6": 8234.2842, "encoder_q-layer.7": 7843.2612, "encoder_q-layer.8": 4799.562, "encoder_q-layer.9": 1873.1039, "epoch": 0.12, "inbatch_neg_score": 0.1593, "inbatch_pos_score": 0.644, "learning_rate": 4.566666666666667e-05, "loss": 4.4999, "norm_diff": 0.0392, "norm_loss": 0.0, "num_token_doc": 66.9671, "num_token_overlap": 11.7133, "num_token_query": 31.8745, "num_token_union": 65.3571, "num_word_context": 202.305, "num_word_doc": 49.9715, "num_word_query": 23.542, "postclip_grad_norm": 1.0, "preclip_grad_norm": 11201.4734, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1592, "query_norm": 1.412, "queue_k_norm": 1.3769, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8745, "sent_len_1": 66.9671, "sent_len_max_0": 127.565, "sent_len_max_1": 190.4238, "stdk": 0.0454, "stdq": 0.0417, "stdqueue_k": 0.0457, "stdqueue_q": 0.0, "step": 17800 }, { "accuracy": 39.6484, "active_queue_size": 16384.0, "cl_loss": 4.52, "doc_norm": 1.3648, "encoder_q-embeddings": 3780.5281, "encoder_q-layer.0": 2707.8521, "encoder_q-layer.1": 2905.6179, "encoder_q-layer.10": 725.52, "encoder_q-layer.11": 1859.0509, "encoder_q-layer.2": 3524.5574, "encoder_q-layer.3": 3584.2424, "encoder_q-layer.4": 3474.3486, "encoder_q-layer.5": 3057.219, "encoder_q-layer.6": 2699.4395, "encoder_q-layer.7": 2021.9255, "encoder_q-layer.8": 1406.5298, "encoder_q-layer.9": 824.9957, "epoch": 0.12, "inbatch_neg_score": 0.1395, "inbatch_pos_score": 0.6304, "learning_rate": 4.561111111111112e-05, "loss": 4.52, "norm_diff": 0.0692, "norm_loss": 0.0, "num_token_doc": 66.6607, "num_token_overlap": 11.633, "num_token_query": 31.8682, "num_token_union": 65.2742, "num_word_context": 202.2443, "num_word_doc": 49.7543, "num_word_query": 23.5541, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4162.3491, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1367, "query_norm": 1.434, "queue_k_norm": 1.3716, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8682, "sent_len_1": 66.6607, "sent_len_max_0": 127.6075, "sent_len_max_1": 190.22, "stdk": 0.0453, "stdq": 0.0428, "stdqueue_k": 0.0457, "stdqueue_q": 0.0, "step": 17900 }, { "accuracy": 40.2344, "active_queue_size": 16384.0, "cl_loss": 4.4833, "doc_norm": 1.3706, "encoder_q-embeddings": 2624.855, "encoder_q-layer.0": 1966.4309, "encoder_q-layer.1": 2113.0029, "encoder_q-layer.10": 838.445, "encoder_q-layer.11": 2269.4683, "encoder_q-layer.2": 2143.2358, "encoder_q-layer.3": 2140.989, "encoder_q-layer.4": 2254.928, "encoder_q-layer.5": 2048.344, "encoder_q-layer.6": 1819.9628, "encoder_q-layer.7": 1335.9904, "encoder_q-layer.8": 1264.8666, "encoder_q-layer.9": 846.8462, "epoch": 0.12, "inbatch_neg_score": 0.1502, "inbatch_pos_score": 0.6553, "learning_rate": 4.555555555555556e-05, "loss": 4.4833, "norm_diff": 0.0976, "norm_loss": 0.0, "num_token_doc": 66.7268, "num_token_overlap": 11.7037, "num_token_query": 31.8861, "num_token_union": 65.288, "num_word_context": 202.1461, "num_word_doc": 49.801, "num_word_query": 23.5571, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2874.3209, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.147, "query_norm": 1.4682, "queue_k_norm": 1.3672, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.8861, "sent_len_1": 66.7268, "sent_len_max_0": 127.535, "sent_len_max_1": 189.0625, "stdk": 0.0457, "stdq": 0.0433, "stdqueue_k": 0.0457, "stdqueue_q": 0.0, "step": 18000 }, { "accuracy": 41.7969, "active_queue_size": 16384.0, "cl_loss": 4.4965, "doc_norm": 1.3647, "encoder_q-embeddings": 1232.1458, "encoder_q-layer.0": 935.9882, "encoder_q-layer.1": 939.0318, "encoder_q-layer.10": 743.5528, "encoder_q-layer.11": 1996.3688, "encoder_q-layer.2": 922.5596, "encoder_q-layer.3": 917.7286, "encoder_q-layer.4": 862.6729, "encoder_q-layer.5": 771.3385, "encoder_q-layer.6": 820.6765, "encoder_q-layer.7": 803.3983, "encoder_q-layer.8": 817.3663, "encoder_q-layer.9": 673.4006, "epoch": 0.12, "inbatch_neg_score": 0.1688, "inbatch_pos_score": 0.667, "learning_rate": 4.55e-05, "loss": 4.4965, "norm_diff": 0.0856, "norm_loss": 0.0, "num_token_doc": 66.681, "num_token_overlap": 11.6794, "num_token_query": 31.9113, "num_token_union": 65.2348, "num_word_context": 202.3736, "num_word_doc": 49.7473, "num_word_query": 23.5556, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1558.7316, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1655, "query_norm": 1.4503, "queue_k_norm": 1.3628, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9113, "sent_len_1": 66.681, "sent_len_max_0": 127.4125, "sent_len_max_1": 188.8325, "stdk": 0.0457, "stdq": 0.0428, "stdqueue_k": 0.0457, "stdqueue_q": 0.0, "step": 18100 }, { "accuracy": 39.3555, "active_queue_size": 16384.0, "cl_loss": 4.4969, "doc_norm": 1.3558, "encoder_q-embeddings": 1849.0509, "encoder_q-layer.0": 1226.6414, "encoder_q-layer.1": 1440.4307, "encoder_q-layer.10": 767.5886, "encoder_q-layer.11": 2030.6396, "encoder_q-layer.2": 1675.6097, "encoder_q-layer.3": 1759.6589, "encoder_q-layer.4": 1814.8082, "encoder_q-layer.5": 1548.2875, "encoder_q-layer.6": 1353.1123, "encoder_q-layer.7": 1114.4692, "encoder_q-layer.8": 1008.3705, "encoder_q-layer.9": 741.1815, "epoch": 0.12, "inbatch_neg_score": 0.1569, "inbatch_pos_score": 0.6401, "learning_rate": 4.5444444444444444e-05, "loss": 4.4969, "norm_diff": 0.0984, "norm_loss": 0.0, "num_token_doc": 66.6338, "num_token_overlap": 11.6698, "num_token_query": 31.9397, "num_token_union": 65.2683, "num_word_context": 201.9004, "num_word_doc": 49.7618, "num_word_query": 23.6128, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2202.4406, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1542, "query_norm": 1.4542, "queue_k_norm": 1.3583, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9397, "sent_len_1": 66.6338, "sent_len_max_0": 127.6, "sent_len_max_1": 189.2988, "stdk": 0.0455, "stdq": 0.0425, "stdqueue_k": 0.0457, "stdqueue_q": 0.0, "step": 18200 }, { "accuracy": 41.6016, "active_queue_size": 16384.0, "cl_loss": 4.4727, "doc_norm": 1.352, "encoder_q-embeddings": 1747.8038, "encoder_q-layer.0": 1229.2906, "encoder_q-layer.1": 1337.2906, "encoder_q-layer.10": 765.3647, "encoder_q-layer.11": 1987.6736, "encoder_q-layer.2": 1516.3616, "encoder_q-layer.3": 1675.6649, "encoder_q-layer.4": 1875.543, "encoder_q-layer.5": 1926.2714, "encoder_q-layer.6": 2035.5546, "encoder_q-layer.7": 2019.4095, "encoder_q-layer.8": 1768.8641, "encoder_q-layer.9": 992.8356, "epoch": 0.12, "inbatch_neg_score": 0.1826, "inbatch_pos_score": 0.6973, "learning_rate": 4.538888888888889e-05, "loss": 4.4727, "norm_diff": 0.1224, "norm_loss": 0.0, "num_token_doc": 66.6796, "num_token_overlap": 11.6767, "num_token_query": 31.9152, "num_token_union": 65.3403, "num_word_context": 202.4449, "num_word_doc": 49.7924, "num_word_query": 23.5993, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2444.412, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1809, "query_norm": 1.4744, "queue_k_norm": 1.3554, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9152, "sent_len_1": 66.6796, "sent_len_max_0": 127.5575, "sent_len_max_1": 189.1213, "stdk": 0.0455, "stdq": 0.0432, "stdqueue_k": 0.0457, "stdqueue_q": 0.0, "step": 18300 }, { "accuracy": 42.5781, "active_queue_size": 16384.0, "cl_loss": 4.5372, "doc_norm": 1.3566, "encoder_q-embeddings": 9506.0449, "encoder_q-layer.0": 7929.0444, "encoder_q-layer.1": 8655.0869, "encoder_q-layer.10": 786.4609, "encoder_q-layer.11": 2048.9653, "encoder_q-layer.2": 9572.5566, "encoder_q-layer.3": 9780.5586, "encoder_q-layer.4": 9854.6709, "encoder_q-layer.5": 9266.6787, "encoder_q-layer.6": 9010.5479, "encoder_q-layer.7": 7068.3638, "encoder_q-layer.8": 5497.9263, "encoder_q-layer.9": 2174.5686, "epoch": 0.12, "inbatch_neg_score": 0.2161, "inbatch_pos_score": 0.7148, "learning_rate": 4.5333333333333335e-05, "loss": 4.5372, "norm_diff": 0.1306, "norm_loss": 0.0, "num_token_doc": 66.859, "num_token_overlap": 11.703, "num_token_query": 31.9143, "num_token_union": 65.3664, "num_word_context": 202.2267, "num_word_doc": 49.8813, "num_word_query": 23.5505, "postclip_grad_norm": 1.0, "preclip_grad_norm": 11701.3667, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.215, "query_norm": 1.4872, "queue_k_norm": 1.355, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9143, "sent_len_1": 66.859, "sent_len_max_0": 127.4237, "sent_len_max_1": 189.5913, "stdk": 0.0457, "stdq": 0.0422, "stdqueue_k": 0.0457, "stdqueue_q": 0.0, "step": 18400 }, { "accuracy": 38.1836, "active_queue_size": 16384.0, "cl_loss": 4.4883, "doc_norm": 1.3565, "encoder_q-embeddings": 11646.6201, "encoder_q-layer.0": 9057.2803, "encoder_q-layer.1": 10251.9385, "encoder_q-layer.10": 775.0156, "encoder_q-layer.11": 2043.6782, "encoder_q-layer.2": 11108.2998, "encoder_q-layer.3": 10540.5918, "encoder_q-layer.4": 10131.5225, "encoder_q-layer.5": 9574.6387, "encoder_q-layer.6": 6084.4629, "encoder_q-layer.7": 2389.3184, "encoder_q-layer.8": 1466.9076, "encoder_q-layer.9": 842.0778, "epoch": 0.12, "inbatch_neg_score": 0.2167, "inbatch_pos_score": 0.6914, "learning_rate": 4.527777777777778e-05, "loss": 4.4883, "norm_diff": 0.106, "norm_loss": 0.0, "num_token_doc": 66.8702, "num_token_overlap": 11.7343, "num_token_query": 31.9987, "num_token_union": 65.4134, "num_word_context": 202.6252, "num_word_doc": 49.8717, "num_word_query": 23.631, "postclip_grad_norm": 1.0, "preclip_grad_norm": 12262.3193, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2147, "query_norm": 1.4625, "queue_k_norm": 1.3548, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9987, "sent_len_1": 66.8702, "sent_len_max_0": 127.6137, "sent_len_max_1": 190.305, "stdk": 0.0457, "stdq": 0.0422, "stdqueue_k": 0.0457, "stdqueue_q": 0.0, "step": 18500 }, { "accuracy": 39.5508, "active_queue_size": 16384.0, "cl_loss": 4.4869, "doc_norm": 1.3562, "encoder_q-embeddings": 3429.2463, "encoder_q-layer.0": 2289.1206, "encoder_q-layer.1": 2534.9373, "encoder_q-layer.10": 678.4696, "encoder_q-layer.11": 1813.9926, "encoder_q-layer.2": 2748.4773, "encoder_q-layer.3": 2965.0869, "encoder_q-layer.4": 2943.4863, "encoder_q-layer.5": 2735.9395, "encoder_q-layer.6": 2248.5417, "encoder_q-layer.7": 1627.8177, "encoder_q-layer.8": 1170.7972, "encoder_q-layer.9": 778.7163, "epoch": 0.12, "inbatch_neg_score": 0.2018, "inbatch_pos_score": 0.6807, "learning_rate": 4.522222222222223e-05, "loss": 4.4869, "norm_diff": 0.0418, "norm_loss": 0.0, "num_token_doc": 66.6704, "num_token_overlap": 11.6909, "num_token_query": 31.9349, "num_token_union": 65.3027, "num_word_context": 202.3437, "num_word_doc": 49.8014, "num_word_query": 23.5956, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3517.1235, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2014, "query_norm": 1.398, "queue_k_norm": 1.3543, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9349, "sent_len_1": 66.6704, "sent_len_max_0": 127.45, "sent_len_max_1": 188.6262, "stdk": 0.0457, "stdq": 0.0411, "stdqueue_k": 0.0457, "stdqueue_q": 0.0, "step": 18600 }, { "accuracy": 40.1367, "active_queue_size": 16384.0, "cl_loss": 4.4936, "doc_norm": 1.3464, "encoder_q-embeddings": 6498.751, "encoder_q-layer.0": 4956.5034, "encoder_q-layer.1": 5481.668, "encoder_q-layer.10": 822.8583, "encoder_q-layer.11": 2034.5798, "encoder_q-layer.2": 5536.0889, "encoder_q-layer.3": 5714.061, "encoder_q-layer.4": 4829.1523, "encoder_q-layer.5": 3600.1235, "encoder_q-layer.6": 3203.2278, "encoder_q-layer.7": 2807.9509, "encoder_q-layer.8": 1987.7039, "encoder_q-layer.9": 1087.3773, "epoch": 0.12, "inbatch_neg_score": 0.2069, "inbatch_pos_score": 0.7017, "learning_rate": 4.516666666666667e-05, "loss": 4.4936, "norm_diff": 0.0777, "norm_loss": 0.0, "num_token_doc": 67.0183, "num_token_overlap": 11.6928, "num_token_query": 31.9394, "num_token_union": 65.5197, "num_word_context": 202.5614, "num_word_doc": 50.0064, "num_word_query": 23.5759, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6339.0402, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2056, "query_norm": 1.4241, "queue_k_norm": 1.3545, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9394, "sent_len_1": 67.0183, "sent_len_max_0": 127.5413, "sent_len_max_1": 191.715, "stdk": 0.0454, "stdq": 0.0424, "stdqueue_k": 0.0458, "stdqueue_q": 0.0, "step": 18700 }, { "accuracy": 41.4062, "active_queue_size": 16384.0, "cl_loss": 4.5001, "doc_norm": 1.3583, "encoder_q-embeddings": 2734.2976, "encoder_q-layer.0": 1964.0195, "encoder_q-layer.1": 2084.6365, "encoder_q-layer.10": 757.4, "encoder_q-layer.11": 2193.0022, "encoder_q-layer.2": 1901.7074, "encoder_q-layer.3": 1792.7605, "encoder_q-layer.4": 1777.4919, "encoder_q-layer.5": 1534.1069, "encoder_q-layer.6": 1528.4176, "encoder_q-layer.7": 1340.8422, "encoder_q-layer.8": 1384.1234, "encoder_q-layer.9": 840.501, "epoch": 0.12, "inbatch_neg_score": 0.2178, "inbatch_pos_score": 0.7134, "learning_rate": 4.511111111111112e-05, "loss": 4.5001, "norm_diff": 0.0749, "norm_loss": 0.0, "num_token_doc": 66.8847, "num_token_overlap": 11.6956, "num_token_query": 31.9407, "num_token_union": 65.4148, "num_word_context": 202.5396, "num_word_doc": 49.8997, "num_word_query": 23.5831, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2752.5004, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2158, "query_norm": 1.4332, "queue_k_norm": 1.3527, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9407, "sent_len_1": 66.8847, "sent_len_max_0": 127.4638, "sent_len_max_1": 189.96, "stdk": 0.0458, "stdq": 0.0424, "stdqueue_k": 0.0457, "stdqueue_q": 0.0, "step": 18800 }, { "accuracy": 41.2109, "active_queue_size": 16384.0, "cl_loss": 4.5041, "doc_norm": 1.3565, "encoder_q-embeddings": 4795.0674, "encoder_q-layer.0": 3403.9497, "encoder_q-layer.1": 3380.3594, "encoder_q-layer.10": 751.681, "encoder_q-layer.11": 2152.7061, "encoder_q-layer.2": 3575.0063, "encoder_q-layer.3": 3674.9585, "encoder_q-layer.4": 3830.7654, "encoder_q-layer.5": 3631.1831, "encoder_q-layer.6": 3823.2922, "encoder_q-layer.7": 3291.7688, "encoder_q-layer.8": 3015.5525, "encoder_q-layer.9": 1634.3948, "epoch": 0.12, "inbatch_neg_score": 0.1979, "inbatch_pos_score": 0.707, "learning_rate": 4.5055555555555554e-05, "loss": 4.5041, "norm_diff": 0.0392, "norm_loss": 0.0, "num_token_doc": 66.6839, "num_token_overlap": 11.6384, "num_token_query": 31.7122, "num_token_union": 65.1839, "num_word_context": 202.26, "num_word_doc": 49.7336, "num_word_query": 23.4246, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5061.4799, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.198, "query_norm": 1.3956, "queue_k_norm": 1.3513, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.7122, "sent_len_1": 66.6839, "sent_len_max_0": 127.47, "sent_len_max_1": 189.5075, "stdk": 0.0458, "stdq": 0.0425, "stdqueue_k": 0.0457, "stdqueue_q": 0.0, "step": 18900 }, { "accuracy": 37.793, "active_queue_size": 16384.0, "cl_loss": 4.5057, "doc_norm": 1.3446, "encoder_q-embeddings": 2559.3586, "encoder_q-layer.0": 1797.6935, "encoder_q-layer.1": 1964.7705, "encoder_q-layer.10": 791.2563, "encoder_q-layer.11": 2258.0532, "encoder_q-layer.2": 2021.2358, "encoder_q-layer.3": 2003.1877, "encoder_q-layer.4": 2024.182, "encoder_q-layer.5": 1797.2375, "encoder_q-layer.6": 2008.5135, "encoder_q-layer.7": 1625.2332, "encoder_q-layer.8": 1419.541, "encoder_q-layer.9": 829.3621, "epoch": 0.12, "inbatch_neg_score": 0.2104, "inbatch_pos_score": 0.6992, "learning_rate": 4.5e-05, "loss": 4.5057, "norm_diff": 0.0635, "norm_loss": 0.0, "num_token_doc": 66.6461, "num_token_overlap": 11.6642, "num_token_query": 31.8975, "num_token_union": 65.2496, "num_word_context": 202.1417, "num_word_doc": 49.7189, "num_word_query": 23.552, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2821.436, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.208, "query_norm": 1.408, "queue_k_norm": 1.3516, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8975, "sent_len_1": 66.6461, "sent_len_max_0": 127.5238, "sent_len_max_1": 188.9187, "stdk": 0.0454, "stdq": 0.0427, "stdqueue_k": 0.0457, "stdqueue_q": 0.0, "step": 19000 }, { "accuracy": 40.8203, "active_queue_size": 16384.0, "cl_loss": 4.4935, "doc_norm": 1.3495, "encoder_q-embeddings": 12137.2256, "encoder_q-layer.0": 8761.8643, "encoder_q-layer.1": 8813.3086, "encoder_q-layer.10": 728.5419, "encoder_q-layer.11": 1886.0819, "encoder_q-layer.2": 10567.3164, "encoder_q-layer.3": 8854.6855, "encoder_q-layer.4": 6830.2407, "encoder_q-layer.5": 5202.437, "encoder_q-layer.6": 4702.4702, "encoder_q-layer.7": 3744.0337, "encoder_q-layer.8": 2585.7476, "encoder_q-layer.9": 750.1227, "epoch": 0.12, "inbatch_neg_score": 0.1953, "inbatch_pos_score": 0.6958, "learning_rate": 4.4944444444444445e-05, "loss": 4.4935, "norm_diff": 0.0202, "norm_loss": 0.0, "num_token_doc": 66.6504, "num_token_overlap": 11.6714, "num_token_query": 31.8554, "num_token_union": 65.2556, "num_word_context": 202.2318, "num_word_doc": 49.7631, "num_word_query": 23.5133, "postclip_grad_norm": 1.0, "preclip_grad_norm": 11058.0627, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1965, "query_norm": 1.3697, "queue_k_norm": 1.3514, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8554, "sent_len_1": 66.6504, "sent_len_max_0": 127.6225, "sent_len_max_1": 189.2438, "stdk": 0.0456, "stdq": 0.0421, "stdqueue_k": 0.0457, "stdqueue_q": 0.0, "step": 19100 }, { "accuracy": 38.8672, "active_queue_size": 16384.0, "cl_loss": 4.4954, "doc_norm": 1.3493, "encoder_q-embeddings": 19081.0215, "encoder_q-layer.0": 13455.9316, "encoder_q-layer.1": 14340.0107, "encoder_q-layer.10": 726.2358, "encoder_q-layer.11": 1905.6897, "encoder_q-layer.2": 15675.6182, "encoder_q-layer.3": 16730.0371, "encoder_q-layer.4": 16180.1914, "encoder_q-layer.5": 16507.5684, "encoder_q-layer.6": 15272.5361, "encoder_q-layer.7": 12148.1221, "encoder_q-layer.8": 6019.4707, "encoder_q-layer.9": 1561.7777, "epoch": 0.12, "inbatch_neg_score": 0.1909, "inbatch_pos_score": 0.6763, "learning_rate": 4.4888888888888894e-05, "loss": 4.4954, "norm_diff": 0.0154, "norm_loss": 0.0, "num_token_doc": 66.955, "num_token_overlap": 11.6423, "num_token_query": 31.7797, "num_token_union": 65.4147, "num_word_context": 202.4638, "num_word_doc": 49.9707, "num_word_query": 23.4714, "postclip_grad_norm": 1.0, "preclip_grad_norm": 19880.8688, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.1907, "query_norm": 1.355, "queue_k_norm": 1.3519, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.7797, "sent_len_1": 66.955, "sent_len_max_0": 127.6375, "sent_len_max_1": 189.0012, "stdk": 0.0456, "stdq": 0.0423, "stdqueue_k": 0.0458, "stdqueue_q": 0.0, "step": 19200 }, { "accuracy": 37.793, "active_queue_size": 16384.0, "cl_loss": 4.5197, "doc_norm": 1.352, "encoder_q-embeddings": 2784.0618, "encoder_q-layer.0": 2040.7002, "encoder_q-layer.1": 2306.7871, "encoder_q-layer.10": 915.5797, "encoder_q-layer.11": 2339.1353, "encoder_q-layer.2": 2473.9214, "encoder_q-layer.3": 2735.6721, "encoder_q-layer.4": 2823.3052, "encoder_q-layer.5": 2652.437, "encoder_q-layer.6": 2867.3303, "encoder_q-layer.7": 2350.4866, "encoder_q-layer.8": 1518.9915, "encoder_q-layer.9": 926.1472, "epoch": 0.13, "inbatch_neg_score": 0.1927, "inbatch_pos_score": 0.668, "learning_rate": 4.483333333333333e-05, "loss": 4.5197, "norm_diff": 0.0195, "norm_loss": 0.0, "num_token_doc": 66.7837, "num_token_overlap": 11.655, "num_token_query": 31.857, "num_token_union": 65.3641, "num_word_context": 202.3831, "num_word_doc": 49.8168, "num_word_query": 23.5164, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3434.7977, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1909, "query_norm": 1.3667, "queue_k_norm": 1.3491, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.857, "sent_len_1": 66.7837, "sent_len_max_0": 127.6287, "sent_len_max_1": 188.055, "stdk": 0.0457, "stdq": 0.0424, "stdqueue_k": 0.0457, "stdqueue_q": 0.0, "step": 19300 }, { "accuracy": 39.8438, "active_queue_size": 16384.0, "cl_loss": 4.5135, "doc_norm": 1.344, "encoder_q-embeddings": 30282.2324, "encoder_q-layer.0": 22549.2383, "encoder_q-layer.1": 23383.3242, "encoder_q-layer.10": 806.7704, "encoder_q-layer.11": 1971.4963, "encoder_q-layer.2": 26790.0039, "encoder_q-layer.3": 27068.873, "encoder_q-layer.4": 28943.6973, "encoder_q-layer.5": 26145.3867, "encoder_q-layer.6": 24009.084, "encoder_q-layer.7": 18078.5918, "encoder_q-layer.8": 13295.4883, "encoder_q-layer.9": 3856.9102, "epoch": 0.13, "inbatch_neg_score": 0.1861, "inbatch_pos_score": 0.6709, "learning_rate": 4.477777777777778e-05, "loss": 4.5135, "norm_diff": 0.0343, "norm_loss": 0.0, "num_token_doc": 66.6013, "num_token_overlap": 11.6513, "num_token_query": 31.776, "num_token_union": 65.1954, "num_word_context": 202.138, "num_word_doc": 49.6821, "num_word_query": 23.4753, "postclip_grad_norm": 1.0, "preclip_grad_norm": 32830.6761, "preclip_grad_norm_avg": 0.0003, "q@queue_neg_score": 0.1855, "query_norm": 1.3097, "queue_k_norm": 1.3487, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.776, "sent_len_1": 66.6013, "sent_len_max_0": 127.5913, "sent_len_max_1": 190.2212, "stdk": 0.0455, "stdq": 0.0415, "stdqueue_k": 0.0457, "stdqueue_q": 0.0, "step": 19400 }, { "accuracy": 41.9922, "active_queue_size": 16384.0, "cl_loss": 4.4571, "doc_norm": 1.3494, "encoder_q-embeddings": 1299.7633, "encoder_q-layer.0": 943.5549, "encoder_q-layer.1": 1071.214, "encoder_q-layer.10": 381.3082, "encoder_q-layer.11": 936.9185, "encoder_q-layer.2": 1105.2433, "encoder_q-layer.3": 1087.1611, "encoder_q-layer.4": 1070.0231, "encoder_q-layer.5": 957.1652, "encoder_q-layer.6": 1028.9509, "encoder_q-layer.7": 807.8464, "encoder_q-layer.8": 595.9142, "encoder_q-layer.9": 427.4354, "epoch": 0.13, "inbatch_neg_score": 0.1848, "inbatch_pos_score": 0.687, "learning_rate": 4.472222222222223e-05, "loss": 4.4571, "norm_diff": 0.0152, "norm_loss": 0.0, "num_token_doc": 66.8593, "num_token_overlap": 11.7065, "num_token_query": 31.9591, "num_token_union": 65.3829, "num_word_context": 202.1852, "num_word_doc": 49.8965, "num_word_query": 23.6179, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1422.1334, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1836, "query_norm": 1.3512, "queue_k_norm": 1.3481, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9591, "sent_len_1": 66.8593, "sent_len_max_0": 127.5075, "sent_len_max_1": 190.9038, "stdk": 0.0456, "stdq": 0.0429, "stdqueue_k": 0.0457, "stdqueue_q": 0.0, "step": 19500 }, { "accuracy": 41.0156, "active_queue_size": 16384.0, "cl_loss": 4.4969, "doc_norm": 1.345, "encoder_q-embeddings": 3609.4534, "encoder_q-layer.0": 2546.0369, "encoder_q-layer.1": 2244.3477, "encoder_q-layer.10": 389.2709, "encoder_q-layer.11": 960.6298, "encoder_q-layer.2": 1574.6659, "encoder_q-layer.3": 1505.9523, "encoder_q-layer.4": 1460.5654, "encoder_q-layer.5": 1274.7924, "encoder_q-layer.6": 1200.3324, "encoder_q-layer.7": 935.0948, "encoder_q-layer.8": 677.1836, "encoder_q-layer.9": 404.1089, "epoch": 0.13, "inbatch_neg_score": 0.1825, "inbatch_pos_score": 0.6611, "learning_rate": 4.466666666666667e-05, "loss": 4.4969, "norm_diff": 0.0194, "norm_loss": 0.0, "num_token_doc": 67.1469, "num_token_overlap": 11.6949, "num_token_query": 31.8552, "num_token_union": 65.5053, "num_word_context": 202.4813, "num_word_doc": 50.0898, "num_word_query": 23.5102, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2765.5958, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1825, "query_norm": 1.3365, "queue_k_norm": 1.3489, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.8552, "sent_len_1": 67.1469, "sent_len_max_0": 127.4, "sent_len_max_1": 192.1312, "stdk": 0.0456, "stdq": 0.0417, "stdqueue_k": 0.0458, "stdqueue_q": 0.0, "step": 19600 }, { "accuracy": 40.5273, "active_queue_size": 16384.0, "cl_loss": 4.4896, "doc_norm": 1.3464, "encoder_q-embeddings": 2127.1367, "encoder_q-layer.0": 1628.814, "encoder_q-layer.1": 1798.8467, "encoder_q-layer.10": 373.2895, "encoder_q-layer.11": 958.2648, "encoder_q-layer.2": 2048.0547, "encoder_q-layer.3": 2024.7089, "encoder_q-layer.4": 1973.4907, "encoder_q-layer.5": 1794.2225, "encoder_q-layer.6": 1856.0237, "encoder_q-layer.7": 1250.3818, "encoder_q-layer.8": 827.947, "encoder_q-layer.9": 448.9944, "epoch": 0.13, "inbatch_neg_score": 0.1812, "inbatch_pos_score": 0.6709, "learning_rate": 4.461111111111111e-05, "loss": 4.4896, "norm_diff": 0.0095, "norm_loss": 0.0, "num_token_doc": 66.6812, "num_token_overlap": 11.6785, "num_token_query": 31.8776, "num_token_union": 65.2592, "num_word_context": 201.9255, "num_word_doc": 49.7763, "num_word_query": 23.5394, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2402.4669, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1781, "query_norm": 1.3539, "queue_k_norm": 1.3468, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8776, "sent_len_1": 66.6812, "sent_len_max_0": 127.575, "sent_len_max_1": 189.8462, "stdk": 0.0456, "stdq": 0.042, "stdqueue_k": 0.0457, "stdqueue_q": 0.0, "step": 19700 }, { "accuracy": 39.9414, "active_queue_size": 16384.0, "cl_loss": 4.5049, "doc_norm": 1.3442, "encoder_q-embeddings": 1912.9825, "encoder_q-layer.0": 1528.9458, "encoder_q-layer.1": 1693.8617, "encoder_q-layer.10": 375.3207, "encoder_q-layer.11": 1020.4975, "encoder_q-layer.2": 1891.2919, "encoder_q-layer.3": 1978.777, "encoder_q-layer.4": 1873.8875, "encoder_q-layer.5": 1352.0375, "encoder_q-layer.6": 1547.0997, "encoder_q-layer.7": 1130.1716, "encoder_q-layer.8": 781.9293, "encoder_q-layer.9": 507.9963, "epoch": 0.13, "inbatch_neg_score": 0.1937, "inbatch_pos_score": 0.6802, "learning_rate": 4.4555555555555555e-05, "loss": 4.5049, "norm_diff": 0.0169, "norm_loss": 0.0, "num_token_doc": 66.7473, "num_token_overlap": 11.732, "num_token_query": 32.0918, "num_token_union": 65.3919, "num_word_context": 201.9256, "num_word_doc": 49.8328, "num_word_query": 23.6962, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2179.2005, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1919, "query_norm": 1.3457, "queue_k_norm": 1.3467, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 32.0918, "sent_len_1": 66.7473, "sent_len_max_0": 127.6188, "sent_len_max_1": 189.1687, "stdk": 0.0456, "stdq": 0.042, "stdqueue_k": 0.0457, "stdqueue_q": 0.0, "step": 19800 }, { "accuracy": 41.1133, "active_queue_size": 16384.0, "cl_loss": 4.5091, "doc_norm": 1.3526, "encoder_q-embeddings": 1528.0518, "encoder_q-layer.0": 1079.4216, "encoder_q-layer.1": 1234.3766, "encoder_q-layer.10": 480.7881, "encoder_q-layer.11": 1202.4082, "encoder_q-layer.2": 1492.7816, "encoder_q-layer.3": 1666.2369, "encoder_q-layer.4": 1631.7732, "encoder_q-layer.5": 1597.8285, "encoder_q-layer.6": 1805.4358, "encoder_q-layer.7": 1439.9097, "encoder_q-layer.8": 943.8522, "encoder_q-layer.9": 585.3702, "epoch": 0.13, "inbatch_neg_score": 0.183, "inbatch_pos_score": 0.6758, "learning_rate": 4.4500000000000004e-05, "loss": 4.5091, "norm_diff": 0.0127, "norm_loss": 0.0, "num_token_doc": 66.7289, "num_token_overlap": 11.6518, "num_token_query": 31.9083, "num_token_union": 65.3341, "num_word_context": 202.2573, "num_word_doc": 49.7615, "num_word_query": 23.549, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2011.0619, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1824, "query_norm": 1.3648, "queue_k_norm": 1.3443, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9083, "sent_len_1": 66.7289, "sent_len_max_0": 127.565, "sent_len_max_1": 190.565, "stdk": 0.0459, "stdq": 0.0426, "stdqueue_k": 0.0457, "stdqueue_q": 0.0, "step": 19900 }, { "accuracy": 41.8945, "active_queue_size": 16384.0, "cl_loss": 4.5076, "doc_norm": 1.3489, "encoder_q-embeddings": 910.9697, "encoder_q-layer.0": 664.9462, "encoder_q-layer.1": 744.6733, "encoder_q-layer.10": 446.172, "encoder_q-layer.11": 1149.9137, "encoder_q-layer.2": 795.8492, "encoder_q-layer.3": 871.2124, "encoder_q-layer.4": 863.0801, "encoder_q-layer.5": 815.0632, "encoder_q-layer.6": 843.4731, "encoder_q-layer.7": 695.6791, "encoder_q-layer.8": 566.0085, "encoder_q-layer.9": 409.3418, "epoch": 0.13, "inbatch_neg_score": 0.1975, "inbatch_pos_score": 0.6997, "learning_rate": 4.4444444444444447e-05, "loss": 4.5076, "norm_diff": 0.0231, "norm_loss": 0.0, "num_token_doc": 66.9719, "num_token_overlap": 11.6869, "num_token_query": 31.9248, "num_token_union": 65.4785, "num_word_context": 202.4163, "num_word_doc": 49.9275, "num_word_query": 23.5924, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1183.2788, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1967, "query_norm": 1.3405, "queue_k_norm": 1.3456, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9248, "sent_len_1": 66.9719, "sent_len_max_0": 127.4287, "sent_len_max_1": 191.7413, "stdk": 0.0458, "stdq": 0.042, "stdqueue_k": 0.0457, "stdqueue_q": 0.0, "step": 20000 }, { "dev_runtime": 42.9207, "dev_samples_per_second": 1.491, "dev_steps_per_second": 0.023, "epoch": 0.13, "step": 20000, "test_accuracy": 91.796875, "test_active_queue_size": 16384.0, "test_cl_loss": 0.49861958622932434, "test_doc_norm": 1.2972036600112915, "test_inbatch_neg_score": 0.5114925503730774, "test_inbatch_pos_score": 1.2878828048706055, "test_loss": 0.49861958622932434, "test_loss_align": 1.1605675220489502, "test_loss_unif": 3.909008502960205, "test_loss_unif_q@queue": 3.909008502960205, "test_norm_diff": 0.12646043300628662, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.19032904505729675, "test_query_norm": 1.4236640930175781, "test_queue_k_norm": 1.3460538387298584, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.03858362138271332, "test_stdq": 0.03983572497963905, "test_stdqueue_k": 0.045752182602882385, "test_stdqueue_q": 0.0 }, { "dev_runtime": 42.9207, "dev_samples_per_second": 1.491, "dev_steps_per_second": 0.023, "epoch": 0.13, "eval_beir-arguana_ndcg@10": 0.25632, "eval_beir-arguana_recall@10": 0.45164, "eval_beir-arguana_recall@100": 0.76956, "eval_beir-arguana_recall@20": 0.57681, "eval_beir-avg_ndcg@10": 0.3093259166666667, "eval_beir-avg_recall@10": 0.36913325, "eval_beir-avg_recall@100": 0.5544868333333334, "eval_beir-avg_recall@20": 0.43047825000000006, "eval_beir-cqadupstack_ndcg@10": 0.18338916666666663, "eval_beir-cqadupstack_recall@10": 0.26152250000000005, "eval_beir-cqadupstack_recall@100": 0.47936833333333334, "eval_beir-cqadupstack_recall@20": 0.3209425, "eval_beir-fiqa_ndcg@10": 0.14962, "eval_beir-fiqa_recall@10": 0.19901, "eval_beir-fiqa_recall@100": 0.44794, "eval_beir-fiqa_recall@20": 0.27796, "eval_beir-nfcorpus_ndcg@10": 0.25528, "eval_beir-nfcorpus_recall@10": 0.117, "eval_beir-nfcorpus_recall@100": 0.24849, "eval_beir-nfcorpus_recall@20": 0.15687, "eval_beir-nq_ndcg@10": 0.21021, "eval_beir-nq_recall@10": 0.34912, "eval_beir-nq_recall@100": 0.68854, "eval_beir-nq_recall@20": 0.46331, "eval_beir-quora_ndcg@10": 0.71056, "eval_beir-quora_recall@10": 0.83068, "eval_beir-quora_recall@100": 0.95642, "eval_beir-quora_recall@20": 0.88322, "eval_beir-scidocs_ndcg@10": 0.12141, "eval_beir-scidocs_recall@10": 0.12678, "eval_beir-scidocs_recall@100": 0.30952, "eval_beir-scidocs_recall@20": 0.16947, "eval_beir-scifact_ndcg@10": 0.55798, "eval_beir-scifact_recall@10": 0.72017, "eval_beir-scifact_recall@100": 0.89189, "eval_beir-scifact_recall@20": 0.778, "eval_beir-trec-covid_ndcg@10": 0.43583, "eval_beir-trec-covid_recall@10": 0.484, "eval_beir-trec-covid_recall@100": 0.348, "eval_beir-trec-covid_recall@20": 0.474, "eval_beir-webis-touche2020_ndcg@10": 0.21266, "eval_beir-webis-touche2020_recall@10": 0.15141, "eval_beir-webis-touche2020_recall@100": 0.40514, "eval_beir-webis-touche2020_recall@20": 0.2042, "eval_senteval-avg_sts": 0.7225956864276957, "eval_senteval-sickr_spearman": 0.6788391667328135, "eval_senteval-stsb_spearman": 0.7663522061225779, "step": 20000, "test_accuracy": 91.796875, "test_active_queue_size": 16384.0, "test_cl_loss": 0.49861958622932434, "test_doc_norm": 1.2972036600112915, "test_inbatch_neg_score": 0.5114925503730774, "test_inbatch_pos_score": 1.2878828048706055, "test_loss": 0.49861958622932434, "test_loss_align": 1.1605675220489502, "test_loss_unif": 3.909008502960205, "test_loss_unif_q@queue": 3.909008502960205, "test_norm_diff": 0.12646043300628662, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.19032904505729675, "test_query_norm": 1.4236640930175781, "test_queue_k_norm": 1.3460538387298584, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.03858362138271332, "test_stdq": 0.03983572497963905, "test_stdqueue_k": 0.045752182602882385, "test_stdqueue_q": 0.0 }, { "accuracy": 40.2344, "active_queue_size": 16384.0, "cl_loss": 4.4713, "doc_norm": 1.3462, "encoder_q-embeddings": 2098.6348, "encoder_q-layer.0": 1478.3558, "encoder_q-layer.1": 1699.2078, "encoder_q-layer.10": 383.918, "encoder_q-layer.11": 953.4269, "encoder_q-layer.2": 1996.4346, "encoder_q-layer.3": 2105.4131, "encoder_q-layer.4": 2062.7573, "encoder_q-layer.5": 2008.0031, "encoder_q-layer.6": 1765.8159, "encoder_q-layer.7": 1727.9891, "encoder_q-layer.8": 1414.1135, "encoder_q-layer.9": 734.0267, "epoch": 0.13, "inbatch_neg_score": 0.187, "inbatch_pos_score": 0.6753, "learning_rate": 4.438888888888889e-05, "loss": 4.4713, "norm_diff": 0.014, "norm_loss": 0.0, "num_token_doc": 67.1091, "num_token_overlap": 11.7392, "num_token_query": 31.9936, "num_token_union": 65.5553, "num_word_context": 202.3224, "num_word_doc": 50.0687, "num_word_query": 23.6405, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2467.5957, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1875, "query_norm": 1.3431, "queue_k_norm": 1.3456, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9936, "sent_len_1": 67.1091, "sent_len_max_0": 127.5987, "sent_len_max_1": 190.405, "stdk": 0.0457, "stdq": 0.0426, "stdqueue_k": 0.0457, "stdqueue_q": 0.0, "step": 20100 }, { "accuracy": 38.2812, "active_queue_size": 16384.0, "cl_loss": 4.5168, "doc_norm": 1.3466, "encoder_q-embeddings": 18037.5332, "encoder_q-layer.0": 13479.7764, "encoder_q-layer.1": 12623.7773, "encoder_q-layer.10": 503.342, "encoder_q-layer.11": 1193.4817, "encoder_q-layer.2": 13611.1602, "encoder_q-layer.3": 11784.9326, "encoder_q-layer.4": 11886.5156, "encoder_q-layer.5": 9851.8125, "encoder_q-layer.6": 9499.8604, "encoder_q-layer.7": 6767.9775, "encoder_q-layer.8": 4760.0845, "encoder_q-layer.9": 1893.1871, "epoch": 0.13, "inbatch_neg_score": 0.1908, "inbatch_pos_score": 0.6689, "learning_rate": 4.433333333333334e-05, "loss": 4.5168, "norm_diff": 0.0073, "norm_loss": 0.0, "num_token_doc": 66.8057, "num_token_overlap": 11.7063, "num_token_query": 31.9772, "num_token_union": 65.3692, "num_word_context": 202.2911, "num_word_doc": 49.8523, "num_word_query": 23.6178, "postclip_grad_norm": 1.0, "preclip_grad_norm": 15868.9609, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1891, "query_norm": 1.3449, "queue_k_norm": 1.3462, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9772, "sent_len_1": 66.8057, "sent_len_max_0": 127.6425, "sent_len_max_1": 187.9938, "stdk": 0.0457, "stdq": 0.0424, "stdqueue_k": 0.0458, "stdqueue_q": 0.0, "step": 20200 }, { "accuracy": 37.3047, "active_queue_size": 16384.0, "cl_loss": 4.494, "doc_norm": 1.3419, "encoder_q-embeddings": 5529.894, "encoder_q-layer.0": 4076.877, "encoder_q-layer.1": 3903.9055, "encoder_q-layer.10": 405.011, "encoder_q-layer.11": 1138.9802, "encoder_q-layer.2": 4347.4429, "encoder_q-layer.3": 3568.6858, "encoder_q-layer.4": 3180.1687, "encoder_q-layer.5": 2314.8267, "encoder_q-layer.6": 1924.8118, "encoder_q-layer.7": 1466.572, "encoder_q-layer.8": 1014.2775, "encoder_q-layer.9": 551.7928, "epoch": 0.13, "inbatch_neg_score": 0.1927, "inbatch_pos_score": 0.6577, "learning_rate": 4.427777777777778e-05, "loss": 4.494, "norm_diff": 0.0192, "norm_loss": 0.0, "num_token_doc": 66.891, "num_token_overlap": 11.6314, "num_token_query": 31.7884, "num_token_union": 65.3677, "num_word_context": 202.3806, "num_word_doc": 49.9125, "num_word_query": 23.4673, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4689.9862, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1925, "query_norm": 1.323, "queue_k_norm": 1.3467, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.7884, "sent_len_1": 66.891, "sent_len_max_0": 127.365, "sent_len_max_1": 189.215, "stdk": 0.0456, "stdq": 0.0413, "stdqueue_k": 0.0458, "stdqueue_q": 0.0, "step": 20300 }, { "accuracy": 40.4297, "active_queue_size": 16384.0, "cl_loss": 4.4914, "doc_norm": 1.3431, "encoder_q-embeddings": 3736.8748, "encoder_q-layer.0": 2699.9758, "encoder_q-layer.1": 2974.7188, "encoder_q-layer.10": 389.1844, "encoder_q-layer.11": 1066.3014, "encoder_q-layer.2": 2953.1963, "encoder_q-layer.3": 3075.0229, "encoder_q-layer.4": 2620.9761, "encoder_q-layer.5": 2825.7009, "encoder_q-layer.6": 2547.8462, "encoder_q-layer.7": 1963.1321, "encoder_q-layer.8": 1706.6517, "encoder_q-layer.9": 918.5349, "epoch": 0.13, "inbatch_neg_score": 0.186, "inbatch_pos_score": 0.6606, "learning_rate": 4.422222222222222e-05, "loss": 4.4914, "norm_diff": 0.0451, "norm_loss": 0.0, "num_token_doc": 66.873, "num_token_overlap": 11.6893, "num_token_query": 31.9061, "num_token_union": 65.4408, "num_word_context": 202.5714, "num_word_doc": 49.9226, "num_word_query": 23.5693, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3752.0908, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1857, "query_norm": 1.2979, "queue_k_norm": 1.3456, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9061, "sent_len_1": 66.873, "sent_len_max_0": 127.5238, "sent_len_max_1": 189.1488, "stdk": 0.0456, "stdq": 0.0409, "stdqueue_k": 0.0458, "stdqueue_q": 0.0, "step": 20400 }, { "accuracy": 38.4766, "active_queue_size": 16384.0, "cl_loss": 4.4892, "doc_norm": 1.3414, "encoder_q-embeddings": 1204.9325, "encoder_q-layer.0": 864.109, "encoder_q-layer.1": 1021.7819, "encoder_q-layer.10": 372.6447, "encoder_q-layer.11": 1023.7612, "encoder_q-layer.2": 1077.9401, "encoder_q-layer.3": 946.3239, "encoder_q-layer.4": 793.8325, "encoder_q-layer.5": 809.1516, "encoder_q-layer.6": 776.715, "encoder_q-layer.7": 757.3099, "encoder_q-layer.8": 563.9902, "encoder_q-layer.9": 378.7634, "epoch": 0.13, "inbatch_neg_score": 0.1769, "inbatch_pos_score": 0.6436, "learning_rate": 4.4166666666666665e-05, "loss": 4.4892, "norm_diff": 0.0308, "norm_loss": 0.0, "num_token_doc": 66.8346, "num_token_overlap": 11.6904, "num_token_query": 31.9533, "num_token_union": 65.4245, "num_word_context": 202.353, "num_word_doc": 49.9106, "num_word_query": 23.5942, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1324.3728, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1763, "query_norm": 1.3106, "queue_k_norm": 1.3441, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9533, "sent_len_1": 66.8346, "sent_len_max_0": 127.5938, "sent_len_max_1": 189.8613, "stdk": 0.0456, "stdq": 0.0411, "stdqueue_k": 0.0458, "stdqueue_q": 0.0, "step": 20500 }, { "accuracy": 41.0156, "active_queue_size": 16384.0, "cl_loss": 4.4815, "doc_norm": 1.3422, "encoder_q-embeddings": 1198.983, "encoder_q-layer.0": 878.3004, "encoder_q-layer.1": 902.4221, "encoder_q-layer.10": 453.3053, "encoder_q-layer.11": 1105.6483, "encoder_q-layer.2": 1056.4741, "encoder_q-layer.3": 1015.0253, "encoder_q-layer.4": 961.2715, "encoder_q-layer.5": 889.1341, "encoder_q-layer.6": 981.5656, "encoder_q-layer.7": 872.9807, "encoder_q-layer.8": 651.4993, "encoder_q-layer.9": 422.7257, "epoch": 0.13, "inbatch_neg_score": 0.1604, "inbatch_pos_score": 0.6724, "learning_rate": 4.4111111111111114e-05, "loss": 4.4815, "norm_diff": 0.0219, "norm_loss": 0.0, "num_token_doc": 66.8078, "num_token_overlap": 11.7798, "num_token_query": 32.1593, "num_token_union": 65.4976, "num_word_context": 202.0762, "num_word_doc": 49.8699, "num_word_query": 23.7461, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1364.6785, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1597, "query_norm": 1.3631, "queue_k_norm": 1.3453, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 32.1593, "sent_len_1": 66.8078, "sent_len_max_0": 127.52, "sent_len_max_1": 190.0275, "stdk": 0.0456, "stdq": 0.0435, "stdqueue_k": 0.0459, "stdqueue_q": 0.0, "step": 20600 }, { "accuracy": 38.5742, "active_queue_size": 16384.0, "cl_loss": 4.4927, "doc_norm": 1.3425, "encoder_q-embeddings": 1066.4694, "encoder_q-layer.0": 796.2223, "encoder_q-layer.1": 892.8753, "encoder_q-layer.10": 392.3762, "encoder_q-layer.11": 1056.173, "encoder_q-layer.2": 992.7626, "encoder_q-layer.3": 976.7967, "encoder_q-layer.4": 902.1714, "encoder_q-layer.5": 981.5134, "encoder_q-layer.6": 835.9537, "encoder_q-layer.7": 675.4587, "encoder_q-layer.8": 597.6069, "encoder_q-layer.9": 407.3857, "epoch": 0.13, "inbatch_neg_score": 0.1573, "inbatch_pos_score": 0.6401, "learning_rate": 4.4055555555555557e-05, "loss": 4.4927, "norm_diff": 0.0117, "norm_loss": 0.0, "num_token_doc": 66.742, "num_token_overlap": 11.6769, "num_token_query": 31.8272, "num_token_union": 65.3017, "num_word_context": 201.9098, "num_word_doc": 49.7944, "num_word_query": 23.4907, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1292.9358, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1562, "query_norm": 1.3439, "queue_k_norm": 1.3437, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8272, "sent_len_1": 66.742, "sent_len_max_0": 127.5337, "sent_len_max_1": 190.3, "stdk": 0.0457, "stdq": 0.0424, "stdqueue_k": 0.0458, "stdqueue_q": 0.0, "step": 20700 }, { "accuracy": 38.0859, "active_queue_size": 16384.0, "cl_loss": 4.4936, "doc_norm": 1.3378, "encoder_q-embeddings": 890.6385, "encoder_q-layer.0": 649.6392, "encoder_q-layer.1": 706.5017, "encoder_q-layer.10": 400.8892, "encoder_q-layer.11": 1061.8108, "encoder_q-layer.2": 805.2793, "encoder_q-layer.3": 812.3587, "encoder_q-layer.4": 723.1882, "encoder_q-layer.5": 702.3318, "encoder_q-layer.6": 670.8225, "encoder_q-layer.7": 534.3181, "encoder_q-layer.8": 480.9577, "encoder_q-layer.9": 374.8943, "epoch": 0.14, "inbatch_neg_score": 0.1644, "inbatch_pos_score": 0.6509, "learning_rate": 4.4000000000000006e-05, "loss": 4.4936, "norm_diff": 0.0146, "norm_loss": 0.0, "num_token_doc": 66.657, "num_token_overlap": 11.6738, "num_token_query": 31.92, "num_token_union": 65.2877, "num_word_context": 201.981, "num_word_doc": 49.7207, "num_word_query": 23.5744, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1084.8503, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1628, "query_norm": 1.3287, "queue_k_norm": 1.3406, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.92, "sent_len_1": 66.657, "sent_len_max_0": 127.58, "sent_len_max_1": 189.8862, "stdk": 0.0456, "stdq": 0.0421, "stdqueue_k": 0.0458, "stdqueue_q": 0.0, "step": 20800 }, { "accuracy": 42.6758, "active_queue_size": 16384.0, "cl_loss": 4.5084, "doc_norm": 1.3338, "encoder_q-embeddings": 2116.9136, "encoder_q-layer.0": 1545.3734, "encoder_q-layer.1": 1781.5834, "encoder_q-layer.10": 379.7639, "encoder_q-layer.11": 1034.2043, "encoder_q-layer.2": 1965.9589, "encoder_q-layer.3": 1891.7073, "encoder_q-layer.4": 1846.1262, "encoder_q-layer.5": 1500.1904, "encoder_q-layer.6": 1304.5513, "encoder_q-layer.7": 1204.7581, "encoder_q-layer.8": 1025.1787, "encoder_q-layer.9": 558.0435, "epoch": 0.14, "inbatch_neg_score": 0.1598, "inbatch_pos_score": 0.6787, "learning_rate": 4.394444444444445e-05, "loss": 4.5084, "norm_diff": 0.0051, "norm_loss": 0.0, "num_token_doc": 66.7, "num_token_overlap": 11.683, "num_token_query": 31.9166, "num_token_union": 65.2813, "num_word_context": 202.1118, "num_word_doc": 49.7704, "num_word_query": 23.5585, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2283.5684, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1598, "query_norm": 1.334, "queue_k_norm": 1.3387, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9166, "sent_len_1": 66.7, "sent_len_max_0": 127.6012, "sent_len_max_1": 188.9425, "stdk": 0.0455, "stdq": 0.0422, "stdqueue_k": 0.0457, "stdqueue_q": 0.0, "step": 20900 }, { "accuracy": 39.1602, "active_queue_size": 16384.0, "cl_loss": 4.5039, "doc_norm": 1.3393, "encoder_q-embeddings": 2159.3662, "encoder_q-layer.0": 1420.4332, "encoder_q-layer.1": 1517.0957, "encoder_q-layer.10": 388.5914, "encoder_q-layer.11": 1097.8118, "encoder_q-layer.2": 1755.322, "encoder_q-layer.3": 1799.4615, "encoder_q-layer.4": 1823.6611, "encoder_q-layer.5": 1695.6782, "encoder_q-layer.6": 1546.1991, "encoder_q-layer.7": 1450.2936, "encoder_q-layer.8": 1305.0142, "encoder_q-layer.9": 673.4405, "epoch": 0.14, "inbatch_neg_score": 0.1668, "inbatch_pos_score": 0.6343, "learning_rate": 4.388888888888889e-05, "loss": 4.5039, "norm_diff": 0.021, "norm_loss": 0.0, "num_token_doc": 66.5512, "num_token_overlap": 11.6329, "num_token_query": 31.7545, "num_token_union": 65.1552, "num_word_context": 202.3002, "num_word_doc": 49.6217, "num_word_query": 23.4375, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2295.6318, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1672, "query_norm": 1.3182, "queue_k_norm": 1.3374, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.7545, "sent_len_1": 66.5512, "sent_len_max_0": 127.4625, "sent_len_max_1": 190.0737, "stdk": 0.0457, "stdq": 0.0414, "stdqueue_k": 0.0457, "stdqueue_q": 0.0, "step": 21000 }, { "accuracy": 42.4805, "active_queue_size": 16384.0, "cl_loss": 4.4886, "doc_norm": 1.3363, "encoder_q-embeddings": 429.145, "encoder_q-layer.0": 306.6984, "encoder_q-layer.1": 348.496, "encoder_q-layer.10": 216.1935, "encoder_q-layer.11": 562.7649, "encoder_q-layer.2": 381.6291, "encoder_q-layer.3": 380.0662, "encoder_q-layer.4": 389.4863, "encoder_q-layer.5": 336.9896, "encoder_q-layer.6": 292.6036, "encoder_q-layer.7": 270.7203, "encoder_q-layer.8": 251.7501, "encoder_q-layer.9": 196.6976, "epoch": 0.14, "inbatch_neg_score": 0.1632, "inbatch_pos_score": 0.6553, "learning_rate": 4.383333333333334e-05, "loss": 4.4886, "norm_diff": 0.0167, "norm_loss": 0.0, "num_token_doc": 66.7017, "num_token_overlap": 11.6811, "num_token_query": 31.9236, "num_token_union": 65.2733, "num_word_context": 202.5185, "num_word_doc": 49.7217, "num_word_query": 23.5777, "postclip_grad_norm": 1.0, "preclip_grad_norm": 527.1058, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1624, "query_norm": 1.3203, "queue_k_norm": 1.3375, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9236, "sent_len_1": 66.7017, "sent_len_max_0": 127.57, "sent_len_max_1": 192.7975, "stdk": 0.0456, "stdq": 0.0417, "stdqueue_k": 0.0458, "stdqueue_q": 0.0, "step": 21100 }, { "accuracy": 40.332, "active_queue_size": 16384.0, "cl_loss": 4.4627, "doc_norm": 1.3296, "encoder_q-embeddings": 537.2772, "encoder_q-layer.0": 412.2778, "encoder_q-layer.1": 444.4453, "encoder_q-layer.10": 197.4297, "encoder_q-layer.11": 542.5471, "encoder_q-layer.2": 501.4665, "encoder_q-layer.3": 465.2497, "encoder_q-layer.4": 480.569, "encoder_q-layer.5": 457.0233, "encoder_q-layer.6": 459.1529, "encoder_q-layer.7": 367.4666, "encoder_q-layer.8": 340.0575, "encoder_q-layer.9": 230.1053, "epoch": 0.14, "inbatch_neg_score": 0.168, "inbatch_pos_score": 0.6401, "learning_rate": 4.377777777777778e-05, "loss": 4.4627, "norm_diff": 0.0219, "norm_loss": 0.0, "num_token_doc": 66.8854, "num_token_overlap": 11.6968, "num_token_query": 31.85, "num_token_union": 65.4006, "num_word_context": 202.6537, "num_word_doc": 49.9548, "num_word_query": 23.5238, "postclip_grad_norm": 1.0, "preclip_grad_norm": 657.421, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.166, "query_norm": 1.3077, "queue_k_norm": 1.3363, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.85, "sent_len_1": 66.8854, "sent_len_max_0": 127.6375, "sent_len_max_1": 188.5525, "stdk": 0.0454, "stdq": 0.0407, "stdqueue_k": 0.0457, "stdqueue_q": 0.0, "step": 21200 }, { "accuracy": 42.5781, "active_queue_size": 16384.0, "cl_loss": 4.4953, "doc_norm": 1.3335, "encoder_q-embeddings": 530.9879, "encoder_q-layer.0": 379.3385, "encoder_q-layer.1": 402.3618, "encoder_q-layer.10": 184.0228, "encoder_q-layer.11": 493.3565, "encoder_q-layer.2": 409.476, "encoder_q-layer.3": 395.4963, "encoder_q-layer.4": 428.7256, "encoder_q-layer.5": 372.2406, "encoder_q-layer.6": 351.3476, "encoder_q-layer.7": 298.2889, "encoder_q-layer.8": 259.0308, "encoder_q-layer.9": 195.2354, "epoch": 0.14, "inbatch_neg_score": 0.1522, "inbatch_pos_score": 0.6431, "learning_rate": 4.3722222222222224e-05, "loss": 4.4953, "norm_diff": 0.0225, "norm_loss": 0.0, "num_token_doc": 66.9475, "num_token_overlap": 11.6677, "num_token_query": 31.8806, "num_token_union": 65.4122, "num_word_context": 202.4582, "num_word_doc": 49.9302, "num_word_query": 23.5425, "postclip_grad_norm": 1.0, "preclip_grad_norm": 574.3734, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1516, "query_norm": 1.3111, "queue_k_norm": 1.3357, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8806, "sent_len_1": 66.9475, "sent_len_max_0": 127.3275, "sent_len_max_1": 190.3963, "stdk": 0.0456, "stdq": 0.0416, "stdqueue_k": 0.0457, "stdqueue_q": 0.0, "step": 21300 }, { "accuracy": 40.8203, "active_queue_size": 16384.0, "cl_loss": 4.4991, "doc_norm": 1.3362, "encoder_q-embeddings": 2181.2693, "encoder_q-layer.0": 1576.6588, "encoder_q-layer.1": 1842.1105, "encoder_q-layer.10": 198.179, "encoder_q-layer.11": 528.5616, "encoder_q-layer.2": 2195.813, "encoder_q-layer.3": 2489.7727, "encoder_q-layer.4": 2166.4319, "encoder_q-layer.5": 1739.6553, "encoder_q-layer.6": 1131.217, "encoder_q-layer.7": 612.2812, "encoder_q-layer.8": 424.5633, "encoder_q-layer.9": 220.9871, "epoch": 0.14, "inbatch_neg_score": 0.1477, "inbatch_pos_score": 0.6357, "learning_rate": 4.3666666666666666e-05, "loss": 4.4991, "norm_diff": 0.0121, "norm_loss": 0.0, "num_token_doc": 66.8353, "num_token_overlap": 11.6762, "num_token_query": 31.8716, "num_token_union": 65.3939, "num_word_context": 202.6397, "num_word_doc": 49.8979, "num_word_query": 23.554, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2399.2092, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1479, "query_norm": 1.3397, "queue_k_norm": 1.3346, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8716, "sent_len_1": 66.8353, "sent_len_max_0": 127.545, "sent_len_max_1": 189.0863, "stdk": 0.0457, "stdq": 0.0421, "stdqueue_k": 0.0457, "stdqueue_q": 0.0, "step": 21400 }, { "accuracy": 39.1602, "active_queue_size": 16384.0, "cl_loss": 4.4825, "doc_norm": 1.3283, "encoder_q-embeddings": 487.6038, "encoder_q-layer.0": 356.1452, "encoder_q-layer.1": 395.5602, "encoder_q-layer.10": 180.2246, "encoder_q-layer.11": 463.9524, "encoder_q-layer.2": 429.0854, "encoder_q-layer.3": 478.2032, "encoder_q-layer.4": 501.2536, "encoder_q-layer.5": 402.4408, "encoder_q-layer.6": 431.1661, "encoder_q-layer.7": 339.9914, "encoder_q-layer.8": 252.6181, "encoder_q-layer.9": 181.706, "epoch": 0.14, "inbatch_neg_score": 0.1462, "inbatch_pos_score": 0.644, "learning_rate": 4.3611111111111116e-05, "loss": 4.4825, "norm_diff": 0.0174, "norm_loss": 0.0, "num_token_doc": 66.6954, "num_token_overlap": 11.681, "num_token_query": 31.8008, "num_token_union": 65.274, "num_word_context": 202.3033, "num_word_doc": 49.7993, "num_word_query": 23.5072, "postclip_grad_norm": 1.0, "preclip_grad_norm": 592.7423, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.144, "query_norm": 1.3457, "queue_k_norm": 1.3332, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8008, "sent_len_1": 66.6954, "sent_len_max_0": 127.4537, "sent_len_max_1": 188.82, "stdk": 0.0453, "stdq": 0.0422, "stdqueue_k": 0.0457, "stdqueue_q": 0.0, "step": 21500 }, { "accuracy": 38.4766, "active_queue_size": 16384.0, "cl_loss": 4.4647, "doc_norm": 1.3247, "encoder_q-embeddings": 529.437, "encoder_q-layer.0": 387.3592, "encoder_q-layer.1": 410.1592, "encoder_q-layer.10": 182.8137, "encoder_q-layer.11": 483.5677, "encoder_q-layer.2": 440.3929, "encoder_q-layer.3": 460.9088, "encoder_q-layer.4": 480.7289, "encoder_q-layer.5": 421.6036, "encoder_q-layer.6": 445.2092, "encoder_q-layer.7": 386.454, "encoder_q-layer.8": 303.171, "encoder_q-layer.9": 205.8634, "epoch": 0.14, "inbatch_neg_score": 0.1768, "inbatch_pos_score": 0.6606, "learning_rate": 4.355555555555556e-05, "loss": 4.4647, "norm_diff": 0.0444, "norm_loss": 0.0, "num_token_doc": 67.0525, "num_token_overlap": 11.6884, "num_token_query": 31.8024, "num_token_union": 65.4758, "num_word_context": 202.8086, "num_word_doc": 50.0462, "num_word_query": 23.4918, "postclip_grad_norm": 1.0, "preclip_grad_norm": 618.9232, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1758, "query_norm": 1.369, "queue_k_norm": 1.3343, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.8024, "sent_len_1": 67.0525, "sent_len_max_0": 127.5438, "sent_len_max_1": 188.69, "stdk": 0.0453, "stdq": 0.0427, "stdqueue_k": 0.0458, "stdqueue_q": 0.0, "step": 21600 }, { "accuracy": 37.9883, "active_queue_size": 16384.0, "cl_loss": 4.4703, "doc_norm": 1.3371, "encoder_q-embeddings": 1224.8745, "encoder_q-layer.0": 941.8134, "encoder_q-layer.1": 970.517, "encoder_q-layer.10": 186.0219, "encoder_q-layer.11": 495.8085, "encoder_q-layer.2": 928.2578, "encoder_q-layer.3": 903.2776, "encoder_q-layer.4": 807.9918, "encoder_q-layer.5": 661.8643, "encoder_q-layer.6": 502.9306, "encoder_q-layer.7": 418.5988, "encoder_q-layer.8": 297.1966, "encoder_q-layer.9": 199.0126, "epoch": 0.14, "inbatch_neg_score": 0.1721, "inbatch_pos_score": 0.6548, "learning_rate": 4.35e-05, "loss": 4.4703, "norm_diff": 0.0112, "norm_loss": 0.0, "num_token_doc": 66.8124, "num_token_overlap": 11.6001, "num_token_query": 31.7762, "num_token_union": 65.3683, "num_word_context": 202.0443, "num_word_doc": 49.8228, "num_word_query": 23.4726, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1129.6393, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1721, "query_norm": 1.3357, "queue_k_norm": 1.3334, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.7762, "sent_len_1": 66.8124, "sent_len_max_0": 127.5775, "sent_len_max_1": 190.2812, "stdk": 0.0458, "stdq": 0.0421, "stdqueue_k": 0.0457, "stdqueue_q": 0.0, "step": 21700 }, { "accuracy": 40.1367, "active_queue_size": 16384.0, "cl_loss": 4.4656, "doc_norm": 1.326, "encoder_q-embeddings": 1303.1501, "encoder_q-layer.0": 988.9907, "encoder_q-layer.1": 958.6537, "encoder_q-layer.10": 183.8916, "encoder_q-layer.11": 536.9089, "encoder_q-layer.2": 989.6652, "encoder_q-layer.3": 888.9484, "encoder_q-layer.4": 837.1909, "encoder_q-layer.5": 732.2968, "encoder_q-layer.6": 685.2018, "encoder_q-layer.7": 533.3314, "encoder_q-layer.8": 360.3159, "encoder_q-layer.9": 201.6869, "epoch": 0.14, "inbatch_neg_score": 0.1851, "inbatch_pos_score": 0.6826, "learning_rate": 4.344444444444445e-05, "loss": 4.4656, "norm_diff": 0.0198, "norm_loss": 0.0, "num_token_doc": 66.678, "num_token_overlap": 11.6824, "num_token_query": 31.9206, "num_token_union": 65.3498, "num_word_context": 202.3469, "num_word_doc": 49.7958, "num_word_query": 23.5665, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1196.8203, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1843, "query_norm": 1.3439, "queue_k_norm": 1.3335, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9206, "sent_len_1": 66.678, "sent_len_max_0": 127.5575, "sent_len_max_1": 188.1312, "stdk": 0.0454, "stdq": 0.0423, "stdqueue_k": 0.0457, "stdqueue_q": 0.0, "step": 21800 }, { "accuracy": 40.8203, "active_queue_size": 16384.0, "cl_loss": 4.4596, "doc_norm": 1.3366, "encoder_q-embeddings": 1528.916, "encoder_q-layer.0": 1197.769, "encoder_q-layer.1": 1411.5327, "encoder_q-layer.10": 179.8439, "encoder_q-layer.11": 479.8439, "encoder_q-layer.2": 1493.3562, "encoder_q-layer.3": 1427.4788, "encoder_q-layer.4": 1360.5212, "encoder_q-layer.5": 1113.2765, "encoder_q-layer.6": 1144.2769, "encoder_q-layer.7": 1084.4091, "encoder_q-layer.8": 481.1969, "encoder_q-layer.9": 215.8343, "epoch": 0.14, "inbatch_neg_score": 0.1825, "inbatch_pos_score": 0.6777, "learning_rate": 4.338888888888889e-05, "loss": 4.4596, "norm_diff": 0.0098, "norm_loss": 0.0, "num_token_doc": 66.6963, "num_token_overlap": 11.7351, "num_token_query": 32.0618, "num_token_union": 65.3473, "num_word_context": 202.0504, "num_word_doc": 49.7873, "num_word_query": 23.6896, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1709.1835, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1821, "query_norm": 1.3375, "queue_k_norm": 1.3318, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 32.0618, "sent_len_1": 66.6963, "sent_len_max_0": 127.43, "sent_len_max_1": 189.7375, "stdk": 0.0457, "stdq": 0.042, "stdqueue_k": 0.0456, "stdqueue_q": 0.0, "step": 21900 }, { "accuracy": 38.1836, "active_queue_size": 16384.0, "cl_loss": 4.475, "doc_norm": 1.3355, "encoder_q-embeddings": 2091.1951, "encoder_q-layer.0": 1467.4495, "encoder_q-layer.1": 1655.5642, "encoder_q-layer.10": 197.5626, "encoder_q-layer.11": 542.0865, "encoder_q-layer.2": 1827.3368, "encoder_q-layer.3": 1868.8383, "encoder_q-layer.4": 1925.6569, "encoder_q-layer.5": 1660.3911, "encoder_q-layer.6": 1742.9136, "encoder_q-layer.7": 1465.3766, "encoder_q-layer.8": 891.5388, "encoder_q-layer.9": 295.7991, "epoch": 0.14, "inbatch_neg_score": 0.1904, "inbatch_pos_score": 0.6753, "learning_rate": 4.3333333333333334e-05, "loss": 4.475, "norm_diff": 0.0164, "norm_loss": 0.0, "num_token_doc": 66.8243, "num_token_overlap": 11.7086, "num_token_query": 32.0025, "num_token_union": 65.3514, "num_word_context": 202.1439, "num_word_doc": 49.8266, "num_word_query": 23.635, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2267.7225, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1885, "query_norm": 1.3471, "queue_k_norm": 1.3346, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 32.0025, "sent_len_1": 66.8243, "sent_len_max_0": 127.5062, "sent_len_max_1": 190.705, "stdk": 0.0457, "stdq": 0.0423, "stdqueue_k": 0.0457, "stdqueue_q": 0.0, "step": 22000 }, { "accuracy": 43.9453, "active_queue_size": 16384.0, "cl_loss": 4.4879, "doc_norm": 1.3379, "encoder_q-embeddings": 690.6536, "encoder_q-layer.0": 523.4812, "encoder_q-layer.1": 579.3364, "encoder_q-layer.10": 193.4956, "encoder_q-layer.11": 530.2991, "encoder_q-layer.2": 613.6019, "encoder_q-layer.3": 610.0903, "encoder_q-layer.4": 647.2848, "encoder_q-layer.5": 547.7438, "encoder_q-layer.6": 556.3356, "encoder_q-layer.7": 486.686, "encoder_q-layer.8": 335.9757, "encoder_q-layer.9": 194.4687, "epoch": 0.14, "inbatch_neg_score": 0.1816, "inbatch_pos_score": 0.7021, "learning_rate": 4.3277777777777776e-05, "loss": 4.4879, "norm_diff": 0.0235, "norm_loss": 0.0, "num_token_doc": 66.7947, "num_token_overlap": 11.6973, "num_token_query": 31.937, "num_token_union": 65.3518, "num_word_context": 202.1998, "num_word_doc": 49.867, "num_word_query": 23.593, "postclip_grad_norm": 1.0, "preclip_grad_norm": 794.6768, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1809, "query_norm": 1.3601, "queue_k_norm": 1.337, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.937, "sent_len_1": 66.7947, "sent_len_max_0": 127.4412, "sent_len_max_1": 189.62, "stdk": 0.0458, "stdq": 0.0431, "stdqueue_k": 0.0458, "stdqueue_q": 0.0, "step": 22100 }, { "accuracy": 36.3281, "active_queue_size": 16384.0, "cl_loss": 4.4849, "doc_norm": 1.3366, "encoder_q-embeddings": 743.9832, "encoder_q-layer.0": 552.7338, "encoder_q-layer.1": 587.6577, "encoder_q-layer.10": 223.9528, "encoder_q-layer.11": 540.8817, "encoder_q-layer.2": 715.2553, "encoder_q-layer.3": 695.463, "encoder_q-layer.4": 706.6072, "encoder_q-layer.5": 684.0652, "encoder_q-layer.6": 582.7522, "encoder_q-layer.7": 520.7937, "encoder_q-layer.8": 322.6621, "encoder_q-layer.9": 203.1115, "epoch": 0.14, "inbatch_neg_score": 0.1742, "inbatch_pos_score": 0.6396, "learning_rate": 4.3222222222222226e-05, "loss": 4.4849, "norm_diff": 0.0143, "norm_loss": 0.0, "num_token_doc": 66.8233, "num_token_overlap": 11.666, "num_token_query": 31.8869, "num_token_union": 65.3462, "num_word_context": 202.3121, "num_word_doc": 49.8723, "num_word_query": 23.5418, "postclip_grad_norm": 1.0, "preclip_grad_norm": 857.3056, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1726, "query_norm": 1.3277, "queue_k_norm": 1.3366, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8869, "sent_len_1": 66.8233, "sent_len_max_0": 127.505, "sent_len_max_1": 189.065, "stdk": 0.0457, "stdq": 0.042, "stdqueue_k": 0.0458, "stdqueue_q": 0.0, "step": 22200 }, { "accuracy": 37.5, "active_queue_size": 16384.0, "cl_loss": 4.4706, "doc_norm": 1.339, "encoder_q-embeddings": 589.5955, "encoder_q-layer.0": 427.9029, "encoder_q-layer.1": 502.3592, "encoder_q-layer.10": 198.5033, "encoder_q-layer.11": 500.1961, "encoder_q-layer.2": 570.8281, "encoder_q-layer.3": 576.6826, "encoder_q-layer.4": 554.6289, "encoder_q-layer.5": 529.327, "encoder_q-layer.6": 521.0823, "encoder_q-layer.7": 423.4987, "encoder_q-layer.8": 278.8934, "encoder_q-layer.9": 191.4295, "epoch": 0.15, "inbatch_neg_score": 0.1714, "inbatch_pos_score": 0.6333, "learning_rate": 4.316666666666667e-05, "loss": 4.4706, "norm_diff": 0.0276, "norm_loss": 0.0, "num_token_doc": 67.0043, "num_token_overlap": 11.685, "num_token_query": 31.8823, "num_token_union": 65.4762, "num_word_context": 202.8345, "num_word_doc": 50.0121, "num_word_query": 23.5357, "postclip_grad_norm": 1.0, "preclip_grad_norm": 711.1873, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.171, "query_norm": 1.3114, "queue_k_norm": 1.3401, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8823, "sent_len_1": 67.0043, "sent_len_max_0": 127.5113, "sent_len_max_1": 189.3187, "stdk": 0.0458, "stdq": 0.041, "stdqueue_k": 0.0459, "stdqueue_q": 0.0, "step": 22300 }, { "accuracy": 41.2109, "active_queue_size": 16384.0, "cl_loss": 4.4713, "doc_norm": 1.3471, "encoder_q-embeddings": 335.2913, "encoder_q-layer.0": 243.3107, "encoder_q-layer.1": 281.067, "encoder_q-layer.10": 225.977, "encoder_q-layer.11": 521.5149, "encoder_q-layer.2": 249.7683, "encoder_q-layer.3": 235.6426, "encoder_q-layer.4": 227.0425, "encoder_q-layer.5": 209.7652, "encoder_q-layer.6": 212.0583, "encoder_q-layer.7": 211.0689, "encoder_q-layer.8": 222.4425, "encoder_q-layer.9": 189.2542, "epoch": 0.15, "inbatch_neg_score": 0.1633, "inbatch_pos_score": 0.6631, "learning_rate": 4.311111111111111e-05, "loss": 4.4713, "norm_diff": 0.028, "norm_loss": 0.0, "num_token_doc": 66.9006, "num_token_overlap": 11.6809, "num_token_query": 31.8978, "num_token_union": 65.44, "num_word_context": 202.387, "num_word_doc": 49.9102, "num_word_query": 23.5603, "postclip_grad_norm": 1.0, "preclip_grad_norm": 419.4399, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1644, "query_norm": 1.319, "queue_k_norm": 1.34, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.8978, "sent_len_1": 66.9006, "sent_len_max_0": 127.605, "sent_len_max_1": 188.6362, "stdk": 0.0461, "stdq": 0.0413, "stdqueue_k": 0.046, "stdqueue_q": 0.0, "step": 22400 }, { "accuracy": 39.2578, "active_queue_size": 16384.0, "cl_loss": 4.4796, "doc_norm": 1.3376, "encoder_q-embeddings": 3011.5073, "encoder_q-layer.0": 2166.033, "encoder_q-layer.1": 2411.8391, "encoder_q-layer.10": 182.5553, "encoder_q-layer.11": 463.9932, "encoder_q-layer.2": 2598.2415, "encoder_q-layer.3": 2271.3794, "encoder_q-layer.4": 2107.3655, "encoder_q-layer.5": 1578.2378, "encoder_q-layer.6": 1283.5016, "encoder_q-layer.7": 1249.3646, "encoder_q-layer.8": 650.8995, "encoder_q-layer.9": 286.4484, "epoch": 0.15, "inbatch_neg_score": 0.1626, "inbatch_pos_score": 0.6392, "learning_rate": 4.305555555555556e-05, "loss": 4.4796, "norm_diff": 0.0176, "norm_loss": 0.0, "num_token_doc": 66.8881, "num_token_overlap": 11.6651, "num_token_query": 31.9435, "num_token_union": 65.4315, "num_word_context": 202.2118, "num_word_doc": 49.9479, "num_word_query": 23.5837, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2850.5831, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1616, "query_norm": 1.3211, "queue_k_norm": 1.3419, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9435, "sent_len_1": 66.8881, "sent_len_max_0": 127.4775, "sent_len_max_1": 189.2088, "stdk": 0.0458, "stdq": 0.0415, "stdqueue_k": 0.0461, "stdqueue_q": 0.0, "step": 22500 }, { "accuracy": 38.4766, "active_queue_size": 16384.0, "cl_loss": 4.4637, "doc_norm": 1.3417, "encoder_q-embeddings": 1954.0211, "encoder_q-layer.0": 1369.5161, "encoder_q-layer.1": 1582.8041, "encoder_q-layer.10": 208.7482, "encoder_q-layer.11": 488.9493, "encoder_q-layer.2": 1836.9923, "encoder_q-layer.3": 1884.6708, "encoder_q-layer.4": 1752.4674, "encoder_q-layer.5": 1487.3862, "encoder_q-layer.6": 1267.0211, "encoder_q-layer.7": 953.6858, "encoder_q-layer.8": 643.2366, "encoder_q-layer.9": 324.7628, "epoch": 0.15, "inbatch_neg_score": 0.1695, "inbatch_pos_score": 0.6641, "learning_rate": 4.3e-05, "loss": 4.4637, "norm_diff": 0.0431, "norm_loss": 0.0, "num_token_doc": 66.7611, "num_token_overlap": 11.6834, "num_token_query": 31.9435, "num_token_union": 65.3542, "num_word_context": 202.2998, "num_word_doc": 49.8054, "num_word_query": 23.6001, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2097.5719, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1672, "query_norm": 1.3848, "queue_k_norm": 1.3421, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9435, "sent_len_1": 66.7611, "sent_len_max_0": 127.67, "sent_len_max_1": 187.0175, "stdk": 0.0459, "stdq": 0.0433, "stdqueue_k": 0.0461, "stdqueue_q": 0.0, "step": 22600 }, { "accuracy": 38.4766, "active_queue_size": 16384.0, "cl_loss": 4.4746, "doc_norm": 1.33, "encoder_q-embeddings": 386.0448, "encoder_q-layer.0": 270.9549, "encoder_q-layer.1": 282.4733, "encoder_q-layer.10": 174.9947, "encoder_q-layer.11": 462.496, "encoder_q-layer.2": 276.9734, "encoder_q-layer.3": 253.6833, "encoder_q-layer.4": 258.6289, "encoder_q-layer.5": 220.7903, "encoder_q-layer.6": 245.6097, "encoder_q-layer.7": 227.8987, "encoder_q-layer.8": 206.7332, "encoder_q-layer.9": 167.5009, "epoch": 0.15, "inbatch_neg_score": 0.1619, "inbatch_pos_score": 0.6431, "learning_rate": 4.294444444444445e-05, "loss": 4.4746, "norm_diff": 0.0182, "norm_loss": 0.0, "num_token_doc": 66.9219, "num_token_overlap": 11.7112, "num_token_query": 31.9332, "num_token_union": 65.4333, "num_word_context": 202.0603, "num_word_doc": 49.9215, "num_word_query": 23.5885, "postclip_grad_norm": 1.0, "preclip_grad_norm": 426.2215, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1603, "query_norm": 1.3478, "queue_k_norm": 1.3414, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9332, "sent_len_1": 66.9219, "sent_len_max_0": 127.4137, "sent_len_max_1": 188.3825, "stdk": 0.0456, "stdq": 0.0423, "stdqueue_k": 0.0461, "stdqueue_q": 0.0, "step": 22700 }, { "accuracy": 38.0859, "active_queue_size": 16384.0, "cl_loss": 4.4539, "doc_norm": 1.3409, "encoder_q-embeddings": 704.2505, "encoder_q-layer.0": 485.2723, "encoder_q-layer.1": 563.6843, "encoder_q-layer.10": 201.9103, "encoder_q-layer.11": 495.1724, "encoder_q-layer.2": 676.7744, "encoder_q-layer.3": 737.5107, "encoder_q-layer.4": 788.5009, "encoder_q-layer.5": 781.1985, "encoder_q-layer.6": 613.8134, "encoder_q-layer.7": 458.9636, "encoder_q-layer.8": 376.7085, "encoder_q-layer.9": 205.1983, "epoch": 0.15, "inbatch_neg_score": 0.1555, "inbatch_pos_score": 0.644, "learning_rate": 4.2888888888888886e-05, "loss": 4.4539, "norm_diff": 0.0118, "norm_loss": 0.0, "num_token_doc": 66.8727, "num_token_overlap": 11.6783, "num_token_query": 31.8176, "num_token_union": 65.4298, "num_word_context": 202.5271, "num_word_doc": 49.9344, "num_word_query": 23.5092, "postclip_grad_norm": 1.0, "preclip_grad_norm": 863.8946, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1555, "query_norm": 1.3523, "queue_k_norm": 1.3416, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.8176, "sent_len_1": 66.8727, "sent_len_max_0": 127.4762, "sent_len_max_1": 186.6937, "stdk": 0.046, "stdq": 0.0425, "stdqueue_k": 0.0461, "stdqueue_q": 0.0, "step": 22800 }, { "accuracy": 42.0898, "active_queue_size": 16384.0, "cl_loss": 4.4614, "doc_norm": 1.3397, "encoder_q-embeddings": 362.6513, "encoder_q-layer.0": 288.2294, "encoder_q-layer.1": 299.5884, "encoder_q-layer.10": 177.9419, "encoder_q-layer.11": 471.8198, "encoder_q-layer.2": 293.9479, "encoder_q-layer.3": 303.9859, "encoder_q-layer.4": 312.2076, "encoder_q-layer.5": 291.7318, "encoder_q-layer.6": 310.5285, "encoder_q-layer.7": 236.7051, "encoder_q-layer.8": 227.0393, "encoder_q-layer.9": 175.6948, "epoch": 0.15, "inbatch_neg_score": 0.1615, "inbatch_pos_score": 0.6504, "learning_rate": 4.2833333333333335e-05, "loss": 4.4614, "norm_diff": 0.0216, "norm_loss": 0.0, "num_token_doc": 66.6707, "num_token_overlap": 11.6616, "num_token_query": 31.8713, "num_token_union": 65.2292, "num_word_context": 202.1319, "num_word_doc": 49.6945, "num_word_query": 23.5186, "postclip_grad_norm": 1.0, "preclip_grad_norm": 457.9881, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.161, "query_norm": 1.3308, "queue_k_norm": 1.3415, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8713, "sent_len_1": 66.6707, "sent_len_max_0": 127.5537, "sent_len_max_1": 192.2512, "stdk": 0.046, "stdq": 0.042, "stdqueue_k": 0.0461, "stdqueue_q": 0.0, "step": 22900 }, { "accuracy": 41.4062, "active_queue_size": 16384.0, "cl_loss": 4.4474, "doc_norm": 1.3416, "encoder_q-embeddings": 1166.5479, "encoder_q-layer.0": 863.2001, "encoder_q-layer.1": 908.2233, "encoder_q-layer.10": 183.2308, "encoder_q-layer.11": 492.1051, "encoder_q-layer.2": 1127.5615, "encoder_q-layer.3": 1081.0123, "encoder_q-layer.4": 1110.9137, "encoder_q-layer.5": 975.1854, "encoder_q-layer.6": 793.9055, "encoder_q-layer.7": 536.1813, "encoder_q-layer.8": 366.0487, "encoder_q-layer.9": 202.1714, "epoch": 0.15, "inbatch_neg_score": 0.1602, "inbatch_pos_score": 0.6499, "learning_rate": 4.277777777777778e-05, "loss": 4.4474, "norm_diff": 0.0213, "norm_loss": 0.0, "num_token_doc": 66.8285, "num_token_overlap": 11.7239, "num_token_query": 31.9394, "num_token_union": 65.3744, "num_word_context": 201.8521, "num_word_doc": 49.8619, "num_word_query": 23.6116, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1262.0114, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1602, "query_norm": 1.3209, "queue_k_norm": 1.3422, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9394, "sent_len_1": 66.8285, "sent_len_max_0": 127.66, "sent_len_max_1": 189.4112, "stdk": 0.0461, "stdq": 0.0418, "stdqueue_k": 0.0462, "stdqueue_q": 0.0, "step": 23000 }, { "accuracy": 40.332, "active_queue_size": 16384.0, "cl_loss": 4.4722, "doc_norm": 1.3338, "encoder_q-embeddings": 1439.1301, "encoder_q-layer.0": 1028.5643, "encoder_q-layer.1": 1138.7424, "encoder_q-layer.10": 345.386, "encoder_q-layer.11": 940.4011, "encoder_q-layer.2": 1310.1234, "encoder_q-layer.3": 1326.441, "encoder_q-layer.4": 1385.1724, "encoder_q-layer.5": 1127.2773, "encoder_q-layer.6": 847.8629, "encoder_q-layer.7": 634.4769, "encoder_q-layer.8": 524.0278, "encoder_q-layer.9": 349.5336, "epoch": 0.15, "inbatch_neg_score": 0.1539, "inbatch_pos_score": 0.6465, "learning_rate": 4.272222222222223e-05, "loss": 4.4722, "norm_diff": 0.0083, "norm_loss": 0.0, "num_token_doc": 66.6576, "num_token_overlap": 11.7051, "num_token_query": 32.1281, "num_token_union": 65.3921, "num_word_context": 202.4521, "num_word_doc": 49.7808, "num_word_query": 23.7526, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1577.1613, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1538, "query_norm": 1.3298, "queue_k_norm": 1.3399, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 32.1281, "sent_len_1": 66.6576, "sent_len_max_0": 127.5913, "sent_len_max_1": 189.535, "stdk": 0.0458, "stdq": 0.0421, "stdqueue_k": 0.0461, "stdqueue_q": 0.0, "step": 23100 }, { "accuracy": 42.9688, "active_queue_size": 16384.0, "cl_loss": 4.4378, "doc_norm": 1.3454, "encoder_q-embeddings": 1799.6375, "encoder_q-layer.0": 1382.439, "encoder_q-layer.1": 1596.0555, "encoder_q-layer.10": 355.7217, "encoder_q-layer.11": 938.5627, "encoder_q-layer.2": 1939.4778, "encoder_q-layer.3": 1699.9144, "encoder_q-layer.4": 1623.2877, "encoder_q-layer.5": 1162.4862, "encoder_q-layer.6": 1031.064, "encoder_q-layer.7": 790.0034, "encoder_q-layer.8": 603.9675, "encoder_q-layer.9": 370.1521, "epoch": 0.15, "inbatch_neg_score": 0.156, "inbatch_pos_score": 0.6558, "learning_rate": 4.266666666666667e-05, "loss": 4.4378, "norm_diff": 0.0116, "norm_loss": 0.0, "num_token_doc": 66.7841, "num_token_overlap": 11.6767, "num_token_query": 31.874, "num_token_union": 65.3852, "num_word_context": 202.3722, "num_word_doc": 49.8674, "num_word_query": 23.5366, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1954.1558, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1544, "query_norm": 1.345, "queue_k_norm": 1.3441, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.874, "sent_len_1": 66.7841, "sent_len_max_0": 127.5687, "sent_len_max_1": 189.9675, "stdk": 0.0462, "stdq": 0.0424, "stdqueue_k": 0.0463, "stdqueue_q": 0.0, "step": 23200 }, { "accuracy": 39.8438, "active_queue_size": 16384.0, "cl_loss": 4.4757, "doc_norm": 1.3447, "encoder_q-embeddings": 26342.291, "encoder_q-layer.0": 20742.3906, "encoder_q-layer.1": 22671.0645, "encoder_q-layer.10": 520.6323, "encoder_q-layer.11": 1118.5033, "encoder_q-layer.2": 28268.6934, "encoder_q-layer.3": 23286.0703, "encoder_q-layer.4": 19893.1406, "encoder_q-layer.5": 18139.0605, "encoder_q-layer.6": 14674.417, "encoder_q-layer.7": 13665.1641, "encoder_q-layer.8": 8032.1709, "encoder_q-layer.9": 3244.2588, "epoch": 0.15, "inbatch_neg_score": 0.1704, "inbatch_pos_score": 0.6587, "learning_rate": 4.261111111111111e-05, "loss": 4.4757, "norm_diff": 0.0235, "norm_loss": 0.0, "num_token_doc": 66.7505, "num_token_overlap": 11.6851, "num_token_query": 31.9482, "num_token_union": 65.4047, "num_word_context": 202.3722, "num_word_doc": 49.8604, "num_word_query": 23.6115, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28158.7721, "preclip_grad_norm_avg": 0.0003, "q@queue_neg_score": 0.1699, "query_norm": 1.3656, "queue_k_norm": 1.3401, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9482, "sent_len_1": 66.7505, "sent_len_max_0": 127.4137, "sent_len_max_1": 189.4125, "stdk": 0.0462, "stdq": 0.0419, "stdqueue_k": 0.0462, "stdqueue_q": 0.0, "step": 23300 }, { "accuracy": 38.4766, "active_queue_size": 16384.0, "cl_loss": 4.4847, "doc_norm": 1.3478, "encoder_q-embeddings": 2395.6904, "encoder_q-layer.0": 1811.1411, "encoder_q-layer.1": 1875.2844, "encoder_q-layer.10": 381.1633, "encoder_q-layer.11": 1043.5272, "encoder_q-layer.2": 2069.9221, "encoder_q-layer.3": 2213.8503, "encoder_q-layer.4": 2126.6672, "encoder_q-layer.5": 1983.0613, "encoder_q-layer.6": 1844.764, "encoder_q-layer.7": 1572.4481, "encoder_q-layer.8": 1091.593, "encoder_q-layer.9": 491.949, "epoch": 0.15, "inbatch_neg_score": 0.1664, "inbatch_pos_score": 0.6426, "learning_rate": 4.255555555555556e-05, "loss": 4.4847, "norm_diff": 0.0228, "norm_loss": 0.0, "num_token_doc": 66.603, "num_token_overlap": 11.6746, "num_token_query": 31.9724, "num_token_union": 65.2762, "num_word_context": 201.9817, "num_word_doc": 49.7031, "num_word_query": 23.6262, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2630.913, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1664, "query_norm": 1.3271, "queue_k_norm": 1.3412, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9724, "sent_len_1": 66.603, "sent_len_max_0": 127.4163, "sent_len_max_1": 191.8075, "stdk": 0.0463, "stdq": 0.0411, "stdqueue_k": 0.0462, "stdqueue_q": 0.0, "step": 23400 }, { "accuracy": 40.0391, "active_queue_size": 16384.0, "cl_loss": 4.4461, "doc_norm": 1.3402, "encoder_q-embeddings": 635.3502, "encoder_q-layer.0": 443.7859, "encoder_q-layer.1": 488.5583, "encoder_q-layer.10": 355.688, "encoder_q-layer.11": 976.4153, "encoder_q-layer.2": 544.747, "encoder_q-layer.3": 524.7565, "encoder_q-layer.4": 543.1645, "encoder_q-layer.5": 511.4908, "encoder_q-layer.6": 508.4286, "encoder_q-layer.7": 426.38, "encoder_q-layer.8": 470.7757, "encoder_q-layer.9": 343.9818, "epoch": 0.15, "inbatch_neg_score": 0.1617, "inbatch_pos_score": 0.6328, "learning_rate": 4.25e-05, "loss": 4.4461, "norm_diff": 0.0472, "norm_loss": 0.0, "num_token_doc": 66.729, "num_token_overlap": 11.6752, "num_token_query": 31.9125, "num_token_union": 65.3418, "num_word_context": 202.4069, "num_word_doc": 49.7845, "num_word_query": 23.5779, "postclip_grad_norm": 1.0, "preclip_grad_norm": 833.738, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1595, "query_norm": 1.2931, "queue_k_norm": 1.3422, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9125, "sent_len_1": 66.729, "sent_len_max_0": 127.4112, "sent_len_max_1": 190.055, "stdk": 0.046, "stdq": 0.0408, "stdqueue_k": 0.0462, "stdqueue_q": 0.0, "step": 23500 }, { "accuracy": 41.1133, "active_queue_size": 16384.0, "cl_loss": 4.4269, "doc_norm": 1.3387, "encoder_q-embeddings": 769.6287, "encoder_q-layer.0": 527.8018, "encoder_q-layer.1": 626.4318, "encoder_q-layer.10": 363.6526, "encoder_q-layer.11": 971.6888, "encoder_q-layer.2": 686.8844, "encoder_q-layer.3": 670.2164, "encoder_q-layer.4": 714.0781, "encoder_q-layer.5": 692.1204, "encoder_q-layer.6": 615.6566, "encoder_q-layer.7": 495.6332, "encoder_q-layer.8": 469.1042, "encoder_q-layer.9": 322.2926, "epoch": 0.15, "inbatch_neg_score": 0.1573, "inbatch_pos_score": 0.6558, "learning_rate": 4.2444444444444445e-05, "loss": 4.4269, "norm_diff": 0.0214, "norm_loss": 0.0, "num_token_doc": 66.958, "num_token_overlap": 11.7297, "num_token_query": 32.0953, "num_token_union": 65.5257, "num_word_context": 202.4331, "num_word_doc": 49.9738, "num_word_query": 23.7021, "postclip_grad_norm": 1.0, "preclip_grad_norm": 969.1289, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.156, "query_norm": 1.3173, "queue_k_norm": 1.3397, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 32.0953, "sent_len_1": 66.958, "sent_len_max_0": 127.66, "sent_len_max_1": 190.39, "stdk": 0.046, "stdq": 0.0418, "stdqueue_k": 0.0462, "stdqueue_q": 0.0, "step": 23600 }, { "accuracy": 40.8203, "active_queue_size": 16384.0, "cl_loss": 4.4555, "doc_norm": 1.3371, "encoder_q-embeddings": 2030.5654, "encoder_q-layer.0": 1615.0232, "encoder_q-layer.1": 1861.3413, "encoder_q-layer.10": 379.6226, "encoder_q-layer.11": 988.3532, "encoder_q-layer.2": 2241.9656, "encoder_q-layer.3": 2412.9517, "encoder_q-layer.4": 2553.0942, "encoder_q-layer.5": 2441.0986, "encoder_q-layer.6": 2203.3489, "encoder_q-layer.7": 1788.5103, "encoder_q-layer.8": 1688.5173, "encoder_q-layer.9": 454.0223, "epoch": 0.15, "inbatch_neg_score": 0.1548, "inbatch_pos_score": 0.625, "learning_rate": 4.238888888888889e-05, "loss": 4.4555, "norm_diff": 0.0494, "norm_loss": 0.0, "num_token_doc": 66.8042, "num_token_overlap": 11.6605, "num_token_query": 31.8658, "num_token_union": 65.3863, "num_word_context": 202.253, "num_word_doc": 49.8396, "num_word_query": 23.5267, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2805.3362, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1545, "query_norm": 1.2877, "queue_k_norm": 1.3384, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8658, "sent_len_1": 66.8042, "sent_len_max_0": 127.4663, "sent_len_max_1": 188.5513, "stdk": 0.046, "stdq": 0.041, "stdqueue_k": 0.0461, "stdqueue_q": 0.0, "step": 23700 }, { "accuracy": 38.5742, "active_queue_size": 16384.0, "cl_loss": 4.4182, "doc_norm": 1.3447, "encoder_q-embeddings": 1031.6815, "encoder_q-layer.0": 784.2057, "encoder_q-layer.1": 826.6844, "encoder_q-layer.10": 189.5384, "encoder_q-layer.11": 474.6008, "encoder_q-layer.2": 592.3408, "encoder_q-layer.3": 522.3268, "encoder_q-layer.4": 485.3893, "encoder_q-layer.5": 473.0649, "encoder_q-layer.6": 430.6864, "encoder_q-layer.7": 306.8642, "encoder_q-layer.8": 265.6743, "encoder_q-layer.9": 179.7079, "epoch": 0.15, "inbatch_neg_score": 0.1474, "inbatch_pos_score": 0.6318, "learning_rate": 4.233333333333334e-05, "loss": 4.4182, "norm_diff": 0.0497, "norm_loss": 0.0, "num_token_doc": 66.9666, "num_token_overlap": 11.6911, "num_token_query": 31.9192, "num_token_union": 65.4404, "num_word_context": 202.5907, "num_word_doc": 49.9131, "num_word_query": 23.5656, "postclip_grad_norm": 1.0, "preclip_grad_norm": 898.0151, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1473, "query_norm": 1.2951, "queue_k_norm": 1.3376, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9192, "sent_len_1": 66.9666, "sent_len_max_0": 127.4488, "sent_len_max_1": 192.0475, "stdk": 0.0463, "stdq": 0.0418, "stdqueue_k": 0.0461, "stdqueue_q": 0.0, "step": 23800 }, { "accuracy": 39.6484, "active_queue_size": 16384.0, "cl_loss": 4.4352, "doc_norm": 1.3337, "encoder_q-embeddings": 13200.707, "encoder_q-layer.0": 10786.3604, "encoder_q-layer.1": 13680.2734, "encoder_q-layer.10": 194.2023, "encoder_q-layer.11": 477.4244, "encoder_q-layer.2": 10033.7383, "encoder_q-layer.3": 6896.5161, "encoder_q-layer.4": 3948.0049, "encoder_q-layer.5": 3144.8369, "encoder_q-layer.6": 2069.6499, "encoder_q-layer.7": 2159.7915, "encoder_q-layer.8": 1300.587, "encoder_q-layer.9": 344.5764, "epoch": 0.16, "inbatch_neg_score": 0.1394, "inbatch_pos_score": 0.6348, "learning_rate": 4.227777777777778e-05, "loss": 4.4352, "norm_diff": 0.0276, "norm_loss": 0.0, "num_token_doc": 66.5423, "num_token_overlap": 11.6816, "num_token_query": 31.9591, "num_token_union": 65.2452, "num_word_context": 201.9923, "num_word_doc": 49.6746, "num_word_query": 23.6194, "postclip_grad_norm": 1.0, "preclip_grad_norm": 11555.0158, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1381, "query_norm": 1.3126, "queue_k_norm": 1.3375, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9591, "sent_len_1": 66.5423, "sent_len_max_0": 127.5375, "sent_len_max_1": 188.125, "stdk": 0.0459, "stdq": 0.0425, "stdqueue_k": 0.0461, "stdqueue_q": 0.0, "step": 23900 }, { "accuracy": 40.1367, "active_queue_size": 16384.0, "cl_loss": 4.453, "doc_norm": 1.3394, "encoder_q-embeddings": 870.2314, "encoder_q-layer.0": 604.6301, "encoder_q-layer.1": 721.0061, "encoder_q-layer.10": 193.8605, "encoder_q-layer.11": 498.0234, "encoder_q-layer.2": 779.0353, "encoder_q-layer.3": 860.5425, "encoder_q-layer.4": 1008.16, "encoder_q-layer.5": 1000.7205, "encoder_q-layer.6": 904.6501, "encoder_q-layer.7": 617.8063, "encoder_q-layer.8": 424.733, "encoder_q-layer.9": 231.1241, "epoch": 0.16, "inbatch_neg_score": 0.1438, "inbatch_pos_score": 0.6411, "learning_rate": 4.222222222222222e-05, "loss": 4.453, "norm_diff": 0.0228, "norm_loss": 0.0, "num_token_doc": 66.8031, "num_token_overlap": 11.6397, "num_token_query": 31.9207, "num_token_union": 65.4126, "num_word_context": 202.2721, "num_word_doc": 49.8359, "num_word_query": 23.5737, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1059.6079, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1433, "query_norm": 1.3166, "queue_k_norm": 1.3376, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9207, "sent_len_1": 66.8031, "sent_len_max_0": 127.3988, "sent_len_max_1": 188.8625, "stdk": 0.0461, "stdq": 0.0425, "stdqueue_k": 0.0461, "stdqueue_q": 0.0, "step": 24000 }, { "accuracy": 42.1875, "active_queue_size": 16384.0, "cl_loss": 4.4534, "doc_norm": 1.3391, "encoder_q-embeddings": 1247.9629, "encoder_q-layer.0": 820.0999, "encoder_q-layer.1": 899.7753, "encoder_q-layer.10": 177.7448, "encoder_q-layer.11": 475.1311, "encoder_q-layer.2": 868.2661, "encoder_q-layer.3": 783.72, "encoder_q-layer.4": 692.0386, "encoder_q-layer.5": 598.6022, "encoder_q-layer.6": 549.2291, "encoder_q-layer.7": 342.828, "encoder_q-layer.8": 264.0601, "encoder_q-layer.9": 161.9231, "epoch": 0.16, "inbatch_neg_score": 0.1561, "inbatch_pos_score": 0.6558, "learning_rate": 4.216666666666667e-05, "loss": 4.4534, "norm_diff": 0.0511, "norm_loss": 0.0, "num_token_doc": 66.8156, "num_token_overlap": 11.687, "num_token_query": 31.9908, "num_token_union": 65.4788, "num_word_context": 202.6799, "num_word_doc": 49.8788, "num_word_query": 23.6555, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1080.3409, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1561, "query_norm": 1.2881, "queue_k_norm": 1.3371, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9908, "sent_len_1": 66.8156, "sent_len_max_0": 127.4575, "sent_len_max_1": 189.4863, "stdk": 0.0461, "stdq": 0.0413, "stdqueue_k": 0.0462, "stdqueue_q": 0.0, "step": 24100 }, { "accuracy": 37.0117, "active_queue_size": 16384.0, "cl_loss": 4.4705, "doc_norm": 1.3348, "encoder_q-embeddings": 356.9401, "encoder_q-layer.0": 257.3736, "encoder_q-layer.1": 293.7075, "encoder_q-layer.10": 173.9061, "encoder_q-layer.11": 499.6986, "encoder_q-layer.2": 289.7965, "encoder_q-layer.3": 279.7806, "encoder_q-layer.4": 273.6304, "encoder_q-layer.5": 276.7941, "encoder_q-layer.6": 302.0551, "encoder_q-layer.7": 276.3857, "encoder_q-layer.8": 244.7154, "encoder_q-layer.9": 158.0966, "epoch": 0.16, "inbatch_neg_score": 0.1586, "inbatch_pos_score": 0.6279, "learning_rate": 4.211111111111111e-05, "loss": 4.4705, "norm_diff": 0.0356, "norm_loss": 0.0, "num_token_doc": 66.6701, "num_token_overlap": 11.6549, "num_token_query": 31.9307, "num_token_union": 65.317, "num_word_context": 202.2852, "num_word_doc": 49.7185, "num_word_query": 23.5805, "postclip_grad_norm": 1.0, "preclip_grad_norm": 462.7791, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1598, "query_norm": 1.2992, "queue_k_norm": 1.3373, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9307, "sent_len_1": 66.6701, "sent_len_max_0": 127.5413, "sent_len_max_1": 190.48, "stdk": 0.046, "stdq": 0.0416, "stdqueue_k": 0.0462, "stdqueue_q": 0.0, "step": 24200 }, { "accuracy": 42.6758, "active_queue_size": 16384.0, "cl_loss": 4.4314, "doc_norm": 1.3379, "encoder_q-embeddings": 794.8365, "encoder_q-layer.0": 594.9608, "encoder_q-layer.1": 618.7563, "encoder_q-layer.10": 207.9982, "encoder_q-layer.11": 531.052, "encoder_q-layer.2": 710.394, "encoder_q-layer.3": 617.4498, "encoder_q-layer.4": 611.4649, "encoder_q-layer.5": 499.3925, "encoder_q-layer.6": 388.6699, "encoder_q-layer.7": 272.1651, "encoder_q-layer.8": 248.4683, "encoder_q-layer.9": 172.0631, "epoch": 0.16, "inbatch_neg_score": 0.1678, "inbatch_pos_score": 0.6797, "learning_rate": 4.205555555555556e-05, "loss": 4.4314, "norm_diff": 0.0093, "norm_loss": 0.0, "num_token_doc": 66.8462, "num_token_overlap": 11.7026, "num_token_query": 32.0403, "num_token_union": 65.4514, "num_word_context": 202.8212, "num_word_doc": 49.9071, "num_word_query": 23.6459, "postclip_grad_norm": 1.0, "preclip_grad_norm": 802.9601, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1654, "query_norm": 1.3389, "queue_k_norm": 1.3399, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 32.0403, "sent_len_1": 66.8462, "sent_len_max_0": 127.5125, "sent_len_max_1": 188.8088, "stdk": 0.046, "stdq": 0.0425, "stdqueue_k": 0.0463, "stdqueue_q": 0.0, "step": 24300 }, { "accuracy": 39.0625, "active_queue_size": 16384.0, "cl_loss": 4.4788, "doc_norm": 1.3309, "encoder_q-embeddings": 2182.376, "encoder_q-layer.0": 1493.3201, "encoder_q-layer.1": 1826.7944, "encoder_q-layer.10": 192.6072, "encoder_q-layer.11": 496.3945, "encoder_q-layer.2": 1927.7957, "encoder_q-layer.3": 2130.1914, "encoder_q-layer.4": 2316.2202, "encoder_q-layer.5": 2335.126, "encoder_q-layer.6": 1906.6766, "encoder_q-layer.7": 1284.0754, "encoder_q-layer.8": 797.3526, "encoder_q-layer.9": 371.352, "epoch": 0.16, "inbatch_neg_score": 0.1687, "inbatch_pos_score": 0.6401, "learning_rate": 4.2e-05, "loss": 4.4788, "norm_diff": 0.0172, "norm_loss": 0.0, "num_token_doc": 66.773, "num_token_overlap": 11.6136, "num_token_query": 31.7646, "num_token_union": 65.2586, "num_word_context": 202.7342, "num_word_doc": 49.8058, "num_word_query": 23.4569, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2489.0658, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.167, "query_norm": 1.3195, "queue_k_norm": 1.3366, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.7646, "sent_len_1": 66.773, "sent_len_max_0": 127.5012, "sent_len_max_1": 191.9375, "stdk": 0.0458, "stdq": 0.0413, "stdqueue_k": 0.0461, "stdqueue_q": 0.0, "step": 24400 }, { "accuracy": 36.8164, "active_queue_size": 16384.0, "cl_loss": 4.4298, "doc_norm": 1.3355, "encoder_q-embeddings": 1351.7062, "encoder_q-layer.0": 1007.8474, "encoder_q-layer.1": 1131.3881, "encoder_q-layer.10": 179.7721, "encoder_q-layer.11": 515.7203, "encoder_q-layer.2": 1163.4863, "encoder_q-layer.3": 1187.2605, "encoder_q-layer.4": 1192.3281, "encoder_q-layer.5": 1156.1833, "encoder_q-layer.6": 1014.8638, "encoder_q-layer.7": 591.8943, "encoder_q-layer.8": 363.3826, "encoder_q-layer.9": 199.9048, "epoch": 0.16, "inbatch_neg_score": 0.1694, "inbatch_pos_score": 0.6338, "learning_rate": 4.194444444444445e-05, "loss": 4.4298, "norm_diff": 0.024, "norm_loss": 0.0, "num_token_doc": 66.8628, "num_token_overlap": 11.7285, "num_token_query": 31.9624, "num_token_union": 65.3827, "num_word_context": 202.5958, "num_word_doc": 49.9233, "num_word_query": 23.6308, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1421.5411, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1686, "query_norm": 1.3163, "queue_k_norm": 1.3363, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9624, "sent_len_1": 66.8628, "sent_len_max_0": 127.5062, "sent_len_max_1": 190.2212, "stdk": 0.046, "stdq": 0.0413, "stdqueue_k": 0.0461, "stdqueue_q": 0.0, "step": 24500 }, { "accuracy": 39.2578, "active_queue_size": 16384.0, "cl_loss": 4.4433, "doc_norm": 1.3365, "encoder_q-embeddings": 1327.7582, "encoder_q-layer.0": 943.0918, "encoder_q-layer.1": 1003.85, "encoder_q-layer.10": 171.8669, "encoder_q-layer.11": 485.9075, "encoder_q-layer.2": 1178.3673, "encoder_q-layer.3": 1198.7816, "encoder_q-layer.4": 1143.4672, "encoder_q-layer.5": 895.9602, "encoder_q-layer.6": 859.8477, "encoder_q-layer.7": 532.593, "encoder_q-layer.8": 312.4591, "encoder_q-layer.9": 173.1264, "epoch": 0.16, "inbatch_neg_score": 0.1795, "inbatch_pos_score": 0.6523, "learning_rate": 4.188888888888889e-05, "loss": 4.4433, "norm_diff": 0.0178, "norm_loss": 0.0, "num_token_doc": 66.7275, "num_token_overlap": 11.6469, "num_token_query": 31.8558, "num_token_union": 65.3576, "num_word_context": 202.291, "num_word_doc": 49.8025, "num_word_query": 23.5299, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1344.824, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1783, "query_norm": 1.3232, "queue_k_norm": 1.336, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8558, "sent_len_1": 66.7275, "sent_len_max_0": 127.4125, "sent_len_max_1": 188.9512, "stdk": 0.046, "stdq": 0.0411, "stdqueue_k": 0.0461, "stdqueue_q": 0.0, "step": 24600 }, { "accuracy": 40.7227, "active_queue_size": 16384.0, "cl_loss": 4.4589, "doc_norm": 1.3373, "encoder_q-embeddings": 583.7432, "encoder_q-layer.0": 403.9385, "encoder_q-layer.1": 449.1687, "encoder_q-layer.10": 211.6648, "encoder_q-layer.11": 527.8582, "encoder_q-layer.2": 480.6054, "encoder_q-layer.3": 512.2752, "encoder_q-layer.4": 499.9823, "encoder_q-layer.5": 483.9261, "encoder_q-layer.6": 478.0604, "encoder_q-layer.7": 339.1552, "encoder_q-layer.8": 263.1988, "encoder_q-layer.9": 183.6932, "epoch": 0.16, "inbatch_neg_score": 0.1748, "inbatch_pos_score": 0.6606, "learning_rate": 4.183333333333334e-05, "loss": 4.4589, "norm_diff": 0.0367, "norm_loss": 0.0, "num_token_doc": 66.8873, "num_token_overlap": 11.6842, "num_token_query": 31.8871, "num_token_union": 65.3808, "num_word_context": 202.1148, "num_word_doc": 49.8639, "num_word_query": 23.5355, "postclip_grad_norm": 1.0, "preclip_grad_norm": 674.4739, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.174, "query_norm": 1.3005, "queue_k_norm": 1.3375, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8871, "sent_len_1": 66.8873, "sent_len_max_0": 127.5525, "sent_len_max_1": 189.3913, "stdk": 0.046, "stdq": 0.0409, "stdqueue_k": 0.0461, "stdqueue_q": 0.0, "step": 24700 }, { "accuracy": 41.4062, "active_queue_size": 16384.0, "cl_loss": 4.4333, "doc_norm": 1.3395, "encoder_q-embeddings": 800.4089, "encoder_q-layer.0": 646.2689, "encoder_q-layer.1": 733.2798, "encoder_q-layer.10": 198.5182, "encoder_q-layer.11": 489.4855, "encoder_q-layer.2": 377.687, "encoder_q-layer.3": 299.8315, "encoder_q-layer.4": 285.5776, "encoder_q-layer.5": 266.921, "encoder_q-layer.6": 244.7386, "encoder_q-layer.7": 217.8454, "encoder_q-layer.8": 230.9711, "encoder_q-layer.9": 184.0396, "epoch": 0.16, "inbatch_neg_score": 0.1769, "inbatch_pos_score": 0.6885, "learning_rate": 4.177777777777778e-05, "loss": 4.4333, "norm_diff": 0.0282, "norm_loss": 0.0, "num_token_doc": 66.6287, "num_token_overlap": 11.7008, "num_token_query": 32.0182, "num_token_union": 65.2979, "num_word_context": 202.5154, "num_word_doc": 49.71, "num_word_query": 23.6663, "postclip_grad_norm": 1.0, "preclip_grad_norm": 697.5299, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1766, "query_norm": 1.3672, "queue_k_norm": 1.3395, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 32.0182, "sent_len_1": 66.6287, "sent_len_max_0": 127.5763, "sent_len_max_1": 188.735, "stdk": 0.046, "stdq": 0.0432, "stdqueue_k": 0.0461, "stdqueue_q": 0.0, "step": 24800 }, { "accuracy": 37.4023, "active_queue_size": 16384.0, "cl_loss": 4.4163, "doc_norm": 1.3383, "encoder_q-embeddings": 389.715, "encoder_q-layer.0": 283.6752, "encoder_q-layer.1": 328.9637, "encoder_q-layer.10": 195.3256, "encoder_q-layer.11": 474.002, "encoder_q-layer.2": 342.3502, "encoder_q-layer.3": 354.8914, "encoder_q-layer.4": 311.751, "encoder_q-layer.5": 305.3976, "encoder_q-layer.6": 268.7512, "encoder_q-layer.7": 236.8104, "encoder_q-layer.8": 234.8498, "encoder_q-layer.9": 183.3792, "epoch": 0.16, "inbatch_neg_score": 0.1724, "inbatch_pos_score": 0.6548, "learning_rate": 4.172222222222222e-05, "loss": 4.4163, "norm_diff": 0.0146, "norm_loss": 0.0, "num_token_doc": 66.8783, "num_token_overlap": 11.6315, "num_token_query": 31.8505, "num_token_union": 65.421, "num_word_context": 202.7281, "num_word_doc": 49.9042, "num_word_query": 23.5258, "postclip_grad_norm": 1.0, "preclip_grad_norm": 481.1221, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1707, "query_norm": 1.3453, "queue_k_norm": 1.3386, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8505, "sent_len_1": 66.8783, "sent_len_max_0": 127.5825, "sent_len_max_1": 190.4187, "stdk": 0.046, "stdq": 0.0426, "stdqueue_k": 0.0461, "stdqueue_q": 0.0, "step": 24900 }, { "accuracy": 38.7695, "active_queue_size": 16384.0, "cl_loss": 4.452, "doc_norm": 1.3441, "encoder_q-embeddings": 2434.6614, "encoder_q-layer.0": 1681.4393, "encoder_q-layer.1": 1988.1495, "encoder_q-layer.10": 181.4061, "encoder_q-layer.11": 495.7877, "encoder_q-layer.2": 2122.7515, "encoder_q-layer.3": 1950.4709, "encoder_q-layer.4": 1776.9862, "encoder_q-layer.5": 1675.9305, "encoder_q-layer.6": 1357.9519, "encoder_q-layer.7": 952.3671, "encoder_q-layer.8": 616.901, "encoder_q-layer.9": 280.692, "epoch": 0.16, "inbatch_neg_score": 0.1663, "inbatch_pos_score": 0.6533, "learning_rate": 4.166666666666667e-05, "loss": 4.452, "norm_diff": 0.0244, "norm_loss": 0.0, "num_token_doc": 66.803, "num_token_overlap": 11.6615, "num_token_query": 31.8284, "num_token_union": 65.3419, "num_word_context": 202.5764, "num_word_doc": 49.8526, "num_word_query": 23.5074, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2417.9172, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1664, "query_norm": 1.3255, "queue_k_norm": 1.3398, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8284, "sent_len_1": 66.803, "sent_len_max_0": 127.5463, "sent_len_max_1": 189.0675, "stdk": 0.0462, "stdq": 0.0421, "stdqueue_k": 0.0461, "stdqueue_q": 0.0, "step": 25000 }, { "accuracy": 42.4805, "active_queue_size": 16384.0, "cl_loss": 4.4288, "doc_norm": 1.3355, "encoder_q-embeddings": 1309.6469, "encoder_q-layer.0": 1021.1263, "encoder_q-layer.1": 1348.0923, "encoder_q-layer.10": 173.4315, "encoder_q-layer.11": 466.3159, "encoder_q-layer.2": 1576.5509, "encoder_q-layer.3": 1254.7777, "encoder_q-layer.4": 804.0762, "encoder_q-layer.5": 717.6318, "encoder_q-layer.6": 564.7186, "encoder_q-layer.7": 472.0868, "encoder_q-layer.8": 345.2174, "encoder_q-layer.9": 181.0982, "epoch": 0.16, "inbatch_neg_score": 0.1596, "inbatch_pos_score": 0.6567, "learning_rate": 4.1611111111111114e-05, "loss": 4.4288, "norm_diff": 0.0186, "norm_loss": 0.0, "num_token_doc": 66.6952, "num_token_overlap": 11.6657, "num_token_query": 31.8586, "num_token_union": 65.2473, "num_word_context": 202.1868, "num_word_doc": 49.7339, "num_word_query": 23.5432, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1418.7212, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1594, "query_norm": 1.322, "queue_k_norm": 1.3374, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8586, "sent_len_1": 66.6952, "sent_len_max_0": 127.41, "sent_len_max_1": 190.0613, "stdk": 0.0459, "stdq": 0.0421, "stdqueue_k": 0.046, "stdqueue_q": 0.0, "step": 25100 }, { "accuracy": 39.3555, "active_queue_size": 16384.0, "cl_loss": 4.4284, "doc_norm": 1.3446, "encoder_q-embeddings": 460.7732, "encoder_q-layer.0": 338.0968, "encoder_q-layer.1": 357.7734, "encoder_q-layer.10": 184.5188, "encoder_q-layer.11": 487.5558, "encoder_q-layer.2": 377.3792, "encoder_q-layer.3": 353.45, "encoder_q-layer.4": 355.6951, "encoder_q-layer.5": 361.5615, "encoder_q-layer.6": 379.249, "encoder_q-layer.7": 283.0919, "encoder_q-layer.8": 233.1464, "encoder_q-layer.9": 170.0399, "epoch": 0.16, "inbatch_neg_score": 0.1546, "inbatch_pos_score": 0.6392, "learning_rate": 4.155555555555556e-05, "loss": 4.4284, "norm_diff": 0.0518, "norm_loss": 0.0, "num_token_doc": 66.8193, "num_token_overlap": 11.7014, "num_token_query": 31.8953, "num_token_union": 65.3757, "num_word_context": 202.5597, "num_word_doc": 49.9131, "num_word_query": 23.5316, "postclip_grad_norm": 1.0, "preclip_grad_norm": 528.7215, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1547, "query_norm": 1.2928, "queue_k_norm": 1.3371, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.8953, "sent_len_1": 66.8193, "sent_len_max_0": 127.42, "sent_len_max_1": 190.2663, "stdk": 0.0463, "stdq": 0.0415, "stdqueue_k": 0.046, "stdqueue_q": 0.0, "step": 25200 }, { "accuracy": 40.7227, "active_queue_size": 16384.0, "cl_loss": 4.441, "doc_norm": 1.3445, "encoder_q-embeddings": 626.4313, "encoder_q-layer.0": 464.0182, "encoder_q-layer.1": 549.8607, "encoder_q-layer.10": 164.5128, "encoder_q-layer.11": 443.4879, "encoder_q-layer.2": 577.7957, "encoder_q-layer.3": 643.0212, "encoder_q-layer.4": 652.6345, "encoder_q-layer.5": 573.0179, "encoder_q-layer.6": 431.6505, "encoder_q-layer.7": 319.7242, "encoder_q-layer.8": 237.2083, "encoder_q-layer.9": 154.2597, "epoch": 0.16, "inbatch_neg_score": 0.157, "inbatch_pos_score": 0.6558, "learning_rate": 4.15e-05, "loss": 4.441, "norm_diff": 0.0367, "norm_loss": 0.0, "num_token_doc": 66.8878, "num_token_overlap": 11.6638, "num_token_query": 31.8415, "num_token_union": 65.3752, "num_word_context": 202.408, "num_word_doc": 49.8651, "num_word_query": 23.5263, "postclip_grad_norm": 1.0, "preclip_grad_norm": 730.0196, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.156, "query_norm": 1.3078, "queue_k_norm": 1.3383, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8415, "sent_len_1": 66.8878, "sent_len_max_0": 127.465, "sent_len_max_1": 191.1525, "stdk": 0.0462, "stdq": 0.042, "stdqueue_k": 0.0461, "stdqueue_q": 0.0, "step": 25300 }, { "accuracy": 40.332, "active_queue_size": 16384.0, "cl_loss": 4.4027, "doc_norm": 1.3362, "encoder_q-embeddings": 244.2261, "encoder_q-layer.0": 168.832, "encoder_q-layer.1": 190.0656, "encoder_q-layer.10": 188.328, "encoder_q-layer.11": 503.3083, "encoder_q-layer.2": 200.291, "encoder_q-layer.3": 206.2868, "encoder_q-layer.4": 209.7338, "encoder_q-layer.5": 190.953, "encoder_q-layer.6": 179.7231, "encoder_q-layer.7": 164.6828, "encoder_q-layer.8": 187.6605, "encoder_q-layer.9": 165.8255, "epoch": 0.17, "inbatch_neg_score": 0.1452, "inbatch_pos_score": 0.6367, "learning_rate": 4.144444444444445e-05, "loss": 4.4027, "norm_diff": 0.0278, "norm_loss": 0.0, "num_token_doc": 66.7993, "num_token_overlap": 11.7405, "num_token_query": 32.027, "num_token_union": 65.4056, "num_word_context": 202.3573, "num_word_doc": 49.8528, "num_word_query": 23.6761, "postclip_grad_norm": 1.0, "preclip_grad_norm": 357.6112, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1448, "query_norm": 1.3095, "queue_k_norm": 1.338, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 32.027, "sent_len_1": 66.7993, "sent_len_max_0": 127.5687, "sent_len_max_1": 189.0938, "stdk": 0.0459, "stdq": 0.0425, "stdqueue_k": 0.0461, "stdqueue_q": 0.0, "step": 25400 }, { "accuracy": 41.2109, "active_queue_size": 16384.0, "cl_loss": 4.4008, "doc_norm": 1.3384, "encoder_q-embeddings": 812.1422, "encoder_q-layer.0": 603.6174, "encoder_q-layer.1": 588.3362, "encoder_q-layer.10": 187.106, "encoder_q-layer.11": 456.8617, "encoder_q-layer.2": 386.3921, "encoder_q-layer.3": 327.0026, "encoder_q-layer.4": 286.1378, "encoder_q-layer.5": 251.2705, "encoder_q-layer.6": 227.9181, "encoder_q-layer.7": 227.6834, "encoder_q-layer.8": 209.091, "encoder_q-layer.9": 162.242, "epoch": 0.17, "inbatch_neg_score": 0.1356, "inbatch_pos_score": 0.6348, "learning_rate": 4.138888888888889e-05, "loss": 4.4008, "norm_diff": 0.0397, "norm_loss": 0.0, "num_token_doc": 66.9313, "num_token_overlap": 11.6733, "num_token_query": 31.8784, "num_token_union": 65.4598, "num_word_context": 202.4801, "num_word_doc": 49.9701, "num_word_query": 23.544, "postclip_grad_norm": 1.0, "preclip_grad_norm": 671.387, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1362, "query_norm": 1.2987, "queue_k_norm": 1.3376, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8784, "sent_len_1": 66.9313, "sent_len_max_0": 127.5837, "sent_len_max_1": 190.6738, "stdk": 0.0461, "stdq": 0.0421, "stdqueue_k": 0.0461, "stdqueue_q": 0.0, "step": 25500 }, { "accuracy": 43.8477, "active_queue_size": 16384.0, "cl_loss": 4.4363, "doc_norm": 1.3462, "encoder_q-embeddings": 412.7804, "encoder_q-layer.0": 303.7703, "encoder_q-layer.1": 319.4423, "encoder_q-layer.10": 178.453, "encoder_q-layer.11": 467.601, "encoder_q-layer.2": 330.6123, "encoder_q-layer.3": 299.4453, "encoder_q-layer.4": 289.7698, "encoder_q-layer.5": 257.3474, "encoder_q-layer.6": 233.3878, "encoder_q-layer.7": 196.068, "encoder_q-layer.8": 185.7608, "encoder_q-layer.9": 160.9766, "epoch": 0.17, "inbatch_neg_score": 0.1312, "inbatch_pos_score": 0.644, "learning_rate": 4.133333333333333e-05, "loss": 4.4363, "norm_diff": 0.0304, "norm_loss": 0.0, "num_token_doc": 66.8224, "num_token_overlap": 11.6475, "num_token_query": 31.862, "num_token_union": 65.3683, "num_word_context": 202.6382, "num_word_doc": 49.8159, "num_word_query": 23.5251, "postclip_grad_norm": 1.0, "preclip_grad_norm": 453.7198, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1305, "query_norm": 1.3158, "queue_k_norm": 1.3366, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.862, "sent_len_1": 66.8224, "sent_len_max_0": 127.5438, "sent_len_max_1": 191.0888, "stdk": 0.0464, "stdq": 0.0426, "stdqueue_k": 0.0461, "stdqueue_q": 0.0, "step": 25600 }, { "accuracy": 40.332, "active_queue_size": 16384.0, "cl_loss": 4.4171, "doc_norm": 1.3409, "encoder_q-embeddings": 1273.512, "encoder_q-layer.0": 905.4546, "encoder_q-layer.1": 890.5992, "encoder_q-layer.10": 193.6232, "encoder_q-layer.11": 500.266, "encoder_q-layer.2": 727.3232, "encoder_q-layer.3": 622.5176, "encoder_q-layer.4": 643.4575, "encoder_q-layer.5": 482.6777, "encoder_q-layer.6": 371.3572, "encoder_q-layer.7": 288.0705, "encoder_q-layer.8": 245.3616, "encoder_q-layer.9": 180.5265, "epoch": 0.17, "inbatch_neg_score": 0.118, "inbatch_pos_score": 0.6055, "learning_rate": 4.127777777777778e-05, "loss": 4.4171, "norm_diff": 0.0517, "norm_loss": 0.0, "num_token_doc": 66.7515, "num_token_overlap": 11.7073, "num_token_query": 31.8974, "num_token_union": 65.3037, "num_word_context": 202.153, "num_word_doc": 49.8138, "num_word_query": 23.5568, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1042.9167, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1177, "query_norm": 1.2893, "queue_k_norm": 1.3359, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8974, "sent_len_1": 66.7515, "sent_len_max_0": 127.4513, "sent_len_max_1": 189.4187, "stdk": 0.0462, "stdq": 0.0417, "stdqueue_k": 0.0461, "stdqueue_q": 0.0, "step": 25700 }, { "accuracy": 39.1602, "active_queue_size": 16384.0, "cl_loss": 4.3907, "doc_norm": 1.3372, "encoder_q-embeddings": 1831.2036, "encoder_q-layer.0": 1385.4528, "encoder_q-layer.1": 1604.7463, "encoder_q-layer.10": 395.467, "encoder_q-layer.11": 1062.8428, "encoder_q-layer.2": 1239.4917, "encoder_q-layer.3": 1261.2655, "encoder_q-layer.4": 1239.1091, "encoder_q-layer.5": 1052.4106, "encoder_q-layer.6": 763.702, "encoder_q-layer.7": 581.8166, "encoder_q-layer.8": 518.0935, "encoder_q-layer.9": 352.0264, "epoch": 0.17, "inbatch_neg_score": 0.1395, "inbatch_pos_score": 0.6206, "learning_rate": 4.1222222222222224e-05, "loss": 4.3907, "norm_diff": 0.0125, "norm_loss": 0.0, "num_token_doc": 66.8339, "num_token_overlap": 11.6791, "num_token_query": 31.94, "num_token_union": 65.4137, "num_word_context": 201.9831, "num_word_doc": 49.8487, "num_word_query": 23.5877, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1743.7589, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1377, "query_norm": 1.3282, "queue_k_norm": 1.3353, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.94, "sent_len_1": 66.8339, "sent_len_max_0": 127.5563, "sent_len_max_1": 191.545, "stdk": 0.0461, "stdq": 0.0425, "stdqueue_k": 0.0461, "stdqueue_q": 0.0, "step": 25800 }, { "accuracy": 42.5781, "active_queue_size": 16384.0, "cl_loss": 4.4139, "doc_norm": 1.3362, "encoder_q-embeddings": 2161.6643, "encoder_q-layer.0": 1493.1049, "encoder_q-layer.1": 1584.1464, "encoder_q-layer.10": 395.0257, "encoder_q-layer.11": 964.8029, "encoder_q-layer.2": 1772.6027, "encoder_q-layer.3": 1789.9471, "encoder_q-layer.4": 1820.8064, "encoder_q-layer.5": 1571.4221, "encoder_q-layer.6": 1258.2053, "encoder_q-layer.7": 982.7791, "encoder_q-layer.8": 763.7466, "encoder_q-layer.9": 431.953, "epoch": 0.17, "inbatch_neg_score": 0.1432, "inbatch_pos_score": 0.6509, "learning_rate": 4.116666666666667e-05, "loss": 4.4139, "norm_diff": 0.0121, "norm_loss": 0.0, "num_token_doc": 66.9347, "num_token_overlap": 11.7075, "num_token_query": 31.9798, "num_token_union": 65.4793, "num_word_context": 202.8036, "num_word_doc": 49.9775, "num_word_query": 23.6293, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2156.1158, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1431, "query_norm": 1.3363, "queue_k_norm": 1.334, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9798, "sent_len_1": 66.9347, "sent_len_max_0": 127.5625, "sent_len_max_1": 189.315, "stdk": 0.0461, "stdq": 0.043, "stdqueue_k": 0.0461, "stdqueue_q": 0.0, "step": 25900 }, { "accuracy": 39.7461, "active_queue_size": 16384.0, "cl_loss": 4.4101, "doc_norm": 1.3413, "encoder_q-embeddings": 1615.6793, "encoder_q-layer.0": 1096.6484, "encoder_q-layer.1": 1284.83, "encoder_q-layer.10": 407.6357, "encoder_q-layer.11": 1063.406, "encoder_q-layer.2": 1409.1859, "encoder_q-layer.3": 1443.4541, "encoder_q-layer.4": 1640.0363, "encoder_q-layer.5": 1535.7367, "encoder_q-layer.6": 1283.3865, "encoder_q-layer.7": 915.1854, "encoder_q-layer.8": 698.6, "encoder_q-layer.9": 390.7987, "epoch": 0.17, "inbatch_neg_score": 0.1301, "inbatch_pos_score": 0.6338, "learning_rate": 4.111111111111111e-05, "loss": 4.4101, "norm_diff": 0.0244, "norm_loss": 0.0, "num_token_doc": 66.8203, "num_token_overlap": 11.6548, "num_token_query": 31.754, "num_token_union": 65.2733, "num_word_context": 201.8762, "num_word_doc": 49.8927, "num_word_query": 23.4448, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1809.8645, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1293, "query_norm": 1.3189, "queue_k_norm": 1.3338, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.754, "sent_len_1": 66.8203, "sent_len_max_0": 127.695, "sent_len_max_1": 189.1262, "stdk": 0.0463, "stdq": 0.0431, "stdqueue_k": 0.0462, "stdqueue_q": 0.0, "step": 26000 }, { "accuracy": 40.1367, "active_queue_size": 16384.0, "cl_loss": 4.42, "doc_norm": 1.3319, "encoder_q-embeddings": 2877.6978, "encoder_q-layer.0": 1999.5088, "encoder_q-layer.1": 2294.0046, "encoder_q-layer.10": 440.6547, "encoder_q-layer.11": 988.3563, "encoder_q-layer.2": 2480.0549, "encoder_q-layer.3": 2301.7168, "encoder_q-layer.4": 2281.5027, "encoder_q-layer.5": 1666.0939, "encoder_q-layer.6": 1007.8181, "encoder_q-layer.7": 590.7666, "encoder_q-layer.8": 522.8379, "encoder_q-layer.9": 392.0247, "epoch": 0.17, "inbatch_neg_score": 0.1294, "inbatch_pos_score": 0.6323, "learning_rate": 4.105555555555556e-05, "loss": 4.42, "norm_diff": 0.0181, "norm_loss": 0.0, "num_token_doc": 66.5824, "num_token_overlap": 11.6644, "num_token_query": 31.7949, "num_token_union": 65.1858, "num_word_context": 202.0716, "num_word_doc": 49.7204, "num_word_query": 23.4957, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2736.5044, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1296, "query_norm": 1.3146, "queue_k_norm": 1.3331, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.7949, "sent_len_1": 66.5824, "sent_len_max_0": 127.3625, "sent_len_max_1": 187.5325, "stdk": 0.046, "stdq": 0.0431, "stdqueue_k": 0.0461, "stdqueue_q": 0.0, "step": 26100 }, { "accuracy": 38.8672, "active_queue_size": 16384.0, "cl_loss": 4.4347, "doc_norm": 1.3259, "encoder_q-embeddings": 2629.7705, "encoder_q-layer.0": 2076.4268, "encoder_q-layer.1": 2354.3745, "encoder_q-layer.10": 481.83, "encoder_q-layer.11": 1081.8973, "encoder_q-layer.2": 2586.7349, "encoder_q-layer.3": 1947.9199, "encoder_q-layer.4": 1460.1012, "encoder_q-layer.5": 1047.6008, "encoder_q-layer.6": 941.3817, "encoder_q-layer.7": 670.7659, "encoder_q-layer.8": 616.9934, "encoder_q-layer.9": 443.0421, "epoch": 0.17, "inbatch_neg_score": 0.1308, "inbatch_pos_score": 0.6157, "learning_rate": 4.1e-05, "loss": 4.4347, "norm_diff": 0.027, "norm_loss": 0.0, "num_token_doc": 66.7808, "num_token_overlap": 11.6792, "num_token_query": 31.8578, "num_token_union": 65.3746, "num_word_context": 202.5772, "num_word_doc": 49.8336, "num_word_query": 23.5262, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2563.3146, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.13, "query_norm": 1.2989, "queue_k_norm": 1.333, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8578, "sent_len_1": 66.7808, "sent_len_max_0": 127.3738, "sent_len_max_1": 189.7163, "stdk": 0.0458, "stdq": 0.0428, "stdqueue_k": 0.0462, "stdqueue_q": 0.0, "step": 26200 }, { "accuracy": 38.4766, "active_queue_size": 16384.0, "cl_loss": 4.4193, "doc_norm": 1.3173, "encoder_q-embeddings": 1296.9111, "encoder_q-layer.0": 915.9423, "encoder_q-layer.1": 1011.9166, "encoder_q-layer.10": 368.5836, "encoder_q-layer.11": 959.6346, "encoder_q-layer.2": 1140.8267, "encoder_q-layer.3": 1433.9044, "encoder_q-layer.4": 1546.6899, "encoder_q-layer.5": 1382.7935, "encoder_q-layer.6": 1070.0203, "encoder_q-layer.7": 1041.3722, "encoder_q-layer.8": 790.4096, "encoder_q-layer.9": 433.6979, "epoch": 0.17, "inbatch_neg_score": 0.13, "inbatch_pos_score": 0.6006, "learning_rate": 4.094444444444445e-05, "loss": 4.4193, "norm_diff": 0.0295, "norm_loss": 0.0, "num_token_doc": 66.6797, "num_token_overlap": 11.6638, "num_token_query": 31.9179, "num_token_union": 65.2902, "num_word_context": 201.9921, "num_word_doc": 49.7585, "num_word_query": 23.583, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1590.4417, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1299, "query_norm": 1.2907, "queue_k_norm": 1.331, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9179, "sent_len_1": 66.6797, "sent_len_max_0": 127.71, "sent_len_max_1": 189.305, "stdk": 0.0455, "stdq": 0.0422, "stdqueue_k": 0.0461, "stdqueue_q": 0.0, "step": 26300 }, { "accuracy": 40.7227, "active_queue_size": 16384.0, "cl_loss": 4.4086, "doc_norm": 1.3316, "encoder_q-embeddings": 1537.6438, "encoder_q-layer.0": 1098.7433, "encoder_q-layer.1": 1092.7485, "encoder_q-layer.10": 392.8643, "encoder_q-layer.11": 910.154, "encoder_q-layer.2": 1265.6315, "encoder_q-layer.3": 1320.6879, "encoder_q-layer.4": 1313.754, "encoder_q-layer.5": 1192.5698, "encoder_q-layer.6": 1055.6906, "encoder_q-layer.7": 931.9545, "encoder_q-layer.8": 875.6584, "encoder_q-layer.9": 464.0248, "epoch": 0.17, "inbatch_neg_score": 0.1343, "inbatch_pos_score": 0.6299, "learning_rate": 4.088888888888889e-05, "loss": 4.4086, "norm_diff": 0.0391, "norm_loss": 0.0, "num_token_doc": 66.9432, "num_token_overlap": 11.7777, "num_token_query": 32.1263, "num_token_union": 65.5319, "num_word_context": 202.3345, "num_word_doc": 49.9793, "num_word_query": 23.7491, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1656.6196, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1333, "query_norm": 1.2925, "queue_k_norm": 1.3316, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 32.1263, "sent_len_1": 66.9432, "sent_len_max_0": 127.5925, "sent_len_max_1": 189.7912, "stdk": 0.0461, "stdq": 0.0421, "stdqueue_k": 0.0462, "stdqueue_q": 0.0, "step": 26400 }, { "accuracy": 41.9922, "active_queue_size": 16384.0, "cl_loss": 4.3922, "doc_norm": 1.3326, "encoder_q-embeddings": 2023.7444, "encoder_q-layer.0": 1424.8314, "encoder_q-layer.1": 1626.2689, "encoder_q-layer.10": 370.0829, "encoder_q-layer.11": 912.0249, "encoder_q-layer.2": 1850.3073, "encoder_q-layer.3": 1618.1899, "encoder_q-layer.4": 1660.4215, "encoder_q-layer.5": 1515.3762, "encoder_q-layer.6": 1357.9376, "encoder_q-layer.7": 1037.3883, "encoder_q-layer.8": 781.9818, "encoder_q-layer.9": 389.8038, "epoch": 0.17, "inbatch_neg_score": 0.1443, "inbatch_pos_score": 0.647, "learning_rate": 4.0833333333333334e-05, "loss": 4.3922, "norm_diff": 0.0204, "norm_loss": 0.0, "num_token_doc": 66.7727, "num_token_overlap": 11.72, "num_token_query": 32.0267, "num_token_union": 65.4005, "num_word_context": 202.3835, "num_word_doc": 49.8148, "num_word_query": 23.6731, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2079.8405, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1438, "query_norm": 1.3122, "queue_k_norm": 1.3324, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 32.0267, "sent_len_1": 66.7727, "sent_len_max_0": 127.6425, "sent_len_max_1": 188.9512, "stdk": 0.0462, "stdq": 0.0423, "stdqueue_k": 0.0462, "stdqueue_q": 0.0, "step": 26500 }, { "accuracy": 39.3555, "active_queue_size": 16384.0, "cl_loss": 4.4089, "doc_norm": 1.3352, "encoder_q-embeddings": 1452.3855, "encoder_q-layer.0": 1053.1212, "encoder_q-layer.1": 1190.3115, "encoder_q-layer.10": 342.3587, "encoder_q-layer.11": 881.3047, "encoder_q-layer.2": 1338.9082, "encoder_q-layer.3": 1283.4596, "encoder_q-layer.4": 1378.027, "encoder_q-layer.5": 1353.9114, "encoder_q-layer.6": 1339.5239, "encoder_q-layer.7": 1193.3214, "encoder_q-layer.8": 716.8932, "encoder_q-layer.9": 421.9559, "epoch": 0.17, "inbatch_neg_score": 0.1514, "inbatch_pos_score": 0.6348, "learning_rate": 4.0777777777777783e-05, "loss": 4.4089, "norm_diff": 0.0171, "norm_loss": 0.0, "num_token_doc": 66.5576, "num_token_overlap": 11.6856, "num_token_query": 32.018, "num_token_union": 65.2582, "num_word_context": 202.0387, "num_word_doc": 49.697, "num_word_query": 23.6614, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1681.4592, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.152, "query_norm": 1.3199, "queue_k_norm": 1.3308, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 32.018, "sent_len_1": 66.5576, "sent_len_max_0": 127.5037, "sent_len_max_1": 189.26, "stdk": 0.0462, "stdq": 0.0421, "stdqueue_k": 0.0461, "stdqueue_q": 0.0, "step": 26600 }, { "accuracy": 41.1133, "active_queue_size": 16384.0, "cl_loss": 4.4125, "doc_norm": 1.3365, "encoder_q-embeddings": 813.4149, "encoder_q-layer.0": 567.0361, "encoder_q-layer.1": 603.2216, "encoder_q-layer.10": 362.2899, "encoder_q-layer.11": 929.0835, "encoder_q-layer.2": 662.5128, "encoder_q-layer.3": 664.7961, "encoder_q-layer.4": 735.145, "encoder_q-layer.5": 778.0126, "encoder_q-layer.6": 779.7131, "encoder_q-layer.7": 912.0308, "encoder_q-layer.8": 719.1766, "encoder_q-layer.9": 417.8403, "epoch": 0.17, "inbatch_neg_score": 0.1575, "inbatch_pos_score": 0.6514, "learning_rate": 4.0722222222222226e-05, "loss": 4.4125, "norm_diff": 0.0171, "norm_loss": 0.0, "num_token_doc": 66.7942, "num_token_overlap": 11.6811, "num_token_query": 31.8942, "num_token_union": 65.3712, "num_word_context": 202.0105, "num_word_doc": 49.8035, "num_word_query": 23.5528, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1052.9737, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1556, "query_norm": 1.3194, "queue_k_norm": 1.3331, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8942, "sent_len_1": 66.7942, "sent_len_max_0": 127.4237, "sent_len_max_1": 188.9212, "stdk": 0.0462, "stdq": 0.0422, "stdqueue_k": 0.0462, "stdqueue_q": 0.0, "step": 26700 }, { "accuracy": 41.6016, "active_queue_size": 16384.0, "cl_loss": 4.3899, "doc_norm": 1.3324, "encoder_q-embeddings": 2103.219, "encoder_q-layer.0": 1417.8816, "encoder_q-layer.1": 1624.0802, "encoder_q-layer.10": 388.2193, "encoder_q-layer.11": 928.5538, "encoder_q-layer.2": 1762.1235, "encoder_q-layer.3": 1735.1212, "encoder_q-layer.4": 1337.865, "encoder_q-layer.5": 1149.4291, "encoder_q-layer.6": 1131.1393, "encoder_q-layer.7": 968.3185, "encoder_q-layer.8": 726.9176, "encoder_q-layer.9": 411.0622, "epoch": 0.17, "inbatch_neg_score": 0.1537, "inbatch_pos_score": 0.6509, "learning_rate": 4.066666666666667e-05, "loss": 4.3899, "norm_diff": 0.029, "norm_loss": 0.0, "num_token_doc": 66.9845, "num_token_overlap": 11.6916, "num_token_query": 31.9273, "num_token_union": 65.4881, "num_word_context": 202.4435, "num_word_doc": 49.9822, "num_word_query": 23.5969, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2052.6949, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1531, "query_norm": 1.3034, "queue_k_norm": 1.3344, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9273, "sent_len_1": 66.9845, "sent_len_max_0": 127.5113, "sent_len_max_1": 187.4038, "stdk": 0.0461, "stdq": 0.0416, "stdqueue_k": 0.0463, "stdqueue_q": 0.0, "step": 26800 }, { "accuracy": 40.2344, "active_queue_size": 16384.0, "cl_loss": 4.38, "doc_norm": 1.3327, "encoder_q-embeddings": 749.5754, "encoder_q-layer.0": 499.7037, "encoder_q-layer.1": 598.8021, "encoder_q-layer.10": 334.9335, "encoder_q-layer.11": 921.2088, "encoder_q-layer.2": 647.3256, "encoder_q-layer.3": 688.4249, "encoder_q-layer.4": 737.2276, "encoder_q-layer.5": 796.5293, "encoder_q-layer.6": 888.7683, "encoder_q-layer.7": 810.9237, "encoder_q-layer.8": 593.2555, "encoder_q-layer.9": 373.4026, "epoch": 0.18, "inbatch_neg_score": 0.1581, "inbatch_pos_score": 0.6348, "learning_rate": 4.061111111111111e-05, "loss": 4.38, "norm_diff": 0.0379, "norm_loss": 0.0, "num_token_doc": 66.9165, "num_token_overlap": 11.7726, "num_token_query": 32.1281, "num_token_union": 65.4857, "num_word_context": 202.4184, "num_word_doc": 49.912, "num_word_query": 23.7336, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1031.1479, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1583, "query_norm": 1.2948, "queue_k_norm": 1.3377, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 32.1281, "sent_len_1": 66.9165, "sent_len_max_0": 127.455, "sent_len_max_1": 188.6475, "stdk": 0.0461, "stdq": 0.0413, "stdqueue_k": 0.0464, "stdqueue_q": 0.0, "step": 26900 }, { "accuracy": 42.8711, "active_queue_size": 16384.0, "cl_loss": 4.401, "doc_norm": 1.3351, "encoder_q-embeddings": 1177.7831, "encoder_q-layer.0": 820.1949, "encoder_q-layer.1": 872.7925, "encoder_q-layer.10": 334.3777, "encoder_q-layer.11": 909.4063, "encoder_q-layer.2": 979.009, "encoder_q-layer.3": 1037.4485, "encoder_q-layer.4": 1149.3025, "encoder_q-layer.5": 1044.0819, "encoder_q-layer.6": 1198.701, "encoder_q-layer.7": 900.3474, "encoder_q-layer.8": 701.3435, "encoder_q-layer.9": 374.4312, "epoch": 0.18, "inbatch_neg_score": 0.156, "inbatch_pos_score": 0.6768, "learning_rate": 4.055555555555556e-05, "loss": 4.401, "norm_diff": 0.0222, "norm_loss": 0.0, "num_token_doc": 66.9383, "num_token_overlap": 11.7026, "num_token_query": 31.9076, "num_token_union": 65.4172, "num_word_context": 202.6018, "num_word_doc": 49.9468, "num_word_query": 23.5557, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1388.0869, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1549, "query_norm": 1.33, "queue_k_norm": 1.3385, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9076, "sent_len_1": 66.9383, "sent_len_max_0": 127.4775, "sent_len_max_1": 190.0613, "stdk": 0.0462, "stdq": 0.0428, "stdqueue_k": 0.0464, "stdqueue_q": 0.0, "step": 27000 }, { "accuracy": 42.3828, "active_queue_size": 16384.0, "cl_loss": 4.3882, "doc_norm": 1.3362, "encoder_q-embeddings": 1696.6842, "encoder_q-layer.0": 1145.629, "encoder_q-layer.1": 1342.3088, "encoder_q-layer.10": 373.6667, "encoder_q-layer.11": 955.4197, "encoder_q-layer.2": 1424.5237, "encoder_q-layer.3": 1566.9452, "encoder_q-layer.4": 1457.3291, "encoder_q-layer.5": 1517.0881, "encoder_q-layer.6": 1648.5686, "encoder_q-layer.7": 1489.5128, "encoder_q-layer.8": 929.1933, "encoder_q-layer.9": 399.0504, "epoch": 0.18, "inbatch_neg_score": 0.1567, "inbatch_pos_score": 0.668, "learning_rate": 4.05e-05, "loss": 4.3882, "norm_diff": 0.0194, "norm_loss": 0.0, "num_token_doc": 66.5989, "num_token_overlap": 11.6885, "num_token_query": 31.9772, "num_token_union": 65.2797, "num_word_context": 202.3846, "num_word_doc": 49.7041, "num_word_query": 23.634, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1949.619, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1565, "query_norm": 1.3242, "queue_k_norm": 1.3378, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9772, "sent_len_1": 66.5989, "sent_len_max_0": 127.5738, "sent_len_max_1": 189.1325, "stdk": 0.0462, "stdq": 0.0424, "stdqueue_k": 0.0464, "stdqueue_q": 0.0, "step": 27100 }, { "accuracy": 36.7188, "active_queue_size": 16384.0, "cl_loss": 4.4199, "doc_norm": 1.3356, "encoder_q-embeddings": 1711.8378, "encoder_q-layer.0": 1241.666, "encoder_q-layer.1": 1353.6927, "encoder_q-layer.10": 332.9179, "encoder_q-layer.11": 909.3831, "encoder_q-layer.2": 1340.4159, "encoder_q-layer.3": 1472.0945, "encoder_q-layer.4": 1463.7513, "encoder_q-layer.5": 1300.5669, "encoder_q-layer.6": 1418.8119, "encoder_q-layer.7": 1251.2587, "encoder_q-layer.8": 1074.2295, "encoder_q-layer.9": 547.451, "epoch": 0.18, "inbatch_neg_score": 0.1513, "inbatch_pos_score": 0.6143, "learning_rate": 4.0444444444444444e-05, "loss": 4.4199, "norm_diff": 0.0325, "norm_loss": 0.0, "num_token_doc": 66.6882, "num_token_overlap": 11.6624, "num_token_query": 31.8424, "num_token_union": 65.2233, "num_word_context": 202.0983, "num_word_doc": 49.7531, "num_word_query": 23.5002, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1868.5054, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1511, "query_norm": 1.3032, "queue_k_norm": 1.3386, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.8424, "sent_len_1": 66.6882, "sent_len_max_0": 127.4875, "sent_len_max_1": 189.9688, "stdk": 0.0462, "stdq": 0.0413, "stdqueue_k": 0.0464, "stdqueue_q": 0.0, "step": 27200 }, { "accuracy": 40.918, "active_queue_size": 16384.0, "cl_loss": 4.4186, "doc_norm": 1.329, "encoder_q-embeddings": 3057.7192, "encoder_q-layer.0": 2180.6482, "encoder_q-layer.1": 2550.042, "encoder_q-layer.10": 325.2839, "encoder_q-layer.11": 907.2687, "encoder_q-layer.2": 2811.001, "encoder_q-layer.3": 2464.9373, "encoder_q-layer.4": 2165.3123, "encoder_q-layer.5": 1808.9182, "encoder_q-layer.6": 1674.4554, "encoder_q-layer.7": 1464.2747, "encoder_q-layer.8": 1228.3735, "encoder_q-layer.9": 630.2496, "epoch": 0.18, "inbatch_neg_score": 0.1556, "inbatch_pos_score": 0.647, "learning_rate": 4.038888888888889e-05, "loss": 4.4186, "norm_diff": 0.0298, "norm_loss": 0.0, "num_token_doc": 66.8031, "num_token_overlap": 11.7186, "num_token_query": 32.0323, "num_token_union": 65.4342, "num_word_context": 202.6596, "num_word_doc": 49.8344, "num_word_query": 23.6477, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3053.4432, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1538, "query_norm": 1.3036, "queue_k_norm": 1.3376, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 32.0323, "sent_len_1": 66.8031, "sent_len_max_0": 127.5025, "sent_len_max_1": 189.75, "stdk": 0.0459, "stdq": 0.0419, "stdqueue_k": 0.0464, "stdqueue_q": 0.0, "step": 27300 }, { "accuracy": 43.3594, "active_queue_size": 16384.0, "cl_loss": 4.3854, "doc_norm": 1.3337, "encoder_q-embeddings": 1021.5201, "encoder_q-layer.0": 754.2284, "encoder_q-layer.1": 800.7887, "encoder_q-layer.10": 331.4781, "encoder_q-layer.11": 927.2682, "encoder_q-layer.2": 912.9567, "encoder_q-layer.3": 970.8771, "encoder_q-layer.4": 1038.3049, "encoder_q-layer.5": 1127.6047, "encoder_q-layer.6": 1121.9611, "encoder_q-layer.7": 898.9802, "encoder_q-layer.8": 650.2173, "encoder_q-layer.9": 373.9082, "epoch": 0.18, "inbatch_neg_score": 0.1566, "inbatch_pos_score": 0.666, "learning_rate": 4.0333333333333336e-05, "loss": 4.3854, "norm_diff": 0.0144, "norm_loss": 0.0, "num_token_doc": 66.8271, "num_token_overlap": 11.6991, "num_token_query": 31.9505, "num_token_union": 65.3966, "num_word_context": 202.5993, "num_word_doc": 49.8669, "num_word_query": 23.5934, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1310.7538, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1548, "query_norm": 1.321, "queue_k_norm": 1.3377, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9505, "sent_len_1": 66.8271, "sent_len_max_0": 127.5962, "sent_len_max_1": 189.88, "stdk": 0.0461, "stdq": 0.0421, "stdqueue_k": 0.0463, "stdqueue_q": 0.0, "step": 27400 }, { "accuracy": 44.2383, "active_queue_size": 16384.0, "cl_loss": 4.3784, "doc_norm": 1.3389, "encoder_q-embeddings": 1011.2205, "encoder_q-layer.0": 691.7206, "encoder_q-layer.1": 786.5356, "encoder_q-layer.10": 342.5161, "encoder_q-layer.11": 928.3193, "encoder_q-layer.2": 907.5623, "encoder_q-layer.3": 944.1732, "encoder_q-layer.4": 963.4959, "encoder_q-layer.5": 848.0593, "encoder_q-layer.6": 880.6261, "encoder_q-layer.7": 796.5034, "encoder_q-layer.8": 629.3749, "encoder_q-layer.9": 376.3977, "epoch": 0.18, "inbatch_neg_score": 0.1506, "inbatch_pos_score": 0.665, "learning_rate": 4.027777777777778e-05, "loss": 4.3784, "norm_diff": 0.0242, "norm_loss": 0.0, "num_token_doc": 66.9444, "num_token_overlap": 11.6886, "num_token_query": 31.9871, "num_token_union": 65.521, "num_word_context": 202.3498, "num_word_doc": 49.9454, "num_word_query": 23.6259, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1214.9535, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1506, "query_norm": 1.3147, "queue_k_norm": 1.3408, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9871, "sent_len_1": 66.9444, "sent_len_max_0": 127.4975, "sent_len_max_1": 189.4938, "stdk": 0.0463, "stdq": 0.042, "stdqueue_k": 0.0464, "stdqueue_q": 0.0, "step": 27500 }, { "accuracy": 40.625, "active_queue_size": 16384.0, "cl_loss": 4.3929, "doc_norm": 1.3412, "encoder_q-embeddings": 1781.6672, "encoder_q-layer.0": 1297.4077, "encoder_q-layer.1": 1475.9186, "encoder_q-layer.10": 407.9903, "encoder_q-layer.11": 901.5829, "encoder_q-layer.2": 1768.8212, "encoder_q-layer.3": 1676.3322, "encoder_q-layer.4": 1794.5889, "encoder_q-layer.5": 1533.7574, "encoder_q-layer.6": 1522.1824, "encoder_q-layer.7": 1234.1028, "encoder_q-layer.8": 1131.798, "encoder_q-layer.9": 608.5446, "epoch": 0.18, "inbatch_neg_score": 0.14, "inbatch_pos_score": 0.6235, "learning_rate": 4.022222222222222e-05, "loss": 4.3929, "norm_diff": 0.0436, "norm_loss": 0.0, "num_token_doc": 67.0161, "num_token_overlap": 11.7129, "num_token_query": 31.9133, "num_token_union": 65.4898, "num_word_context": 202.3268, "num_word_doc": 50.0674, "num_word_query": 23.5971, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2100.0936, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1395, "query_norm": 1.2976, "queue_k_norm": 1.3389, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9133, "sent_len_1": 67.0161, "sent_len_max_0": 127.5037, "sent_len_max_1": 186.9013, "stdk": 0.0463, "stdq": 0.0417, "stdqueue_k": 0.0464, "stdqueue_q": 0.0, "step": 27600 }, { "accuracy": 40.625, "active_queue_size": 16384.0, "cl_loss": 4.4075, "doc_norm": 1.3443, "encoder_q-embeddings": 1004.6859, "encoder_q-layer.0": 695.6033, "encoder_q-layer.1": 781.9873, "encoder_q-layer.10": 349.9341, "encoder_q-layer.11": 915.4876, "encoder_q-layer.2": 894.0247, "encoder_q-layer.3": 947.0562, "encoder_q-layer.4": 909.1183, "encoder_q-layer.5": 1013.8792, "encoder_q-layer.6": 1131.8412, "encoder_q-layer.7": 1242.6438, "encoder_q-layer.8": 891.3198, "encoder_q-layer.9": 547.1475, "epoch": 0.18, "inbatch_neg_score": 0.1308, "inbatch_pos_score": 0.6396, "learning_rate": 4.016666666666667e-05, "loss": 4.4075, "norm_diff": 0.0102, "norm_loss": 0.0, "num_token_doc": 66.8542, "num_token_overlap": 11.6952, "num_token_query": 31.9243, "num_token_union": 65.4183, "num_word_context": 202.198, "num_word_doc": 49.8921, "num_word_query": 23.5731, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1336.789, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1299, "query_norm": 1.3453, "queue_k_norm": 1.3383, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9243, "sent_len_1": 66.8542, "sent_len_max_0": 127.6925, "sent_len_max_1": 189.0513, "stdk": 0.0464, "stdq": 0.0434, "stdqueue_k": 0.0463, "stdqueue_q": 0.0, "step": 27700 }, { "accuracy": 40.2344, "active_queue_size": 16384.0, "cl_loss": 4.3621, "doc_norm": 1.3412, "encoder_q-embeddings": 3538.1514, "encoder_q-layer.0": 2572.5115, "encoder_q-layer.1": 2640.4302, "encoder_q-layer.10": 660.1265, "encoder_q-layer.11": 1758.9948, "encoder_q-layer.2": 2776.2542, "encoder_q-layer.3": 2623.4226, "encoder_q-layer.4": 2281.8799, "encoder_q-layer.5": 2054.8662, "encoder_q-layer.6": 2065.0508, "encoder_q-layer.7": 1787.1665, "encoder_q-layer.8": 1363.9657, "encoder_q-layer.9": 836.1664, "epoch": 0.18, "inbatch_neg_score": 0.1311, "inbatch_pos_score": 0.6201, "learning_rate": 4.011111111111111e-05, "loss": 4.3621, "norm_diff": 0.019, "norm_loss": 0.0, "num_token_doc": 67.1377, "num_token_overlap": 11.7907, "num_token_query": 32.1634, "num_token_union": 65.6326, "num_word_context": 202.6066, "num_word_doc": 50.0181, "num_word_query": 23.7512, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3414.3215, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1311, "query_norm": 1.3237, "queue_k_norm": 1.3397, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 32.1634, "sent_len_1": 67.1377, "sent_len_max_0": 127.6137, "sent_len_max_1": 193.0462, "stdk": 0.0463, "stdq": 0.0425, "stdqueue_k": 0.0464, "stdqueue_q": 0.0, "step": 27800 }, { "accuracy": 40.4297, "active_queue_size": 16384.0, "cl_loss": 4.3935, "doc_norm": 1.3369, "encoder_q-embeddings": 3258.4446, "encoder_q-layer.0": 2219.3035, "encoder_q-layer.1": 2345.9536, "encoder_q-layer.10": 672.4504, "encoder_q-layer.11": 1774.3005, "encoder_q-layer.2": 2064.6409, "encoder_q-layer.3": 1998.546, "encoder_q-layer.4": 2002.7803, "encoder_q-layer.5": 2147.5105, "encoder_q-layer.6": 2068.2773, "encoder_q-layer.7": 1534.4431, "encoder_q-layer.8": 1299.5177, "encoder_q-layer.9": 745.899, "epoch": 0.18, "inbatch_neg_score": 0.1295, "inbatch_pos_score": 0.6323, "learning_rate": 4.0055555555555554e-05, "loss": 4.3935, "norm_diff": 0.0247, "norm_loss": 0.0, "num_token_doc": 66.7819, "num_token_overlap": 11.6995, "num_token_query": 31.9653, "num_token_union": 65.3621, "num_word_context": 202.5595, "num_word_doc": 49.7969, "num_word_query": 23.6266, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3062.5263, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1302, "query_norm": 1.3216, "queue_k_norm": 1.3393, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9653, "sent_len_1": 66.7819, "sent_len_max_0": 127.5763, "sent_len_max_1": 191.495, "stdk": 0.0462, "stdq": 0.0425, "stdqueue_k": 0.0464, "stdqueue_q": 0.0, "step": 27900 }, { "accuracy": 42.7734, "active_queue_size": 16384.0, "cl_loss": 4.4042, "doc_norm": 1.3442, "encoder_q-embeddings": 2140.4912, "encoder_q-layer.0": 1647.5331, "encoder_q-layer.1": 2128.3069, "encoder_q-layer.10": 699.4918, "encoder_q-layer.11": 1839.4602, "encoder_q-layer.2": 1632.9279, "encoder_q-layer.3": 824.2332, "encoder_q-layer.4": 810.1917, "encoder_q-layer.5": 694.8981, "encoder_q-layer.6": 747.1025, "encoder_q-layer.7": 717.2946, "encoder_q-layer.8": 754.9637, "encoder_q-layer.9": 631.559, "epoch": 0.18, "inbatch_neg_score": 0.1265, "inbatch_pos_score": 0.6455, "learning_rate": 4e-05, "loss": 4.4042, "norm_diff": 0.0104, "norm_loss": 0.0, "num_token_doc": 66.6883, "num_token_overlap": 11.6995, "num_token_query": 31.9568, "num_token_union": 65.2947, "num_word_context": 202.0766, "num_word_doc": 49.7562, "num_word_query": 23.5881, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2078.3034, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1268, "query_norm": 1.3371, "queue_k_norm": 1.3377, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9568, "sent_len_1": 66.6883, "sent_len_max_0": 127.4387, "sent_len_max_1": 190.14, "stdk": 0.0465, "stdq": 0.043, "stdqueue_k": 0.0464, "stdqueue_q": 0.0, "step": 28000 }, { "accuracy": 41.8945, "active_queue_size": 16384.0, "cl_loss": 4.3805, "doc_norm": 1.3394, "encoder_q-embeddings": 2224.0864, "encoder_q-layer.0": 1594.5963, "encoder_q-layer.1": 1793.3259, "encoder_q-layer.10": 657.6335, "encoder_q-layer.11": 1803.0721, "encoder_q-layer.2": 1789.3933, "encoder_q-layer.3": 1615.6012, "encoder_q-layer.4": 1532.1907, "encoder_q-layer.5": 1479.3481, "encoder_q-layer.6": 1479.1422, "encoder_q-layer.7": 1304.25, "encoder_q-layer.8": 1137.515, "encoder_q-layer.9": 717.2214, "epoch": 0.18, "inbatch_neg_score": 0.1419, "inbatch_pos_score": 0.6318, "learning_rate": 3.9944444444444446e-05, "loss": 4.3805, "norm_diff": 0.0489, "norm_loss": 0.0, "num_token_doc": 66.5739, "num_token_overlap": 11.7152, "num_token_query": 31.8966, "num_token_union": 65.2282, "num_word_context": 202.1474, "num_word_doc": 49.6574, "num_word_query": 23.5491, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2346.883, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.141, "query_norm": 1.2905, "queue_k_norm": 1.3394, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8966, "sent_len_1": 66.5739, "sent_len_max_0": 127.43, "sent_len_max_1": 189.7537, "stdk": 0.0463, "stdq": 0.0418, "stdqueue_k": 0.0464, "stdqueue_q": 0.0, "step": 28100 }, { "accuracy": 43.0664, "active_queue_size": 16384.0, "cl_loss": 4.3735, "doc_norm": 1.3479, "encoder_q-embeddings": 2632.1814, "encoder_q-layer.0": 2041.7579, "encoder_q-layer.1": 2220.8232, "encoder_q-layer.10": 848.2041, "encoder_q-layer.11": 1928.4536, "encoder_q-layer.2": 2231.0317, "encoder_q-layer.3": 2184.5222, "encoder_q-layer.4": 2179.3142, "encoder_q-layer.5": 1872.2996, "encoder_q-layer.6": 1883.3668, "encoder_q-layer.7": 1588.5568, "encoder_q-layer.8": 1488.6991, "encoder_q-layer.9": 952.0801, "epoch": 0.18, "inbatch_neg_score": 0.153, "inbatch_pos_score": 0.6934, "learning_rate": 3.9888888888888895e-05, "loss": 4.3735, "norm_diff": 0.0113, "norm_loss": 0.0, "num_token_doc": 66.7807, "num_token_overlap": 11.6666, "num_token_query": 31.966, "num_token_union": 65.422, "num_word_context": 202.3453, "num_word_doc": 49.8367, "num_word_query": 23.6041, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2888.1305, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1542, "query_norm": 1.3508, "queue_k_norm": 1.3407, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.966, "sent_len_1": 66.7807, "sent_len_max_0": 127.6713, "sent_len_max_1": 189.1525, "stdk": 0.0467, "stdq": 0.0438, "stdqueue_k": 0.0465, "stdqueue_q": 0.0, "step": 28200 }, { "accuracy": 42.2852, "active_queue_size": 16384.0, "cl_loss": 4.3868, "doc_norm": 1.3378, "encoder_q-embeddings": 3865.8564, "encoder_q-layer.0": 2798.1143, "encoder_q-layer.1": 2787.9641, "encoder_q-layer.10": 634.3708, "encoder_q-layer.11": 1755.3759, "encoder_q-layer.2": 2843.5378, "encoder_q-layer.3": 2875.7629, "encoder_q-layer.4": 2701.2385, "encoder_q-layer.5": 3292.0183, "encoder_q-layer.6": 3254.9949, "encoder_q-layer.7": 3146.9055, "encoder_q-layer.8": 2501.5486, "encoder_q-layer.9": 998.0812, "epoch": 0.18, "inbatch_neg_score": 0.1487, "inbatch_pos_score": 0.6382, "learning_rate": 3.983333333333333e-05, "loss": 4.3868, "norm_diff": 0.0484, "norm_loss": 0.0, "num_token_doc": 66.559, "num_token_overlap": 11.6036, "num_token_query": 31.6953, "num_token_union": 65.1991, "num_word_context": 201.91, "num_word_doc": 49.6571, "num_word_query": 23.3966, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4271.3747, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1494, "query_norm": 1.2893, "queue_k_norm": 1.3394, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.6953, "sent_len_1": 66.559, "sent_len_max_0": 127.5162, "sent_len_max_1": 188.68, "stdk": 0.0463, "stdq": 0.0416, "stdqueue_k": 0.0464, "stdqueue_q": 0.0, "step": 28300 }, { "accuracy": 41.6016, "active_queue_size": 16384.0, "cl_loss": 4.3799, "doc_norm": 1.3452, "encoder_q-embeddings": 1729.3525, "encoder_q-layer.0": 1161.1399, "encoder_q-layer.1": 1244.9822, "encoder_q-layer.10": 694.6596, "encoder_q-layer.11": 1852.2635, "encoder_q-layer.2": 1355.9033, "encoder_q-layer.3": 1455.2063, "encoder_q-layer.4": 1463.4965, "encoder_q-layer.5": 1452.9327, "encoder_q-layer.6": 1605.8245, "encoder_q-layer.7": 1508.3633, "encoder_q-layer.8": 1190.8663, "encoder_q-layer.9": 668.0327, "epoch": 0.18, "inbatch_neg_score": 0.1496, "inbatch_pos_score": 0.6597, "learning_rate": 3.977777777777778e-05, "loss": 4.3799, "norm_diff": 0.0477, "norm_loss": 0.0, "num_token_doc": 66.8899, "num_token_overlap": 11.6473, "num_token_query": 31.8425, "num_token_union": 65.478, "num_word_context": 202.4902, "num_word_doc": 49.9463, "num_word_query": 23.5383, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2089.6853, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1494, "query_norm": 1.2975, "queue_k_norm": 1.3413, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.8425, "sent_len_1": 66.8899, "sent_len_max_0": 127.5588, "sent_len_max_1": 186.3963, "stdk": 0.0465, "stdq": 0.0419, "stdqueue_k": 0.0465, "stdqueue_q": 0.0, "step": 28400 }, { "accuracy": 40.625, "active_queue_size": 16384.0, "cl_loss": 4.3653, "doc_norm": 1.3395, "encoder_q-embeddings": 1195.6895, "encoder_q-layer.0": 829.6791, "encoder_q-layer.1": 866.2873, "encoder_q-layer.10": 752.5388, "encoder_q-layer.11": 1902.2313, "encoder_q-layer.2": 969.4139, "encoder_q-layer.3": 1012.0239, "encoder_q-layer.4": 1051.5668, "encoder_q-layer.5": 1102.9495, "encoder_q-layer.6": 1120.7484, "encoder_q-layer.7": 1028.3647, "encoder_q-layer.8": 994.9702, "encoder_q-layer.9": 686.4772, "epoch": 0.19, "inbatch_neg_score": 0.1522, "inbatch_pos_score": 0.6665, "learning_rate": 3.972222222222222e-05, "loss": 4.3653, "norm_diff": 0.0268, "norm_loss": 0.0, "num_token_doc": 66.6943, "num_token_overlap": 11.7074, "num_token_query": 31.9457, "num_token_union": 65.3615, "num_word_context": 202.2835, "num_word_doc": 49.7883, "num_word_query": 23.6019, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1651.3602, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1533, "query_norm": 1.3179, "queue_k_norm": 1.3405, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9457, "sent_len_1": 66.6943, "sent_len_max_0": 127.5375, "sent_len_max_1": 187.6037, "stdk": 0.0463, "stdq": 0.0426, "stdqueue_k": 0.0465, "stdqueue_q": 0.0, "step": 28500 }, { "accuracy": 38.8672, "active_queue_size": 16384.0, "cl_loss": 4.3815, "doc_norm": 1.3424, "encoder_q-embeddings": 2104.4954, "encoder_q-layer.0": 1475.052, "encoder_q-layer.1": 1533.416, "encoder_q-layer.10": 675.6888, "encoder_q-layer.11": 1920.8069, "encoder_q-layer.2": 1528.597, "encoder_q-layer.3": 1596.2758, "encoder_q-layer.4": 1523.5361, "encoder_q-layer.5": 1393.9644, "encoder_q-layer.6": 1488.5923, "encoder_q-layer.7": 1326.9679, "encoder_q-layer.8": 1177.8007, "encoder_q-layer.9": 764.4667, "epoch": 0.19, "inbatch_neg_score": 0.155, "inbatch_pos_score": 0.627, "learning_rate": 3.966666666666667e-05, "loss": 4.3815, "norm_diff": 0.0659, "norm_loss": 0.0, "num_token_doc": 66.9135, "num_token_overlap": 11.674, "num_token_query": 31.8868, "num_token_union": 65.4378, "num_word_context": 202.3665, "num_word_doc": 49.9183, "num_word_query": 23.5532, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2299.2, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1549, "query_norm": 1.2765, "queue_k_norm": 1.3389, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8868, "sent_len_1": 66.9135, "sent_len_max_0": 127.6912, "sent_len_max_1": 190.3988, "stdk": 0.0464, "stdq": 0.041, "stdqueue_k": 0.0464, "stdqueue_q": 0.0, "step": 28600 }, { "accuracy": 41.6992, "active_queue_size": 16384.0, "cl_loss": 4.4137, "doc_norm": 1.3338, "encoder_q-embeddings": 1157.4071, "encoder_q-layer.0": 806.5478, "encoder_q-layer.1": 875.8633, "encoder_q-layer.10": 712.2332, "encoder_q-layer.11": 1839.962, "encoder_q-layer.2": 925.9893, "encoder_q-layer.3": 952.8656, "encoder_q-layer.4": 1069.7052, "encoder_q-layer.5": 879.5442, "encoder_q-layer.6": 918.9317, "encoder_q-layer.7": 889.8787, "encoder_q-layer.8": 788.882, "encoder_q-layer.9": 676.2667, "epoch": 0.19, "inbatch_neg_score": 0.1545, "inbatch_pos_score": 0.6616, "learning_rate": 3.961111111111111e-05, "loss": 4.4137, "norm_diff": 0.0226, "norm_loss": 0.0, "num_token_doc": 66.6098, "num_token_overlap": 11.6151, "num_token_query": 31.8908, "num_token_union": 65.286, "num_word_context": 202.2368, "num_word_doc": 49.6967, "num_word_query": 23.5586, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1545.1844, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1548, "query_norm": 1.3112, "queue_k_norm": 1.342, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8908, "sent_len_1": 66.6098, "sent_len_max_0": 127.4587, "sent_len_max_1": 188.1675, "stdk": 0.0461, "stdq": 0.0427, "stdqueue_k": 0.0465, "stdqueue_q": 0.0, "step": 28700 }, { "accuracy": 42.3828, "active_queue_size": 16384.0, "cl_loss": 4.3438, "doc_norm": 1.3457, "encoder_q-embeddings": 12754.2422, "encoder_q-layer.0": 8702.6191, "encoder_q-layer.1": 8711.3672, "encoder_q-layer.10": 687.1881, "encoder_q-layer.11": 1977.3516, "encoder_q-layer.2": 8329.3105, "encoder_q-layer.3": 7417.8042, "encoder_q-layer.4": 6022.2603, "encoder_q-layer.5": 5949.3535, "encoder_q-layer.6": 3582.8232, "encoder_q-layer.7": 1528.4475, "encoder_q-layer.8": 1224.6572, "encoder_q-layer.9": 728.1784, "epoch": 0.19, "inbatch_neg_score": 0.1523, "inbatch_pos_score": 0.6445, "learning_rate": 3.9555555555555556e-05, "loss": 4.3438, "norm_diff": 0.0644, "norm_loss": 0.0, "num_token_doc": 66.995, "num_token_overlap": 11.7264, "num_token_query": 31.9503, "num_token_union": 65.4735, "num_word_context": 202.6661, "num_word_doc": 50.0079, "num_word_query": 23.6057, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10319.6765, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1521, "query_norm": 1.2813, "queue_k_norm": 1.3403, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9503, "sent_len_1": 66.995, "sent_len_max_0": 127.4887, "sent_len_max_1": 189.8825, "stdk": 0.0466, "stdq": 0.0417, "stdqueue_k": 0.0464, "stdqueue_q": 0.0, "step": 28800 }, { "accuracy": 41.0156, "active_queue_size": 16384.0, "cl_loss": 4.4051, "doc_norm": 1.3408, "encoder_q-embeddings": 1417.1908, "encoder_q-layer.0": 1014.2875, "encoder_q-layer.1": 1132.7146, "encoder_q-layer.10": 685.6538, "encoder_q-layer.11": 1834.5922, "encoder_q-layer.2": 1300.175, "encoder_q-layer.3": 1175.3038, "encoder_q-layer.4": 1106.3871, "encoder_q-layer.5": 992.738, "encoder_q-layer.6": 1131.3765, "encoder_q-layer.7": 1059.9161, "encoder_q-layer.8": 964.8831, "encoder_q-layer.9": 687.9575, "epoch": 0.19, "inbatch_neg_score": 0.152, "inbatch_pos_score": 0.6509, "learning_rate": 3.9500000000000005e-05, "loss": 4.4051, "norm_diff": 0.0528, "norm_loss": 0.0, "num_token_doc": 66.8121, "num_token_overlap": 11.6672, "num_token_query": 31.8373, "num_token_union": 65.2797, "num_word_context": 202.4612, "num_word_doc": 49.8325, "num_word_query": 23.4879, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1760.7621, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.15, "query_norm": 1.288, "queue_k_norm": 1.34, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8373, "sent_len_1": 66.8121, "sent_len_max_0": 127.6975, "sent_len_max_1": 190.8013, "stdk": 0.0463, "stdq": 0.0424, "stdqueue_k": 0.0464, "stdqueue_q": 0.0, "step": 28900 }, { "accuracy": 40.2344, "active_queue_size": 16384.0, "cl_loss": 4.3823, "doc_norm": 1.3468, "encoder_q-embeddings": 4178.835, "encoder_q-layer.0": 3228.261, "encoder_q-layer.1": 2896.7534, "encoder_q-layer.10": 719.4551, "encoder_q-layer.11": 1899.6455, "encoder_q-layer.2": 2130.6201, "encoder_q-layer.3": 1575.9351, "encoder_q-layer.4": 1582.5077, "encoder_q-layer.5": 1262.6382, "encoder_q-layer.6": 1311.1102, "encoder_q-layer.7": 1266.8567, "encoder_q-layer.8": 1112.146, "encoder_q-layer.9": 769.4945, "epoch": 0.19, "inbatch_neg_score": 0.1473, "inbatch_pos_score": 0.6465, "learning_rate": 3.944444444444445e-05, "loss": 4.3823, "norm_diff": 0.0532, "norm_loss": 0.0, "num_token_doc": 66.87, "num_token_overlap": 11.6777, "num_token_query": 31.8783, "num_token_union": 65.41, "num_word_context": 202.5046, "num_word_doc": 49.9053, "num_word_query": 23.5546, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3360.3627, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1479, "query_norm": 1.2935, "queue_k_norm": 1.3402, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8783, "sent_len_1": 66.87, "sent_len_max_0": 127.6775, "sent_len_max_1": 190.845, "stdk": 0.0466, "stdq": 0.0426, "stdqueue_k": 0.0464, "stdqueue_q": 0.0, "step": 29000 }, { "accuracy": 40.918, "active_queue_size": 16384.0, "cl_loss": 4.348, "doc_norm": 1.333, "encoder_q-embeddings": 3605.7842, "encoder_q-layer.0": 2557.5964, "encoder_q-layer.1": 2973.9922, "encoder_q-layer.10": 860.9509, "encoder_q-layer.11": 1891.3108, "encoder_q-layer.2": 3284.8306, "encoder_q-layer.3": 3266.5962, "encoder_q-layer.4": 2978.5344, "encoder_q-layer.5": 2573.6968, "encoder_q-layer.6": 2694.4751, "encoder_q-layer.7": 1815.8077, "encoder_q-layer.8": 1398.5469, "encoder_q-layer.9": 850.0291, "epoch": 0.19, "inbatch_neg_score": 0.1457, "inbatch_pos_score": 0.6372, "learning_rate": 3.938888888888889e-05, "loss": 4.348, "norm_diff": 0.061, "norm_loss": 0.0, "num_token_doc": 66.6901, "num_token_overlap": 11.6509, "num_token_query": 31.8289, "num_token_union": 65.2893, "num_word_context": 202.3747, "num_word_doc": 49.7869, "num_word_query": 23.5066, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3880.9331, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1454, "query_norm": 1.272, "queue_k_norm": 1.3415, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8289, "sent_len_1": 66.6901, "sent_len_max_0": 127.555, "sent_len_max_1": 189.42, "stdk": 0.046, "stdq": 0.0414, "stdqueue_k": 0.0465, "stdqueue_q": 0.0, "step": 29100 }, { "accuracy": 41.1133, "active_queue_size": 16384.0, "cl_loss": 4.3538, "doc_norm": 1.3462, "encoder_q-embeddings": 2091.2158, "encoder_q-layer.0": 1555.2408, "encoder_q-layer.1": 1589.8142, "encoder_q-layer.10": 761.5371, "encoder_q-layer.11": 1847.3978, "encoder_q-layer.2": 1811.5739, "encoder_q-layer.3": 1947.8342, "encoder_q-layer.4": 1930.7916, "encoder_q-layer.5": 1999.0308, "encoder_q-layer.6": 1730.0466, "encoder_q-layer.7": 1052.5717, "encoder_q-layer.8": 860.316, "encoder_q-layer.9": 680.7625, "epoch": 0.19, "inbatch_neg_score": 0.1434, "inbatch_pos_score": 0.6328, "learning_rate": 3.933333333333333e-05, "loss": 4.3538, "norm_diff": 0.0603, "norm_loss": 0.0, "num_token_doc": 66.79, "num_token_overlap": 11.7461, "num_token_query": 32.0511, "num_token_union": 65.3481, "num_word_context": 202.2513, "num_word_doc": 49.8316, "num_word_query": 23.6658, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2424.3632, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1448, "query_norm": 1.2859, "queue_k_norm": 1.3407, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 32.0511, "sent_len_1": 66.79, "sent_len_max_0": 127.6725, "sent_len_max_1": 189.36, "stdk": 0.0465, "stdq": 0.0421, "stdqueue_k": 0.0464, "stdqueue_q": 0.0, "step": 29200 }, { "accuracy": 40.1367, "active_queue_size": 16384.0, "cl_loss": 4.3441, "doc_norm": 1.3362, "encoder_q-embeddings": 2218.0796, "encoder_q-layer.0": 1518.6478, "encoder_q-layer.1": 1576.9661, "encoder_q-layer.10": 686.3143, "encoder_q-layer.11": 1855.1178, "encoder_q-layer.2": 1594.8798, "encoder_q-layer.3": 1570.4543, "encoder_q-layer.4": 1774.084, "encoder_q-layer.5": 1599.6953, "encoder_q-layer.6": 1601.7615, "encoder_q-layer.7": 1255.948, "encoder_q-layer.8": 1059.3333, "encoder_q-layer.9": 693.0089, "epoch": 0.19, "inbatch_neg_score": 0.1425, "inbatch_pos_score": 0.6289, "learning_rate": 3.927777777777778e-05, "loss": 4.3441, "norm_diff": 0.0591, "norm_loss": 0.0, "num_token_doc": 66.5604, "num_token_overlap": 11.6433, "num_token_query": 31.8571, "num_token_union": 65.2378, "num_word_context": 202.2509, "num_word_doc": 49.6439, "num_word_query": 23.508, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2380.6483, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1409, "query_norm": 1.277, "queue_k_norm": 1.3405, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8571, "sent_len_1": 66.5604, "sent_len_max_0": 127.4788, "sent_len_max_1": 189.5762, "stdk": 0.0461, "stdq": 0.0416, "stdqueue_k": 0.0464, "stdqueue_q": 0.0, "step": 29300 }, { "accuracy": 40.5273, "active_queue_size": 16384.0, "cl_loss": 4.3654, "doc_norm": 1.3434, "encoder_q-embeddings": 2106.7878, "encoder_q-layer.0": 1332.5585, "encoder_q-layer.1": 1411.1621, "encoder_q-layer.10": 727.9183, "encoder_q-layer.11": 1907.0531, "encoder_q-layer.2": 1794.3864, "encoder_q-layer.3": 1845.803, "encoder_q-layer.4": 1806.6604, "encoder_q-layer.5": 1848.0883, "encoder_q-layer.6": 1239.9232, "encoder_q-layer.7": 817.3046, "encoder_q-layer.8": 800.9794, "encoder_q-layer.9": 657.958, "epoch": 0.19, "inbatch_neg_score": 0.1355, "inbatch_pos_score": 0.6304, "learning_rate": 3.922222222222223e-05, "loss": 4.3654, "norm_diff": 0.0683, "norm_loss": 0.0, "num_token_doc": 66.9183, "num_token_overlap": 11.6346, "num_token_query": 31.8484, "num_token_union": 65.4692, "num_word_context": 202.3997, "num_word_doc": 49.9278, "num_word_query": 23.5127, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2305.4551, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1349, "query_norm": 1.2751, "queue_k_norm": 1.3401, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8484, "sent_len_1": 66.9183, "sent_len_max_0": 127.5438, "sent_len_max_1": 189.6562, "stdk": 0.0464, "stdq": 0.042, "stdqueue_k": 0.0464, "stdqueue_q": 0.0, "step": 29400 }, { "accuracy": 42.8711, "active_queue_size": 16384.0, "cl_loss": 4.3843, "doc_norm": 1.3365, "encoder_q-embeddings": 956.4939, "encoder_q-layer.0": 631.4055, "encoder_q-layer.1": 672.8179, "encoder_q-layer.10": 695.3057, "encoder_q-layer.11": 1872.7712, "encoder_q-layer.2": 717.8494, "encoder_q-layer.3": 766.9049, "encoder_q-layer.4": 757.9039, "encoder_q-layer.5": 731.2654, "encoder_q-layer.6": 712.2363, "encoder_q-layer.7": 648.2852, "encoder_q-layer.8": 686.2042, "encoder_q-layer.9": 613.3742, "epoch": 0.19, "inbatch_neg_score": 0.1337, "inbatch_pos_score": 0.6436, "learning_rate": 3.9166666666666665e-05, "loss": 4.3843, "norm_diff": 0.0639, "norm_loss": 0.0, "num_token_doc": 66.9305, "num_token_overlap": 11.6753, "num_token_query": 31.8758, "num_token_union": 65.4067, "num_word_context": 202.5577, "num_word_doc": 49.8858, "num_word_query": 23.5542, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1368.4731, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1331, "query_norm": 1.2726, "queue_k_norm": 1.3381, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8758, "sent_len_1": 66.9305, "sent_len_max_0": 127.5387, "sent_len_max_1": 192.565, "stdk": 0.0462, "stdq": 0.0421, "stdqueue_k": 0.0464, "stdqueue_q": 0.0, "step": 29500 }, { "accuracy": 40.918, "active_queue_size": 16384.0, "cl_loss": 4.357, "doc_norm": 1.3407, "encoder_q-embeddings": 942.2646, "encoder_q-layer.0": 641.9293, "encoder_q-layer.1": 720.6721, "encoder_q-layer.10": 728.211, "encoder_q-layer.11": 1923.3727, "encoder_q-layer.2": 835.2994, "encoder_q-layer.3": 931.5044, "encoder_q-layer.4": 899.2582, "encoder_q-layer.5": 930.06, "encoder_q-layer.6": 901.3869, "encoder_q-layer.7": 829.3459, "encoder_q-layer.8": 809.1642, "encoder_q-layer.9": 681.9945, "epoch": 0.19, "inbatch_neg_score": 0.1291, "inbatch_pos_score": 0.6255, "learning_rate": 3.9111111111111115e-05, "loss": 4.357, "norm_diff": 0.0783, "norm_loss": 0.0, "num_token_doc": 66.8871, "num_token_overlap": 11.6731, "num_token_query": 31.9081, "num_token_union": 65.4426, "num_word_context": 202.492, "num_word_doc": 49.9158, "num_word_query": 23.5666, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1490.7512, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1276, "query_norm": 1.2624, "queue_k_norm": 1.339, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9081, "sent_len_1": 66.8871, "sent_len_max_0": 127.355, "sent_len_max_1": 191.0175, "stdk": 0.0464, "stdq": 0.042, "stdqueue_k": 0.0464, "stdqueue_q": 0.0, "step": 29600 }, { "accuracy": 41.1133, "active_queue_size": 16384.0, "cl_loss": 4.3374, "doc_norm": 1.3365, "encoder_q-embeddings": 1278.5983, "encoder_q-layer.0": 916.5861, "encoder_q-layer.1": 988.2568, "encoder_q-layer.10": 796.6678, "encoder_q-layer.11": 2053.8811, "encoder_q-layer.2": 1116.6012, "encoder_q-layer.3": 1220.3865, "encoder_q-layer.4": 1372.6426, "encoder_q-layer.5": 1045.5452, "encoder_q-layer.6": 897.1166, "encoder_q-layer.7": 777.7869, "encoder_q-layer.8": 799.7847, "encoder_q-layer.9": 681.4089, "epoch": 0.19, "inbatch_neg_score": 0.1248, "inbatch_pos_score": 0.6299, "learning_rate": 3.905555555555556e-05, "loss": 4.3374, "norm_diff": 0.0712, "norm_loss": 0.0, "num_token_doc": 66.7807, "num_token_overlap": 11.7854, "num_token_query": 32.1929, "num_token_union": 65.426, "num_word_context": 202.6253, "num_word_doc": 49.8806, "num_word_query": 23.7978, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1728.0351, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1242, "query_norm": 1.2653, "queue_k_norm": 1.3364, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 32.1929, "sent_len_1": 66.7807, "sent_len_max_0": 127.6213, "sent_len_max_1": 188.5925, "stdk": 0.0462, "stdq": 0.0423, "stdqueue_k": 0.0464, "stdqueue_q": 0.0, "step": 29700 }, { "accuracy": 40.2344, "active_queue_size": 16384.0, "cl_loss": 4.376, "doc_norm": 1.3392, "encoder_q-embeddings": 2638.9895, "encoder_q-layer.0": 1901.5348, "encoder_q-layer.1": 1994.526, "encoder_q-layer.10": 1403.5432, "encoder_q-layer.11": 3604.1426, "encoder_q-layer.2": 2203.5969, "encoder_q-layer.3": 2380.1201, "encoder_q-layer.4": 2377.3928, "encoder_q-layer.5": 2425.2634, "encoder_q-layer.6": 2342.6907, "encoder_q-layer.7": 2302.0427, "encoder_q-layer.8": 2002.934, "encoder_q-layer.9": 1265.6093, "epoch": 0.19, "inbatch_neg_score": 0.1197, "inbatch_pos_score": 0.6035, "learning_rate": 3.9000000000000006e-05, "loss": 4.376, "norm_diff": 0.0873, "norm_loss": 0.0, "num_token_doc": 66.6523, "num_token_overlap": 11.6386, "num_token_query": 31.8099, "num_token_union": 65.2694, "num_word_context": 202.0317, "num_word_doc": 49.72, "num_word_query": 23.4845, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3490.8014, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1205, "query_norm": 1.2519, "queue_k_norm": 1.3372, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8099, "sent_len_1": 66.6523, "sent_len_max_0": 127.4712, "sent_len_max_1": 192.065, "stdk": 0.0465, "stdq": 0.0417, "stdqueue_k": 0.0464, "stdqueue_q": 0.0, "step": 29800 }, { "accuracy": 40.1367, "active_queue_size": 16384.0, "cl_loss": 4.3439, "doc_norm": 1.3334, "encoder_q-embeddings": 4858.8311, "encoder_q-layer.0": 3643.8696, "encoder_q-layer.1": 3913.0774, "encoder_q-layer.10": 1362.8779, "encoder_q-layer.11": 3527.156, "encoder_q-layer.2": 4658.4707, "encoder_q-layer.3": 4965.6182, "encoder_q-layer.4": 5043.729, "encoder_q-layer.5": 4300.3975, "encoder_q-layer.6": 4860.8027, "encoder_q-layer.7": 3350.5173, "encoder_q-layer.8": 2651.3467, "encoder_q-layer.9": 1560.9109, "epoch": 0.19, "inbatch_neg_score": 0.1187, "inbatch_pos_score": 0.6094, "learning_rate": 3.894444444444444e-05, "loss": 4.3439, "norm_diff": 0.0827, "norm_loss": 0.0, "num_token_doc": 66.8323, "num_token_overlap": 11.6841, "num_token_query": 31.9144, "num_token_union": 65.3961, "num_word_context": 202.4212, "num_word_doc": 49.894, "num_word_query": 23.5752, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5875.4738, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1187, "query_norm": 1.2507, "queue_k_norm": 1.3349, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9144, "sent_len_1": 66.8323, "sent_len_max_0": 127.4237, "sent_len_max_1": 189.6488, "stdk": 0.0462, "stdq": 0.0416, "stdqueue_k": 0.0464, "stdqueue_q": 0.0, "step": 29900 }, { "accuracy": 40.1367, "active_queue_size": 16384.0, "cl_loss": 4.3413, "doc_norm": 1.333, "encoder_q-embeddings": 6573.1011, "encoder_q-layer.0": 5259.4062, "encoder_q-layer.1": 5506.5747, "encoder_q-layer.10": 1423.3726, "encoder_q-layer.11": 3933.6787, "encoder_q-layer.2": 5023.4019, "encoder_q-layer.3": 4618.9512, "encoder_q-layer.4": 4945.9712, "encoder_q-layer.5": 5280.5181, "encoder_q-layer.6": 5650.0122, "encoder_q-layer.7": 4351.9365, "encoder_q-layer.8": 2626.2856, "encoder_q-layer.9": 1518.8296, "epoch": 0.2, "inbatch_neg_score": 0.13, "inbatch_pos_score": 0.6191, "learning_rate": 3.888888888888889e-05, "loss": 4.3413, "norm_diff": 0.0626, "norm_loss": 0.0, "num_token_doc": 66.8749, "num_token_overlap": 11.6691, "num_token_query": 31.8914, "num_token_union": 65.4206, "num_word_context": 202.4554, "num_word_doc": 49.9071, "num_word_query": 23.5418, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7077.0596, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1284, "query_norm": 1.2704, "queue_k_norm": 1.3351, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.8914, "sent_len_1": 66.8749, "sent_len_max_0": 127.3875, "sent_len_max_1": 188.6362, "stdk": 0.0462, "stdq": 0.0419, "stdqueue_k": 0.0464, "stdqueue_q": 0.0, "step": 30000 }, { "dev_runtime": 44.9544, "dev_samples_per_second": 1.424, "dev_steps_per_second": 0.022, "epoch": 0.2, "step": 30000, "test_accuracy": 92.08984375, "test_active_queue_size": 16384.0, "test_cl_loss": 0.47372889518737793, "test_doc_norm": 1.2958104610443115, "test_inbatch_neg_score": 0.43992698192596436, "test_inbatch_pos_score": 1.2811636924743652, "test_loss": 0.47372889518737793, "test_loss_align": 1.108154296875, "test_loss_unif": 3.9510884284973145, "test_loss_unif_q@queue": 3.9510886669158936, "test_norm_diff": 0.1030317023396492, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.12065408378839493, "test_query_norm": 1.3988420963287354, "test_queue_k_norm": 1.335240364074707, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.03963874280452728, "test_stdq": 0.0416945144534111, "test_stdqueue_k": 0.04643170163035393, "test_stdqueue_q": 0.0 }, { "dev_runtime": 44.9544, "dev_samples_per_second": 1.424, "dev_steps_per_second": 0.022, "epoch": 0.2, "eval_beir-arguana_ndcg@10": 0.32026, "eval_beir-arguana_recall@10": 0.53414, "eval_beir-arguana_recall@100": 0.81366, "eval_beir-arguana_recall@20": 0.63158, "eval_beir-avg_ndcg@10": 0.34665191666666667, "eval_beir-avg_recall@10": 0.41059583333333327, "eval_beir-avg_recall@100": 0.5860673333333333, "eval_beir-avg_recall@20": 0.46442275000000005, "eval_beir-cqadupstack_ndcg@10": 0.21332916666666668, "eval_beir-cqadupstack_recall@10": 0.3016183333333333, "eval_beir-cqadupstack_recall@100": 0.5221233333333333, "eval_beir-cqadupstack_recall@20": 0.3657575, "eval_beir-fiqa_ndcg@10": 0.20668, "eval_beir-fiqa_recall@10": 0.25864, "eval_beir-fiqa_recall@100": 0.50176, "eval_beir-fiqa_recall@20": 0.32109, "eval_beir-nfcorpus_ndcg@10": 0.26871, "eval_beir-nfcorpus_recall@10": 0.12944, "eval_beir-nfcorpus_recall@100": 0.26954, "eval_beir-nfcorpus_recall@20": 0.16721, "eval_beir-nq_ndcg@10": 0.22713, "eval_beir-nq_recall@10": 0.3835, "eval_beir-nq_recall@100": 0.72598, "eval_beir-nq_recall@20": 0.49322, "eval_beir-quora_ndcg@10": 0.74675, "eval_beir-quora_recall@10": 0.86078, "eval_beir-quora_recall@100": 0.9682, "eval_beir-quora_recall@20": 0.90902, "eval_beir-scidocs_ndcg@10": 0.13106, "eval_beir-scidocs_recall@10": 0.14018, "eval_beir-scidocs_recall@100": 0.33527, "eval_beir-scidocs_recall@20": 0.19118, "eval_beir-scifact_ndcg@10": 0.59531, "eval_beir-scifact_recall@10": 0.7615, "eval_beir-scifact_recall@100": 0.90089, "eval_beir-scifact_recall@20": 0.82844, "eval_beir-trec-covid_ndcg@10": 0.54135, "eval_beir-trec-covid_recall@10": 0.6, "eval_beir-trec-covid_recall@100": 0.3954, "eval_beir-trec-covid_recall@20": 0.535, "eval_beir-webis-touche2020_ndcg@10": 0.21594, "eval_beir-webis-touche2020_recall@10": 0.13616, "eval_beir-webis-touche2020_recall@100": 0.42785, "eval_beir-webis-touche2020_recall@20": 0.20173, "eval_senteval-avg_sts": 0.7519988091142531, "eval_senteval-sickr_spearman": 0.7159581782974043, "eval_senteval-stsb_spearman": 0.7880394399311018, "step": 30000, "test_accuracy": 92.08984375, "test_active_queue_size": 16384.0, "test_cl_loss": 0.47372889518737793, "test_doc_norm": 1.2958104610443115, "test_inbatch_neg_score": 0.43992698192596436, "test_inbatch_pos_score": 1.2811636924743652, "test_loss": 0.47372889518737793, "test_loss_align": 1.108154296875, "test_loss_unif": 3.9510884284973145, "test_loss_unif_q@queue": 3.9510886669158936, "test_norm_diff": 0.1030317023396492, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.12065408378839493, "test_query_norm": 1.3988420963287354, "test_queue_k_norm": 1.335240364074707, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.03963874280452728, "test_stdq": 0.0416945144534111, "test_stdqueue_k": 0.04643170163035393, "test_stdqueue_q": 0.0 }, { "accuracy": 41.4062, "active_queue_size": 16384.0, "cl_loss": 4.3326, "doc_norm": 1.3365, "encoder_q-embeddings": 2335.6233, "encoder_q-layer.0": 1579.3923, "encoder_q-layer.1": 1770.4689, "encoder_q-layer.10": 1508.2626, "encoder_q-layer.11": 3636.0649, "encoder_q-layer.2": 2053.4934, "encoder_q-layer.3": 2123.0515, "encoder_q-layer.4": 2134.7856, "encoder_q-layer.5": 2247.6182, "encoder_q-layer.6": 2190.7417, "encoder_q-layer.7": 1988.1372, "encoder_q-layer.8": 1795.2501, "encoder_q-layer.9": 1381.1296, "epoch": 0.2, "inbatch_neg_score": 0.1294, "inbatch_pos_score": 0.6416, "learning_rate": 3.883333333333333e-05, "loss": 4.3326, "norm_diff": 0.04, "norm_loss": 0.0, "num_token_doc": 66.6679, "num_token_overlap": 11.6862, "num_token_query": 32.0652, "num_token_union": 65.4013, "num_word_context": 202.3693, "num_word_doc": 49.7641, "num_word_query": 23.7026, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3244.4558, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1306, "query_norm": 1.2965, "queue_k_norm": 1.3337, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 32.0652, "sent_len_1": 66.6679, "sent_len_max_0": 127.57, "sent_len_max_1": 189.0475, "stdk": 0.0464, "stdq": 0.043, "stdqueue_k": 0.0464, "stdqueue_q": 0.0, "step": 30100 }, { "accuracy": 40.2344, "active_queue_size": 16384.0, "cl_loss": 4.3299, "doc_norm": 1.3363, "encoder_q-embeddings": 2467.7522, "encoder_q-layer.0": 1665.8107, "encoder_q-layer.1": 1736.7939, "encoder_q-layer.10": 1281.5168, "encoder_q-layer.11": 3564.2185, "encoder_q-layer.2": 1880.65, "encoder_q-layer.3": 1977.1676, "encoder_q-layer.4": 2082.094, "encoder_q-layer.5": 1834.1207, "encoder_q-layer.6": 1844.5596, "encoder_q-layer.7": 1667.1036, "encoder_q-layer.8": 1534.9994, "encoder_q-layer.9": 1254.5507, "epoch": 0.2, "inbatch_neg_score": 0.1265, "inbatch_pos_score": 0.6113, "learning_rate": 3.877777777777778e-05, "loss": 4.3299, "norm_diff": 0.0702, "norm_loss": 0.0, "num_token_doc": 66.9757, "num_token_overlap": 11.7047, "num_token_query": 31.9801, "num_token_union": 65.4995, "num_word_context": 202.2945, "num_word_doc": 49.9748, "num_word_query": 23.6088, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3095.8599, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1261, "query_norm": 1.2662, "queue_k_norm": 1.3336, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9801, "sent_len_1": 66.9757, "sent_len_max_0": 127.445, "sent_len_max_1": 191.025, "stdk": 0.0464, "stdq": 0.0417, "stdqueue_k": 0.0464, "stdqueue_q": 0.0, "step": 30200 }, { "accuracy": 41.5039, "active_queue_size": 16384.0, "cl_loss": 4.3514, "doc_norm": 1.3305, "encoder_q-embeddings": 2288.875, "encoder_q-layer.0": 1568.1351, "encoder_q-layer.1": 1900.7074, "encoder_q-layer.10": 1323.2087, "encoder_q-layer.11": 3626.6174, "encoder_q-layer.2": 2088.3171, "encoder_q-layer.3": 2455.0637, "encoder_q-layer.4": 2695.7988, "encoder_q-layer.5": 2732.0483, "encoder_q-layer.6": 2531.3528, "encoder_q-layer.7": 1777.0863, "encoder_q-layer.8": 1443.0481, "encoder_q-layer.9": 1210.6598, "epoch": 0.2, "inbatch_neg_score": 0.117, "inbatch_pos_score": 0.6162, "learning_rate": 3.8722222222222225e-05, "loss": 4.3514, "norm_diff": 0.0447, "norm_loss": 0.0, "num_token_doc": 66.5813, "num_token_overlap": 11.6678, "num_token_query": 31.8734, "num_token_union": 65.2187, "num_word_context": 201.777, "num_word_doc": 49.6962, "num_word_query": 23.5234, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3416.1888, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.118, "query_norm": 1.2858, "queue_k_norm": 1.3327, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8734, "sent_len_1": 66.5813, "sent_len_max_0": 127.6388, "sent_len_max_1": 187.8088, "stdk": 0.0462, "stdq": 0.0425, "stdqueue_k": 0.0464, "stdqueue_q": 0.0, "step": 30300 }, { "accuracy": 40.0391, "active_queue_size": 16384.0, "cl_loss": 4.3372, "doc_norm": 1.339, "encoder_q-embeddings": 3219.6704, "encoder_q-layer.0": 2532.2954, "encoder_q-layer.1": 2558.1372, "encoder_q-layer.10": 1422.83, "encoder_q-layer.11": 3848.364, "encoder_q-layer.2": 2206.375, "encoder_q-layer.3": 2145.5369, "encoder_q-layer.4": 1935.5586, "encoder_q-layer.5": 1745.7087, "encoder_q-layer.6": 1787.2465, "encoder_q-layer.7": 1615.5759, "encoder_q-layer.8": 1674.3385, "encoder_q-layer.9": 1334.1329, "epoch": 0.2, "inbatch_neg_score": 0.1256, "inbatch_pos_score": 0.6309, "learning_rate": 3.866666666666667e-05, "loss": 4.3372, "norm_diff": 0.0412, "norm_loss": 0.0, "num_token_doc": 66.8161, "num_token_overlap": 11.6957, "num_token_query": 31.8787, "num_token_union": 65.3813, "num_word_context": 202.2437, "num_word_doc": 49.8732, "num_word_query": 23.5562, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3537.0612, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1249, "query_norm": 1.2978, "queue_k_norm": 1.3339, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.8787, "sent_len_1": 66.8161, "sent_len_max_0": 127.435, "sent_len_max_1": 187.8313, "stdk": 0.0465, "stdq": 0.0425, "stdqueue_k": 0.0464, "stdqueue_q": 0.0, "step": 30400 }, { "accuracy": 40.918, "active_queue_size": 16384.0, "cl_loss": 4.3397, "doc_norm": 1.3303, "encoder_q-embeddings": 8095.668, "encoder_q-layer.0": 5779.3325, "encoder_q-layer.1": 6050.0156, "encoder_q-layer.10": 1420.2848, "encoder_q-layer.11": 3869.9358, "encoder_q-layer.2": 6821.4126, "encoder_q-layer.3": 6646.5332, "encoder_q-layer.4": 6871.3169, "encoder_q-layer.5": 6069.5176, "encoder_q-layer.6": 4325.6411, "encoder_q-layer.7": 3630.6736, "encoder_q-layer.8": 2977.8423, "encoder_q-layer.9": 1781.8966, "epoch": 0.2, "inbatch_neg_score": 0.1155, "inbatch_pos_score": 0.6099, "learning_rate": 3.8611111111111116e-05, "loss": 4.3397, "norm_diff": 0.0449, "norm_loss": 0.0, "num_token_doc": 66.7779, "num_token_overlap": 11.7126, "num_token_query": 31.9607, "num_token_union": 65.3388, "num_word_context": 202.396, "num_word_doc": 49.8352, "num_word_query": 23.5867, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8156.2745, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1156, "query_norm": 1.2854, "queue_k_norm": 1.3322, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9607, "sent_len_1": 66.7779, "sent_len_max_0": 127.3738, "sent_len_max_1": 189.5012, "stdk": 0.0463, "stdq": 0.0422, "stdqueue_k": 0.0464, "stdqueue_q": 0.0, "step": 30500 }, { "accuracy": 40.332, "active_queue_size": 16384.0, "cl_loss": 4.3599, "doc_norm": 1.3316, "encoder_q-embeddings": 5641.8711, "encoder_q-layer.0": 4700.9282, "encoder_q-layer.1": 4477.8174, "encoder_q-layer.10": 707.8176, "encoder_q-layer.11": 1805.3177, "encoder_q-layer.2": 4834.9346, "encoder_q-layer.3": 4654.8472, "encoder_q-layer.4": 4139.5444, "encoder_q-layer.5": 3570.0481, "encoder_q-layer.6": 2633.761, "encoder_q-layer.7": 1276.4762, "encoder_q-layer.8": 1125.9763, "encoder_q-layer.9": 766.6653, "epoch": 0.2, "inbatch_neg_score": 0.1159, "inbatch_pos_score": 0.6162, "learning_rate": 3.855555555555556e-05, "loss": 4.3599, "norm_diff": 0.0484, "norm_loss": 0.0, "num_token_doc": 66.7125, "num_token_overlap": 11.6947, "num_token_query": 31.9816, "num_token_union": 65.3769, "num_word_context": 202.3806, "num_word_doc": 49.8204, "num_word_query": 23.6318, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5571.5078, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1147, "query_norm": 1.2831, "queue_k_norm": 1.333, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9816, "sent_len_1": 66.7125, "sent_len_max_0": 127.3375, "sent_len_max_1": 188.8663, "stdk": 0.0463, "stdq": 0.0423, "stdqueue_k": 0.0465, "stdqueue_q": 0.0, "step": 30600 }, { "accuracy": 41.6016, "active_queue_size": 16384.0, "cl_loss": 4.3369, "doc_norm": 1.3321, "encoder_q-embeddings": 1451.3625, "encoder_q-layer.0": 1029.0773, "encoder_q-layer.1": 1099.78, "encoder_q-layer.10": 677.5107, "encoder_q-layer.11": 1750.7194, "encoder_q-layer.2": 1257.7271, "encoder_q-layer.3": 1382.9585, "encoder_q-layer.4": 1534.6965, "encoder_q-layer.5": 1542.6747, "encoder_q-layer.6": 1604.5994, "encoder_q-layer.7": 1723.3654, "encoder_q-layer.8": 1353.4434, "encoder_q-layer.9": 685.7113, "epoch": 0.2, "inbatch_neg_score": 0.1124, "inbatch_pos_score": 0.624, "learning_rate": 3.85e-05, "loss": 4.3369, "norm_diff": 0.0344, "norm_loss": 0.0, "num_token_doc": 66.6437, "num_token_overlap": 11.6678, "num_token_query": 31.8809, "num_token_union": 65.2727, "num_word_context": 202.2123, "num_word_doc": 49.751, "num_word_query": 23.5402, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2055.7601, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1129, "query_norm": 1.2977, "queue_k_norm": 1.3317, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8809, "sent_len_1": 66.6437, "sent_len_max_0": 127.5413, "sent_len_max_1": 187.78, "stdk": 0.0464, "stdq": 0.0428, "stdqueue_k": 0.0464, "stdqueue_q": 0.0, "step": 30700 }, { "accuracy": 43.0664, "active_queue_size": 16384.0, "cl_loss": 4.3143, "doc_norm": 1.3348, "encoder_q-embeddings": 1136.1956, "encoder_q-layer.0": 771.3885, "encoder_q-layer.1": 835.2955, "encoder_q-layer.10": 718.4609, "encoder_q-layer.11": 1849.8877, "encoder_q-layer.2": 892.7875, "encoder_q-layer.3": 894.7737, "encoder_q-layer.4": 933.6226, "encoder_q-layer.5": 910.0377, "encoder_q-layer.6": 873.5419, "encoder_q-layer.7": 893.1044, "encoder_q-layer.8": 851.9902, "encoder_q-layer.9": 670.528, "epoch": 0.2, "inbatch_neg_score": 0.1165, "inbatch_pos_score": 0.6157, "learning_rate": 3.844444444444444e-05, "loss": 4.3143, "norm_diff": 0.063, "norm_loss": 0.0, "num_token_doc": 66.6134, "num_token_overlap": 11.7225, "num_token_query": 32.0448, "num_token_union": 65.2838, "num_word_context": 202.293, "num_word_doc": 49.7152, "num_word_query": 23.6815, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1523.1082, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1162, "query_norm": 1.2718, "queue_k_norm": 1.3318, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 32.0448, "sent_len_1": 66.6134, "sent_len_max_0": 127.66, "sent_len_max_1": 191.215, "stdk": 0.0465, "stdq": 0.0421, "stdqueue_k": 0.0465, "stdqueue_q": 0.0, "step": 30800 }, { "accuracy": 42.5781, "active_queue_size": 16384.0, "cl_loss": 4.331, "doc_norm": 1.3318, "encoder_q-embeddings": 1534.7408, "encoder_q-layer.0": 1071.1259, "encoder_q-layer.1": 1130.7437, "encoder_q-layer.10": 707.1914, "encoder_q-layer.11": 1787.0166, "encoder_q-layer.2": 1266.144, "encoder_q-layer.3": 1348.5825, "encoder_q-layer.4": 1477.9913, "encoder_q-layer.5": 1490.4374, "encoder_q-layer.6": 1407.7238, "encoder_q-layer.7": 1160.4213, "encoder_q-layer.8": 1050.4124, "encoder_q-layer.9": 776.2769, "epoch": 0.2, "inbatch_neg_score": 0.1242, "inbatch_pos_score": 0.6221, "learning_rate": 3.838888888888889e-05, "loss": 4.331, "norm_diff": 0.0668, "norm_loss": 0.0, "num_token_doc": 66.7894, "num_token_overlap": 11.6711, "num_token_query": 31.9314, "num_token_union": 65.3941, "num_word_context": 202.6068, "num_word_doc": 49.8682, "num_word_query": 23.5786, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1962.6444, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1244, "query_norm": 1.265, "queue_k_norm": 1.335, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9314, "sent_len_1": 66.7894, "sent_len_max_0": 127.5088, "sent_len_max_1": 188.5888, "stdk": 0.0464, "stdq": 0.0417, "stdqueue_k": 0.0466, "stdqueue_q": 0.0, "step": 30900 }, { "accuracy": 43.457, "active_queue_size": 16384.0, "cl_loss": 4.3153, "doc_norm": 1.3371, "encoder_q-embeddings": 871.3578, "encoder_q-layer.0": 586.0591, "encoder_q-layer.1": 608.413, "encoder_q-layer.10": 686.8463, "encoder_q-layer.11": 1831.3752, "encoder_q-layer.2": 645.4668, "encoder_q-layer.3": 649.4682, "encoder_q-layer.4": 686.6304, "encoder_q-layer.5": 663.1475, "encoder_q-layer.6": 684.3163, "encoder_q-layer.7": 669.1756, "encoder_q-layer.8": 758.0397, "encoder_q-layer.9": 609.6854, "epoch": 0.2, "inbatch_neg_score": 0.1177, "inbatch_pos_score": 0.6309, "learning_rate": 3.8333333333333334e-05, "loss": 4.3153, "norm_diff": 0.0771, "norm_loss": 0.0, "num_token_doc": 66.8936, "num_token_overlap": 11.7058, "num_token_query": 31.9486, "num_token_union": 65.4672, "num_word_context": 202.4652, "num_word_doc": 49.9225, "num_word_query": 23.6013, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1274.4332, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1176, "query_norm": 1.2601, "queue_k_norm": 1.3329, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9486, "sent_len_1": 66.8936, "sent_len_max_0": 127.5525, "sent_len_max_1": 189.0112, "stdk": 0.0466, "stdq": 0.0418, "stdqueue_k": 0.0465, "stdqueue_q": 0.0, "step": 31000 }, { "accuracy": 40.625, "active_queue_size": 16384.0, "cl_loss": 4.3165, "doc_norm": 1.3334, "encoder_q-embeddings": 2026.1598, "encoder_q-layer.0": 1483.4099, "encoder_q-layer.1": 1649.3701, "encoder_q-layer.10": 751.8855, "encoder_q-layer.11": 1924.678, "encoder_q-layer.2": 1866.2863, "encoder_q-layer.3": 1777.9264, "encoder_q-layer.4": 1795.5558, "encoder_q-layer.5": 1732.2695, "encoder_q-layer.6": 1373.1392, "encoder_q-layer.7": 1192.7599, "encoder_q-layer.8": 970.6688, "encoder_q-layer.9": 749.1065, "epoch": 0.2, "inbatch_neg_score": 0.1176, "inbatch_pos_score": 0.6274, "learning_rate": 3.827777777777778e-05, "loss": 4.3165, "norm_diff": 0.0495, "norm_loss": 0.0, "num_token_doc": 66.6652, "num_token_overlap": 11.6608, "num_token_query": 31.911, "num_token_union": 65.2632, "num_word_context": 202.113, "num_word_doc": 49.7428, "num_word_query": 23.5518, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2381.0894, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1179, "query_norm": 1.2857, "queue_k_norm": 1.3322, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.911, "sent_len_1": 66.6652, "sent_len_max_0": 127.6125, "sent_len_max_1": 190.425, "stdk": 0.0465, "stdq": 0.0429, "stdqueue_k": 0.0465, "stdqueue_q": 0.0, "step": 31100 }, { "accuracy": 41.6016, "active_queue_size": 16384.0, "cl_loss": 4.3115, "doc_norm": 1.3292, "encoder_q-embeddings": 929.4324, "encoder_q-layer.0": 657.0641, "encoder_q-layer.1": 690.0122, "encoder_q-layer.10": 663.1832, "encoder_q-layer.11": 1819.3683, "encoder_q-layer.2": 767.421, "encoder_q-layer.3": 829.5049, "encoder_q-layer.4": 830.8632, "encoder_q-layer.5": 875.2277, "encoder_q-layer.6": 848.5989, "encoder_q-layer.7": 731.1819, "encoder_q-layer.8": 837.671, "encoder_q-layer.9": 656.1633, "epoch": 0.2, "inbatch_neg_score": 0.1163, "inbatch_pos_score": 0.6196, "learning_rate": 3.8222222222222226e-05, "loss": 4.3115, "norm_diff": 0.0675, "norm_loss": 0.0, "num_token_doc": 66.9182, "num_token_overlap": 11.7411, "num_token_query": 32.1071, "num_token_union": 65.5337, "num_word_context": 202.3732, "num_word_doc": 49.9505, "num_word_query": 23.7338, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1387.5291, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1183, "query_norm": 1.2617, "queue_k_norm": 1.3333, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 32.1071, "sent_len_1": 66.9182, "sent_len_max_0": 127.6213, "sent_len_max_1": 189.9975, "stdk": 0.0463, "stdq": 0.042, "stdqueue_k": 0.0466, "stdqueue_q": 0.0, "step": 31200 }, { "accuracy": 41.6016, "active_queue_size": 16384.0, "cl_loss": 4.3127, "doc_norm": 1.3282, "encoder_q-embeddings": 756.3219, "encoder_q-layer.0": 551.3809, "encoder_q-layer.1": 557.9938, "encoder_q-layer.10": 631.3623, "encoder_q-layer.11": 1668.6744, "encoder_q-layer.2": 612.0245, "encoder_q-layer.3": 661.2241, "encoder_q-layer.4": 703.3037, "encoder_q-layer.5": 677.9633, "encoder_q-layer.6": 690.337, "encoder_q-layer.7": 734.9005, "encoder_q-layer.8": 756.0764, "encoder_q-layer.9": 641.0982, "epoch": 0.2, "inbatch_neg_score": 0.1197, "inbatch_pos_score": 0.6045, "learning_rate": 3.816666666666667e-05, "loss": 4.3127, "norm_diff": 0.0809, "norm_loss": 0.0, "num_token_doc": 66.8191, "num_token_overlap": 11.6919, "num_token_query": 31.9363, "num_token_union": 65.3709, "num_word_context": 202.3144, "num_word_doc": 49.8076, "num_word_query": 23.5949, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1205.6021, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1183, "query_norm": 1.2473, "queue_k_norm": 1.3335, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9363, "sent_len_1": 66.8191, "sent_len_max_0": 127.3637, "sent_len_max_1": 191.5725, "stdk": 0.0462, "stdq": 0.0413, "stdqueue_k": 0.0466, "stdqueue_q": 0.0, "step": 31300 }, { "accuracy": 42.3828, "active_queue_size": 16384.0, "cl_loss": 4.2645, "doc_norm": 1.3283, "encoder_q-embeddings": 1681.2029, "encoder_q-layer.0": 1286.9619, "encoder_q-layer.1": 1430.7147, "encoder_q-layer.10": 705.1423, "encoder_q-layer.11": 1703.3474, "encoder_q-layer.2": 1533.3726, "encoder_q-layer.3": 1518.4938, "encoder_q-layer.4": 1584.3765, "encoder_q-layer.5": 1737.2421, "encoder_q-layer.6": 1643.251, "encoder_q-layer.7": 1291.0559, "encoder_q-layer.8": 1182.6388, "encoder_q-layer.9": 732.1802, "epoch": 0.2, "inbatch_neg_score": 0.1146, "inbatch_pos_score": 0.6123, "learning_rate": 3.811111111111112e-05, "loss": 4.2645, "norm_diff": 0.0641, "norm_loss": 0.0, "num_token_doc": 66.7876, "num_token_overlap": 11.715, "num_token_query": 31.8428, "num_token_union": 65.3066, "num_word_context": 202.2589, "num_word_doc": 49.8906, "num_word_query": 23.5318, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2140.3481, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1149, "query_norm": 1.2642, "queue_k_norm": 1.333, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8428, "sent_len_1": 66.7876, "sent_len_max_0": 127.5938, "sent_len_max_1": 188.6188, "stdk": 0.0463, "stdq": 0.0418, "stdqueue_k": 0.0466, "stdqueue_q": 0.0, "step": 31400 }, { "accuracy": 41.6016, "active_queue_size": 16384.0, "cl_loss": 4.3227, "doc_norm": 1.333, "encoder_q-embeddings": 2072.1038, "encoder_q-layer.0": 1711.4104, "encoder_q-layer.1": 1734.179, "encoder_q-layer.10": 701.1655, "encoder_q-layer.11": 1784.3293, "encoder_q-layer.2": 1944.8478, "encoder_q-layer.3": 1892.123, "encoder_q-layer.4": 1861.8524, "encoder_q-layer.5": 1957.3447, "encoder_q-layer.6": 1727.511, "encoder_q-layer.7": 1370.4705, "encoder_q-layer.8": 1127.5121, "encoder_q-layer.9": 769.8012, "epoch": 0.21, "inbatch_neg_score": 0.1158, "inbatch_pos_score": 0.6118, "learning_rate": 3.805555555555555e-05, "loss": 4.3227, "norm_diff": 0.0694, "norm_loss": 0.0, "num_token_doc": 66.8865, "num_token_overlap": 11.6333, "num_token_query": 31.8788, "num_token_union": 65.4254, "num_word_context": 202.6218, "num_word_doc": 49.8664, "num_word_query": 23.5313, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2460.2322, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1154, "query_norm": 1.2636, "queue_k_norm": 1.3346, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8788, "sent_len_1": 66.8865, "sent_len_max_0": 127.5075, "sent_len_max_1": 190.4837, "stdk": 0.0465, "stdq": 0.0415, "stdqueue_k": 0.0466, "stdqueue_q": 0.0, "step": 31500 }, { "accuracy": 41.5039, "active_queue_size": 16384.0, "cl_loss": 4.3083, "doc_norm": 1.3324, "encoder_q-embeddings": 1634.5221, "encoder_q-layer.0": 1142.6451, "encoder_q-layer.1": 1143.2268, "encoder_q-layer.10": 640.4377, "encoder_q-layer.11": 1655.7759, "encoder_q-layer.2": 1258.6354, "encoder_q-layer.3": 1241.7189, "encoder_q-layer.4": 1308.6718, "encoder_q-layer.5": 1080.6089, "encoder_q-layer.6": 947.5276, "encoder_q-layer.7": 822.199, "encoder_q-layer.8": 796.6402, "encoder_q-layer.9": 604.4239, "epoch": 0.21, "inbatch_neg_score": 0.1171, "inbatch_pos_score": 0.6235, "learning_rate": 3.8e-05, "loss": 4.3083, "norm_diff": 0.0431, "norm_loss": 0.0, "num_token_doc": 66.6664, "num_token_overlap": 11.6747, "num_token_query": 31.9142, "num_token_union": 65.351, "num_word_context": 202.3972, "num_word_doc": 49.8007, "num_word_query": 23.5795, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1782.5061, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1171, "query_norm": 1.2893, "queue_k_norm": 1.332, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9142, "sent_len_1": 66.6664, "sent_len_max_0": 127.3337, "sent_len_max_1": 187.6163, "stdk": 0.0465, "stdq": 0.0422, "stdqueue_k": 0.0466, "stdqueue_q": 0.0, "step": 31600 }, { "accuracy": 40.332, "active_queue_size": 16384.0, "cl_loss": 4.3156, "doc_norm": 1.3356, "encoder_q-embeddings": 2851.7678, "encoder_q-layer.0": 2000.6367, "encoder_q-layer.1": 2250.4009, "encoder_q-layer.10": 686.1399, "encoder_q-layer.11": 1647.8558, "encoder_q-layer.2": 2584.519, "encoder_q-layer.3": 2644.355, "encoder_q-layer.4": 2481.0664, "encoder_q-layer.5": 2412.1982, "encoder_q-layer.6": 1853.4589, "encoder_q-layer.7": 1287.5648, "encoder_q-layer.8": 1036.3477, "encoder_q-layer.9": 705.9865, "epoch": 0.21, "inbatch_neg_score": 0.1183, "inbatch_pos_score": 0.6221, "learning_rate": 3.7944444444444444e-05, "loss": 4.3156, "norm_diff": 0.041, "norm_loss": 0.0, "num_token_doc": 66.6451, "num_token_overlap": 11.6382, "num_token_query": 31.8421, "num_token_union": 65.2484, "num_word_context": 202.1194, "num_word_doc": 49.7344, "num_word_query": 23.5347, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3089.3514, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1173, "query_norm": 1.2946, "queue_k_norm": 1.3339, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8421, "sent_len_1": 66.6451, "sent_len_max_0": 127.3375, "sent_len_max_1": 188.0737, "stdk": 0.0466, "stdq": 0.0423, "stdqueue_k": 0.0466, "stdqueue_q": 0.0, "step": 31700 }, { "accuracy": 42.1875, "active_queue_size": 16384.0, "cl_loss": 4.3134, "doc_norm": 1.3294, "encoder_q-embeddings": 723.5902, "encoder_q-layer.0": 473.1109, "encoder_q-layer.1": 496.5215, "encoder_q-layer.10": 697.9465, "encoder_q-layer.11": 1717.8182, "encoder_q-layer.2": 543.0474, "encoder_q-layer.3": 546.7551, "encoder_q-layer.4": 557.0995, "encoder_q-layer.5": 564.1658, "encoder_q-layer.6": 604.233, "encoder_q-layer.7": 618.5774, "encoder_q-layer.8": 749.989, "encoder_q-layer.9": 621.6603, "epoch": 0.21, "inbatch_neg_score": 0.1227, "inbatch_pos_score": 0.6235, "learning_rate": 3.7888888888888894e-05, "loss": 4.3134, "norm_diff": 0.0267, "norm_loss": 0.0, "num_token_doc": 66.8013, "num_token_overlap": 11.6787, "num_token_query": 31.9574, "num_token_union": 65.3983, "num_word_context": 202.4394, "num_word_doc": 49.8391, "num_word_query": 23.5935, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1143.2928, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1224, "query_norm": 1.3027, "queue_k_norm": 1.3342, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9574, "sent_len_1": 66.8013, "sent_len_max_0": 127.5662, "sent_len_max_1": 188.8762, "stdk": 0.0464, "stdq": 0.042, "stdqueue_k": 0.0467, "stdqueue_q": 0.0, "step": 31800 }, { "accuracy": 41.0156, "active_queue_size": 16384.0, "cl_loss": 4.3188, "doc_norm": 1.3397, "encoder_q-embeddings": 763.2213, "encoder_q-layer.0": 538.7764, "encoder_q-layer.1": 561.301, "encoder_q-layer.10": 750.046, "encoder_q-layer.11": 1797.3124, "encoder_q-layer.2": 618.2783, "encoder_q-layer.3": 629.0075, "encoder_q-layer.4": 633.731, "encoder_q-layer.5": 579.4846, "encoder_q-layer.6": 616.5648, "encoder_q-layer.7": 663.8322, "encoder_q-layer.8": 796.6367, "encoder_q-layer.9": 689.2385, "epoch": 0.21, "inbatch_neg_score": 0.1228, "inbatch_pos_score": 0.6343, "learning_rate": 3.7833333333333336e-05, "loss": 4.3188, "norm_diff": 0.0266, "norm_loss": 0.0, "num_token_doc": 66.6823, "num_token_overlap": 11.6327, "num_token_query": 31.9057, "num_token_union": 65.3237, "num_word_context": 202.5223, "num_word_doc": 49.7714, "num_word_query": 23.5733, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1201.7321, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1221, "query_norm": 1.3131, "queue_k_norm": 1.3337, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9057, "sent_len_1": 66.6823, "sent_len_max_0": 127.62, "sent_len_max_1": 190.41, "stdk": 0.0468, "stdq": 0.0429, "stdqueue_k": 0.0467, "stdqueue_q": 0.0, "step": 31900 }, { "accuracy": 41.5039, "active_queue_size": 16384.0, "cl_loss": 4.3117, "doc_norm": 1.3406, "encoder_q-embeddings": 1075.1019, "encoder_q-layer.0": 704.3367, "encoder_q-layer.1": 789.8727, "encoder_q-layer.10": 707.6517, "encoder_q-layer.11": 1735.3234, "encoder_q-layer.2": 782.139, "encoder_q-layer.3": 761.9372, "encoder_q-layer.4": 810.0781, "encoder_q-layer.5": 702.7669, "encoder_q-layer.6": 719.9804, "encoder_q-layer.7": 691.4351, "encoder_q-layer.8": 748.6674, "encoder_q-layer.9": 644.0598, "epoch": 0.21, "inbatch_neg_score": 0.1262, "inbatch_pos_score": 0.6265, "learning_rate": 3.777777777777778e-05, "loss": 4.3117, "norm_diff": 0.0701, "norm_loss": 0.0, "num_token_doc": 66.8056, "num_token_overlap": 11.6241, "num_token_query": 31.8556, "num_token_union": 65.3989, "num_word_context": 202.3542, "num_word_doc": 49.8573, "num_word_query": 23.53, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1351.4241, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1254, "query_norm": 1.2705, "queue_k_norm": 1.3325, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.8556, "sent_len_1": 66.8056, "sent_len_max_0": 127.455, "sent_len_max_1": 188.8663, "stdk": 0.0468, "stdq": 0.0417, "stdqueue_k": 0.0466, "stdqueue_q": 0.0, "step": 32000 }, { "accuracy": 42.4805, "active_queue_size": 16384.0, "cl_loss": 4.2861, "doc_norm": 1.3415, "encoder_q-embeddings": 1372.8328, "encoder_q-layer.0": 968.4745, "encoder_q-layer.1": 1141.9293, "encoder_q-layer.10": 691.9738, "encoder_q-layer.11": 1703.3688, "encoder_q-layer.2": 1311.4827, "encoder_q-layer.3": 1483.2662, "encoder_q-layer.4": 1679.8036, "encoder_q-layer.5": 1359.9548, "encoder_q-layer.6": 1044.0122, "encoder_q-layer.7": 779.0826, "encoder_q-layer.8": 767.9491, "encoder_q-layer.9": 615.5536, "epoch": 0.21, "inbatch_neg_score": 0.1192, "inbatch_pos_score": 0.6196, "learning_rate": 3.772222222222223e-05, "loss": 4.2861, "norm_diff": 0.0639, "norm_loss": 0.0, "num_token_doc": 66.8062, "num_token_overlap": 11.6583, "num_token_query": 31.8751, "num_token_union": 65.3408, "num_word_context": 202.1631, "num_word_doc": 49.8668, "num_word_query": 23.5403, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1799.6407, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1183, "query_norm": 1.2776, "queue_k_norm": 1.3352, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8751, "sent_len_1": 66.8062, "sent_len_max_0": 127.3937, "sent_len_max_1": 189.4787, "stdk": 0.0468, "stdq": 0.0424, "stdqueue_k": 0.0467, "stdqueue_q": 0.0, "step": 32100 }, { "accuracy": 43.3594, "active_queue_size": 16384.0, "cl_loss": 4.2718, "doc_norm": 1.3297, "encoder_q-embeddings": 1244.8822, "encoder_q-layer.0": 840.6086, "encoder_q-layer.1": 908.6096, "encoder_q-layer.10": 692.7382, "encoder_q-layer.11": 1726.0164, "encoder_q-layer.2": 976.6132, "encoder_q-layer.3": 1052.4066, "encoder_q-layer.4": 1056.3345, "encoder_q-layer.5": 893.3962, "encoder_q-layer.6": 856.3376, "encoder_q-layer.7": 767.5609, "encoder_q-layer.8": 845.8845, "encoder_q-layer.9": 675.1525, "epoch": 0.21, "inbatch_neg_score": 0.1189, "inbatch_pos_score": 0.6235, "learning_rate": 3.766666666666667e-05, "loss": 4.2718, "norm_diff": 0.0649, "norm_loss": 0.0, "num_token_doc": 66.8868, "num_token_overlap": 11.7064, "num_token_query": 32.0179, "num_token_union": 65.4682, "num_word_context": 202.4973, "num_word_doc": 49.9084, "num_word_query": 23.6326, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1538.3112, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1171, "query_norm": 1.2648, "queue_k_norm": 1.3364, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 32.0179, "sent_len_1": 66.8868, "sent_len_max_0": 127.5375, "sent_len_max_1": 188.9462, "stdk": 0.0464, "stdq": 0.042, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 32200 }, { "accuracy": 42.6758, "active_queue_size": 16384.0, "cl_loss": 4.2854, "doc_norm": 1.3354, "encoder_q-embeddings": 1001.0406, "encoder_q-layer.0": 701.1011, "encoder_q-layer.1": 787.488, "encoder_q-layer.10": 633.1292, "encoder_q-layer.11": 1634.3745, "encoder_q-layer.2": 844.2122, "encoder_q-layer.3": 840.1972, "encoder_q-layer.4": 860.2222, "encoder_q-layer.5": 815.6446, "encoder_q-layer.6": 756.8572, "encoder_q-layer.7": 693.1887, "encoder_q-layer.8": 760.7075, "encoder_q-layer.9": 625.2637, "epoch": 0.21, "inbatch_neg_score": 0.1163, "inbatch_pos_score": 0.6377, "learning_rate": 3.761111111111111e-05, "loss": 4.2854, "norm_diff": 0.062, "norm_loss": 0.0, "num_token_doc": 66.7654, "num_token_overlap": 11.6171, "num_token_query": 31.8394, "num_token_union": 65.3182, "num_word_context": 202.4258, "num_word_doc": 49.7876, "num_word_query": 23.5003, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1336.1275, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1146, "query_norm": 1.2734, "queue_k_norm": 1.3346, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8394, "sent_len_1": 66.7654, "sent_len_max_0": 127.5613, "sent_len_max_1": 189.88, "stdk": 0.0466, "stdq": 0.0425, "stdqueue_k": 0.0467, "stdqueue_q": 0.0, "step": 32300 }, { "accuracy": 44.5312, "active_queue_size": 16384.0, "cl_loss": 4.287, "doc_norm": 1.3376, "encoder_q-embeddings": 1051.2074, "encoder_q-layer.0": 749.3475, "encoder_q-layer.1": 816.5165, "encoder_q-layer.10": 661.6453, "encoder_q-layer.11": 1660.6169, "encoder_q-layer.2": 930.6106, "encoder_q-layer.3": 922.3546, "encoder_q-layer.4": 893.4784, "encoder_q-layer.5": 827.8774, "encoder_q-layer.6": 783.5752, "encoder_q-layer.7": 791.0231, "encoder_q-layer.8": 787.9965, "encoder_q-layer.9": 619.4978, "epoch": 0.21, "inbatch_neg_score": 0.1099, "inbatch_pos_score": 0.6313, "learning_rate": 3.7555555555555554e-05, "loss": 4.287, "norm_diff": 0.0943, "norm_loss": 0.0, "num_token_doc": 67.0458, "num_token_overlap": 11.7283, "num_token_query": 32.0072, "num_token_union": 65.5014, "num_word_context": 202.7537, "num_word_doc": 49.9756, "num_word_query": 23.6271, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1393.7464, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1088, "query_norm": 1.2433, "queue_k_norm": 1.3347, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 32.0072, "sent_len_1": 67.0458, "sent_len_max_0": 127.435, "sent_len_max_1": 191.39, "stdk": 0.0467, "stdq": 0.0416, "stdqueue_k": 0.0467, "stdqueue_q": 0.0, "step": 32400 }, { "accuracy": 41.3086, "active_queue_size": 16384.0, "cl_loss": 4.2912, "doc_norm": 1.3409, "encoder_q-embeddings": 1072.4575, "encoder_q-layer.0": 716.2352, "encoder_q-layer.1": 734.5449, "encoder_q-layer.10": 715.1099, "encoder_q-layer.11": 1623.1581, "encoder_q-layer.2": 789.0363, "encoder_q-layer.3": 812.6609, "encoder_q-layer.4": 878.357, "encoder_q-layer.5": 849.8046, "encoder_q-layer.6": 799.0329, "encoder_q-layer.7": 756.4128, "encoder_q-layer.8": 797.8799, "encoder_q-layer.9": 688.3184, "epoch": 0.21, "inbatch_neg_score": 0.1062, "inbatch_pos_score": 0.6055, "learning_rate": 3.7500000000000003e-05, "loss": 4.2912, "norm_diff": 0.1107, "norm_loss": 0.0, "num_token_doc": 66.7492, "num_token_overlap": 11.6801, "num_token_query": 31.8866, "num_token_union": 65.3586, "num_word_context": 202.08, "num_word_doc": 49.8011, "num_word_query": 23.5453, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1360.3769, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1052, "query_norm": 1.2302, "queue_k_norm": 1.334, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8866, "sent_len_1": 66.7492, "sent_len_max_0": 127.525, "sent_len_max_1": 188.3338, "stdk": 0.0469, "stdq": 0.041, "stdqueue_k": 0.0467, "stdqueue_q": 0.0, "step": 32500 }, { "accuracy": 42.8711, "active_queue_size": 16384.0, "cl_loss": 4.2875, "doc_norm": 1.3339, "encoder_q-embeddings": 2228.6846, "encoder_q-layer.0": 1499.6902, "encoder_q-layer.1": 1665.897, "encoder_q-layer.10": 1261.5609, "encoder_q-layer.11": 3249.0029, "encoder_q-layer.2": 1794.209, "encoder_q-layer.3": 1745.4952, "encoder_q-layer.4": 1652.5621, "encoder_q-layer.5": 1558.0125, "encoder_q-layer.6": 1665.1761, "encoder_q-layer.7": 1571.4216, "encoder_q-layer.8": 1495.5828, "encoder_q-layer.9": 1187.5554, "epoch": 0.21, "inbatch_neg_score": 0.0981, "inbatch_pos_score": 0.6157, "learning_rate": 3.7444444444444446e-05, "loss": 4.2875, "norm_diff": 0.0725, "norm_loss": 0.0, "num_token_doc": 66.6402, "num_token_overlap": 11.6734, "num_token_query": 31.9344, "num_token_union": 65.2534, "num_word_context": 202.2777, "num_word_doc": 49.7392, "num_word_query": 23.5788, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2762.8289, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0988, "query_norm": 1.2614, "queue_k_norm": 1.333, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9344, "sent_len_1": 66.6402, "sent_len_max_0": 127.48, "sent_len_max_1": 190.435, "stdk": 0.0466, "stdq": 0.0421, "stdqueue_k": 0.0467, "stdqueue_q": 0.0, "step": 32600 }, { "accuracy": 40.7227, "active_queue_size": 16384.0, "cl_loss": 4.2707, "doc_norm": 1.3338, "encoder_q-embeddings": 1651.7896, "encoder_q-layer.0": 1155.87, "encoder_q-layer.1": 1269.1899, "encoder_q-layer.10": 1361.5671, "encoder_q-layer.11": 3234.4607, "encoder_q-layer.2": 1394.7692, "encoder_q-layer.3": 1339.8047, "encoder_q-layer.4": 1329.384, "encoder_q-layer.5": 1337.3654, "encoder_q-layer.6": 1403.0426, "encoder_q-layer.7": 1394.1138, "encoder_q-layer.8": 1528.4071, "encoder_q-layer.9": 1273.2804, "epoch": 0.21, "inbatch_neg_score": 0.0898, "inbatch_pos_score": 0.5977, "learning_rate": 3.738888888888889e-05, "loss": 4.2707, "norm_diff": 0.0723, "norm_loss": 0.0, "num_token_doc": 66.627, "num_token_overlap": 11.6515, "num_token_query": 31.7201, "num_token_union": 65.1574, "num_word_context": 202.0793, "num_word_doc": 49.7325, "num_word_query": 23.4383, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2391.9153, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0896, "query_norm": 1.2616, "queue_k_norm": 1.3327, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.7201, "sent_len_1": 66.627, "sent_len_max_0": 127.5212, "sent_len_max_1": 189.3288, "stdk": 0.0466, "stdq": 0.0421, "stdqueue_k": 0.0467, "stdqueue_q": 0.0, "step": 32700 }, { "accuracy": 41.8945, "active_queue_size": 16384.0, "cl_loss": 4.2639, "doc_norm": 1.3292, "encoder_q-embeddings": 2210.2041, "encoder_q-layer.0": 1478.2314, "encoder_q-layer.1": 1737.6909, "encoder_q-layer.10": 1445.0017, "encoder_q-layer.11": 3322.2898, "encoder_q-layer.2": 1980.6154, "encoder_q-layer.3": 2026.7231, "encoder_q-layer.4": 2192.9624, "encoder_q-layer.5": 2113.2349, "encoder_q-layer.6": 2585.8811, "encoder_q-layer.7": 2247.5156, "encoder_q-layer.8": 2096.8687, "encoder_q-layer.9": 1301.7253, "epoch": 0.21, "inbatch_neg_score": 0.0899, "inbatch_pos_score": 0.5913, "learning_rate": 3.733333333333334e-05, "loss": 4.2639, "norm_diff": 0.0558, "norm_loss": 0.0, "num_token_doc": 66.795, "num_token_overlap": 11.6348, "num_token_query": 31.7604, "num_token_union": 65.3305, "num_word_context": 201.8723, "num_word_doc": 49.8552, "num_word_query": 23.4601, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3134.2293, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0908, "query_norm": 1.2734, "queue_k_norm": 1.3311, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.7604, "sent_len_1": 66.795, "sent_len_max_0": 127.62, "sent_len_max_1": 190.7763, "stdk": 0.0465, "stdq": 0.0424, "stdqueue_k": 0.0467, "stdqueue_q": 0.0, "step": 32800 }, { "accuracy": 41.1133, "active_queue_size": 16384.0, "cl_loss": 4.2803, "doc_norm": 1.3346, "encoder_q-embeddings": 1733.6942, "encoder_q-layer.0": 1199.9984, "encoder_q-layer.1": 1260.7001, "encoder_q-layer.10": 1317.6002, "encoder_q-layer.11": 3184.8877, "encoder_q-layer.2": 1381.7223, "encoder_q-layer.3": 1455.2637, "encoder_q-layer.4": 1525.6489, "encoder_q-layer.5": 1550.142, "encoder_q-layer.6": 1515.3828, "encoder_q-layer.7": 1427.9858, "encoder_q-layer.8": 1564.3926, "encoder_q-layer.9": 1218.1237, "epoch": 0.21, "inbatch_neg_score": 0.0936, "inbatch_pos_score": 0.6104, "learning_rate": 3.727777777777778e-05, "loss": 4.2803, "norm_diff": 0.052, "norm_loss": 0.0, "num_token_doc": 66.6403, "num_token_overlap": 11.6971, "num_token_query": 31.9541, "num_token_union": 65.2362, "num_word_context": 201.9796, "num_word_doc": 49.7475, "num_word_query": 23.6033, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2478.5255, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0941, "query_norm": 1.2826, "queue_k_norm": 1.3309, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9541, "sent_len_1": 66.6403, "sent_len_max_0": 127.53, "sent_len_max_1": 188.755, "stdk": 0.0467, "stdq": 0.0423, "stdqueue_k": 0.0467, "stdqueue_q": 0.0, "step": 32900 }, { "accuracy": 44.4336, "active_queue_size": 16384.0, "cl_loss": 4.2668, "doc_norm": 1.3362, "encoder_q-embeddings": 2542.4863, "encoder_q-layer.0": 1782.2477, "encoder_q-layer.1": 1874.8004, "encoder_q-layer.10": 1395.6941, "encoder_q-layer.11": 3156.7153, "encoder_q-layer.2": 2146.0884, "encoder_q-layer.3": 2237.1538, "encoder_q-layer.4": 2208.6721, "encoder_q-layer.5": 2000.158, "encoder_q-layer.6": 1950.6763, "encoder_q-layer.7": 1786.6453, "encoder_q-layer.8": 1671.6475, "encoder_q-layer.9": 1249.5425, "epoch": 0.21, "inbatch_neg_score": 0.0997, "inbatch_pos_score": 0.6216, "learning_rate": 3.722222222222222e-05, "loss": 4.2668, "norm_diff": 0.0614, "norm_loss": 0.0, "num_token_doc": 66.8149, "num_token_overlap": 11.7431, "num_token_query": 32.2259, "num_token_union": 65.5671, "num_word_context": 202.8317, "num_word_doc": 49.8708, "num_word_query": 23.8195, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3120.8359, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0986, "query_norm": 1.2748, "queue_k_norm": 1.3311, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 32.2259, "sent_len_1": 66.8149, "sent_len_max_0": 127.56, "sent_len_max_1": 189.14, "stdk": 0.0468, "stdq": 0.0419, "stdqueue_k": 0.0467, "stdqueue_q": 0.0, "step": 33000 }, { "accuracy": 41.6992, "active_queue_size": 16384.0, "cl_loss": 4.2482, "doc_norm": 1.3359, "encoder_q-embeddings": 1380.0261, "encoder_q-layer.0": 965.8669, "encoder_q-layer.1": 1021.1174, "encoder_q-layer.10": 1203.5493, "encoder_q-layer.11": 3075.929, "encoder_q-layer.2": 1119.2194, "encoder_q-layer.3": 1125.785, "encoder_q-layer.4": 1116.5524, "encoder_q-layer.5": 1097.0885, "encoder_q-layer.6": 1275.6433, "encoder_q-layer.7": 1310.3008, "encoder_q-layer.8": 1488.772, "encoder_q-layer.9": 1224.5967, "epoch": 0.22, "inbatch_neg_score": 0.0999, "inbatch_pos_score": 0.6006, "learning_rate": 3.7166666666666664e-05, "loss": 4.2482, "norm_diff": 0.0643, "norm_loss": 0.0, "num_token_doc": 66.9447, "num_token_overlap": 11.7262, "num_token_query": 32.0807, "num_token_union": 65.5441, "num_word_context": 202.752, "num_word_doc": 49.8882, "num_word_query": 23.7221, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2213.7416, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0989, "query_norm": 1.2716, "queue_k_norm": 1.3303, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 32.0807, "sent_len_1": 66.9447, "sent_len_max_0": 127.5575, "sent_len_max_1": 192.6987, "stdk": 0.0468, "stdq": 0.042, "stdqueue_k": 0.0467, "stdqueue_q": 0.0, "step": 33100 }, { "accuracy": 40.332, "active_queue_size": 16384.0, "cl_loss": 4.3074, "doc_norm": 1.3228, "encoder_q-embeddings": 4647.168, "encoder_q-layer.0": 3183.6111, "encoder_q-layer.1": 3310.4998, "encoder_q-layer.10": 1351.7625, "encoder_q-layer.11": 3398.6658, "encoder_q-layer.2": 3806.8831, "encoder_q-layer.3": 3888.0894, "encoder_q-layer.4": 3935.531, "encoder_q-layer.5": 3819.9731, "encoder_q-layer.6": 3592.0488, "encoder_q-layer.7": 2898.1394, "encoder_q-layer.8": 2271.5286, "encoder_q-layer.9": 1502.0564, "epoch": 0.22, "inbatch_neg_score": 0.1085, "inbatch_pos_score": 0.604, "learning_rate": 3.7111111111111113e-05, "loss": 4.3074, "norm_diff": 0.0426, "norm_loss": 0.0, "num_token_doc": 66.703, "num_token_overlap": 11.6255, "num_token_query": 31.7862, "num_token_union": 65.2934, "num_word_context": 201.9261, "num_word_doc": 49.762, "num_word_query": 23.4423, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5107.2079, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1082, "query_norm": 1.2802, "queue_k_norm": 1.3309, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.7862, "sent_len_1": 66.703, "sent_len_max_0": 127.4613, "sent_len_max_1": 189.6612, "stdk": 0.0463, "stdq": 0.042, "stdqueue_k": 0.0467, "stdqueue_q": 0.0, "step": 33200 }, { "accuracy": 41.5039, "active_queue_size": 16384.0, "cl_loss": 4.2672, "doc_norm": 1.3292, "encoder_q-embeddings": 3058.2173, "encoder_q-layer.0": 2130.6265, "encoder_q-layer.1": 2333.5298, "encoder_q-layer.10": 1416.0308, "encoder_q-layer.11": 3515.5796, "encoder_q-layer.2": 2401.3357, "encoder_q-layer.3": 2265.3972, "encoder_q-layer.4": 1833.8771, "encoder_q-layer.5": 1510.1161, "encoder_q-layer.6": 1627.4456, "encoder_q-layer.7": 1415.9214, "encoder_q-layer.8": 1585.041, "encoder_q-layer.9": 1341.5081, "epoch": 0.22, "inbatch_neg_score": 0.1094, "inbatch_pos_score": 0.6143, "learning_rate": 3.705555555555556e-05, "loss": 4.2672, "norm_diff": 0.0413, "norm_loss": 0.0, "num_token_doc": 66.73, "num_token_overlap": 11.6797, "num_token_query": 31.8661, "num_token_union": 65.3151, "num_word_context": 201.9967, "num_word_doc": 49.8177, "num_word_query": 23.5416, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3316.0066, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1084, "query_norm": 1.2879, "queue_k_norm": 1.3316, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8661, "sent_len_1": 66.73, "sent_len_max_0": 127.2763, "sent_len_max_1": 189.4925, "stdk": 0.0465, "stdq": 0.0425, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 33300 }, { "accuracy": 41.9922, "active_queue_size": 16384.0, "cl_loss": 4.2706, "doc_norm": 1.3286, "encoder_q-embeddings": 2083.7334, "encoder_q-layer.0": 1482.8771, "encoder_q-layer.1": 1606.4331, "encoder_q-layer.10": 1304.259, "encoder_q-layer.11": 3078.0234, "encoder_q-layer.2": 1916.6404, "encoder_q-layer.3": 1917.8831, "encoder_q-layer.4": 1889.9918, "encoder_q-layer.5": 2045.5149, "encoder_q-layer.6": 1658.1052, "encoder_q-layer.7": 1517.1901, "encoder_q-layer.8": 1516.0293, "encoder_q-layer.9": 1256.6488, "epoch": 0.22, "inbatch_neg_score": 0.1123, "inbatch_pos_score": 0.6055, "learning_rate": 3.7e-05, "loss": 4.2706, "norm_diff": 0.0617, "norm_loss": 0.0, "num_token_doc": 66.6652, "num_token_overlap": 11.6112, "num_token_query": 31.7109, "num_token_union": 65.2564, "num_word_context": 202.1371, "num_word_doc": 49.7888, "num_word_query": 23.454, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2783.7587, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1129, "query_norm": 1.2669, "queue_k_norm": 1.3296, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.7109, "sent_len_1": 66.6652, "sent_len_max_0": 127.275, "sent_len_max_1": 188.0087, "stdk": 0.0466, "stdq": 0.0421, "stdqueue_k": 0.0467, "stdqueue_q": 0.0, "step": 33400 }, { "accuracy": 45.4102, "active_queue_size": 16384.0, "cl_loss": 4.2563, "doc_norm": 1.3331, "encoder_q-embeddings": 1459.4707, "encoder_q-layer.0": 991.287, "encoder_q-layer.1": 1053.0006, "encoder_q-layer.10": 1236.5902, "encoder_q-layer.11": 2960.2405, "encoder_q-layer.2": 1165.1537, "encoder_q-layer.3": 1168.1499, "encoder_q-layer.4": 1204.9143, "encoder_q-layer.5": 1183.7031, "encoder_q-layer.6": 1246.1504, "encoder_q-layer.7": 1227.202, "encoder_q-layer.8": 1409.2336, "encoder_q-layer.9": 1195.8871, "epoch": 0.22, "inbatch_neg_score": 0.1154, "inbatch_pos_score": 0.6362, "learning_rate": 3.694444444444445e-05, "loss": 4.2563, "norm_diff": 0.0508, "norm_loss": 0.0, "num_token_doc": 66.5129, "num_token_overlap": 11.7232, "num_token_query": 32.0525, "num_token_union": 65.2049, "num_word_context": 202.1087, "num_word_doc": 49.6053, "num_word_query": 23.6784, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2141.2885, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1146, "query_norm": 1.2822, "queue_k_norm": 1.3335, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 32.0525, "sent_len_1": 66.5129, "sent_len_max_0": 127.475, "sent_len_max_1": 191.105, "stdk": 0.0467, "stdq": 0.0426, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 33500 }, { "accuracy": 39.6484, "active_queue_size": 16384.0, "cl_loss": 4.2819, "doc_norm": 1.333, "encoder_q-embeddings": 2222.0393, "encoder_q-layer.0": 1674.8658, "encoder_q-layer.1": 1731.1299, "encoder_q-layer.10": 1355.9788, "encoder_q-layer.11": 3342.3643, "encoder_q-layer.2": 2029.5558, "encoder_q-layer.3": 2064.075, "encoder_q-layer.4": 1925.7952, "encoder_q-layer.5": 2027.9691, "encoder_q-layer.6": 2030.2371, "encoder_q-layer.7": 2028.491, "encoder_q-layer.8": 1906.3313, "encoder_q-layer.9": 1503.7465, "epoch": 0.22, "inbatch_neg_score": 0.1138, "inbatch_pos_score": 0.5908, "learning_rate": 3.688888888888889e-05, "loss": 4.2819, "norm_diff": 0.1036, "norm_loss": 0.0, "num_token_doc": 66.7179, "num_token_overlap": 11.6289, "num_token_query": 31.8115, "num_token_union": 65.2929, "num_word_context": 202.061, "num_word_doc": 49.7802, "num_word_query": 23.5033, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3051.6353, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.113, "query_norm": 1.2295, "queue_k_norm": 1.3328, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.8115, "sent_len_1": 66.7179, "sent_len_max_0": 127.4775, "sent_len_max_1": 189.2375, "stdk": 0.0467, "stdq": 0.0408, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 33600 }, { "accuracy": 41.6016, "active_queue_size": 16384.0, "cl_loss": 4.2459, "doc_norm": 1.3342, "encoder_q-embeddings": 2280.3035, "encoder_q-layer.0": 1542.0431, "encoder_q-layer.1": 1530.9702, "encoder_q-layer.10": 1322.2036, "encoder_q-layer.11": 3238.8813, "encoder_q-layer.2": 1751.5305, "encoder_q-layer.3": 1804.5731, "encoder_q-layer.4": 1886.796, "encoder_q-layer.5": 1764.5568, "encoder_q-layer.6": 1812.5253, "encoder_q-layer.7": 1742.0199, "encoder_q-layer.8": 1589.1719, "encoder_q-layer.9": 1240.5475, "epoch": 0.22, "inbatch_neg_score": 0.1155, "inbatch_pos_score": 0.6089, "learning_rate": 3.683333333333334e-05, "loss": 4.2459, "norm_diff": 0.0807, "norm_loss": 0.0, "num_token_doc": 66.665, "num_token_overlap": 11.6887, "num_token_query": 31.9827, "num_token_union": 65.2848, "num_word_context": 202.1816, "num_word_doc": 49.6944, "num_word_query": 23.6141, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2825.7298, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1136, "query_norm": 1.2535, "queue_k_norm": 1.3342, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9827, "sent_len_1": 66.665, "sent_len_max_0": 127.5125, "sent_len_max_1": 190.9387, "stdk": 0.0467, "stdq": 0.0419, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 33700 }, { "accuracy": 43.1641, "active_queue_size": 16384.0, "cl_loss": 4.2467, "doc_norm": 1.3321, "encoder_q-embeddings": 1420.5255, "encoder_q-layer.0": 967.5457, "encoder_q-layer.1": 1050.8104, "encoder_q-layer.10": 1308.8497, "encoder_q-layer.11": 3042.2666, "encoder_q-layer.2": 1137.6707, "encoder_q-layer.3": 1188.0702, "encoder_q-layer.4": 1230.0544, "encoder_q-layer.5": 1185.5432, "encoder_q-layer.6": 1275.2855, "encoder_q-layer.7": 1302.0763, "encoder_q-layer.8": 1491.3729, "encoder_q-layer.9": 1264.63, "epoch": 0.22, "inbatch_neg_score": 0.1092, "inbatch_pos_score": 0.6309, "learning_rate": 3.677777777777778e-05, "loss": 4.2467, "norm_diff": 0.0687, "norm_loss": 0.0, "num_token_doc": 66.7652, "num_token_overlap": 11.7015, "num_token_query": 31.9385, "num_token_union": 65.3667, "num_word_context": 202.084, "num_word_doc": 49.84, "num_word_query": 23.6079, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2183.4931, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1104, "query_norm": 1.2634, "queue_k_norm": 1.335, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9385, "sent_len_1": 66.7652, "sent_len_max_0": 127.5713, "sent_len_max_1": 188.98, "stdk": 0.0467, "stdq": 0.0425, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 33800 }, { "accuracy": 42.9688, "active_queue_size": 16384.0, "cl_loss": 4.2793, "doc_norm": 1.3362, "encoder_q-embeddings": 1650.4961, "encoder_q-layer.0": 1076.0123, "encoder_q-layer.1": 1190.4282, "encoder_q-layer.10": 1350.9845, "encoder_q-layer.11": 3316.228, "encoder_q-layer.2": 1337.1184, "encoder_q-layer.3": 1380.5168, "encoder_q-layer.4": 1373.0767, "encoder_q-layer.5": 1393.7391, "encoder_q-layer.6": 1421.2869, "encoder_q-layer.7": 1364.6703, "encoder_q-layer.8": 1480.1825, "encoder_q-layer.9": 1267.8818, "epoch": 0.22, "inbatch_neg_score": 0.1087, "inbatch_pos_score": 0.6206, "learning_rate": 3.672222222222222e-05, "loss": 4.2793, "norm_diff": 0.0809, "norm_loss": 0.0, "num_token_doc": 66.6512, "num_token_overlap": 11.6698, "num_token_query": 31.9241, "num_token_union": 65.3068, "num_word_context": 202.3136, "num_word_doc": 49.7438, "num_word_query": 23.5641, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2385.2718, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1076, "query_norm": 1.2554, "queue_k_norm": 1.333, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9241, "sent_len_1": 66.6512, "sent_len_max_0": 127.4988, "sent_len_max_1": 188.07, "stdk": 0.0468, "stdq": 0.0422, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 33900 }, { "accuracy": 45.4102, "active_queue_size": 16384.0, "cl_loss": 4.2557, "doc_norm": 1.3383, "encoder_q-embeddings": 1634.4125, "encoder_q-layer.0": 1115.4434, "encoder_q-layer.1": 1226.7009, "encoder_q-layer.10": 1189.1052, "encoder_q-layer.11": 2960.8083, "encoder_q-layer.2": 1428.443, "encoder_q-layer.3": 1570.0808, "encoder_q-layer.4": 1682.0491, "encoder_q-layer.5": 1744.9199, "encoder_q-layer.6": 1644.307, "encoder_q-layer.7": 1402.7347, "encoder_q-layer.8": 1532.0375, "encoder_q-layer.9": 1134.8711, "epoch": 0.22, "inbatch_neg_score": 0.109, "inbatch_pos_score": 0.6431, "learning_rate": 3.6666666666666666e-05, "loss": 4.2557, "norm_diff": 0.0848, "norm_loss": 0.0, "num_token_doc": 66.825, "num_token_overlap": 11.6574, "num_token_query": 31.8407, "num_token_union": 65.3778, "num_word_context": 202.1215, "num_word_doc": 49.8407, "num_word_query": 23.5088, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2442.5653, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1096, "query_norm": 1.2535, "queue_k_norm": 1.3353, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.8407, "sent_len_1": 66.825, "sent_len_max_0": 127.6137, "sent_len_max_1": 191.5238, "stdk": 0.0469, "stdq": 0.0421, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 34000 }, { "accuracy": 43.2617, "active_queue_size": 16384.0, "cl_loss": 4.2536, "doc_norm": 1.3387, "encoder_q-embeddings": 2459.802, "encoder_q-layer.0": 1726.6621, "encoder_q-layer.1": 1933.8934, "encoder_q-layer.10": 1344.3657, "encoder_q-layer.11": 3103.3032, "encoder_q-layer.2": 2189.8904, "encoder_q-layer.3": 2301.1521, "encoder_q-layer.4": 2316.2253, "encoder_q-layer.5": 2096.1079, "encoder_q-layer.6": 2305.9851, "encoder_q-layer.7": 1911.9065, "encoder_q-layer.8": 1671.0696, "encoder_q-layer.9": 1246.5317, "epoch": 0.22, "inbatch_neg_score": 0.1126, "inbatch_pos_score": 0.6382, "learning_rate": 3.6611111111111115e-05, "loss": 4.2536, "norm_diff": 0.0741, "norm_loss": 0.0, "num_token_doc": 66.6825, "num_token_overlap": 11.689, "num_token_query": 31.9525, "num_token_union": 65.3257, "num_word_context": 202.3155, "num_word_doc": 49.7442, "num_word_query": 23.6086, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3131.033, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.112, "query_norm": 1.2646, "queue_k_norm": 1.3364, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9525, "sent_len_1": 66.6825, "sent_len_max_0": 127.4175, "sent_len_max_1": 190.7637, "stdk": 0.0468, "stdq": 0.0424, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 34100 }, { "accuracy": 41.5039, "active_queue_size": 16384.0, "cl_loss": 4.2316, "doc_norm": 1.3361, "encoder_q-embeddings": 1781.0945, "encoder_q-layer.0": 1182.4518, "encoder_q-layer.1": 1271.8696, "encoder_q-layer.10": 1348.7836, "encoder_q-layer.11": 3067.7913, "encoder_q-layer.2": 1463.7727, "encoder_q-layer.3": 1566.6998, "encoder_q-layer.4": 1508.7574, "encoder_q-layer.5": 1446.7545, "encoder_q-layer.6": 1556.8892, "encoder_q-layer.7": 1554.5465, "encoder_q-layer.8": 1601.0455, "encoder_q-layer.9": 1309.7458, "epoch": 0.22, "inbatch_neg_score": 0.1101, "inbatch_pos_score": 0.6055, "learning_rate": 3.655555555555556e-05, "loss": 4.2316, "norm_diff": 0.09, "norm_loss": 0.0, "num_token_doc": 66.8762, "num_token_overlap": 11.7363, "num_token_query": 32.1571, "num_token_union": 65.4902, "num_word_context": 202.6269, "num_word_doc": 49.9205, "num_word_query": 23.7528, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2520.8416, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.111, "query_norm": 1.2461, "queue_k_norm": 1.3344, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 32.1571, "sent_len_1": 66.8762, "sent_len_max_0": 127.58, "sent_len_max_1": 189.6275, "stdk": 0.0468, "stdq": 0.0418, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 34200 }, { "accuracy": 46.2891, "active_queue_size": 16384.0, "cl_loss": 4.2362, "doc_norm": 1.3346, "encoder_q-embeddings": 1731.5808, "encoder_q-layer.0": 1121.7905, "encoder_q-layer.1": 1236.1118, "encoder_q-layer.10": 1411.3031, "encoder_q-layer.11": 3239.8687, "encoder_q-layer.2": 1371.4001, "encoder_q-layer.3": 1394.5227, "encoder_q-layer.4": 1405.0173, "encoder_q-layer.5": 1326.0168, "encoder_q-layer.6": 1483.8516, "encoder_q-layer.7": 1530.6909, "encoder_q-layer.8": 1714.8325, "encoder_q-layer.9": 1281.7562, "epoch": 0.22, "inbatch_neg_score": 0.1136, "inbatch_pos_score": 0.6299, "learning_rate": 3.65e-05, "loss": 4.2362, "norm_diff": 0.1004, "norm_loss": 0.0, "num_token_doc": 66.8283, "num_token_overlap": 11.6769, "num_token_query": 31.8076, "num_token_union": 65.3138, "num_word_context": 202.1702, "num_word_doc": 49.8144, "num_word_query": 23.4703, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2458.0612, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1144, "query_norm": 1.2341, "queue_k_norm": 1.3352, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8076, "sent_len_1": 66.8283, "sent_len_max_0": 127.4762, "sent_len_max_1": 190.95, "stdk": 0.0467, "stdq": 0.0414, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 34300 }, { "accuracy": 43.75, "active_queue_size": 16384.0, "cl_loss": 4.2236, "doc_norm": 1.3312, "encoder_q-embeddings": 1480.1775, "encoder_q-layer.0": 974.9924, "encoder_q-layer.1": 1053.2936, "encoder_q-layer.10": 1261.5521, "encoder_q-layer.11": 3015.446, "encoder_q-layer.2": 1163.7531, "encoder_q-layer.3": 1158.4364, "encoder_q-layer.4": 1169.6145, "encoder_q-layer.5": 1164.2982, "encoder_q-layer.6": 1229.0698, "encoder_q-layer.7": 1194.6901, "encoder_q-layer.8": 1293.9908, "encoder_q-layer.9": 1177.3469, "epoch": 0.22, "inbatch_neg_score": 0.1107, "inbatch_pos_score": 0.6104, "learning_rate": 3.644444444444445e-05, "loss": 4.2236, "norm_diff": 0.1006, "norm_loss": 0.0, "num_token_doc": 66.8986, "num_token_overlap": 11.7031, "num_token_query": 31.9821, "num_token_union": 65.4871, "num_word_context": 202.4209, "num_word_doc": 49.925, "num_word_query": 23.591, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2166.6152, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.111, "query_norm": 1.2305, "queue_k_norm": 1.3361, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9821, "sent_len_1": 66.8986, "sent_len_max_0": 127.615, "sent_len_max_1": 187.7212, "stdk": 0.0466, "stdq": 0.0413, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 34400 }, { "accuracy": 43.457, "active_queue_size": 16384.0, "cl_loss": 4.2406, "doc_norm": 1.3385, "encoder_q-embeddings": 2373.8276, "encoder_q-layer.0": 1585.8606, "encoder_q-layer.1": 1715.5198, "encoder_q-layer.10": 1374.9597, "encoder_q-layer.11": 3169.9265, "encoder_q-layer.2": 1897.7087, "encoder_q-layer.3": 1982.8542, "encoder_q-layer.4": 1957.7764, "encoder_q-layer.5": 1631.0355, "encoder_q-layer.6": 1643.6042, "encoder_q-layer.7": 1523.825, "encoder_q-layer.8": 1492.245, "encoder_q-layer.9": 1273.8738, "epoch": 0.22, "inbatch_neg_score": 0.1072, "inbatch_pos_score": 0.6177, "learning_rate": 3.638888888888889e-05, "loss": 4.2406, "norm_diff": 0.1269, "norm_loss": 0.0, "num_token_doc": 66.8509, "num_token_overlap": 11.6669, "num_token_query": 31.8984, "num_token_union": 65.3943, "num_word_context": 202.0466, "num_word_doc": 49.8816, "num_word_query": 23.5613, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2872.7564, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1073, "query_norm": 1.2116, "queue_k_norm": 1.3362, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8984, "sent_len_1": 66.8509, "sent_len_max_0": 127.6275, "sent_len_max_1": 189.3938, "stdk": 0.0469, "stdq": 0.0408, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 34500 }, { "accuracy": 42.8711, "active_queue_size": 16384.0, "cl_loss": 4.247, "doc_norm": 1.3305, "encoder_q-embeddings": 3086.616, "encoder_q-layer.0": 2075.656, "encoder_q-layer.1": 2187.5503, "encoder_q-layer.10": 2477.2319, "encoder_q-layer.11": 6261.2734, "encoder_q-layer.2": 2347.854, "encoder_q-layer.3": 2361.2673, "encoder_q-layer.4": 2628.3896, "encoder_q-layer.5": 2537.7217, "encoder_q-layer.6": 2606.6211, "encoder_q-layer.7": 2588.2334, "encoder_q-layer.8": 2831.7156, "encoder_q-layer.9": 2422.5083, "epoch": 0.23, "inbatch_neg_score": 0.1072, "inbatch_pos_score": 0.6074, "learning_rate": 3.633333333333333e-05, "loss": 4.247, "norm_diff": 0.0977, "norm_loss": 0.0, "num_token_doc": 66.628, "num_token_overlap": 11.728, "num_token_query": 31.9919, "num_token_union": 65.2062, "num_word_context": 202.284, "num_word_doc": 49.6972, "num_word_query": 23.616, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4596.2299, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1088, "query_norm": 1.2328, "queue_k_norm": 1.336, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9919, "sent_len_1": 66.628, "sent_len_max_0": 127.4075, "sent_len_max_1": 190.8425, "stdk": 0.0466, "stdq": 0.0414, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 34600 }, { "accuracy": 40.4297, "active_queue_size": 16384.0, "cl_loss": 4.2514, "doc_norm": 1.3284, "encoder_q-embeddings": 7415.2402, "encoder_q-layer.0": 5123.2329, "encoder_q-layer.1": 6231.7842, "encoder_q-layer.10": 2786.075, "encoder_q-layer.11": 6458.6768, "encoder_q-layer.2": 7399.1377, "encoder_q-layer.3": 7523.6035, "encoder_q-layer.4": 7532.8086, "encoder_q-layer.5": 5944.3008, "encoder_q-layer.6": 4712.7905, "encoder_q-layer.7": 4520.5645, "encoder_q-layer.8": 4211.5103, "encoder_q-layer.9": 2822.2454, "epoch": 0.23, "inbatch_neg_score": 0.1028, "inbatch_pos_score": 0.5908, "learning_rate": 3.6277777777777776e-05, "loss": 4.2514, "norm_diff": 0.0948, "norm_loss": 0.0, "num_token_doc": 67.0541, "num_token_overlap": 11.7416, "num_token_query": 32.0823, "num_token_union": 65.5729, "num_word_context": 202.5943, "num_word_doc": 50.0703, "num_word_query": 23.7026, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8892.5657, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1025, "query_norm": 1.2337, "queue_k_norm": 1.3351, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 32.0823, "sent_len_1": 67.0541, "sent_len_max_0": 127.525, "sent_len_max_1": 191.2175, "stdk": 0.0465, "stdq": 0.0417, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 34700 }, { "accuracy": 41.6992, "active_queue_size": 16384.0, "cl_loss": 4.2489, "doc_norm": 1.3331, "encoder_q-embeddings": 3338.5742, "encoder_q-layer.0": 2201.1118, "encoder_q-layer.1": 2273.7861, "encoder_q-layer.10": 2599.4839, "encoder_q-layer.11": 6216.4404, "encoder_q-layer.2": 2525.2915, "encoder_q-layer.3": 2690.2439, "encoder_q-layer.4": 2862.7805, "encoder_q-layer.5": 2863.8145, "encoder_q-layer.6": 2733.4866, "encoder_q-layer.7": 2813.5049, "encoder_q-layer.8": 2862.9709, "encoder_q-layer.9": 2473.3699, "epoch": 0.23, "inbatch_neg_score": 0.103, "inbatch_pos_score": 0.5986, "learning_rate": 3.6222222222222225e-05, "loss": 4.2489, "norm_diff": 0.0968, "norm_loss": 0.0, "num_token_doc": 66.7825, "num_token_overlap": 11.6646, "num_token_query": 31.979, "num_token_union": 65.4179, "num_word_context": 202.1116, "num_word_doc": 49.8009, "num_word_query": 23.6141, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4735.1905, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1025, "query_norm": 1.2363, "queue_k_norm": 1.3349, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.979, "sent_len_1": 66.7825, "sent_len_max_0": 127.57, "sent_len_max_1": 190.3487, "stdk": 0.0467, "stdq": 0.0417, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 34800 }, { "accuracy": 44.1406, "active_queue_size": 16384.0, "cl_loss": 4.2318, "doc_norm": 1.3359, "encoder_q-embeddings": 1758.8772, "encoder_q-layer.0": 1167.1866, "encoder_q-layer.1": 1202.9269, "encoder_q-layer.10": 1382.6337, "encoder_q-layer.11": 3021.4592, "encoder_q-layer.2": 1353.3365, "encoder_q-layer.3": 1351.3121, "encoder_q-layer.4": 1351.7396, "encoder_q-layer.5": 1330.4258, "encoder_q-layer.6": 1354.1176, "encoder_q-layer.7": 1268.1763, "encoder_q-layer.8": 1401.511, "encoder_q-layer.9": 1214.1427, "epoch": 0.23, "inbatch_neg_score": 0.095, "inbatch_pos_score": 0.6201, "learning_rate": 3.6166666666666674e-05, "loss": 4.2318, "norm_diff": 0.1039, "norm_loss": 0.0, "num_token_doc": 66.6834, "num_token_overlap": 11.6512, "num_token_query": 31.7919, "num_token_union": 65.2541, "num_word_context": 202.3502, "num_word_doc": 49.7462, "num_word_query": 23.4807, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2330.3301, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0952, "query_norm": 1.232, "queue_k_norm": 1.3343, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.7919, "sent_len_1": 66.6834, "sent_len_max_0": 127.3637, "sent_len_max_1": 187.69, "stdk": 0.0468, "stdq": 0.0417, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 34900 }, { "accuracy": 43.1641, "active_queue_size": 16384.0, "cl_loss": 4.2264, "doc_norm": 1.3354, "encoder_q-embeddings": 1873.8461, "encoder_q-layer.0": 1221.5498, "encoder_q-layer.1": 1352.2885, "encoder_q-layer.10": 1333.4514, "encoder_q-layer.11": 3262.3989, "encoder_q-layer.2": 1513.0465, "encoder_q-layer.3": 1525.1422, "encoder_q-layer.4": 1558.3148, "encoder_q-layer.5": 1491.6096, "encoder_q-layer.6": 1486.8372, "encoder_q-layer.7": 1445.6232, "encoder_q-layer.8": 1541.5796, "encoder_q-layer.9": 1283.7787, "epoch": 0.23, "inbatch_neg_score": 0.0954, "inbatch_pos_score": 0.6069, "learning_rate": 3.611111111111111e-05, "loss": 4.2264, "norm_diff": 0.1044, "norm_loss": 0.0, "num_token_doc": 66.6625, "num_token_overlap": 11.6351, "num_token_query": 31.8326, "num_token_union": 65.3153, "num_word_context": 202.2722, "num_word_doc": 49.8267, "num_word_query": 23.5094, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2579.8367, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0961, "query_norm": 1.231, "queue_k_norm": 1.3316, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8326, "sent_len_1": 66.6625, "sent_len_max_0": 127.5337, "sent_len_max_1": 186.8963, "stdk": 0.0468, "stdq": 0.0415, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 35000 }, { "accuracy": 43.5547, "active_queue_size": 16384.0, "cl_loss": 4.2297, "doc_norm": 1.3332, "encoder_q-embeddings": 1674.6638, "encoder_q-layer.0": 1163.8525, "encoder_q-layer.1": 1210.8104, "encoder_q-layer.10": 1190.1526, "encoder_q-layer.11": 2935.3352, "encoder_q-layer.2": 1345.118, "encoder_q-layer.3": 1457.7328, "encoder_q-layer.4": 1544.7173, "encoder_q-layer.5": 1524.7961, "encoder_q-layer.6": 1592.6833, "encoder_q-layer.7": 1482.1705, "encoder_q-layer.8": 1441.1798, "encoder_q-layer.9": 1212.0125, "epoch": 0.23, "inbatch_neg_score": 0.0921, "inbatch_pos_score": 0.6113, "learning_rate": 3.605555555555556e-05, "loss": 4.2297, "norm_diff": 0.0925, "norm_loss": 0.0, "num_token_doc": 66.5971, "num_token_overlap": 11.6569, "num_token_query": 31.8648, "num_token_union": 65.2453, "num_word_context": 201.974, "num_word_doc": 49.6816, "num_word_query": 23.5371, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2387.3276, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0926, "query_norm": 1.2408, "queue_k_norm": 1.3336, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8648, "sent_len_1": 66.5971, "sent_len_max_0": 127.305, "sent_len_max_1": 189.5762, "stdk": 0.0468, "stdq": 0.0419, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 35100 }, { "accuracy": 42.9688, "active_queue_size": 16384.0, "cl_loss": 4.2157, "doc_norm": 1.3388, "encoder_q-embeddings": 2281.4019, "encoder_q-layer.0": 1462.2594, "encoder_q-layer.1": 1681.9443, "encoder_q-layer.10": 1275.5195, "encoder_q-layer.11": 3017.3582, "encoder_q-layer.2": 1819.8146, "encoder_q-layer.3": 1932.3223, "encoder_q-layer.4": 1831.5355, "encoder_q-layer.5": 1726.2482, "encoder_q-layer.6": 1607.6501, "encoder_q-layer.7": 1675.0925, "encoder_q-layer.8": 1643.121, "encoder_q-layer.9": 1327.5543, "epoch": 0.23, "inbatch_neg_score": 0.0929, "inbatch_pos_score": 0.6104, "learning_rate": 3.6e-05, "loss": 4.2157, "norm_diff": 0.0866, "norm_loss": 0.0, "num_token_doc": 66.7356, "num_token_overlap": 11.7172, "num_token_query": 31.8833, "num_token_union": 65.2802, "num_word_context": 201.8991, "num_word_doc": 49.7336, "num_word_query": 23.5479, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2814.2329, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.092, "query_norm": 1.2523, "queue_k_norm": 1.335, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.8833, "sent_len_1": 66.7356, "sent_len_max_0": 127.69, "sent_len_max_1": 189.8525, "stdk": 0.0469, "stdq": 0.042, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 35200 }, { "accuracy": 43.3594, "active_queue_size": 16384.0, "cl_loss": 4.2063, "doc_norm": 1.3318, "encoder_q-embeddings": 1952.4745, "encoder_q-layer.0": 1400.5143, "encoder_q-layer.1": 1373.6996, "encoder_q-layer.10": 1280.2697, "encoder_q-layer.11": 3160.1863, "encoder_q-layer.2": 1636.4688, "encoder_q-layer.3": 1715.012, "encoder_q-layer.4": 1843.2531, "encoder_q-layer.5": 1590.7241, "encoder_q-layer.6": 1739.0428, "encoder_q-layer.7": 1463.6234, "encoder_q-layer.8": 1651.2236, "encoder_q-layer.9": 1253.912, "epoch": 0.23, "inbatch_neg_score": 0.0919, "inbatch_pos_score": 0.6025, "learning_rate": 3.594444444444445e-05, "loss": 4.2063, "norm_diff": 0.0824, "norm_loss": 0.0, "num_token_doc": 66.886, "num_token_overlap": 11.6978, "num_token_query": 31.9378, "num_token_union": 65.4563, "num_word_context": 202.5251, "num_word_doc": 49.8837, "num_word_query": 23.6066, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2678.3396, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0921, "query_norm": 1.2494, "queue_k_norm": 1.3314, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9378, "sent_len_1": 66.886, "sent_len_max_0": 127.4363, "sent_len_max_1": 189.69, "stdk": 0.0467, "stdq": 0.0418, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 35300 }, { "accuracy": 42.7734, "active_queue_size": 16384.0, "cl_loss": 4.2375, "doc_norm": 1.3406, "encoder_q-embeddings": 1699.5146, "encoder_q-layer.0": 1157.3398, "encoder_q-layer.1": 1264.9698, "encoder_q-layer.10": 1249.7345, "encoder_q-layer.11": 3138.2144, "encoder_q-layer.2": 1407.4495, "encoder_q-layer.3": 1553.6233, "encoder_q-layer.4": 1640.7806, "encoder_q-layer.5": 1548.3745, "encoder_q-layer.6": 1682.863, "encoder_q-layer.7": 1640.9275, "encoder_q-layer.8": 1477.4229, "encoder_q-layer.9": 1212.1719, "epoch": 0.23, "inbatch_neg_score": 0.0947, "inbatch_pos_score": 0.5996, "learning_rate": 3.5888888888888886e-05, "loss": 4.2375, "norm_diff": 0.0856, "norm_loss": 0.0, "num_token_doc": 66.5968, "num_token_overlap": 11.6835, "num_token_query": 31.969, "num_token_union": 65.2722, "num_word_context": 201.9995, "num_word_doc": 49.6797, "num_word_query": 23.5926, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2502.0119, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0947, "query_norm": 1.255, "queue_k_norm": 1.3314, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.969, "sent_len_1": 66.5968, "sent_len_max_0": 127.6375, "sent_len_max_1": 189.1062, "stdk": 0.047, "stdq": 0.0417, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 35400 }, { "accuracy": 41.7969, "active_queue_size": 16384.0, "cl_loss": 4.24, "doc_norm": 1.3308, "encoder_q-embeddings": 1293.8484, "encoder_q-layer.0": 854.6813, "encoder_q-layer.1": 903.2352, "encoder_q-layer.10": 1311.6791, "encoder_q-layer.11": 3144.9626, "encoder_q-layer.2": 960.3447, "encoder_q-layer.3": 985.7769, "encoder_q-layer.4": 1083.8597, "encoder_q-layer.5": 1032.8184, "encoder_q-layer.6": 1206.0858, "encoder_q-layer.7": 1304.9886, "encoder_q-layer.8": 1488.3585, "encoder_q-layer.9": 1305.8607, "epoch": 0.23, "inbatch_neg_score": 0.0966, "inbatch_pos_score": 0.6118, "learning_rate": 3.5833333333333335e-05, "loss": 4.24, "norm_diff": 0.0403, "norm_loss": 0.0, "num_token_doc": 66.8737, "num_token_overlap": 11.6163, "num_token_query": 31.7584, "num_token_union": 65.3473, "num_word_context": 202.4344, "num_word_doc": 49.8944, "num_word_query": 23.4409, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2143.1611, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0972, "query_norm": 1.2904, "queue_k_norm": 1.3338, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.7584, "sent_len_1": 66.8737, "sent_len_max_0": 127.4762, "sent_len_max_1": 189.45, "stdk": 0.0467, "stdq": 0.0425, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 35500 }, { "accuracy": 43.457, "active_queue_size": 16384.0, "cl_loss": 4.2456, "doc_norm": 1.3378, "encoder_q-embeddings": 1329.0149, "encoder_q-layer.0": 907.871, "encoder_q-layer.1": 938.1167, "encoder_q-layer.10": 1210.0569, "encoder_q-layer.11": 2913.0173, "encoder_q-layer.2": 990.1823, "encoder_q-layer.3": 976.2853, "encoder_q-layer.4": 1049.7559, "encoder_q-layer.5": 983.4369, "encoder_q-layer.6": 1084.6407, "encoder_q-layer.7": 1137.908, "encoder_q-layer.8": 1286.9796, "encoder_q-layer.9": 1177.369, "epoch": 0.23, "inbatch_neg_score": 0.1025, "inbatch_pos_score": 0.6147, "learning_rate": 3.577777777777778e-05, "loss": 4.2456, "norm_diff": 0.0435, "norm_loss": 0.0, "num_token_doc": 66.5846, "num_token_overlap": 11.6514, "num_token_query": 31.9105, "num_token_union": 65.2978, "num_word_context": 202.3076, "num_word_doc": 49.6666, "num_word_query": 23.5741, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2011.4929, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1016, "query_norm": 1.2942, "queue_k_norm": 1.3302, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9105, "sent_len_1": 66.5846, "sent_len_max_0": 127.5687, "sent_len_max_1": 189.5838, "stdk": 0.047, "stdq": 0.0421, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 35600 }, { "accuracy": 42.4805, "active_queue_size": 16384.0, "cl_loss": 4.2378, "doc_norm": 1.3331, "encoder_q-embeddings": 1646.9084, "encoder_q-layer.0": 1115.9178, "encoder_q-layer.1": 1234.7423, "encoder_q-layer.10": 1290.1095, "encoder_q-layer.11": 3279.405, "encoder_q-layer.2": 1445.35, "encoder_q-layer.3": 1438.0759, "encoder_q-layer.4": 1565.2217, "encoder_q-layer.5": 1449.9623, "encoder_q-layer.6": 1479.6549, "encoder_q-layer.7": 1584.4629, "encoder_q-layer.8": 1566.0891, "encoder_q-layer.9": 1209.7672, "epoch": 0.23, "inbatch_neg_score": 0.1059, "inbatch_pos_score": 0.6274, "learning_rate": 3.5722222222222226e-05, "loss": 4.2378, "norm_diff": 0.0534, "norm_loss": 0.0, "num_token_doc": 66.7722, "num_token_overlap": 11.6614, "num_token_query": 31.8116, "num_token_union": 65.3294, "num_word_context": 202.4627, "num_word_doc": 49.8385, "num_word_query": 23.5343, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2451.7601, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1068, "query_norm": 1.2796, "queue_k_norm": 1.3297, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8116, "sent_len_1": 66.7722, "sent_len_max_0": 127.675, "sent_len_max_1": 188.9425, "stdk": 0.0468, "stdq": 0.0422, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 35700 }, { "accuracy": 43.2617, "active_queue_size": 16384.0, "cl_loss": 4.2312, "doc_norm": 1.3331, "encoder_q-embeddings": 11774.1777, "encoder_q-layer.0": 8313.0781, "encoder_q-layer.1": 8869.2773, "encoder_q-layer.10": 1296.6447, "encoder_q-layer.11": 3174.77, "encoder_q-layer.2": 9741.0674, "encoder_q-layer.3": 9755.1641, "encoder_q-layer.4": 10764.666, "encoder_q-layer.5": 10654.7646, "encoder_q-layer.6": 9662.5439, "encoder_q-layer.7": 7297.3149, "encoder_q-layer.8": 4003.0198, "encoder_q-layer.9": 1554.6355, "epoch": 0.23, "inbatch_neg_score": 0.109, "inbatch_pos_score": 0.6226, "learning_rate": 3.566666666666667e-05, "loss": 4.2312, "norm_diff": 0.0792, "norm_loss": 0.0, "num_token_doc": 66.8623, "num_token_overlap": 11.6997, "num_token_query": 31.9577, "num_token_union": 65.4506, "num_word_context": 202.3601, "num_word_doc": 49.9056, "num_word_query": 23.599, "postclip_grad_norm": 1.0, "preclip_grad_norm": 12542.459, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1083, "query_norm": 1.2539, "queue_k_norm": 1.3333, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9577, "sent_len_1": 66.8623, "sent_len_max_0": 127.565, "sent_len_max_1": 187.1362, "stdk": 0.0468, "stdq": 0.0418, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 35800 }, { "accuracy": 40.5273, "active_queue_size": 16384.0, "cl_loss": 4.2273, "doc_norm": 1.3322, "encoder_q-embeddings": 1912.7688, "encoder_q-layer.0": 1290.5492, "encoder_q-layer.1": 1373.7126, "encoder_q-layer.10": 1297.3083, "encoder_q-layer.11": 3169.3755, "encoder_q-layer.2": 1583.7194, "encoder_q-layer.3": 1711.6819, "encoder_q-layer.4": 1816.7526, "encoder_q-layer.5": 1644.4044, "encoder_q-layer.6": 1685.0304, "encoder_q-layer.7": 1711.3746, "encoder_q-layer.8": 1738.5178, "encoder_q-layer.9": 1311.5485, "epoch": 0.23, "inbatch_neg_score": 0.1071, "inbatch_pos_score": 0.6128, "learning_rate": 3.561111111111111e-05, "loss": 4.2273, "norm_diff": 0.0998, "norm_loss": 0.0, "num_token_doc": 66.8041, "num_token_overlap": 11.6568, "num_token_query": 31.9446, "num_token_union": 65.4055, "num_word_context": 202.1088, "num_word_doc": 49.8188, "num_word_query": 23.5989, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2711.8842, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1075, "query_norm": 1.2324, "queue_k_norm": 1.3335, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9446, "sent_len_1": 66.8041, "sent_len_max_0": 127.655, "sent_len_max_1": 191.6775, "stdk": 0.0468, "stdq": 0.0414, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 35900 }, { "accuracy": 42.0898, "active_queue_size": 16384.0, "cl_loss": 4.215, "doc_norm": 1.3335, "encoder_q-embeddings": 9242.6094, "encoder_q-layer.0": 6522.3281, "encoder_q-layer.1": 7145.6846, "encoder_q-layer.10": 1303.7258, "encoder_q-layer.11": 3059.4771, "encoder_q-layer.2": 7954.2471, "encoder_q-layer.3": 7691.8584, "encoder_q-layer.4": 7996.291, "encoder_q-layer.5": 6813.1191, "encoder_q-layer.6": 4483.9082, "encoder_q-layer.7": 3742.2937, "encoder_q-layer.8": 2682.3445, "encoder_q-layer.9": 1562.0295, "epoch": 0.23, "inbatch_neg_score": 0.1015, "inbatch_pos_score": 0.6143, "learning_rate": 3.555555555555556e-05, "loss": 4.215, "norm_diff": 0.0915, "norm_loss": 0.0, "num_token_doc": 66.9559, "num_token_overlap": 11.6286, "num_token_query": 31.6645, "num_token_union": 65.3233, "num_word_context": 202.3096, "num_word_doc": 49.9651, "num_word_query": 23.3937, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9318.7408, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1017, "query_norm": 1.2419, "queue_k_norm": 1.3318, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.6645, "sent_len_1": 66.9559, "sent_len_max_0": 127.4112, "sent_len_max_1": 191.5062, "stdk": 0.0468, "stdq": 0.0421, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 36000 }, { "accuracy": 40.918, "active_queue_size": 16384.0, "cl_loss": 4.214, "doc_norm": 1.3373, "encoder_q-embeddings": 4212.7676, "encoder_q-layer.0": 3013.4331, "encoder_q-layer.1": 3520.9082, "encoder_q-layer.10": 1244.192, "encoder_q-layer.11": 2971.5688, "encoder_q-layer.2": 3926.168, "encoder_q-layer.3": 4133.894, "encoder_q-layer.4": 4284.5107, "encoder_q-layer.5": 4411.5752, "encoder_q-layer.6": 4441.6968, "encoder_q-layer.7": 3936.0225, "encoder_q-layer.8": 2627.0913, "encoder_q-layer.9": 1353.8159, "epoch": 0.23, "inbatch_neg_score": 0.0968, "inbatch_pos_score": 0.6074, "learning_rate": 3.55e-05, "loss": 4.214, "norm_diff": 0.1106, "norm_loss": 0.0, "num_token_doc": 67.0317, "num_token_overlap": 11.7227, "num_token_query": 32.0242, "num_token_union": 65.5381, "num_word_context": 202.4768, "num_word_doc": 49.9963, "num_word_query": 23.6741, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5397.182, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0964, "query_norm": 1.2267, "queue_k_norm": 1.3306, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 32.0242, "sent_len_1": 67.0317, "sent_len_max_0": 127.5687, "sent_len_max_1": 191.2738, "stdk": 0.0469, "stdq": 0.0415, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 36100 }, { "accuracy": 43.1641, "active_queue_size": 16384.0, "cl_loss": 4.2228, "doc_norm": 1.3288, "encoder_q-embeddings": 1192.595, "encoder_q-layer.0": 793.317, "encoder_q-layer.1": 831.6827, "encoder_q-layer.10": 1342.6548, "encoder_q-layer.11": 3108.4595, "encoder_q-layer.2": 942.977, "encoder_q-layer.3": 962.8303, "encoder_q-layer.4": 1014.5302, "encoder_q-layer.5": 1024.0477, "encoder_q-layer.6": 1079.137, "encoder_q-layer.7": 1135.6625, "encoder_q-layer.8": 1327.7222, "encoder_q-layer.9": 1201.752, "epoch": 0.24, "inbatch_neg_score": 0.0863, "inbatch_pos_score": 0.6182, "learning_rate": 3.5444444444444445e-05, "loss": 4.2228, "norm_diff": 0.0732, "norm_loss": 0.0, "num_token_doc": 66.6627, "num_token_overlap": 11.6594, "num_token_query": 31.8969, "num_token_union": 65.2871, "num_word_context": 201.9458, "num_word_doc": 49.7046, "num_word_query": 23.5239, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1981.3495, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0859, "query_norm": 1.2555, "queue_k_norm": 1.3311, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8969, "sent_len_1": 66.6627, "sent_len_max_0": 127.7025, "sent_len_max_1": 191.1625, "stdk": 0.0466, "stdq": 0.0428, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 36200 }, { "accuracy": 43.457, "active_queue_size": 16384.0, "cl_loss": 4.2271, "doc_norm": 1.3208, "encoder_q-embeddings": 1322.2115, "encoder_q-layer.0": 858.2141, "encoder_q-layer.1": 943.3273, "encoder_q-layer.10": 675.0237, "encoder_q-layer.11": 1580.2827, "encoder_q-layer.2": 1120.097, "encoder_q-layer.3": 1124.5396, "encoder_q-layer.4": 1235.8486, "encoder_q-layer.5": 1048.9631, "encoder_q-layer.6": 1109.9174, "encoder_q-layer.7": 1039.725, "encoder_q-layer.8": 858.8973, "encoder_q-layer.9": 620.1706, "epoch": 0.24, "inbatch_neg_score": 0.0827, "inbatch_pos_score": 0.6021, "learning_rate": 3.538888888888889e-05, "loss": 4.2271, "norm_diff": 0.0873, "norm_loss": 0.0, "num_token_doc": 66.8881, "num_token_overlap": 11.7432, "num_token_query": 32.0725, "num_token_union": 65.4975, "num_word_context": 202.6162, "num_word_doc": 49.9136, "num_word_query": 23.6844, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1620.5014, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0828, "query_norm": 1.2334, "queue_k_norm": 1.3323, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 32.0725, "sent_len_1": 66.8881, "sent_len_max_0": 127.5212, "sent_len_max_1": 188.9238, "stdk": 0.0464, "stdq": 0.0418, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 36300 }, { "accuracy": 43.6523, "active_queue_size": 16384.0, "cl_loss": 4.2389, "doc_norm": 1.321, "encoder_q-embeddings": 984.7516, "encoder_q-layer.0": 668.1675, "encoder_q-layer.1": 745.0484, "encoder_q-layer.10": 597.7369, "encoder_q-layer.11": 1487.0186, "encoder_q-layer.2": 853.8988, "encoder_q-layer.3": 902.6306, "encoder_q-layer.4": 888.324, "encoder_q-layer.5": 910.3353, "encoder_q-layer.6": 915.3752, "encoder_q-layer.7": 859.1865, "encoder_q-layer.8": 778.4792, "encoder_q-layer.9": 571.3373, "epoch": 0.24, "inbatch_neg_score": 0.0855, "inbatch_pos_score": 0.5996, "learning_rate": 3.5333333333333336e-05, "loss": 4.2389, "norm_diff": 0.0743, "norm_loss": 0.0, "num_token_doc": 66.7146, "num_token_overlap": 11.6417, "num_token_query": 31.812, "num_token_union": 65.2527, "num_word_context": 202.1866, "num_word_doc": 49.7817, "num_word_query": 23.4855, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1330.3251, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0844, "query_norm": 1.2468, "queue_k_norm": 1.3302, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.812, "sent_len_1": 66.7146, "sent_len_max_0": 127.6713, "sent_len_max_1": 188.5, "stdk": 0.0464, "stdq": 0.0422, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 36400 }, { "accuracy": 39.9414, "active_queue_size": 16384.0, "cl_loss": 4.2, "doc_norm": 1.334, "encoder_q-embeddings": 941.3266, "encoder_q-layer.0": 672.9044, "encoder_q-layer.1": 686.0236, "encoder_q-layer.10": 632.0007, "encoder_q-layer.11": 1642.8661, "encoder_q-layer.2": 723.2648, "encoder_q-layer.3": 724.0248, "encoder_q-layer.4": 716.4655, "encoder_q-layer.5": 740.7134, "encoder_q-layer.6": 696.1275, "encoder_q-layer.7": 696.8909, "encoder_q-layer.8": 738.0842, "encoder_q-layer.9": 620.1655, "epoch": 0.24, "inbatch_neg_score": 0.0878, "inbatch_pos_score": 0.5796, "learning_rate": 3.527777777777778e-05, "loss": 4.2, "norm_diff": 0.1094, "norm_loss": 0.0, "num_token_doc": 67.0192, "num_token_overlap": 11.7198, "num_token_query": 31.9742, "num_token_union": 65.5679, "num_word_context": 202.4093, "num_word_doc": 50.0297, "num_word_query": 23.6179, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1269.0315, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0883, "query_norm": 1.2246, "queue_k_norm": 1.3299, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9742, "sent_len_1": 67.0192, "sent_len_max_0": 127.3487, "sent_len_max_1": 187.2262, "stdk": 0.0469, "stdq": 0.0412, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 36500 }, { "accuracy": 45.8008, "active_queue_size": 16384.0, "cl_loss": 4.1852, "doc_norm": 1.3367, "encoder_q-embeddings": 731.6634, "encoder_q-layer.0": 496.339, "encoder_q-layer.1": 514.0212, "encoder_q-layer.10": 637.5527, "encoder_q-layer.11": 1549.2246, "encoder_q-layer.2": 577.301, "encoder_q-layer.3": 609.857, "encoder_q-layer.4": 639.2559, "encoder_q-layer.5": 607.9256, "encoder_q-layer.6": 642.9197, "encoder_q-layer.7": 645.1682, "encoder_q-layer.8": 678.1881, "encoder_q-layer.9": 570.7568, "epoch": 0.24, "inbatch_neg_score": 0.0888, "inbatch_pos_score": 0.6421, "learning_rate": 3.522222222222222e-05, "loss": 4.1852, "norm_diff": 0.0559, "norm_loss": 0.0, "num_token_doc": 66.5305, "num_token_overlap": 11.6569, "num_token_query": 31.7808, "num_token_union": 65.1468, "num_word_context": 201.9561, "num_word_doc": 49.6125, "num_word_query": 23.4602, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1111.7134, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0886, "query_norm": 1.2808, "queue_k_norm": 1.3291, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.7808, "sent_len_1": 66.5305, "sent_len_max_0": 127.5625, "sent_len_max_1": 188.2463, "stdk": 0.047, "stdq": 0.0433, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 36600 }, { "accuracy": 42.3828, "active_queue_size": 16384.0, "cl_loss": 4.1941, "doc_norm": 1.3287, "encoder_q-embeddings": 1806.8704, "encoder_q-layer.0": 1263.9788, "encoder_q-layer.1": 1220.8896, "encoder_q-layer.10": 649.4602, "encoder_q-layer.11": 1594.9031, "encoder_q-layer.2": 1315.3975, "encoder_q-layer.3": 1231.0076, "encoder_q-layer.4": 1332.0021, "encoder_q-layer.5": 1165.4039, "encoder_q-layer.6": 1175.4351, "encoder_q-layer.7": 1022.8596, "encoder_q-layer.8": 855.8275, "encoder_q-layer.9": 658.5193, "epoch": 0.24, "inbatch_neg_score": 0.0887, "inbatch_pos_score": 0.5996, "learning_rate": 3.516666666666667e-05, "loss": 4.1941, "norm_diff": 0.0644, "norm_loss": 0.0, "num_token_doc": 66.8392, "num_token_overlap": 11.679, "num_token_query": 31.8243, "num_token_union": 65.3353, "num_word_context": 202.3624, "num_word_doc": 49.8606, "num_word_query": 23.5166, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1888.9863, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0882, "query_norm": 1.2642, "queue_k_norm": 1.3307, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8243, "sent_len_1": 66.8392, "sent_len_max_0": 127.5, "sent_len_max_1": 190.3875, "stdk": 0.0467, "stdq": 0.0423, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 36700 }, { "accuracy": 42.5781, "active_queue_size": 16384.0, "cl_loss": 4.1948, "doc_norm": 1.3288, "encoder_q-embeddings": 684.7654, "encoder_q-layer.0": 455.5755, "encoder_q-layer.1": 475.9491, "encoder_q-layer.10": 607.9802, "encoder_q-layer.11": 1516.5444, "encoder_q-layer.2": 511.1423, "encoder_q-layer.3": 515.3214, "encoder_q-layer.4": 520.658, "encoder_q-layer.5": 501.6346, "encoder_q-layer.6": 550.0569, "encoder_q-layer.7": 569.7257, "encoder_q-layer.8": 663.7833, "encoder_q-layer.9": 601.1552, "epoch": 0.24, "inbatch_neg_score": 0.0902, "inbatch_pos_score": 0.6025, "learning_rate": 3.511111111111111e-05, "loss": 4.1948, "norm_diff": 0.0477, "norm_loss": 0.0, "num_token_doc": 66.7271, "num_token_overlap": 11.6506, "num_token_query": 31.8708, "num_token_union": 65.3132, "num_word_context": 202.2455, "num_word_doc": 49.7249, "num_word_query": 23.5164, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1025.8178, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0895, "query_norm": 1.281, "queue_k_norm": 1.3276, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.8708, "sent_len_1": 66.7271, "sent_len_max_0": 127.5113, "sent_len_max_1": 190.9425, "stdk": 0.0467, "stdq": 0.0426, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 36800 }, { "accuracy": 40.918, "active_queue_size": 16384.0, "cl_loss": 4.1892, "doc_norm": 1.3306, "encoder_q-embeddings": 1178.8621, "encoder_q-layer.0": 811.1371, "encoder_q-layer.1": 899.2797, "encoder_q-layer.10": 631.1008, "encoder_q-layer.11": 1542.2896, "encoder_q-layer.2": 1001.578, "encoder_q-layer.3": 1028.4426, "encoder_q-layer.4": 1005.4478, "encoder_q-layer.5": 1005.2866, "encoder_q-layer.6": 1049.3353, "encoder_q-layer.7": 1011.4929, "encoder_q-layer.8": 887.4526, "encoder_q-layer.9": 607.0975, "epoch": 0.24, "inbatch_neg_score": 0.0971, "inbatch_pos_score": 0.5884, "learning_rate": 3.505555555555556e-05, "loss": 4.1892, "norm_diff": 0.0578, "norm_loss": 0.0, "num_token_doc": 66.6625, "num_token_overlap": 11.691, "num_token_query": 31.9795, "num_token_union": 65.3179, "num_word_context": 202.2375, "num_word_doc": 49.7178, "num_word_query": 23.6029, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1508.8502, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0945, "query_norm": 1.2728, "queue_k_norm": 1.3279, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9795, "sent_len_1": 66.6625, "sent_len_max_0": 127.5187, "sent_len_max_1": 191.1062, "stdk": 0.0468, "stdq": 0.0421, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 36900 }, { "accuracy": 45.7031, "active_queue_size": 16384.0, "cl_loss": 4.2171, "doc_norm": 1.3322, "encoder_q-embeddings": 854.6715, "encoder_q-layer.0": 585.8581, "encoder_q-layer.1": 661.7152, "encoder_q-layer.10": 651.9911, "encoder_q-layer.11": 1542.0681, "encoder_q-layer.2": 724.8875, "encoder_q-layer.3": 751.4727, "encoder_q-layer.4": 796.0989, "encoder_q-layer.5": 787.2787, "encoder_q-layer.6": 783.9033, "encoder_q-layer.7": 680.0988, "encoder_q-layer.8": 735.0727, "encoder_q-layer.9": 612.1118, "epoch": 0.24, "inbatch_neg_score": 0.1007, "inbatch_pos_score": 0.6201, "learning_rate": 3.5e-05, "loss": 4.2171, "norm_diff": 0.0551, "norm_loss": 0.0, "num_token_doc": 66.6213, "num_token_overlap": 11.6642, "num_token_query": 31.9426, "num_token_union": 65.2809, "num_word_context": 202.0251, "num_word_doc": 49.6819, "num_word_query": 23.6054, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1214.2449, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1003, "query_norm": 1.2771, "queue_k_norm": 1.329, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9426, "sent_len_1": 66.6213, "sent_len_max_0": 127.5413, "sent_len_max_1": 188.6775, "stdk": 0.0469, "stdq": 0.0423, "stdqueue_k": 0.0468, "stdqueue_q": 0.0, "step": 37000 }, { "accuracy": 41.0156, "active_queue_size": 16384.0, "cl_loss": 4.1875, "doc_norm": 1.3322, "encoder_q-embeddings": 928.9784, "encoder_q-layer.0": 613.9437, "encoder_q-layer.1": 688.1318, "encoder_q-layer.10": 600.5315, "encoder_q-layer.11": 1513.9967, "encoder_q-layer.2": 810.5642, "encoder_q-layer.3": 829.822, "encoder_q-layer.4": 888.6031, "encoder_q-layer.5": 865.9474, "encoder_q-layer.6": 929.2416, "encoder_q-layer.7": 800.3723, "encoder_q-layer.8": 788.509, "encoder_q-layer.9": 607.7766, "epoch": 0.24, "inbatch_neg_score": 0.1051, "inbatch_pos_score": 0.6016, "learning_rate": 3.4944444444444446e-05, "loss": 4.1875, "norm_diff": 0.0622, "norm_loss": 0.0, "num_token_doc": 66.9907, "num_token_overlap": 11.6625, "num_token_query": 31.9221, "num_token_union": 65.5204, "num_word_context": 202.6551, "num_word_doc": 49.9586, "num_word_query": 23.5645, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1308.8544, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1042, "query_norm": 1.27, "queue_k_norm": 1.3313, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9221, "sent_len_1": 66.9907, "sent_len_max_0": 127.5825, "sent_len_max_1": 191.8512, "stdk": 0.0468, "stdq": 0.0417, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 37100 }, { "accuracy": 43.6523, "active_queue_size": 16384.0, "cl_loss": 4.2061, "doc_norm": 1.334, "encoder_q-embeddings": 869.6489, "encoder_q-layer.0": 565.268, "encoder_q-layer.1": 584.7144, "encoder_q-layer.10": 641.8464, "encoder_q-layer.11": 1523.1582, "encoder_q-layer.2": 641.8239, "encoder_q-layer.3": 677.3878, "encoder_q-layer.4": 695.8391, "encoder_q-layer.5": 646.1552, "encoder_q-layer.6": 678.4634, "encoder_q-layer.7": 741.2657, "encoder_q-layer.8": 777.72, "encoder_q-layer.9": 672.3436, "epoch": 0.24, "inbatch_neg_score": 0.115, "inbatch_pos_score": 0.6206, "learning_rate": 3.4888888888888895e-05, "loss": 4.2061, "norm_diff": 0.0575, "norm_loss": 0.0, "num_token_doc": 66.9739, "num_token_overlap": 11.6797, "num_token_query": 31.9329, "num_token_union": 65.4829, "num_word_context": 202.7398, "num_word_doc": 49.9633, "num_word_query": 23.5578, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1207.4924, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.114, "query_norm": 1.2765, "queue_k_norm": 1.3315, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9329, "sent_len_1": 66.9739, "sent_len_max_0": 127.5288, "sent_len_max_1": 189.7438, "stdk": 0.0469, "stdq": 0.042, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 37200 }, { "accuracy": 42.4805, "active_queue_size": 16384.0, "cl_loss": 4.1948, "doc_norm": 1.3342, "encoder_q-embeddings": 1005.5423, "encoder_q-layer.0": 684.9691, "encoder_q-layer.1": 762.0978, "encoder_q-layer.10": 660.8759, "encoder_q-layer.11": 1563.1768, "encoder_q-layer.2": 804.6514, "encoder_q-layer.3": 867.6329, "encoder_q-layer.4": 920.0703, "encoder_q-layer.5": 856.221, "encoder_q-layer.6": 824.0046, "encoder_q-layer.7": 909.3055, "encoder_q-layer.8": 809.3045, "encoder_q-layer.9": 615.8554, "epoch": 0.24, "inbatch_neg_score": 0.1158, "inbatch_pos_score": 0.6265, "learning_rate": 3.483333333333334e-05, "loss": 4.1948, "norm_diff": 0.0673, "norm_loss": 0.0, "num_token_doc": 66.9219, "num_token_overlap": 11.694, "num_token_query": 31.8549, "num_token_union": 65.3819, "num_word_context": 202.1927, "num_word_doc": 49.9741, "num_word_query": 23.5419, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1357.8464, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.116, "query_norm": 1.2668, "queue_k_norm": 1.331, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8549, "sent_len_1": 66.9219, "sent_len_max_0": 127.625, "sent_len_max_1": 190.2012, "stdk": 0.0468, "stdq": 0.0423, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 37300 }, { "accuracy": 41.2109, "active_queue_size": 16384.0, "cl_loss": 4.2061, "doc_norm": 1.3321, "encoder_q-embeddings": 844.0098, "encoder_q-layer.0": 573.5997, "encoder_q-layer.1": 618.1741, "encoder_q-layer.10": 684.8368, "encoder_q-layer.11": 1612.8031, "encoder_q-layer.2": 691.8179, "encoder_q-layer.3": 748.0234, "encoder_q-layer.4": 742.222, "encoder_q-layer.5": 664.903, "encoder_q-layer.6": 627.142, "encoder_q-layer.7": 639.1431, "encoder_q-layer.8": 709.0754, "encoder_q-layer.9": 636.4255, "epoch": 0.24, "inbatch_neg_score": 0.1165, "inbatch_pos_score": 0.6338, "learning_rate": 3.477777777777778e-05, "loss": 4.2061, "norm_diff": 0.0699, "norm_loss": 0.0, "num_token_doc": 66.826, "num_token_overlap": 11.6998, "num_token_query": 32.0637, "num_token_union": 65.4803, "num_word_context": 202.7633, "num_word_doc": 49.8702, "num_word_query": 23.7227, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1177.5137, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1166, "query_norm": 1.2622, "queue_k_norm": 1.3318, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 32.0637, "sent_len_1": 66.826, "sent_len_max_0": 127.5037, "sent_len_max_1": 189.535, "stdk": 0.0468, "stdq": 0.0424, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 37400 }, { "accuracy": 42.0898, "active_queue_size": 16384.0, "cl_loss": 4.2034, "doc_norm": 1.3325, "encoder_q-embeddings": 752.1066, "encoder_q-layer.0": 501.3268, "encoder_q-layer.1": 533.8021, "encoder_q-layer.10": 628.1588, "encoder_q-layer.11": 1598.3391, "encoder_q-layer.2": 609.4655, "encoder_q-layer.3": 643.4839, "encoder_q-layer.4": 671.7812, "encoder_q-layer.5": 665.196, "encoder_q-layer.6": 730.8207, "encoder_q-layer.7": 712.7258, "encoder_q-layer.8": 704.8749, "encoder_q-layer.9": 601.9977, "epoch": 0.24, "inbatch_neg_score": 0.1191, "inbatch_pos_score": 0.6157, "learning_rate": 3.472222222222222e-05, "loss": 4.2034, "norm_diff": 0.0995, "norm_loss": 0.0, "num_token_doc": 66.684, "num_token_overlap": 11.6514, "num_token_query": 31.8867, "num_token_union": 65.3374, "num_word_context": 202.0312, "num_word_doc": 49.8011, "num_word_query": 23.5532, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1154.7281, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1193, "query_norm": 1.233, "queue_k_norm": 1.3351, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8867, "sent_len_1": 66.684, "sent_len_max_0": 127.6363, "sent_len_max_1": 187.3837, "stdk": 0.0467, "stdq": 0.0413, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 37500 }, { "accuracy": 42.2852, "active_queue_size": 16384.0, "cl_loss": 4.1959, "doc_norm": 1.3286, "encoder_q-embeddings": 918.356, "encoder_q-layer.0": 626.9265, "encoder_q-layer.1": 683.7198, "encoder_q-layer.10": 640.6749, "encoder_q-layer.11": 1723.8204, "encoder_q-layer.2": 796.5502, "encoder_q-layer.3": 827.4762, "encoder_q-layer.4": 915.3622, "encoder_q-layer.5": 852.7214, "encoder_q-layer.6": 794.6145, "encoder_q-layer.7": 740.0555, "encoder_q-layer.8": 757.9103, "encoder_q-layer.9": 622.537, "epoch": 0.24, "inbatch_neg_score": 0.1159, "inbatch_pos_score": 0.6323, "learning_rate": 3.466666666666667e-05, "loss": 4.1959, "norm_diff": 0.0859, "norm_loss": 0.0, "num_token_doc": 66.9409, "num_token_overlap": 11.7419, "num_token_query": 32.0207, "num_token_union": 65.4824, "num_word_context": 202.2765, "num_word_doc": 49.9302, "num_word_query": 23.6608, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1316.9466, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1157, "query_norm": 1.2427, "queue_k_norm": 1.3357, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 32.0207, "sent_len_1": 66.9409, "sent_len_max_0": 127.44, "sent_len_max_1": 189.8625, "stdk": 0.0466, "stdq": 0.0418, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 37600 }, { "accuracy": 42.6758, "active_queue_size": 16384.0, "cl_loss": 4.1882, "doc_norm": 1.3367, "encoder_q-embeddings": 986.4265, "encoder_q-layer.0": 705.2728, "encoder_q-layer.1": 757.1473, "encoder_q-layer.10": 625.463, "encoder_q-layer.11": 1612.6034, "encoder_q-layer.2": 806.061, "encoder_q-layer.3": 829.7111, "encoder_q-layer.4": 826.296, "encoder_q-layer.5": 787.0577, "encoder_q-layer.6": 732.922, "encoder_q-layer.7": 678.0918, "encoder_q-layer.8": 729.1813, "encoder_q-layer.9": 599.9193, "epoch": 0.25, "inbatch_neg_score": 0.1113, "inbatch_pos_score": 0.6294, "learning_rate": 3.4611111111111114e-05, "loss": 4.1882, "norm_diff": 0.0948, "norm_loss": 0.0, "num_token_doc": 66.7443, "num_token_overlap": 11.7483, "num_token_query": 32.1222, "num_token_union": 65.3959, "num_word_context": 202.1543, "num_word_doc": 49.8231, "num_word_query": 23.7481, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1298.1744, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1116, "query_norm": 1.2419, "queue_k_norm": 1.3374, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 32.1222, "sent_len_1": 66.7443, "sent_len_max_0": 127.4225, "sent_len_max_1": 190.0788, "stdk": 0.0469, "stdq": 0.0418, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 37700 }, { "accuracy": 43.8477, "active_queue_size": 16384.0, "cl_loss": 4.1897, "doc_norm": 1.3376, "encoder_q-embeddings": 1061.3529, "encoder_q-layer.0": 774.3439, "encoder_q-layer.1": 864.5612, "encoder_q-layer.10": 666.5906, "encoder_q-layer.11": 1663.5823, "encoder_q-layer.2": 796.5197, "encoder_q-layer.3": 798.6975, "encoder_q-layer.4": 746.8472, "encoder_q-layer.5": 705.0082, "encoder_q-layer.6": 750.1039, "encoder_q-layer.7": 697.5883, "encoder_q-layer.8": 691.4614, "encoder_q-layer.9": 612.6069, "epoch": 0.25, "inbatch_neg_score": 0.1089, "inbatch_pos_score": 0.6191, "learning_rate": 3.4555555555555556e-05, "loss": 4.1897, "norm_diff": 0.1017, "norm_loss": 0.0, "num_token_doc": 66.6707, "num_token_overlap": 11.6716, "num_token_query": 31.9456, "num_token_union": 65.3065, "num_word_context": 202.1595, "num_word_doc": 49.7762, "num_word_query": 23.5974, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1325.9245, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1089, "query_norm": 1.2359, "queue_k_norm": 1.3354, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9456, "sent_len_1": 66.6707, "sent_len_max_0": 127.5588, "sent_len_max_1": 189.1987, "stdk": 0.0469, "stdq": 0.0417, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 37800 }, { "accuracy": 43.75, "active_queue_size": 16384.0, "cl_loss": 4.2052, "doc_norm": 1.3373, "encoder_q-embeddings": 680.623, "encoder_q-layer.0": 458.0573, "encoder_q-layer.1": 501.6741, "encoder_q-layer.10": 607.806, "encoder_q-layer.11": 1483.8044, "encoder_q-layer.2": 580.0751, "encoder_q-layer.3": 590.5854, "encoder_q-layer.4": 648.737, "encoder_q-layer.5": 617.4504, "encoder_q-layer.6": 629.2166, "encoder_q-layer.7": 641.6814, "encoder_q-layer.8": 691.3044, "encoder_q-layer.9": 579.572, "epoch": 0.25, "inbatch_neg_score": 0.1102, "inbatch_pos_score": 0.6328, "learning_rate": 3.45e-05, "loss": 4.2052, "norm_diff": 0.08, "norm_loss": 0.0, "num_token_doc": 66.6985, "num_token_overlap": 11.6854, "num_token_query": 31.8562, "num_token_union": 65.2601, "num_word_context": 202.552, "num_word_doc": 49.7571, "num_word_query": 23.5274, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1061.193, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1092, "query_norm": 1.2573, "queue_k_norm": 1.3367, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8562, "sent_len_1": 66.6985, "sent_len_max_0": 127.4237, "sent_len_max_1": 187.475, "stdk": 0.0469, "stdq": 0.0426, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 37900 }, { "accuracy": 40.918, "active_queue_size": 16384.0, "cl_loss": 4.1963, "doc_norm": 1.3319, "encoder_q-embeddings": 763.4291, "encoder_q-layer.0": 512.9421, "encoder_q-layer.1": 535.6034, "encoder_q-layer.10": 649.0513, "encoder_q-layer.11": 1584.3757, "encoder_q-layer.2": 619.2594, "encoder_q-layer.3": 608.2355, "encoder_q-layer.4": 625.744, "encoder_q-layer.5": 634.1587, "encoder_q-layer.6": 723.3548, "encoder_q-layer.7": 739.5615, "encoder_q-layer.8": 739.2358, "encoder_q-layer.9": 629.1268, "epoch": 0.25, "inbatch_neg_score": 0.1056, "inbatch_pos_score": 0.6113, "learning_rate": 3.444444444444445e-05, "loss": 4.1963, "norm_diff": 0.1024, "norm_loss": 0.0, "num_token_doc": 66.9433, "num_token_overlap": 11.6845, "num_token_query": 31.9286, "num_token_union": 65.4366, "num_word_context": 202.2473, "num_word_doc": 49.8971, "num_word_query": 23.559, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1157.671, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1053, "query_norm": 1.2295, "queue_k_norm": 1.3389, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9286, "sent_len_1": 66.9433, "sent_len_max_0": 127.5512, "sent_len_max_1": 189.5375, "stdk": 0.0466, "stdq": 0.0416, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 38000 }, { "accuracy": 42.7734, "active_queue_size": 16384.0, "cl_loss": 4.1943, "doc_norm": 1.3391, "encoder_q-embeddings": 2111.6506, "encoder_q-layer.0": 1607.1631, "encoder_q-layer.1": 1977.8192, "encoder_q-layer.10": 651.2692, "encoder_q-layer.11": 1557.2355, "encoder_q-layer.2": 2234.0464, "encoder_q-layer.3": 1839.8175, "encoder_q-layer.4": 1671.8241, "encoder_q-layer.5": 1712.4287, "encoder_q-layer.6": 1928.9607, "encoder_q-layer.7": 1733.7545, "encoder_q-layer.8": 1051.1691, "encoder_q-layer.9": 648.3228, "epoch": 0.25, "inbatch_neg_score": 0.0994, "inbatch_pos_score": 0.6094, "learning_rate": 3.438888888888889e-05, "loss": 4.1943, "norm_diff": 0.12, "norm_loss": 0.0, "num_token_doc": 66.7702, "num_token_overlap": 11.6873, "num_token_query": 31.9332, "num_token_union": 65.3536, "num_word_context": 202.1106, "num_word_doc": 49.8069, "num_word_query": 23.5785, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2524.4142, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0995, "query_norm": 1.2191, "queue_k_norm": 1.338, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9332, "sent_len_1": 66.7702, "sent_len_max_0": 127.5362, "sent_len_max_1": 189.6987, "stdk": 0.0469, "stdq": 0.0412, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 38100 }, { "accuracy": 44.9219, "active_queue_size": 16384.0, "cl_loss": 4.1819, "doc_norm": 1.3427, "encoder_q-embeddings": 697.2944, "encoder_q-layer.0": 462.7946, "encoder_q-layer.1": 474.4038, "encoder_q-layer.10": 652.5497, "encoder_q-layer.11": 1586.6486, "encoder_q-layer.2": 551.5476, "encoder_q-layer.3": 559.9506, "encoder_q-layer.4": 588.2076, "encoder_q-layer.5": 577.9332, "encoder_q-layer.6": 615.3834, "encoder_q-layer.7": 623.0646, "encoder_q-layer.8": 670.7243, "encoder_q-layer.9": 595.8032, "epoch": 0.25, "inbatch_neg_score": 0.0963, "inbatch_pos_score": 0.6265, "learning_rate": 3.433333333333333e-05, "loss": 4.1819, "norm_diff": 0.1076, "norm_loss": 0.0, "num_token_doc": 66.9664, "num_token_overlap": 11.6872, "num_token_query": 32.0571, "num_token_union": 65.5681, "num_word_context": 202.2686, "num_word_doc": 49.9733, "num_word_query": 23.6808, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1084.3865, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0966, "query_norm": 1.2351, "queue_k_norm": 1.3366, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 32.0571, "sent_len_1": 66.9664, "sent_len_max_0": 127.4725, "sent_len_max_1": 190.4925, "stdk": 0.0471, "stdq": 0.0418, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 38200 }, { "accuracy": 40.4297, "active_queue_size": 16384.0, "cl_loss": 4.1721, "doc_norm": 1.3389, "encoder_q-embeddings": 4381.7725, "encoder_q-layer.0": 3282.0808, "encoder_q-layer.1": 3691.4646, "encoder_q-layer.10": 1250.0972, "encoder_q-layer.11": 3056.7373, "encoder_q-layer.2": 4953.8071, "encoder_q-layer.3": 5019.5166, "encoder_q-layer.4": 5630.9062, "encoder_q-layer.5": 4065.3293, "encoder_q-layer.6": 4649.5552, "encoder_q-layer.7": 4814.041, "encoder_q-layer.8": 3380.6904, "encoder_q-layer.9": 1522.5497, "epoch": 0.25, "inbatch_neg_score": 0.0954, "inbatch_pos_score": 0.5947, "learning_rate": 3.427777777777778e-05, "loss": 4.1721, "norm_diff": 0.1108, "norm_loss": 0.0, "num_token_doc": 66.985, "num_token_overlap": 11.6563, "num_token_query": 31.6722, "num_token_union": 65.3262, "num_word_context": 202.6957, "num_word_doc": 49.9468, "num_word_query": 23.3904, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5999.8224, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.0948, "query_norm": 1.2281, "queue_k_norm": 1.3364, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.6722, "sent_len_1": 66.985, "sent_len_max_0": 127.485, "sent_len_max_1": 191.3925, "stdk": 0.047, "stdq": 0.0415, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 38300 }, { "accuracy": 42.5781, "active_queue_size": 16384.0, "cl_loss": 4.2008, "doc_norm": 1.3329, "encoder_q-embeddings": 1498.5803, "encoder_q-layer.0": 1012.1828, "encoder_q-layer.1": 1143.0923, "encoder_q-layer.10": 1162.254, "encoder_q-layer.11": 2904.4138, "encoder_q-layer.2": 1321.3885, "encoder_q-layer.3": 1353.8867, "encoder_q-layer.4": 1451.7838, "encoder_q-layer.5": 1383.6489, "encoder_q-layer.6": 1501.5967, "encoder_q-layer.7": 1381.3594, "encoder_q-layer.8": 1410.6118, "encoder_q-layer.9": 1173.9102, "epoch": 0.25, "inbatch_neg_score": 0.0851, "inbatch_pos_score": 0.5957, "learning_rate": 3.4222222222222224e-05, "loss": 4.2008, "norm_diff": 0.1047, "norm_loss": 0.0, "num_token_doc": 66.709, "num_token_overlap": 11.6854, "num_token_query": 31.9824, "num_token_union": 65.3362, "num_word_context": 202.3496, "num_word_doc": 49.7889, "num_word_query": 23.6332, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2243.5033, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0857, "query_norm": 1.2283, "queue_k_norm": 1.335, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9824, "sent_len_1": 66.709, "sent_len_max_0": 127.5075, "sent_len_max_1": 188.0163, "stdk": 0.0468, "stdq": 0.0414, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 38400 }, { "accuracy": 39.2578, "active_queue_size": 16384.0, "cl_loss": 4.1813, "doc_norm": 1.3375, "encoder_q-embeddings": 1414.3119, "encoder_q-layer.0": 956.7159, "encoder_q-layer.1": 982.5804, "encoder_q-layer.10": 1355.9365, "encoder_q-layer.11": 2961.4646, "encoder_q-layer.2": 1089.9753, "encoder_q-layer.3": 1195.4091, "encoder_q-layer.4": 1237.2065, "encoder_q-layer.5": 1238.4095, "encoder_q-layer.6": 1362.9691, "encoder_q-layer.7": 1362.3995, "encoder_q-layer.8": 1294.6415, "encoder_q-layer.9": 1240.0695, "epoch": 0.25, "inbatch_neg_score": 0.09, "inbatch_pos_score": 0.5864, "learning_rate": 3.4166666666666666e-05, "loss": 4.1813, "norm_diff": 0.103, "norm_loss": 0.0, "num_token_doc": 66.913, "num_token_overlap": 11.6746, "num_token_query": 31.9687, "num_token_union": 65.4431, "num_word_context": 202.6483, "num_word_doc": 49.9057, "num_word_query": 23.6214, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2148.3921, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0883, "query_norm": 1.2344, "queue_k_norm": 1.3371, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9687, "sent_len_1": 66.913, "sent_len_max_0": 127.4625, "sent_len_max_1": 192.6612, "stdk": 0.0469, "stdq": 0.0416, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 38500 }, { "accuracy": 41.2109, "active_queue_size": 16384.0, "cl_loss": 4.176, "doc_norm": 1.3332, "encoder_q-embeddings": 1571.3163, "encoder_q-layer.0": 1029.5835, "encoder_q-layer.1": 1164.3337, "encoder_q-layer.10": 1423.3871, "encoder_q-layer.11": 3023.7024, "encoder_q-layer.2": 1350.2734, "encoder_q-layer.3": 1526.9248, "encoder_q-layer.4": 1759.624, "encoder_q-layer.5": 1736.5464, "encoder_q-layer.6": 1848.4147, "encoder_q-layer.7": 1666.3516, "encoder_q-layer.8": 1648.7411, "encoder_q-layer.9": 1415.3229, "epoch": 0.25, "inbatch_neg_score": 0.0921, "inbatch_pos_score": 0.6099, "learning_rate": 3.411111111111111e-05, "loss": 4.176, "norm_diff": 0.0669, "norm_loss": 0.0, "num_token_doc": 66.9093, "num_token_overlap": 11.7266, "num_token_query": 32.002, "num_token_union": 65.4602, "num_word_context": 202.5188, "num_word_doc": 49.928, "num_word_query": 23.6365, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2509.4951, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0916, "query_norm": 1.2663, "queue_k_norm": 1.3362, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 32.002, "sent_len_1": 66.9093, "sent_len_max_0": 127.335, "sent_len_max_1": 188.8938, "stdk": 0.0468, "stdq": 0.0423, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 38600 }, { "accuracy": 43.1641, "active_queue_size": 16384.0, "cl_loss": 4.1999, "doc_norm": 1.3315, "encoder_q-embeddings": 1579.6993, "encoder_q-layer.0": 1063.1334, "encoder_q-layer.1": 1069.1105, "encoder_q-layer.10": 1252.0171, "encoder_q-layer.11": 3009.8711, "encoder_q-layer.2": 1289.0096, "encoder_q-layer.3": 1368.5613, "encoder_q-layer.4": 1459.7855, "encoder_q-layer.5": 1316.4589, "encoder_q-layer.6": 1447.3187, "encoder_q-layer.7": 1424.5372, "encoder_q-layer.8": 1478.6542, "encoder_q-layer.9": 1243.344, "epoch": 0.25, "inbatch_neg_score": 0.0915, "inbatch_pos_score": 0.604, "learning_rate": 3.405555555555556e-05, "loss": 4.1999, "norm_diff": 0.0882, "norm_loss": 0.0, "num_token_doc": 66.7982, "num_token_overlap": 11.7126, "num_token_query": 32.0309, "num_token_union": 65.3921, "num_word_context": 202.2062, "num_word_doc": 49.8233, "num_word_query": 23.6711, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2325.3167, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0917, "query_norm": 1.2433, "queue_k_norm": 1.3359, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 32.0309, "sent_len_1": 66.7982, "sent_len_max_0": 127.5375, "sent_len_max_1": 191.4062, "stdk": 0.0468, "stdq": 0.0414, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 38700 }, { "accuracy": 46.0938, "active_queue_size": 16384.0, "cl_loss": 4.1959, "doc_norm": 1.333, "encoder_q-embeddings": 1353.0364, "encoder_q-layer.0": 931.2302, "encoder_q-layer.1": 987.7249, "encoder_q-layer.10": 1170.9419, "encoder_q-layer.11": 2875.2722, "encoder_q-layer.2": 1110.7266, "encoder_q-layer.3": 1165.7595, "encoder_q-layer.4": 1189.2528, "encoder_q-layer.5": 1111.546, "encoder_q-layer.6": 1209.1527, "encoder_q-layer.7": 1218.5695, "encoder_q-layer.8": 1341.8019, "encoder_q-layer.9": 1121.3927, "epoch": 0.25, "inbatch_neg_score": 0.0981, "inbatch_pos_score": 0.6318, "learning_rate": 3.4000000000000007e-05, "loss": 4.1959, "norm_diff": 0.071, "norm_loss": 0.0, "num_token_doc": 66.8513, "num_token_overlap": 11.6298, "num_token_query": 31.6542, "num_token_union": 65.3326, "num_word_context": 202.3644, "num_word_doc": 49.8695, "num_word_query": 23.3592, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2087.2119, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0974, "query_norm": 1.262, "queue_k_norm": 1.3365, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.6542, "sent_len_1": 66.8513, "sent_len_max_0": 127.315, "sent_len_max_1": 188.7525, "stdk": 0.0468, "stdq": 0.0421, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 38800 }, { "accuracy": 43.1641, "active_queue_size": 16384.0, "cl_loss": 4.1746, "doc_norm": 1.3397, "encoder_q-embeddings": 1357.616, "encoder_q-layer.0": 895.1581, "encoder_q-layer.1": 910.8173, "encoder_q-layer.10": 1248.6664, "encoder_q-layer.11": 2989.1653, "encoder_q-layer.2": 1029.0371, "encoder_q-layer.3": 1068.056, "encoder_q-layer.4": 1151.4358, "encoder_q-layer.5": 1112.1843, "encoder_q-layer.6": 1175.3359, "encoder_q-layer.7": 1193.1893, "encoder_q-layer.8": 1348.0077, "encoder_q-layer.9": 1249.6936, "epoch": 0.25, "inbatch_neg_score": 0.0959, "inbatch_pos_score": 0.6245, "learning_rate": 3.394444444444444e-05, "loss": 4.1746, "norm_diff": 0.0529, "norm_loss": 0.0, "num_token_doc": 66.877, "num_token_overlap": 11.6891, "num_token_query": 31.9892, "num_token_union": 65.4648, "num_word_context": 202.5512, "num_word_doc": 49.8373, "num_word_query": 23.6259, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2125.6495, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0955, "query_norm": 1.2868, "queue_k_norm": 1.3345, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9892, "sent_len_1": 66.877, "sent_len_max_0": 127.4963, "sent_len_max_1": 191.23, "stdk": 0.047, "stdq": 0.0431, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 38900 }, { "accuracy": 40.918, "active_queue_size": 16384.0, "cl_loss": 4.1875, "doc_norm": 1.3365, "encoder_q-embeddings": 1499.9783, "encoder_q-layer.0": 992.1783, "encoder_q-layer.1": 1025.973, "encoder_q-layer.10": 1235.2328, "encoder_q-layer.11": 3030.7781, "encoder_q-layer.2": 1144.7614, "encoder_q-layer.3": 1208.5002, "encoder_q-layer.4": 1345.9762, "encoder_q-layer.5": 1339.1991, "encoder_q-layer.6": 1565.4407, "encoder_q-layer.7": 1445.0089, "encoder_q-layer.8": 1407.2837, "encoder_q-layer.9": 1268.6221, "epoch": 0.25, "inbatch_neg_score": 0.1015, "inbatch_pos_score": 0.6089, "learning_rate": 3.388888888888889e-05, "loss": 4.1875, "norm_diff": 0.0763, "norm_loss": 0.0, "num_token_doc": 66.549, "num_token_overlap": 11.6747, "num_token_query": 31.9218, "num_token_union": 65.2435, "num_word_context": 202.3966, "num_word_doc": 49.7035, "num_word_query": 23.5864, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2292.9108, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1003, "query_norm": 1.2602, "queue_k_norm": 1.3326, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9218, "sent_len_1": 66.549, "sent_len_max_0": 127.4175, "sent_len_max_1": 188.355, "stdk": 0.047, "stdq": 0.0422, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 39000 }, { "accuracy": 43.2617, "active_queue_size": 16384.0, "cl_loss": 4.2048, "doc_norm": 1.3474, "encoder_q-embeddings": 2118.9426, "encoder_q-layer.0": 1449.3622, "encoder_q-layer.1": 1614.7032, "encoder_q-layer.10": 1435.8171, "encoder_q-layer.11": 3256.2993, "encoder_q-layer.2": 1853.3875, "encoder_q-layer.3": 1928.5118, "encoder_q-layer.4": 1941.2556, "encoder_q-layer.5": 1915.2871, "encoder_q-layer.6": 1904.1761, "encoder_q-layer.7": 1815.8069, "encoder_q-layer.8": 1737.3342, "encoder_q-layer.9": 1429.7021, "epoch": 0.25, "inbatch_neg_score": 0.1008, "inbatch_pos_score": 0.6289, "learning_rate": 3.3833333333333334e-05, "loss": 4.2048, "norm_diff": 0.0613, "norm_loss": 0.0, "num_token_doc": 66.7414, "num_token_overlap": 11.6194, "num_token_query": 31.8809, "num_token_union": 65.352, "num_word_context": 201.8306, "num_word_doc": 49.7682, "num_word_query": 23.5081, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2904.2312, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1005, "query_norm": 1.2861, "queue_k_norm": 1.3347, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8809, "sent_len_1": 66.7414, "sent_len_max_0": 127.61, "sent_len_max_1": 189.9112, "stdk": 0.0473, "stdq": 0.0433, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 39100 }, { "accuracy": 43.75, "active_queue_size": 16384.0, "cl_loss": 4.1796, "doc_norm": 1.339, "encoder_q-embeddings": 1501.6824, "encoder_q-layer.0": 970.683, "encoder_q-layer.1": 1012.5726, "encoder_q-layer.10": 1186.6409, "encoder_q-layer.11": 2960.1057, "encoder_q-layer.2": 1128.7141, "encoder_q-layer.3": 1188.7872, "encoder_q-layer.4": 1343.4989, "encoder_q-layer.5": 1202.2791, "encoder_q-layer.6": 1299.6115, "encoder_q-layer.7": 1301.6106, "encoder_q-layer.8": 1421.7593, "encoder_q-layer.9": 1214.5983, "epoch": 0.26, "inbatch_neg_score": 0.1025, "inbatch_pos_score": 0.6436, "learning_rate": 3.377777777777778e-05, "loss": 4.1796, "norm_diff": 0.0748, "norm_loss": 0.0, "num_token_doc": 66.6585, "num_token_overlap": 11.6439, "num_token_query": 31.8183, "num_token_union": 65.2402, "num_word_context": 201.9074, "num_word_doc": 49.7005, "num_word_query": 23.4942, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2228.6747, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1039, "query_norm": 1.2643, "queue_k_norm": 1.3321, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.8183, "sent_len_1": 66.6585, "sent_len_max_0": 127.4638, "sent_len_max_1": 190.3887, "stdk": 0.0471, "stdq": 0.0425, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 39200 }, { "accuracy": 42.3828, "active_queue_size": 16384.0, "cl_loss": 4.2009, "doc_norm": 1.3378, "encoder_q-embeddings": 1180.6793, "encoder_q-layer.0": 772.9693, "encoder_q-layer.1": 806.4966, "encoder_q-layer.10": 1449.7057, "encoder_q-layer.11": 3339.5044, "encoder_q-layer.2": 887.5291, "encoder_q-layer.3": 976.4055, "encoder_q-layer.4": 1026.9043, "encoder_q-layer.5": 1004.6547, "encoder_q-layer.6": 1170.5475, "encoder_q-layer.7": 1301.769, "encoder_q-layer.8": 1436.2018, "encoder_q-layer.9": 1312.6591, "epoch": 0.26, "inbatch_neg_score": 0.1052, "inbatch_pos_score": 0.604, "learning_rate": 3.3722222222222225e-05, "loss": 4.2009, "norm_diff": 0.0906, "norm_loss": 0.0, "num_token_doc": 66.4759, "num_token_overlap": 11.6333, "num_token_query": 31.7979, "num_token_union": 65.0952, "num_word_context": 202.1761, "num_word_doc": 49.5922, "num_word_query": 23.4692, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2141.9991, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1051, "query_norm": 1.2472, "queue_k_norm": 1.3334, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.7979, "sent_len_1": 66.4759, "sent_len_max_0": 127.5713, "sent_len_max_1": 188.9638, "stdk": 0.047, "stdq": 0.0418, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 39300 }, { "accuracy": 43.8477, "active_queue_size": 16384.0, "cl_loss": 4.1798, "doc_norm": 1.3363, "encoder_q-embeddings": 3999.4421, "encoder_q-layer.0": 2752.4595, "encoder_q-layer.1": 3109.0312, "encoder_q-layer.10": 1304.8436, "encoder_q-layer.11": 3158.5767, "encoder_q-layer.2": 3302.0308, "encoder_q-layer.3": 3441.9397, "encoder_q-layer.4": 3686.4497, "encoder_q-layer.5": 3428.821, "encoder_q-layer.6": 3636.8271, "encoder_q-layer.7": 3993.1453, "encoder_q-layer.8": 2961.3027, "encoder_q-layer.9": 1567.5161, "epoch": 0.26, "inbatch_neg_score": 0.1029, "inbatch_pos_score": 0.6094, "learning_rate": 3.366666666666667e-05, "loss": 4.1798, "norm_diff": 0.0791, "norm_loss": 0.0, "num_token_doc": 66.6215, "num_token_overlap": 11.6363, "num_token_query": 31.8092, "num_token_union": 65.2839, "num_word_context": 202.3741, "num_word_doc": 49.7663, "num_word_query": 23.4695, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4934.3694, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1031, "query_norm": 1.2572, "queue_k_norm": 1.336, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8092, "sent_len_1": 66.6215, "sent_len_max_0": 127.5563, "sent_len_max_1": 188.6262, "stdk": 0.0469, "stdq": 0.0426, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 39400 }, { "accuracy": 43.2617, "active_queue_size": 16384.0, "cl_loss": 4.1658, "doc_norm": 1.3443, "encoder_q-embeddings": 3495.1926, "encoder_q-layer.0": 2409.6799, "encoder_q-layer.1": 3017.4119, "encoder_q-layer.10": 1299.5298, "encoder_q-layer.11": 3120.2373, "encoder_q-layer.2": 3418.6211, "encoder_q-layer.3": 3473.8499, "encoder_q-layer.4": 3296.3674, "encoder_q-layer.5": 3349.8965, "encoder_q-layer.6": 2896.8076, "encoder_q-layer.7": 2332.2083, "encoder_q-layer.8": 1798.9025, "encoder_q-layer.9": 1349.0942, "epoch": 0.26, "inbatch_neg_score": 0.1022, "inbatch_pos_score": 0.6162, "learning_rate": 3.3611111111111116e-05, "loss": 4.1658, "norm_diff": 0.1108, "norm_loss": 0.0, "num_token_doc": 67.0034, "num_token_overlap": 11.6962, "num_token_query": 31.9811, "num_token_union": 65.5229, "num_word_context": 202.5958, "num_word_doc": 50.0008, "num_word_query": 23.6294, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4248.4283, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1035, "query_norm": 1.2335, "queue_k_norm": 1.3341, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9811, "sent_len_1": 67.0034, "sent_len_max_0": 127.5075, "sent_len_max_1": 189.1037, "stdk": 0.0472, "stdq": 0.0418, "stdqueue_k": 0.0469, "stdqueue_q": 0.0, "step": 39500 }, { "accuracy": 44.043, "active_queue_size": 16384.0, "cl_loss": 4.1868, "doc_norm": 1.3351, "encoder_q-embeddings": 1630.8344, "encoder_q-layer.0": 1105.0299, "encoder_q-layer.1": 1191.8982, "encoder_q-layer.10": 1284.7955, "encoder_q-layer.11": 3270.686, "encoder_q-layer.2": 1347.9977, "encoder_q-layer.3": 1333.4209, "encoder_q-layer.4": 1377.9362, "encoder_q-layer.5": 1378.6608, "encoder_q-layer.6": 1485.4585, "encoder_q-layer.7": 1266.238, "encoder_q-layer.8": 1273.2882, "encoder_q-layer.9": 1210.2023, "epoch": 0.26, "inbatch_neg_score": 0.1027, "inbatch_pos_score": 0.6279, "learning_rate": 3.355555555555556e-05, "loss": 4.1868, "norm_diff": 0.0876, "norm_loss": 0.0, "num_token_doc": 66.8469, "num_token_overlap": 11.6031, "num_token_query": 31.6371, "num_token_union": 65.2875, "num_word_context": 201.6925, "num_word_doc": 49.9035, "num_word_query": 23.3371, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2325.1422, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1027, "query_norm": 1.2474, "queue_k_norm": 1.3362, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.6371, "sent_len_1": 66.8469, "sent_len_max_0": 127.5325, "sent_len_max_1": 190.0962, "stdk": 0.0469, "stdq": 0.0425, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 39600 }, { "accuracy": 41.8945, "active_queue_size": 16384.0, "cl_loss": 4.1628, "doc_norm": 1.3407, "encoder_q-embeddings": 1713.8086, "encoder_q-layer.0": 1109.7186, "encoder_q-layer.1": 1219.4053, "encoder_q-layer.10": 1250.8865, "encoder_q-layer.11": 3165.0962, "encoder_q-layer.2": 1496.9974, "encoder_q-layer.3": 1595.762, "encoder_q-layer.4": 1701.9565, "encoder_q-layer.5": 1718.3499, "encoder_q-layer.6": 1658.1658, "encoder_q-layer.7": 1628.1112, "encoder_q-layer.8": 1640.1265, "encoder_q-layer.9": 1296.6847, "epoch": 0.26, "inbatch_neg_score": 0.1003, "inbatch_pos_score": 0.604, "learning_rate": 3.35e-05, "loss": 4.1628, "norm_diff": 0.1175, "norm_loss": 0.0, "num_token_doc": 66.5962, "num_token_overlap": 11.6529, "num_token_query": 31.9154, "num_token_union": 65.2762, "num_word_context": 202.0934, "num_word_doc": 49.7203, "num_word_query": 23.5885, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2521.8557, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1013, "query_norm": 1.2232, "queue_k_norm": 1.3365, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9154, "sent_len_1": 66.5962, "sent_len_max_0": 127.59, "sent_len_max_1": 189.0037, "stdk": 0.0471, "stdq": 0.0414, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 39700 }, { "accuracy": 45.3125, "active_queue_size": 16384.0, "cl_loss": 4.1666, "doc_norm": 1.3305, "encoder_q-embeddings": 1265.9227, "encoder_q-layer.0": 870.5502, "encoder_q-layer.1": 932.6779, "encoder_q-layer.10": 1189.2725, "encoder_q-layer.11": 3034.5645, "encoder_q-layer.2": 1045.923, "encoder_q-layer.3": 1066.464, "encoder_q-layer.4": 1161.2799, "encoder_q-layer.5": 1123.5438, "encoder_q-layer.6": 1193.1685, "encoder_q-layer.7": 1311.5594, "encoder_q-layer.8": 1351.5472, "encoder_q-layer.9": 1193.9141, "epoch": 0.26, "inbatch_neg_score": 0.1023, "inbatch_pos_score": 0.6299, "learning_rate": 3.3444444444444443e-05, "loss": 4.1666, "norm_diff": 0.0898, "norm_loss": 0.0, "num_token_doc": 66.6907, "num_token_overlap": 11.7144, "num_token_query": 31.9674, "num_token_union": 65.3545, "num_word_context": 202.3913, "num_word_doc": 49.7973, "num_word_query": 23.6122, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2061.1015, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1016, "query_norm": 1.2407, "queue_k_norm": 1.3385, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9674, "sent_len_1": 66.6907, "sent_len_max_0": 127.4775, "sent_len_max_1": 187.4062, "stdk": 0.0467, "stdq": 0.0421, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 39800 }, { "accuracy": 43.9453, "active_queue_size": 16384.0, "cl_loss": 4.1532, "doc_norm": 1.3387, "encoder_q-embeddings": 1954.453, "encoder_q-layer.0": 1323.004, "encoder_q-layer.1": 1494.1083, "encoder_q-layer.10": 1339.5522, "encoder_q-layer.11": 3054.0864, "encoder_q-layer.2": 1685.2229, "encoder_q-layer.3": 1819.3657, "encoder_q-layer.4": 1987.2413, "encoder_q-layer.5": 2156.592, "encoder_q-layer.6": 2102.8176, "encoder_q-layer.7": 1617.0238, "encoder_q-layer.8": 1641.1881, "encoder_q-layer.9": 1344.5568, "epoch": 0.26, "inbatch_neg_score": 0.1046, "inbatch_pos_score": 0.6206, "learning_rate": 3.338888888888889e-05, "loss": 4.1532, "norm_diff": 0.1216, "norm_loss": 0.0, "num_token_doc": 66.954, "num_token_overlap": 11.7561, "num_token_query": 32.0978, "num_token_union": 65.4954, "num_word_context": 202.4349, "num_word_doc": 49.9503, "num_word_query": 23.6978, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2791.6755, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1044, "query_norm": 1.2171, "queue_k_norm": 1.3379, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 32.0978, "sent_len_1": 66.954, "sent_len_max_0": 127.725, "sent_len_max_1": 188.7025, "stdk": 0.047, "stdq": 0.0409, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 39900 }, { "accuracy": 40.4297, "active_queue_size": 16384.0, "cl_loss": 4.18, "doc_norm": 1.3352, "encoder_q-embeddings": 1216.3876, "encoder_q-layer.0": 831.1115, "encoder_q-layer.1": 893.0364, "encoder_q-layer.10": 1445.2928, "encoder_q-layer.11": 3354.2939, "encoder_q-layer.2": 958.0146, "encoder_q-layer.3": 1021.0149, "encoder_q-layer.4": 1033.3541, "encoder_q-layer.5": 1067.7761, "encoder_q-layer.6": 1100.0334, "encoder_q-layer.7": 1237.5332, "encoder_q-layer.8": 1477.0138, "encoder_q-layer.9": 1329.61, "epoch": 0.26, "inbatch_neg_score": 0.0991, "inbatch_pos_score": 0.6226, "learning_rate": 3.3333333333333335e-05, "loss": 4.18, "norm_diff": 0.0789, "norm_loss": 0.0, "num_token_doc": 66.7231, "num_token_overlap": 11.6156, "num_token_query": 31.8192, "num_token_union": 65.3353, "num_word_context": 202.1893, "num_word_doc": 49.7517, "num_word_query": 23.4863, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2118.1388, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0987, "query_norm": 1.2563, "queue_k_norm": 1.3373, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.8192, "sent_len_1": 66.7231, "sent_len_max_0": 127.43, "sent_len_max_1": 190.6637, "stdk": 0.0469, "stdq": 0.0427, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 40000 }, { "dev_runtime": 45.0952, "dev_samples_per_second": 1.419, "dev_steps_per_second": 0.022, "epoch": 0.26, "step": 40000, "test_accuracy": 92.83447265625, "test_active_queue_size": 16384.0, "test_cl_loss": 0.4226084351539612, "test_doc_norm": 1.2928581237792969, "test_inbatch_neg_score": 0.41331443190574646, "test_inbatch_pos_score": 1.2774237394332886, "test_loss": 0.4226084351539612, "test_loss_align": 1.0103555917739868, "test_loss_unif": 3.964019775390625, "test_loss_unif_q@queue": 3.964020252227783, "test_norm_diff": 0.07321086525917053, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.08782389760017395, "test_query_norm": 1.366068959236145, "test_queue_k_norm": 1.3368239402770996, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.040268830955028534, "test_stdq": 0.04150468856096268, "test_stdqueue_k": 0.04704645276069641, "test_stdqueue_q": 0.0 }, { "dev_runtime": 45.0952, "dev_samples_per_second": 1.419, "dev_steps_per_second": 0.022, "epoch": 0.26, "eval_beir-arguana_ndcg@10": 0.34328, "eval_beir-arguana_recall@10": 0.59033, "eval_beir-arguana_recall@100": 0.88762, "eval_beir-arguana_recall@20": 0.72546, "eval_beir-avg_ndcg@10": 0.36457491666666664, "eval_beir-avg_recall@10": 0.4324675833333334, "eval_beir-avg_recall@100": 0.6121215833333333, "eval_beir-avg_recall@20": 0.4926628333333333, "eval_beir-cqadupstack_ndcg@10": 0.24215916666666668, "eval_beir-cqadupstack_recall@10": 0.33203583333333336, "eval_beir-cqadupstack_recall@100": 0.5584258333333334, "eval_beir-cqadupstack_recall@20": 0.3931883333333334, "eval_beir-fiqa_ndcg@10": 0.20827, "eval_beir-fiqa_recall@10": 0.25998, "eval_beir-fiqa_recall@100": 0.52327, "eval_beir-fiqa_recall@20": 0.35032, "eval_beir-nfcorpus_ndcg@10": 0.2801, "eval_beir-nfcorpus_recall@10": 0.13711, "eval_beir-nfcorpus_recall@100": 0.2715, "eval_beir-nfcorpus_recall@20": 0.17751, "eval_beir-nq_ndcg@10": 0.26464, "eval_beir-nq_recall@10": 0.44549, "eval_beir-nq_recall@100": 0.78614, "eval_beir-nq_recall@20": 0.56269, "eval_beir-quora_ndcg@10": 0.76287, "eval_beir-quora_recall@10": 0.87468, "eval_beir-quora_recall@100": 0.97326, "eval_beir-quora_recall@20": 0.92, "eval_beir-scidocs_ndcg@10": 0.14156, "eval_beir-scidocs_recall@10": 0.14668, "eval_beir-scidocs_recall@100": 0.34747, "eval_beir-scidocs_recall@20": 0.20158, "eval_beir-scifact_ndcg@10": 0.61738, "eval_beir-scifact_recall@10": 0.78833, "eval_beir-scifact_recall@100": 0.92089, "eval_beir-scifact_recall@20": 0.84233, "eval_beir-trec-covid_ndcg@10": 0.56778, "eval_beir-trec-covid_recall@10": 0.618, "eval_beir-trec-covid_recall@100": 0.4132, "eval_beir-trec-covid_recall@20": 0.556, "eval_beir-webis-touche2020_ndcg@10": 0.21771, "eval_beir-webis-touche2020_recall@10": 0.13204, "eval_beir-webis-touche2020_recall@100": 0.43944, "eval_beir-webis-touche2020_recall@20": 0.19755, "eval_senteval-avg_sts": 0.7547684437908047, "eval_senteval-sickr_spearman": 0.7079287710388935, "eval_senteval-stsb_spearman": 0.8016081165427158, "step": 40000, "test_accuracy": 92.83447265625, "test_active_queue_size": 16384.0, "test_cl_loss": 0.4226084351539612, "test_doc_norm": 1.2928581237792969, "test_inbatch_neg_score": 0.41331443190574646, "test_inbatch_pos_score": 1.2774237394332886, "test_loss": 0.4226084351539612, "test_loss_align": 1.0103555917739868, "test_loss_unif": 3.964019775390625, "test_loss_unif_q@queue": 3.964020252227783, "test_norm_diff": 0.07321086525917053, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.08782389760017395, "test_query_norm": 1.366068959236145, "test_queue_k_norm": 1.3368239402770996, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.040268830955028534, "test_stdq": 0.04150468856096268, "test_stdqueue_k": 0.04704645276069641, "test_stdqueue_q": 0.0 }, { "accuracy": 41.6016, "active_queue_size": 16384.0, "cl_loss": 4.1503, "doc_norm": 1.3391, "encoder_q-embeddings": 1508.3832, "encoder_q-layer.0": 1053.9673, "encoder_q-layer.1": 1126.5859, "encoder_q-layer.10": 1343.7141, "encoder_q-layer.11": 3246.3708, "encoder_q-layer.2": 1333.1975, "encoder_q-layer.3": 1409.0333, "encoder_q-layer.4": 1567.7815, "encoder_q-layer.5": 1575.9252, "encoder_q-layer.6": 1633.3937, "encoder_q-layer.7": 1696.4126, "encoder_q-layer.8": 1679.3734, "encoder_q-layer.9": 1398.7383, "epoch": 0.26, "inbatch_neg_score": 0.1006, "inbatch_pos_score": 0.6289, "learning_rate": 3.327777777777778e-05, "loss": 4.1503, "norm_diff": 0.0824, "norm_loss": 0.0, "num_token_doc": 66.7861, "num_token_overlap": 11.7348, "num_token_query": 32.0598, "num_token_union": 65.4196, "num_word_context": 202.3372, "num_word_doc": 49.8506, "num_word_query": 23.6807, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2506.1656, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.101, "query_norm": 1.2567, "queue_k_norm": 1.3351, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 32.0598, "sent_len_1": 66.7861, "sent_len_max_0": 127.4925, "sent_len_max_1": 188.5175, "stdk": 0.047, "stdq": 0.0426, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 40100 }, { "accuracy": 43.0664, "active_queue_size": 16384.0, "cl_loss": 4.1626, "doc_norm": 1.3313, "encoder_q-embeddings": 2074.9988, "encoder_q-layer.0": 1480.9586, "encoder_q-layer.1": 1579.2539, "encoder_q-layer.10": 1268.9524, "encoder_q-layer.11": 3142.9053, "encoder_q-layer.2": 1810.6918, "encoder_q-layer.3": 1819.2748, "encoder_q-layer.4": 1869.8533, "encoder_q-layer.5": 1663.8921, "encoder_q-layer.6": 1656.6427, "encoder_q-layer.7": 1543.9203, "encoder_q-layer.8": 1607.311, "encoder_q-layer.9": 1297.2964, "epoch": 0.26, "inbatch_neg_score": 0.094, "inbatch_pos_score": 0.6147, "learning_rate": 3.322222222222222e-05, "loss": 4.1626, "norm_diff": 0.0793, "norm_loss": 0.0, "num_token_doc": 66.9275, "num_token_overlap": 11.6892, "num_token_query": 32.0052, "num_token_union": 65.5138, "num_word_context": 202.4153, "num_word_doc": 49.9402, "num_word_query": 23.6517, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2714.4613, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.094, "query_norm": 1.252, "queue_k_norm": 1.3377, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 32.0052, "sent_len_1": 66.9275, "sent_len_max_0": 127.57, "sent_len_max_1": 189.0475, "stdk": 0.0468, "stdq": 0.0426, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 40200 }, { "accuracy": 43.9453, "active_queue_size": 16384.0, "cl_loss": 4.1735, "doc_norm": 1.3407, "encoder_q-embeddings": 2808.6348, "encoder_q-layer.0": 1884.6219, "encoder_q-layer.1": 2050.521, "encoder_q-layer.10": 2459.2627, "encoder_q-layer.11": 5977.5977, "encoder_q-layer.2": 2244.3318, "encoder_q-layer.3": 2299.3701, "encoder_q-layer.4": 2380.5195, "encoder_q-layer.5": 2201.6406, "encoder_q-layer.6": 2414.7371, "encoder_q-layer.7": 2568.4626, "encoder_q-layer.8": 2835.959, "encoder_q-layer.9": 2406.0242, "epoch": 0.26, "inbatch_neg_score": 0.0948, "inbatch_pos_score": 0.6089, "learning_rate": 3.316666666666667e-05, "loss": 4.1735, "norm_diff": 0.094, "norm_loss": 0.0, "num_token_doc": 66.9578, "num_token_overlap": 11.6489, "num_token_query": 31.821, "num_token_union": 65.3824, "num_word_context": 202.6282, "num_word_doc": 49.9512, "num_word_query": 23.4819, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4297.48, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0953, "query_norm": 1.2466, "queue_k_norm": 1.3363, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.821, "sent_len_1": 66.9578, "sent_len_max_0": 127.5775, "sent_len_max_1": 192.0637, "stdk": 0.0471, "stdq": 0.0422, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 40300 }, { "accuracy": 42.6758, "active_queue_size": 16384.0, "cl_loss": 4.1381, "doc_norm": 1.3409, "encoder_q-embeddings": 2472.8164, "encoder_q-layer.0": 1629.0066, "encoder_q-layer.1": 1674.1777, "encoder_q-layer.10": 2485.9753, "encoder_q-layer.11": 6019.3784, "encoder_q-layer.2": 1799.7302, "encoder_q-layer.3": 1894.3983, "encoder_q-layer.4": 1933.6835, "encoder_q-layer.5": 1983.3363, "encoder_q-layer.6": 2198.3059, "encoder_q-layer.7": 2381.0728, "encoder_q-layer.8": 2860.8276, "encoder_q-layer.9": 2461.9912, "epoch": 0.26, "inbatch_neg_score": 0.0957, "inbatch_pos_score": 0.6245, "learning_rate": 3.311111111111112e-05, "loss": 4.1381, "norm_diff": 0.0897, "norm_loss": 0.0, "num_token_doc": 66.6133, "num_token_overlap": 11.6964, "num_token_query": 31.9445, "num_token_union": 65.2757, "num_word_context": 202.2851, "num_word_doc": 49.7325, "num_word_query": 23.5881, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4039.5473, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0963, "query_norm": 1.2512, "queue_k_norm": 1.3388, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9445, "sent_len_1": 66.6133, "sent_len_max_0": 127.4675, "sent_len_max_1": 190.7562, "stdk": 0.0471, "stdq": 0.0424, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 40400 }, { "accuracy": 45.8984, "active_queue_size": 16384.0, "cl_loss": 4.1683, "doc_norm": 1.333, "encoder_q-embeddings": 3460.7554, "encoder_q-layer.0": 2417.4597, "encoder_q-layer.1": 2536.1787, "encoder_q-layer.10": 2407.8279, "encoder_q-layer.11": 5681.2378, "encoder_q-layer.2": 2901.9661, "encoder_q-layer.3": 3035.9409, "encoder_q-layer.4": 3213.7634, "encoder_q-layer.5": 2877.1592, "encoder_q-layer.6": 2968.2737, "encoder_q-layer.7": 2943.6333, "encoder_q-layer.8": 2912.4954, "encoder_q-layer.9": 2329.4048, "epoch": 0.26, "inbatch_neg_score": 0.0895, "inbatch_pos_score": 0.6211, "learning_rate": 3.3055555555555553e-05, "loss": 4.1683, "norm_diff": 0.0999, "norm_loss": 0.0, "num_token_doc": 66.814, "num_token_overlap": 11.668, "num_token_query": 31.8635, "num_token_union": 65.3696, "num_word_context": 202.3552, "num_word_doc": 49.8542, "num_word_query": 23.527, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4718.1257, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0894, "query_norm": 1.2331, "queue_k_norm": 1.3362, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8635, "sent_len_1": 66.814, "sent_len_max_0": 127.4387, "sent_len_max_1": 189.245, "stdk": 0.0468, "stdq": 0.0417, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 40500 }, { "accuracy": 41.2109, "active_queue_size": 16384.0, "cl_loss": 4.1491, "doc_norm": 1.3382, "encoder_q-embeddings": 2506.7043, "encoder_q-layer.0": 1678.5972, "encoder_q-layer.1": 1763.2731, "encoder_q-layer.10": 2761.2139, "encoder_q-layer.11": 5963.918, "encoder_q-layer.2": 1871.8204, "encoder_q-layer.3": 1905.1143, "encoder_q-layer.4": 1922.0448, "encoder_q-layer.5": 2037.5095, "encoder_q-layer.6": 2196.5083, "encoder_q-layer.7": 2318.7715, "encoder_q-layer.8": 2520.4458, "encoder_q-layer.9": 2298.9695, "epoch": 0.26, "inbatch_neg_score": 0.0894, "inbatch_pos_score": 0.5981, "learning_rate": 3.3e-05, "loss": 4.1491, "norm_diff": 0.1095, "norm_loss": 0.0, "num_token_doc": 67.0386, "num_token_overlap": 11.7003, "num_token_query": 31.9879, "num_token_union": 65.5732, "num_word_context": 202.8873, "num_word_doc": 50.0179, "num_word_query": 23.6027, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3965.8811, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0891, "query_norm": 1.2286, "queue_k_norm": 1.3363, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9879, "sent_len_1": 67.0386, "sent_len_max_0": 127.58, "sent_len_max_1": 191.1037, "stdk": 0.047, "stdq": 0.0414, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 40600 }, { "accuracy": 40.8203, "active_queue_size": 16384.0, "cl_loss": 4.1431, "doc_norm": 1.3413, "encoder_q-embeddings": 1154.1251, "encoder_q-layer.0": 752.7305, "encoder_q-layer.1": 742.0712, "encoder_q-layer.10": 1346.3608, "encoder_q-layer.11": 3105.2234, "encoder_q-layer.2": 829.983, "encoder_q-layer.3": 841.4529, "encoder_q-layer.4": 898.8174, "encoder_q-layer.5": 935.1017, "encoder_q-layer.6": 1026.6533, "encoder_q-layer.7": 1110.5836, "encoder_q-layer.8": 1292.3491, "encoder_q-layer.9": 1202.4943, "epoch": 0.26, "inbatch_neg_score": 0.0926, "inbatch_pos_score": 0.6074, "learning_rate": 3.2944444444444445e-05, "loss": 4.1431, "norm_diff": 0.0846, "norm_loss": 0.0, "num_token_doc": 66.8919, "num_token_overlap": 11.7137, "num_token_query": 32.0503, "num_token_union": 65.4713, "num_word_context": 202.3827, "num_word_doc": 49.8933, "num_word_query": 23.6933, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1976.9433, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0926, "query_norm": 1.2567, "queue_k_norm": 1.3359, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 32.0503, "sent_len_1": 66.8919, "sent_len_max_0": 127.5962, "sent_len_max_1": 191.7488, "stdk": 0.0472, "stdq": 0.0422, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 40700 }, { "accuracy": 44.9219, "active_queue_size": 16384.0, "cl_loss": 4.1482, "doc_norm": 1.3309, "encoder_q-embeddings": 1851.4332, "encoder_q-layer.0": 1300.7363, "encoder_q-layer.1": 1457.1765, "encoder_q-layer.10": 1179.3003, "encoder_q-layer.11": 2896.8887, "encoder_q-layer.2": 1679.2588, "encoder_q-layer.3": 1832.4735, "encoder_q-layer.4": 2002.5638, "encoder_q-layer.5": 2060.8503, "encoder_q-layer.6": 2185.6792, "encoder_q-layer.7": 1942.7571, "encoder_q-layer.8": 1478.511, "encoder_q-layer.9": 1188.0345, "epoch": 0.27, "inbatch_neg_score": 0.0908, "inbatch_pos_score": 0.6323, "learning_rate": 3.2888888888888894e-05, "loss": 4.1482, "norm_diff": 0.0387, "norm_loss": 0.0, "num_token_doc": 66.8814, "num_token_overlap": 11.721, "num_token_query": 31.94, "num_token_union": 65.4217, "num_word_context": 202.4815, "num_word_doc": 49.8823, "num_word_query": 23.5836, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2724.9999, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0907, "query_norm": 1.2922, "queue_k_norm": 1.3366, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.94, "sent_len_1": 66.8814, "sent_len_max_0": 127.5525, "sent_len_max_1": 191.8288, "stdk": 0.0468, "stdq": 0.0432, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 40800 }, { "accuracy": 41.5039, "active_queue_size": 16384.0, "cl_loss": 4.1541, "doc_norm": 1.3414, "encoder_q-embeddings": 1890.0757, "encoder_q-layer.0": 1293.6912, "encoder_q-layer.1": 1435.7521, "encoder_q-layer.10": 1236.4878, "encoder_q-layer.11": 2957.5698, "encoder_q-layer.2": 1747.9114, "encoder_q-layer.3": 1942.5447, "encoder_q-layer.4": 2040.8192, "encoder_q-layer.5": 1923.7886, "encoder_q-layer.6": 1780.1626, "encoder_q-layer.7": 1655.9764, "encoder_q-layer.8": 1527.9917, "encoder_q-layer.9": 1178.6877, "epoch": 0.27, "inbatch_neg_score": 0.099, "inbatch_pos_score": 0.6099, "learning_rate": 3.283333333333333e-05, "loss": 4.1541, "norm_diff": 0.0792, "norm_loss": 0.0, "num_token_doc": 66.6979, "num_token_overlap": 11.6633, "num_token_query": 31.761, "num_token_union": 65.2284, "num_word_context": 202.0137, "num_word_doc": 49.7739, "num_word_query": 23.4383, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2695.0437, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.0981, "query_norm": 1.2622, "queue_k_norm": 1.3347, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.761, "sent_len_1": 66.6979, "sent_len_max_0": 127.4737, "sent_len_max_1": 189.8988, "stdk": 0.0472, "stdq": 0.0419, "stdqueue_k": 0.047, "stdqueue_q": 0.0, "step": 40900 }, { "accuracy": 42.4805, "active_queue_size": 16384.0, "cl_loss": 4.1296, "doc_norm": 1.3294, "encoder_q-embeddings": 2481.0854, "encoder_q-layer.0": 1775.818, "encoder_q-layer.1": 1944.5387, "encoder_q-layer.10": 1316.7627, "encoder_q-layer.11": 3056.8435, "encoder_q-layer.2": 2180.5425, "encoder_q-layer.3": 2199.8904, "encoder_q-layer.4": 2224.2729, "encoder_q-layer.5": 2218.396, "encoder_q-layer.6": 2116.873, "encoder_q-layer.7": 2136.3396, "encoder_q-layer.8": 1964.6089, "encoder_q-layer.9": 1285.7213, "epoch": 0.27, "inbatch_neg_score": 0.1047, "inbatch_pos_score": 0.6138, "learning_rate": 3.277777777777778e-05, "loss": 4.1296, "norm_diff": 0.0678, "norm_loss": 0.0, "num_token_doc": 66.803, "num_token_overlap": 11.7139, "num_token_query": 31.9797, "num_token_union": 65.366, "num_word_context": 202.0427, "num_word_doc": 49.7995, "num_word_query": 23.6229, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3181.7971, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1045, "query_norm": 1.2616, "queue_k_norm": 1.3357, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9797, "sent_len_1": 66.803, "sent_len_max_0": 127.5763, "sent_len_max_1": 191.0112, "stdk": 0.0467, "stdq": 0.0416, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 41000 }, { "accuracy": 44.5312, "active_queue_size": 16384.0, "cl_loss": 4.1571, "doc_norm": 1.3406, "encoder_q-embeddings": 1130.7841, "encoder_q-layer.0": 740.0815, "encoder_q-layer.1": 751.3445, "encoder_q-layer.10": 1346.1094, "encoder_q-layer.11": 2997.2822, "encoder_q-layer.2": 838.1949, "encoder_q-layer.3": 853.0006, "encoder_q-layer.4": 943.7239, "encoder_q-layer.5": 934.6774, "encoder_q-layer.6": 1025.7124, "encoder_q-layer.7": 1156.4099, "encoder_q-layer.8": 1334.8058, "encoder_q-layer.9": 1204.1509, "epoch": 0.27, "inbatch_neg_score": 0.1084, "inbatch_pos_score": 0.6318, "learning_rate": 3.272222222222223e-05, "loss": 4.1571, "norm_diff": 0.0769, "norm_loss": 0.0, "num_token_doc": 66.725, "num_token_overlap": 11.6977, "num_token_query": 32.0079, "num_token_union": 65.3866, "num_word_context": 202.2404, "num_word_doc": 49.792, "num_word_query": 23.6437, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1974.6815, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.108, "query_norm": 1.2636, "queue_k_norm": 1.3375, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 32.0079, "sent_len_1": 66.725, "sent_len_max_0": 127.5475, "sent_len_max_1": 188.3587, "stdk": 0.0472, "stdq": 0.0418, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 41100 }, { "accuracy": 43.5547, "active_queue_size": 16384.0, "cl_loss": 4.1339, "doc_norm": 1.336, "encoder_q-embeddings": 1340.8337, "encoder_q-layer.0": 905.1991, "encoder_q-layer.1": 923.7068, "encoder_q-layer.10": 1210.7969, "encoder_q-layer.11": 2869.8447, "encoder_q-layer.2": 1034.4606, "encoder_q-layer.3": 1047.5219, "encoder_q-layer.4": 1117.0679, "encoder_q-layer.5": 1094.9999, "encoder_q-layer.6": 1117.0511, "encoder_q-layer.7": 1161.0729, "encoder_q-layer.8": 1326.0237, "encoder_q-layer.9": 1151.3109, "epoch": 0.27, "inbatch_neg_score": 0.1129, "inbatch_pos_score": 0.6479, "learning_rate": 3.266666666666667e-05, "loss": 4.1339, "norm_diff": 0.0565, "norm_loss": 0.0, "num_token_doc": 66.8778, "num_token_overlap": 11.7566, "num_token_query": 32.0568, "num_token_union": 65.4442, "num_word_context": 202.3661, "num_word_doc": 49.9131, "num_word_query": 23.6775, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2037.7654, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1126, "query_norm": 1.2795, "queue_k_norm": 1.3362, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 32.0568, "sent_len_1": 66.8778, "sent_len_max_0": 127.44, "sent_len_max_1": 190.155, "stdk": 0.0469, "stdq": 0.0425, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 41200 }, { "accuracy": 44.043, "active_queue_size": 16384.0, "cl_loss": 4.1717, "doc_norm": 1.3393, "encoder_q-embeddings": 2245.4705, "encoder_q-layer.0": 1647.2393, "encoder_q-layer.1": 1623.7351, "encoder_q-layer.10": 1230.8248, "encoder_q-layer.11": 3021.5112, "encoder_q-layer.2": 1837.4238, "encoder_q-layer.3": 1901.2374, "encoder_q-layer.4": 1903.578, "encoder_q-layer.5": 1849.2261, "encoder_q-layer.6": 1957.4034, "encoder_q-layer.7": 1928.514, "encoder_q-layer.8": 1703.8405, "encoder_q-layer.9": 1282.4451, "epoch": 0.27, "inbatch_neg_score": 0.1131, "inbatch_pos_score": 0.6357, "learning_rate": 3.261111111111111e-05, "loss": 4.1717, "norm_diff": 0.0832, "norm_loss": 0.0, "num_token_doc": 66.8911, "num_token_overlap": 11.6417, "num_token_query": 31.8555, "num_token_union": 65.4092, "num_word_context": 202.1533, "num_word_doc": 49.8861, "num_word_query": 23.5072, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2878.5766, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1133, "query_norm": 1.2561, "queue_k_norm": 1.3381, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8555, "sent_len_1": 66.8911, "sent_len_max_0": 127.6912, "sent_len_max_1": 191.47, "stdk": 0.0471, "stdq": 0.0416, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 41300 }, { "accuracy": 42.6758, "active_queue_size": 16384.0, "cl_loss": 4.165, "doc_norm": 1.3445, "encoder_q-embeddings": 9239.21, "encoder_q-layer.0": 6494.3433, "encoder_q-layer.1": 7468.6978, "encoder_q-layer.10": 1286.2332, "encoder_q-layer.11": 3017.1946, "encoder_q-layer.2": 7998.1084, "encoder_q-layer.3": 8112.1807, "encoder_q-layer.4": 7792.1689, "encoder_q-layer.5": 7296.3394, "encoder_q-layer.6": 7495.3618, "encoder_q-layer.7": 4487.8125, "encoder_q-layer.8": 3021.8425, "encoder_q-layer.9": 1349.1589, "epoch": 0.27, "inbatch_neg_score": 0.1181, "inbatch_pos_score": 0.6377, "learning_rate": 3.2555555555555555e-05, "loss": 4.165, "norm_diff": 0.0806, "norm_loss": 0.0, "num_token_doc": 66.9288, "num_token_overlap": 11.6172, "num_token_query": 31.7612, "num_token_union": 65.3869, "num_word_context": 202.3077, "num_word_doc": 49.8886, "num_word_query": 23.4435, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9846.1089, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1173, "query_norm": 1.2638, "queue_k_norm": 1.3374, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.7612, "sent_len_1": 66.9288, "sent_len_max_0": 127.465, "sent_len_max_1": 189.465, "stdk": 0.0472, "stdq": 0.0424, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 41400 }, { "accuracy": 44.9219, "active_queue_size": 16384.0, "cl_loss": 4.1611, "doc_norm": 1.3419, "encoder_q-embeddings": 2456.5901, "encoder_q-layer.0": 1702.37, "encoder_q-layer.1": 1902.5487, "encoder_q-layer.10": 1271.8611, "encoder_q-layer.11": 3280.1685, "encoder_q-layer.2": 2180.9365, "encoder_q-layer.3": 2407.2061, "encoder_q-layer.4": 2465.7683, "encoder_q-layer.5": 2541.5156, "encoder_q-layer.6": 2400.6189, "encoder_q-layer.7": 2208.5933, "encoder_q-layer.8": 1700.8257, "encoder_q-layer.9": 1220.6178, "epoch": 0.27, "inbatch_neg_score": 0.1129, "inbatch_pos_score": 0.6377, "learning_rate": 3.2500000000000004e-05, "loss": 4.1611, "norm_diff": 0.0966, "norm_loss": 0.0, "num_token_doc": 66.9439, "num_token_overlap": 11.7013, "num_token_query": 31.9232, "num_token_union": 65.4199, "num_word_context": 202.5468, "num_word_doc": 49.938, "num_word_query": 23.5827, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3344.1369, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1135, "query_norm": 1.2453, "queue_k_norm": 1.3393, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9232, "sent_len_1": 66.9439, "sent_len_max_0": 127.5025, "sent_len_max_1": 190.0, "stdk": 0.0471, "stdq": 0.042, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 41500 }, { "accuracy": 45.8008, "active_queue_size": 16384.0, "cl_loss": 4.1552, "doc_norm": 1.3405, "encoder_q-embeddings": 1315.8281, "encoder_q-layer.0": 842.9537, "encoder_q-layer.1": 897.462, "encoder_q-layer.10": 1279.2393, "encoder_q-layer.11": 3126.9949, "encoder_q-layer.2": 1013.275, "encoder_q-layer.3": 1037.8743, "encoder_q-layer.4": 1109.0907, "encoder_q-layer.5": 1090.7493, "encoder_q-layer.6": 1167.7703, "encoder_q-layer.7": 1266.7175, "encoder_q-layer.8": 1387.856, "encoder_q-layer.9": 1231.9563, "epoch": 0.27, "inbatch_neg_score": 0.1144, "inbatch_pos_score": 0.6704, "learning_rate": 3.2444444444444446e-05, "loss": 4.1552, "norm_diff": 0.0677, "norm_loss": 0.0, "num_token_doc": 66.5741, "num_token_overlap": 11.6943, "num_token_query": 31.9801, "num_token_union": 65.2392, "num_word_context": 202.2015, "num_word_doc": 49.6671, "num_word_query": 23.6085, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2124.009, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1146, "query_norm": 1.2728, "queue_k_norm": 1.3386, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9801, "sent_len_1": 66.5741, "sent_len_max_0": 127.41, "sent_len_max_1": 188.6725, "stdk": 0.047, "stdq": 0.043, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 41600 }, { "accuracy": 45.8008, "active_queue_size": 16384.0, "cl_loss": 4.1197, "doc_norm": 1.3448, "encoder_q-embeddings": 1496.1187, "encoder_q-layer.0": 1034.7639, "encoder_q-layer.1": 1113.099, "encoder_q-layer.10": 1292.8154, "encoder_q-layer.11": 3156.542, "encoder_q-layer.2": 1197.5952, "encoder_q-layer.3": 1221.9509, "encoder_q-layer.4": 1228.5648, "encoder_q-layer.5": 1190.3044, "encoder_q-layer.6": 1240.078, "encoder_q-layer.7": 1197.8617, "encoder_q-layer.8": 1322.4769, "encoder_q-layer.9": 1155.6083, "epoch": 0.27, "inbatch_neg_score": 0.1147, "inbatch_pos_score": 0.6504, "learning_rate": 3.238888888888889e-05, "loss": 4.1197, "norm_diff": 0.1057, "norm_loss": 0.0, "num_token_doc": 66.8575, "num_token_overlap": 11.7003, "num_token_query": 31.9545, "num_token_union": 65.4284, "num_word_context": 202.0027, "num_word_doc": 49.8948, "num_word_query": 23.6012, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2199.2102, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1143, "query_norm": 1.239, "queue_k_norm": 1.3408, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9545, "sent_len_1": 66.8575, "sent_len_max_0": 127.59, "sent_len_max_1": 188.2488, "stdk": 0.0472, "stdq": 0.0418, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 41700 }, { "accuracy": 44.2383, "active_queue_size": 16384.0, "cl_loss": 4.1466, "doc_norm": 1.3378, "encoder_q-embeddings": 1426.3804, "encoder_q-layer.0": 972.8076, "encoder_q-layer.1": 1036.7484, "encoder_q-layer.10": 1291.0702, "encoder_q-layer.11": 2888.0771, "encoder_q-layer.2": 1194.8361, "encoder_q-layer.3": 1210.8159, "encoder_q-layer.4": 1269.0195, "encoder_q-layer.5": 1285.5541, "encoder_q-layer.6": 1341.644, "encoder_q-layer.7": 1373.1443, "encoder_q-layer.8": 1457.4381, "encoder_q-layer.9": 1189.4646, "epoch": 0.27, "inbatch_neg_score": 0.1166, "inbatch_pos_score": 0.6421, "learning_rate": 3.233333333333333e-05, "loss": 4.1466, "norm_diff": 0.0844, "norm_loss": 0.0, "num_token_doc": 66.8229, "num_token_overlap": 11.6839, "num_token_query": 31.7615, "num_token_union": 65.2712, "num_word_context": 202.4758, "num_word_doc": 49.8854, "num_word_query": 23.4342, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2172.6124, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1163, "query_norm": 1.2534, "queue_k_norm": 1.3419, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.7615, "sent_len_1": 66.8229, "sent_len_max_0": 127.5162, "sent_len_max_1": 189.125, "stdk": 0.0469, "stdq": 0.0423, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 41800 }, { "accuracy": 40.2344, "active_queue_size": 16384.0, "cl_loss": 4.1297, "doc_norm": 1.3472, "encoder_q-embeddings": 1301.224, "encoder_q-layer.0": 929.5218, "encoder_q-layer.1": 966.4006, "encoder_q-layer.10": 1249.078, "encoder_q-layer.11": 3194.0269, "encoder_q-layer.2": 1070.0802, "encoder_q-layer.3": 1101.8833, "encoder_q-layer.4": 1127.5421, "encoder_q-layer.5": 1157.0944, "encoder_q-layer.6": 1199.4036, "encoder_q-layer.7": 1199.8228, "encoder_q-layer.8": 1317.1412, "encoder_q-layer.9": 1213.7692, "epoch": 0.27, "inbatch_neg_score": 0.1172, "inbatch_pos_score": 0.6226, "learning_rate": 3.227777777777778e-05, "loss": 4.1297, "norm_diff": 0.093, "norm_loss": 0.0, "num_token_doc": 66.7353, "num_token_overlap": 11.708, "num_token_query": 31.9576, "num_token_union": 65.3304, "num_word_context": 202.3584, "num_word_doc": 49.8029, "num_word_query": 23.6138, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2154.7716, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1165, "query_norm": 1.2542, "queue_k_norm": 1.3437, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9576, "sent_len_1": 66.7353, "sent_len_max_0": 127.6388, "sent_len_max_1": 189.2738, "stdk": 0.0472, "stdq": 0.0424, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 41900 }, { "accuracy": 42.6758, "active_queue_size": 16384.0, "cl_loss": 4.1111, "doc_norm": 1.3475, "encoder_q-embeddings": 1370.0334, "encoder_q-layer.0": 921.5609, "encoder_q-layer.1": 957.6112, "encoder_q-layer.10": 1275.6561, "encoder_q-layer.11": 3086.7417, "encoder_q-layer.2": 1093.093, "encoder_q-layer.3": 1156.6276, "encoder_q-layer.4": 1208.9658, "encoder_q-layer.5": 1294.6587, "encoder_q-layer.6": 1320.5685, "encoder_q-layer.7": 1327.8684, "encoder_q-layer.8": 1360.172, "encoder_q-layer.9": 1253.0171, "epoch": 0.27, "inbatch_neg_score": 0.1116, "inbatch_pos_score": 0.6382, "learning_rate": 3.222222222222223e-05, "loss": 4.1111, "norm_diff": 0.1014, "norm_loss": 0.0, "num_token_doc": 66.7678, "num_token_overlap": 11.7111, "num_token_query": 31.9089, "num_token_union": 65.3573, "num_word_context": 202.5734, "num_word_doc": 49.8301, "num_word_query": 23.5642, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2203.5523, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1131, "query_norm": 1.2461, "queue_k_norm": 1.3407, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9089, "sent_len_1": 66.7678, "sent_len_max_0": 127.6325, "sent_len_max_1": 189.6788, "stdk": 0.0473, "stdq": 0.0421, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 42000 }, { "accuracy": 41.8945, "active_queue_size": 16384.0, "cl_loss": 4.1365, "doc_norm": 1.3471, "encoder_q-embeddings": 2010.4742, "encoder_q-layer.0": 1428.6084, "encoder_q-layer.1": 1551.16, "encoder_q-layer.10": 1284.1316, "encoder_q-layer.11": 3260.5845, "encoder_q-layer.2": 1881.8024, "encoder_q-layer.3": 1990.4515, "encoder_q-layer.4": 2041.528, "encoder_q-layer.5": 1954.1846, "encoder_q-layer.6": 1761.2522, "encoder_q-layer.7": 1512.0005, "encoder_q-layer.8": 1465.6056, "encoder_q-layer.9": 1183.2854, "epoch": 0.27, "inbatch_neg_score": 0.1127, "inbatch_pos_score": 0.6245, "learning_rate": 3.2166666666666665e-05, "loss": 4.1365, "norm_diff": 0.1136, "norm_loss": 0.0, "num_token_doc": 66.8689, "num_token_overlap": 11.672, "num_token_query": 31.921, "num_token_union": 65.437, "num_word_context": 202.2085, "num_word_doc": 49.8516, "num_word_query": 23.5948, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2783.2482, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1122, "query_norm": 1.2335, "queue_k_norm": 1.3425, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.921, "sent_len_1": 66.8689, "sent_len_max_0": 127.5312, "sent_len_max_1": 189.9062, "stdk": 0.0472, "stdq": 0.0417, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 42100 }, { "accuracy": 42.4805, "active_queue_size": 16384.0, "cl_loss": 4.1433, "doc_norm": 1.337, "encoder_q-embeddings": 1343.7393, "encoder_q-layer.0": 908.5625, "encoder_q-layer.1": 952.0399, "encoder_q-layer.10": 1467.432, "encoder_q-layer.11": 3085.9902, "encoder_q-layer.2": 1028.1155, "encoder_q-layer.3": 1031.0548, "encoder_q-layer.4": 1050.4122, "encoder_q-layer.5": 1064.7739, "encoder_q-layer.6": 1104.0881, "encoder_q-layer.7": 1134.9109, "encoder_q-layer.8": 1361.9221, "encoder_q-layer.9": 1244.5735, "epoch": 0.27, "inbatch_neg_score": 0.1093, "inbatch_pos_score": 0.625, "learning_rate": 3.2111111111111114e-05, "loss": 4.1433, "norm_diff": 0.102, "norm_loss": 0.0, "num_token_doc": 66.9314, "num_token_overlap": 11.6903, "num_token_query": 31.9371, "num_token_union": 65.4907, "num_word_context": 202.194, "num_word_doc": 49.9293, "num_word_query": 23.5757, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2071.3819, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1089, "query_norm": 1.2351, "queue_k_norm": 1.3421, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9371, "sent_len_1": 66.9314, "sent_len_max_0": 127.615, "sent_len_max_1": 190.6763, "stdk": 0.0469, "stdq": 0.0419, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 42200 }, { "accuracy": 43.75, "active_queue_size": 16384.0, "cl_loss": 4.1473, "doc_norm": 1.3436, "encoder_q-embeddings": 2317.4902, "encoder_q-layer.0": 1530.9226, "encoder_q-layer.1": 1616.2957, "encoder_q-layer.10": 1219.7191, "encoder_q-layer.11": 2989.5183, "encoder_q-layer.2": 1988.8164, "encoder_q-layer.3": 2058.1077, "encoder_q-layer.4": 2037.1235, "encoder_q-layer.5": 1662.4242, "encoder_q-layer.6": 1367.0819, "encoder_q-layer.7": 1285.9492, "encoder_q-layer.8": 1366.9387, "encoder_q-layer.9": 1168.446, "epoch": 0.28, "inbatch_neg_score": 0.1106, "inbatch_pos_score": 0.6548, "learning_rate": 3.2055555555555556e-05, "loss": 4.1473, "norm_diff": 0.0944, "norm_loss": 0.0, "num_token_doc": 66.7266, "num_token_overlap": 11.6463, "num_token_query": 31.8644, "num_token_union": 65.3593, "num_word_context": 202.2396, "num_word_doc": 49.791, "num_word_query": 23.5371, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2760.9676, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1104, "query_norm": 1.2492, "queue_k_norm": 1.343, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8644, "sent_len_1": 66.7266, "sent_len_max_0": 127.6125, "sent_len_max_1": 187.2587, "stdk": 0.0471, "stdq": 0.0423, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 42300 }, { "accuracy": 44.4336, "active_queue_size": 16384.0, "cl_loss": 4.1422, "doc_norm": 1.3481, "encoder_q-embeddings": 1471.0646, "encoder_q-layer.0": 1025.7715, "encoder_q-layer.1": 1127.1091, "encoder_q-layer.10": 1230.4832, "encoder_q-layer.11": 3018.6396, "encoder_q-layer.2": 1332.8564, "encoder_q-layer.3": 1381.8934, "encoder_q-layer.4": 1460.556, "encoder_q-layer.5": 1330.3405, "encoder_q-layer.6": 1480.788, "encoder_q-layer.7": 1420.4529, "encoder_q-layer.8": 1452.1227, "encoder_q-layer.9": 1219.0474, "epoch": 0.28, "inbatch_neg_score": 0.1122, "inbatch_pos_score": 0.6484, "learning_rate": 3.2000000000000005e-05, "loss": 4.1422, "norm_diff": 0.0829, "norm_loss": 0.0, "num_token_doc": 66.7529, "num_token_overlap": 11.6622, "num_token_query": 31.9063, "num_token_union": 65.3074, "num_word_context": 202.3579, "num_word_doc": 49.7644, "num_word_query": 23.5615, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2274.2151, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1116, "query_norm": 1.2652, "queue_k_norm": 1.3416, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9063, "sent_len_1": 66.7529, "sent_len_max_0": 127.4287, "sent_len_max_1": 190.3425, "stdk": 0.0473, "stdq": 0.0429, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 42400 }, { "accuracy": 43.9453, "active_queue_size": 16384.0, "cl_loss": 4.1516, "doc_norm": 1.3468, "encoder_q-embeddings": 1364.147, "encoder_q-layer.0": 876.3189, "encoder_q-layer.1": 910.7849, "encoder_q-layer.10": 1218.9003, "encoder_q-layer.11": 3094.4644, "encoder_q-layer.2": 1030.4592, "encoder_q-layer.3": 1072.9293, "encoder_q-layer.4": 1202.572, "encoder_q-layer.5": 1107.5267, "encoder_q-layer.6": 1210.1283, "encoder_q-layer.7": 1231.5913, "encoder_q-layer.8": 1337.8954, "encoder_q-layer.9": 1200.7069, "epoch": 0.28, "inbatch_neg_score": 0.1132, "inbatch_pos_score": 0.6338, "learning_rate": 3.194444444444444e-05, "loss": 4.1516, "norm_diff": 0.1021, "norm_loss": 0.0, "num_token_doc": 66.8629, "num_token_overlap": 11.666, "num_token_query": 31.7571, "num_token_union": 65.3067, "num_word_context": 202.1141, "num_word_doc": 49.9068, "num_word_query": 23.4263, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2107.0686, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.113, "query_norm": 1.2447, "queue_k_norm": 1.3422, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.7571, "sent_len_1": 66.8629, "sent_len_max_0": 127.42, "sent_len_max_1": 187.3663, "stdk": 0.0472, "stdq": 0.0419, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 42500 }, { "accuracy": 43.9453, "active_queue_size": 16384.0, "cl_loss": 4.114, "doc_norm": 1.3508, "encoder_q-embeddings": 1740.0679, "encoder_q-layer.0": 1151.5131, "encoder_q-layer.1": 1350.8433, "encoder_q-layer.10": 1346.8971, "encoder_q-layer.11": 3350.2373, "encoder_q-layer.2": 1522.0303, "encoder_q-layer.3": 1490.6511, "encoder_q-layer.4": 1411.087, "encoder_q-layer.5": 1142.2106, "encoder_q-layer.6": 1153.175, "encoder_q-layer.7": 1229.0167, "encoder_q-layer.8": 1425.6609, "encoder_q-layer.9": 1276.6608, "epoch": 0.28, "inbatch_neg_score": 0.1124, "inbatch_pos_score": 0.645, "learning_rate": 3.188888888888889e-05, "loss": 4.114, "norm_diff": 0.1051, "norm_loss": 0.0, "num_token_doc": 66.8545, "num_token_overlap": 11.6802, "num_token_query": 31.9236, "num_token_union": 65.405, "num_word_context": 202.2711, "num_word_doc": 49.9117, "num_word_query": 23.5907, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2459.1722, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1115, "query_norm": 1.2457, "queue_k_norm": 1.3424, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9236, "sent_len_1": 66.8545, "sent_len_max_0": 127.5863, "sent_len_max_1": 188.6062, "stdk": 0.0473, "stdq": 0.042, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 42600 }, { "accuracy": 45.3125, "active_queue_size": 16384.0, "cl_loss": 4.1208, "doc_norm": 1.3392, "encoder_q-embeddings": 3537.2385, "encoder_q-layer.0": 2333.5066, "encoder_q-layer.1": 2534.3894, "encoder_q-layer.10": 2357.1926, "encoder_q-layer.11": 5874.0083, "encoder_q-layer.2": 3043.0854, "encoder_q-layer.3": 3374.1228, "encoder_q-layer.4": 3256.0999, "encoder_q-layer.5": 2994.176, "encoder_q-layer.6": 2840.0723, "encoder_q-layer.7": 2817.9888, "encoder_q-layer.8": 2785.8066, "encoder_q-layer.9": 2262.7937, "epoch": 0.28, "inbatch_neg_score": 0.109, "inbatch_pos_score": 0.6431, "learning_rate": 3.183333333333334e-05, "loss": 4.1208, "norm_diff": 0.0752, "norm_loss": 0.0, "num_token_doc": 66.63, "num_token_overlap": 11.6585, "num_token_query": 31.7803, "num_token_union": 65.1857, "num_word_context": 201.8645, "num_word_doc": 49.73, "num_word_query": 23.4466, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4837.489, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1093, "query_norm": 1.264, "queue_k_norm": 1.3412, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.7803, "sent_len_1": 66.63, "sent_len_max_0": 127.5175, "sent_len_max_1": 189.8313, "stdk": 0.047, "stdq": 0.0427, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 42700 }, { "accuracy": 43.0664, "active_queue_size": 16384.0, "cl_loss": 4.1265, "doc_norm": 1.345, "encoder_q-embeddings": 2693.0164, "encoder_q-layer.0": 1846.9961, "encoder_q-layer.1": 1947.9307, "encoder_q-layer.10": 2476.8875, "encoder_q-layer.11": 5879.7031, "encoder_q-layer.2": 2182.991, "encoder_q-layer.3": 2343.25, "encoder_q-layer.4": 2330.6899, "encoder_q-layer.5": 2069.7507, "encoder_q-layer.6": 2179.3997, "encoder_q-layer.7": 2285.0461, "encoder_q-layer.8": 2600.8303, "encoder_q-layer.9": 2277.2239, "epoch": 0.28, "inbatch_neg_score": 0.1078, "inbatch_pos_score": 0.6348, "learning_rate": 3.177777777777778e-05, "loss": 4.1265, "norm_diff": 0.0827, "norm_loss": 0.0, "num_token_doc": 66.898, "num_token_overlap": 11.7217, "num_token_query": 31.9995, "num_token_union": 65.4803, "num_word_context": 202.4912, "num_word_doc": 49.9587, "num_word_query": 23.648, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4100.1024, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1067, "query_norm": 1.2623, "queue_k_norm": 1.3429, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9995, "sent_len_1": 66.898, "sent_len_max_0": 127.6562, "sent_len_max_1": 188.725, "stdk": 0.0471, "stdq": 0.0424, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 42800 }, { "accuracy": 44.043, "active_queue_size": 16384.0, "cl_loss": 4.1264, "doc_norm": 1.3404, "encoder_q-embeddings": 4338.4077, "encoder_q-layer.0": 3087.6082, "encoder_q-layer.1": 3367.5303, "encoder_q-layer.10": 2702.4167, "encoder_q-layer.11": 6292.9722, "encoder_q-layer.2": 3746.8936, "encoder_q-layer.3": 4010.7524, "encoder_q-layer.4": 4327.2417, "encoder_q-layer.5": 4273.2212, "encoder_q-layer.6": 3786.9326, "encoder_q-layer.7": 3813.7344, "encoder_q-layer.8": 3722.8403, "encoder_q-layer.9": 2639.2083, "epoch": 0.28, "inbatch_neg_score": 0.1073, "inbatch_pos_score": 0.6509, "learning_rate": 3.1722222222222224e-05, "loss": 4.1264, "norm_diff": 0.0479, "norm_loss": 0.0, "num_token_doc": 66.8512, "num_token_overlap": 11.7084, "num_token_query": 32.0419, "num_token_union": 65.4883, "num_word_context": 202.3444, "num_word_doc": 49.8775, "num_word_query": 23.6496, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5833.7825, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1075, "query_norm": 1.2925, "queue_k_norm": 1.3412, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 32.0419, "sent_len_1": 66.8512, "sent_len_max_0": 127.5613, "sent_len_max_1": 189.3887, "stdk": 0.047, "stdq": 0.0435, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 42900 }, { "accuracy": 43.2617, "active_queue_size": 16384.0, "cl_loss": 4.1297, "doc_norm": 1.3419, "encoder_q-embeddings": 3212.6255, "encoder_q-layer.0": 2087.4478, "encoder_q-layer.1": 2204.3105, "encoder_q-layer.10": 2518.4719, "encoder_q-layer.11": 6499.0562, "encoder_q-layer.2": 2463.54, "encoder_q-layer.3": 2471.3442, "encoder_q-layer.4": 2519.7102, "encoder_q-layer.5": 2277.9326, "encoder_q-layer.6": 2367.6462, "encoder_q-layer.7": 2292.5469, "encoder_q-layer.8": 2604.9431, "encoder_q-layer.9": 2427.6921, "epoch": 0.28, "inbatch_neg_score": 0.1079, "inbatch_pos_score": 0.6309, "learning_rate": 3.1666666666666666e-05, "loss": 4.1297, "norm_diff": 0.0787, "norm_loss": 0.0, "num_token_doc": 66.9236, "num_token_overlap": 11.6883, "num_token_query": 31.9623, "num_token_union": 65.4943, "num_word_context": 202.7621, "num_word_doc": 49.9285, "num_word_query": 23.5965, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4554.0881, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1083, "query_norm": 1.2632, "queue_k_norm": 1.3426, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9623, "sent_len_1": 66.9236, "sent_len_max_0": 127.5525, "sent_len_max_1": 188.835, "stdk": 0.047, "stdq": 0.0425, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 43000 }, { "accuracy": 41.9922, "active_queue_size": 16384.0, "cl_loss": 4.1083, "doc_norm": 1.3367, "encoder_q-embeddings": 6320.0513, "encoder_q-layer.0": 4263.2876, "encoder_q-layer.1": 4905.3472, "encoder_q-layer.10": 2968.7114, "encoder_q-layer.11": 6402.1309, "encoder_q-layer.2": 6175.1519, "encoder_q-layer.3": 6065.7075, "encoder_q-layer.4": 6192.1704, "encoder_q-layer.5": 5073.8423, "encoder_q-layer.6": 4692.4077, "encoder_q-layer.7": 3393.0352, "encoder_q-layer.8": 3237.2681, "encoder_q-layer.9": 2602.8379, "epoch": 0.28, "inbatch_neg_score": 0.1101, "inbatch_pos_score": 0.6201, "learning_rate": 3.1611111111111115e-05, "loss": 4.1083, "norm_diff": 0.0831, "norm_loss": 0.0, "num_token_doc": 66.9999, "num_token_overlap": 11.6973, "num_token_query": 32.0269, "num_token_union": 65.5733, "num_word_context": 202.6719, "num_word_doc": 49.9741, "num_word_query": 23.6601, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7520.0914, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1099, "query_norm": 1.2536, "queue_k_norm": 1.3445, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 32.0269, "sent_len_1": 66.9999, "sent_len_max_0": 127.4887, "sent_len_max_1": 190.1213, "stdk": 0.0468, "stdq": 0.042, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 43100 }, { "accuracy": 42.1875, "active_queue_size": 16384.0, "cl_loss": 4.1223, "doc_norm": 1.3372, "encoder_q-embeddings": 3099.5793, "encoder_q-layer.0": 2167.8191, "encoder_q-layer.1": 2478.4019, "encoder_q-layer.10": 2380.8606, "encoder_q-layer.11": 6209.647, "encoder_q-layer.2": 2885.238, "encoder_q-layer.3": 3098.2849, "encoder_q-layer.4": 3435.9006, "encoder_q-layer.5": 3334.5869, "encoder_q-layer.6": 3335.1499, "encoder_q-layer.7": 3172.949, "encoder_q-layer.8": 3284.8237, "encoder_q-layer.9": 2612.5342, "epoch": 0.28, "inbatch_neg_score": 0.109, "inbatch_pos_score": 0.6182, "learning_rate": 3.155555555555556e-05, "loss": 4.1223, "norm_diff": 0.085, "norm_loss": 0.0, "num_token_doc": 66.9837, "num_token_overlap": 11.7117, "num_token_query": 31.9205, "num_token_union": 65.4557, "num_word_context": 202.4981, "num_word_doc": 49.9989, "num_word_query": 23.5654, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5030.3396, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.11, "query_norm": 1.2522, "queue_k_norm": 1.3434, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9205, "sent_len_1": 66.9837, "sent_len_max_0": 127.4925, "sent_len_max_1": 189.3438, "stdk": 0.0469, "stdq": 0.0415, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 43200 }, { "accuracy": 45.3125, "active_queue_size": 16384.0, "cl_loss": 4.1394, "doc_norm": 1.3502, "encoder_q-embeddings": 3464.1089, "encoder_q-layer.0": 2282.3108, "encoder_q-layer.1": 2656.9316, "encoder_q-layer.10": 2691.1545, "encoder_q-layer.11": 6390.3965, "encoder_q-layer.2": 2755.6782, "encoder_q-layer.3": 2523.6226, "encoder_q-layer.4": 2498.429, "encoder_q-layer.5": 2628.8508, "encoder_q-layer.6": 2937.729, "encoder_q-layer.7": 3114.7744, "encoder_q-layer.8": 3418.7246, "encoder_q-layer.9": 2652.751, "epoch": 0.28, "inbatch_neg_score": 0.1158, "inbatch_pos_score": 0.6602, "learning_rate": 3.15e-05, "loss": 4.1394, "norm_diff": 0.0626, "norm_loss": 0.0, "num_token_doc": 66.7478, "num_token_overlap": 11.6678, "num_token_query": 31.8308, "num_token_union": 65.3271, "num_word_context": 202.2699, "num_word_doc": 49.8386, "num_word_query": 23.5144, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4872.845, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.116, "query_norm": 1.2876, "queue_k_norm": 1.3426, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8308, "sent_len_1": 66.7478, "sent_len_max_0": 127.5075, "sent_len_max_1": 188.7463, "stdk": 0.0473, "stdq": 0.0426, "stdqueue_k": 0.0471, "stdqueue_q": 0.0, "step": 43300 }, { "accuracy": 44.8242, "active_queue_size": 16384.0, "cl_loss": 4.1199, "doc_norm": 1.3472, "encoder_q-embeddings": 10580.4873, "encoder_q-layer.0": 8030.8496, "encoder_q-layer.1": 8494.6104, "encoder_q-layer.10": 2613.4932, "encoder_q-layer.11": 6309.5254, "encoder_q-layer.2": 10629.7754, "encoder_q-layer.3": 10891.417, "encoder_q-layer.4": 11246.8643, "encoder_q-layer.5": 10716.8145, "encoder_q-layer.6": 9853.6172, "encoder_q-layer.7": 11672.1182, "encoder_q-layer.8": 8911.374, "encoder_q-layer.9": 4830.0581, "epoch": 0.28, "inbatch_neg_score": 0.119, "inbatch_pos_score": 0.6543, "learning_rate": 3.144444444444445e-05, "loss": 4.1199, "norm_diff": 0.0727, "norm_loss": 0.0, "num_token_doc": 66.8517, "num_token_overlap": 11.6138, "num_token_query": 31.7403, "num_token_union": 65.373, "num_word_context": 202.3794, "num_word_doc": 49.8486, "num_word_query": 23.425, "postclip_grad_norm": 1.0, "preclip_grad_norm": 13971.1402, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1192, "query_norm": 1.2745, "queue_k_norm": 1.3461, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.7403, "sent_len_1": 66.8517, "sent_len_max_0": 127.575, "sent_len_max_1": 188.87, "stdk": 0.0472, "stdq": 0.0424, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 43400 }, { "accuracy": 45.2148, "active_queue_size": 16384.0, "cl_loss": 4.1037, "doc_norm": 1.3503, "encoder_q-embeddings": 3717.8486, "encoder_q-layer.0": 2493.3943, "encoder_q-layer.1": 2884.4751, "encoder_q-layer.10": 2574.0337, "encoder_q-layer.11": 5952.833, "encoder_q-layer.2": 3281.7744, "encoder_q-layer.3": 3358.5146, "encoder_q-layer.4": 3507.0305, "encoder_q-layer.5": 3648.28, "encoder_q-layer.6": 3658.4055, "encoder_q-layer.7": 3351.7007, "encoder_q-layer.8": 3615.3127, "encoder_q-layer.9": 2791.6165, "epoch": 0.28, "inbatch_neg_score": 0.1293, "inbatch_pos_score": 0.6475, "learning_rate": 3.138888888888889e-05, "loss": 4.1037, "norm_diff": 0.0921, "norm_loss": 0.0, "num_token_doc": 66.721, "num_token_overlap": 11.7301, "num_token_query": 31.9821, "num_token_union": 65.3758, "num_word_context": 202.0931, "num_word_doc": 49.7807, "num_word_query": 23.6126, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5238.5536, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1282, "query_norm": 1.2582, "queue_k_norm": 1.3455, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9821, "sent_len_1": 66.721, "sent_len_max_0": 127.6238, "sent_len_max_1": 187.4425, "stdk": 0.0473, "stdq": 0.0416, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 43500 }, { "accuracy": 44.5312, "active_queue_size": 16384.0, "cl_loss": 4.1206, "doc_norm": 1.3452, "encoder_q-embeddings": 2924.9465, "encoder_q-layer.0": 2000.0001, "encoder_q-layer.1": 2302.4568, "encoder_q-layer.10": 2471.092, "encoder_q-layer.11": 6077.3599, "encoder_q-layer.2": 2455.366, "encoder_q-layer.3": 2641.2505, "encoder_q-layer.4": 2757.0442, "encoder_q-layer.5": 2676.1055, "encoder_q-layer.6": 2621.3616, "encoder_q-layer.7": 2435.9539, "encoder_q-layer.8": 2616.916, "encoder_q-layer.9": 2247.0408, "epoch": 0.28, "inbatch_neg_score": 0.1296, "inbatch_pos_score": 0.6509, "learning_rate": 3.1333333333333334e-05, "loss": 4.1206, "norm_diff": 0.0772, "norm_loss": 0.0, "num_token_doc": 66.6841, "num_token_overlap": 11.6888, "num_token_query": 31.9312, "num_token_union": 65.3036, "num_word_context": 202.2182, "num_word_doc": 49.811, "num_word_query": 23.582, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4448.987, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1295, "query_norm": 1.268, "queue_k_norm": 1.3461, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9312, "sent_len_1": 66.6841, "sent_len_max_0": 127.5162, "sent_len_max_1": 186.8063, "stdk": 0.0471, "stdq": 0.042, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 43600 }, { "accuracy": 42.9688, "active_queue_size": 16384.0, "cl_loss": 4.1335, "doc_norm": 1.3471, "encoder_q-embeddings": 6057.0942, "encoder_q-layer.0": 4684.5615, "encoder_q-layer.1": 4920.0571, "encoder_q-layer.10": 2776.6943, "encoder_q-layer.11": 6538.0532, "encoder_q-layer.2": 5442.6147, "encoder_q-layer.3": 5884.7158, "encoder_q-layer.4": 5093.0659, "encoder_q-layer.5": 4734.8286, "encoder_q-layer.6": 4384.7104, "encoder_q-layer.7": 4090.4858, "encoder_q-layer.8": 3655.7754, "encoder_q-layer.9": 2793.6355, "epoch": 0.28, "inbatch_neg_score": 0.1304, "inbatch_pos_score": 0.6548, "learning_rate": 3.1277777777777776e-05, "loss": 4.1335, "norm_diff": 0.0833, "norm_loss": 0.0, "num_token_doc": 66.7126, "num_token_overlap": 11.6957, "num_token_query": 31.8776, "num_token_union": 65.2319, "num_word_context": 202.1887, "num_word_doc": 49.803, "num_word_query": 23.5404, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7440.5144, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1298, "query_norm": 1.2637, "queue_k_norm": 1.3478, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8776, "sent_len_1": 66.7126, "sent_len_max_0": 127.6562, "sent_len_max_1": 189.245, "stdk": 0.0472, "stdq": 0.0421, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 43700 }, { "accuracy": 41.7969, "active_queue_size": 16384.0, "cl_loss": 4.1317, "doc_norm": 1.3462, "encoder_q-embeddings": 5293.1807, "encoder_q-layer.0": 3582.5369, "encoder_q-layer.1": 4065.4731, "encoder_q-layer.10": 2624.7959, "encoder_q-layer.11": 6356.3438, "encoder_q-layer.2": 4519.499, "encoder_q-layer.3": 4886.8813, "encoder_q-layer.4": 5069.1636, "encoder_q-layer.5": 4414.3511, "encoder_q-layer.6": 3852.4058, "encoder_q-layer.7": 3327.9563, "encoder_q-layer.8": 3057.5569, "encoder_q-layer.9": 2442.1936, "epoch": 0.29, "inbatch_neg_score": 0.1294, "inbatch_pos_score": 0.6426, "learning_rate": 3.1222222222222225e-05, "loss": 4.1317, "norm_diff": 0.0936, "norm_loss": 0.0, "num_token_doc": 67.1176, "num_token_overlap": 11.7102, "num_token_query": 31.9752, "num_token_union": 65.6011, "num_word_context": 202.8036, "num_word_doc": 50.102, "num_word_query": 23.6207, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6462.9482, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1305, "query_norm": 1.2526, "queue_k_norm": 1.3478, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9752, "sent_len_1": 67.1176, "sent_len_max_0": 127.4975, "sent_len_max_1": 190.7812, "stdk": 0.0471, "stdq": 0.0419, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 43800 }, { "accuracy": 45.1172, "active_queue_size": 16384.0, "cl_loss": 4.1289, "doc_norm": 1.3514, "encoder_q-embeddings": 2804.6536, "encoder_q-layer.0": 1944.9037, "encoder_q-layer.1": 2138.5596, "encoder_q-layer.10": 2818.385, "encoder_q-layer.11": 6253.5869, "encoder_q-layer.2": 2468.2573, "encoder_q-layer.3": 2585.9707, "encoder_q-layer.4": 2595.7173, "encoder_q-layer.5": 2442.9993, "encoder_q-layer.6": 2370.8665, "encoder_q-layer.7": 2356.3384, "encoder_q-layer.8": 2735.9666, "encoder_q-layer.9": 2411.5515, "epoch": 0.29, "inbatch_neg_score": 0.1312, "inbatch_pos_score": 0.6694, "learning_rate": 3.116666666666667e-05, "loss": 4.1289, "norm_diff": 0.1079, "norm_loss": 0.0, "num_token_doc": 66.9246, "num_token_overlap": 11.6842, "num_token_query": 31.9523, "num_token_union": 65.4847, "num_word_context": 202.7136, "num_word_doc": 49.9156, "num_word_query": 23.5731, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4475.643, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1313, "query_norm": 1.2435, "queue_k_norm": 1.3493, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9523, "sent_len_1": 66.9246, "sent_len_max_0": 127.6188, "sent_len_max_1": 189.0213, "stdk": 0.0473, "stdq": 0.0417, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 43900 }, { "accuracy": 42.9688, "active_queue_size": 16384.0, "cl_loss": 4.1269, "doc_norm": 1.3567, "encoder_q-embeddings": 15273.9775, "encoder_q-layer.0": 10873.3848, "encoder_q-layer.1": 13011.1143, "encoder_q-layer.10": 2698.0522, "encoder_q-layer.11": 6600.7856, "encoder_q-layer.2": 16296.0244, "encoder_q-layer.3": 17973.3125, "encoder_q-layer.4": 21119.334, "encoder_q-layer.5": 20159.2637, "encoder_q-layer.6": 15355.6348, "encoder_q-layer.7": 9167.1953, "encoder_q-layer.8": 8481.2637, "encoder_q-layer.9": 5417.9673, "epoch": 0.29, "inbatch_neg_score": 0.1292, "inbatch_pos_score": 0.6514, "learning_rate": 3.111111111111111e-05, "loss": 4.1269, "norm_diff": 0.1125, "norm_loss": 0.0, "num_token_doc": 66.7025, "num_token_overlap": 11.7033, "num_token_query": 32.0113, "num_token_union": 65.3714, "num_word_context": 202.4367, "num_word_doc": 49.8077, "num_word_query": 23.6357, "postclip_grad_norm": 1.0, "preclip_grad_norm": 20208.092, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.1296, "query_norm": 1.2442, "queue_k_norm": 1.3482, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 32.0113, "sent_len_1": 66.7025, "sent_len_max_0": 127.3388, "sent_len_max_1": 189.1438, "stdk": 0.0474, "stdq": 0.042, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 44000 }, { "accuracy": 42.2852, "active_queue_size": 16384.0, "cl_loss": 4.1235, "doc_norm": 1.346, "encoder_q-embeddings": 2175.3792, "encoder_q-layer.0": 1496.5576, "encoder_q-layer.1": 1532.3326, "encoder_q-layer.10": 2506.9358, "encoder_q-layer.11": 6196.5132, "encoder_q-layer.2": 1666.1295, "encoder_q-layer.3": 1755.3195, "encoder_q-layer.4": 1827.3395, "encoder_q-layer.5": 1831.1957, "encoder_q-layer.6": 1965.7112, "encoder_q-layer.7": 2218.6045, "encoder_q-layer.8": 2584.1416, "encoder_q-layer.9": 2456.752, "epoch": 0.29, "inbatch_neg_score": 0.1271, "inbatch_pos_score": 0.6382, "learning_rate": 3.105555555555555e-05, "loss": 4.1235, "norm_diff": 0.1024, "norm_loss": 0.0, "num_token_doc": 66.5499, "num_token_overlap": 11.7145, "num_token_query": 32.0718, "num_token_union": 65.3182, "num_word_context": 202.0515, "num_word_doc": 49.6641, "num_word_query": 23.7196, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3896.9657, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1262, "query_norm": 1.2435, "queue_k_norm": 1.3524, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 32.0718, "sent_len_1": 66.5499, "sent_len_max_0": 127.6462, "sent_len_max_1": 186.6312, "stdk": 0.047, "stdq": 0.0421, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 44100 }, { "accuracy": 45.7031, "active_queue_size": 16384.0, "cl_loss": 4.1082, "doc_norm": 1.3445, "encoder_q-embeddings": 2398.0098, "encoder_q-layer.0": 1610.9343, "encoder_q-layer.1": 1743.1161, "encoder_q-layer.10": 2448.9006, "encoder_q-layer.11": 6106.2759, "encoder_q-layer.2": 1944.1036, "encoder_q-layer.3": 1950.8738, "encoder_q-layer.4": 2109.7517, "encoder_q-layer.5": 2045.1797, "encoder_q-layer.6": 2292.9871, "encoder_q-layer.7": 2301.2515, "encoder_q-layer.8": 2649.8601, "encoder_q-layer.9": 2336.25, "epoch": 0.29, "inbatch_neg_score": 0.128, "inbatch_pos_score": 0.665, "learning_rate": 3.1e-05, "loss": 4.1082, "norm_diff": 0.0867, "norm_loss": 0.0, "num_token_doc": 67.0814, "num_token_overlap": 11.7053, "num_token_query": 32.0019, "num_token_union": 65.5699, "num_word_context": 202.5527, "num_word_doc": 50.0198, "num_word_query": 23.6261, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4074.7044, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1271, "query_norm": 1.2578, "queue_k_norm": 1.3491, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 32.0019, "sent_len_1": 67.0814, "sent_len_max_0": 127.6188, "sent_len_max_1": 191.6937, "stdk": 0.0469, "stdq": 0.0425, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 44200 }, { "accuracy": 43.457, "active_queue_size": 16384.0, "cl_loss": 4.1068, "doc_norm": 1.3479, "encoder_q-embeddings": 2436.5408, "encoder_q-layer.0": 1657.5713, "encoder_q-layer.1": 1704.2023, "encoder_q-layer.10": 2751.0732, "encoder_q-layer.11": 6377.0137, "encoder_q-layer.2": 1891.3739, "encoder_q-layer.3": 1904.1888, "encoder_q-layer.4": 1942.6332, "encoder_q-layer.5": 1957.6796, "encoder_q-layer.6": 2183.5505, "encoder_q-layer.7": 2344.8499, "encoder_q-layer.8": 2719.738, "encoder_q-layer.9": 2555.8977, "epoch": 0.29, "inbatch_neg_score": 0.1235, "inbatch_pos_score": 0.6426, "learning_rate": 3.094444444444445e-05, "loss": 4.1068, "norm_diff": 0.1006, "norm_loss": 0.0, "num_token_doc": 66.7411, "num_token_overlap": 11.6912, "num_token_query": 31.9443, "num_token_union": 65.3512, "num_word_context": 202.3912, "num_word_doc": 49.798, "num_word_query": 23.5759, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4141.1237, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1227, "query_norm": 1.2472, "queue_k_norm": 1.3518, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9443, "sent_len_1": 66.7411, "sent_len_max_0": 127.4663, "sent_len_max_1": 189.8038, "stdk": 0.0471, "stdq": 0.0423, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 44300 }, { "accuracy": 43.6523, "active_queue_size": 16384.0, "cl_loss": 4.1106, "doc_norm": 1.3392, "encoder_q-embeddings": 3331.1335, "encoder_q-layer.0": 2218.4558, "encoder_q-layer.1": 2399.6001, "encoder_q-layer.10": 2354.6104, "encoder_q-layer.11": 5923.6313, "encoder_q-layer.2": 2822.9128, "encoder_q-layer.3": 3166.6033, "encoder_q-layer.4": 3691.2329, "encoder_q-layer.5": 4087.4475, "encoder_q-layer.6": 4196.1348, "encoder_q-layer.7": 3712.5344, "encoder_q-layer.8": 3570.1851, "encoder_q-layer.9": 2463.8516, "epoch": 0.29, "inbatch_neg_score": 0.1267, "inbatch_pos_score": 0.6484, "learning_rate": 3.088888888888889e-05, "loss": 4.1106, "norm_diff": 0.0819, "norm_loss": 0.0, "num_token_doc": 66.7682, "num_token_overlap": 11.7068, "num_token_query": 32.032, "num_token_union": 65.3821, "num_word_context": 202.5648, "num_word_doc": 49.8144, "num_word_query": 23.6598, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5197.7074, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1262, "query_norm": 1.2573, "queue_k_norm": 1.3513, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 32.032, "sent_len_1": 66.7682, "sent_len_max_0": 127.6688, "sent_len_max_1": 189.7512, "stdk": 0.0468, "stdq": 0.0424, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 44400 }, { "accuracy": 44.7266, "active_queue_size": 16384.0, "cl_loss": 4.1141, "doc_norm": 1.3548, "encoder_q-embeddings": 3005.4265, "encoder_q-layer.0": 2020.7273, "encoder_q-layer.1": 2252.9885, "encoder_q-layer.10": 2324.532, "encoder_q-layer.11": 6084.5098, "encoder_q-layer.2": 2582.3989, "encoder_q-layer.3": 2877.6885, "encoder_q-layer.4": 2984.8567, "encoder_q-layer.5": 3227.4849, "encoder_q-layer.6": 3588.6963, "encoder_q-layer.7": 3148.1392, "encoder_q-layer.8": 3421.6514, "encoder_q-layer.9": 2556.5352, "epoch": 0.29, "inbatch_neg_score": 0.1268, "inbatch_pos_score": 0.6343, "learning_rate": 3.0833333333333335e-05, "loss": 4.1141, "norm_diff": 0.1361, "norm_loss": 0.0, "num_token_doc": 66.7897, "num_token_overlap": 11.6683, "num_token_query": 31.9778, "num_token_union": 65.4192, "num_word_context": 202.4366, "num_word_doc": 49.8262, "num_word_query": 23.6177, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4878.1832, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1265, "query_norm": 1.2188, "queue_k_norm": 1.3513, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9778, "sent_len_1": 66.7897, "sent_len_max_0": 127.5037, "sent_len_max_1": 189.9487, "stdk": 0.0474, "stdq": 0.0408, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 44500 }, { "accuracy": 46.4844, "active_queue_size": 16384.0, "cl_loss": 4.1265, "doc_norm": 1.3505, "encoder_q-embeddings": 2409.2217, "encoder_q-layer.0": 1609.3131, "encoder_q-layer.1": 1730.407, "encoder_q-layer.10": 2461.3589, "encoder_q-layer.11": 5858.4771, "encoder_q-layer.2": 1962.3771, "encoder_q-layer.3": 1976.3218, "encoder_q-layer.4": 2121.6985, "encoder_q-layer.5": 2107.2358, "encoder_q-layer.6": 2458.3516, "encoder_q-layer.7": 2447.7566, "encoder_q-layer.8": 2704.689, "encoder_q-layer.9": 2397.2881, "epoch": 0.29, "inbatch_neg_score": 0.1274, "inbatch_pos_score": 0.6602, "learning_rate": 3.077777777777778e-05, "loss": 4.1265, "norm_diff": 0.0947, "norm_loss": 0.0, "num_token_doc": 66.5106, "num_token_overlap": 11.6542, "num_token_query": 31.8617, "num_token_union": 65.2231, "num_word_context": 202.0512, "num_word_doc": 49.6494, "num_word_query": 23.5471, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4014.0302, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.127, "query_norm": 1.2559, "queue_k_norm": 1.3511, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8617, "sent_len_1": 66.5106, "sent_len_max_0": 127.5037, "sent_len_max_1": 190.145, "stdk": 0.0472, "stdq": 0.0423, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 44600 }, { "accuracy": 45.3125, "active_queue_size": 16384.0, "cl_loss": 4.1158, "doc_norm": 1.3561, "encoder_q-embeddings": 5514.5522, "encoder_q-layer.0": 3700.0835, "encoder_q-layer.1": 3915.3123, "encoder_q-layer.10": 5287.7783, "encoder_q-layer.11": 12267.1426, "encoder_q-layer.2": 4470.9009, "encoder_q-layer.3": 4776.0186, "encoder_q-layer.4": 5074.459, "encoder_q-layer.5": 5207.0723, "encoder_q-layer.6": 5568.3848, "encoder_q-layer.7": 5112.2627, "encoder_q-layer.8": 5677.8794, "encoder_q-layer.9": 4636.5586, "epoch": 0.29, "inbatch_neg_score": 0.1231, "inbatch_pos_score": 0.6592, "learning_rate": 3.0722222222222227e-05, "loss": 4.1158, "norm_diff": 0.1136, "norm_loss": 0.0, "num_token_doc": 66.6322, "num_token_overlap": 11.7127, "num_token_query": 32.0171, "num_token_union": 65.307, "num_word_context": 201.9523, "num_word_doc": 49.712, "num_word_query": 23.6223, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8837.1216, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.123, "query_norm": 1.2425, "queue_k_norm": 1.3527, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 32.0171, "sent_len_1": 66.6322, "sent_len_max_0": 127.5138, "sent_len_max_1": 188.8038, "stdk": 0.0474, "stdq": 0.042, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 44700 }, { "accuracy": 46.0938, "active_queue_size": 16384.0, "cl_loss": 4.1156, "doc_norm": 1.3458, "encoder_q-embeddings": 37239.0703, "encoder_q-layer.0": 27583.9121, "encoder_q-layer.1": 26278.7812, "encoder_q-layer.10": 5458.1577, "encoder_q-layer.11": 12202.959, "encoder_q-layer.2": 32690.0547, "encoder_q-layer.3": 35127.9492, "encoder_q-layer.4": 31152.1699, "encoder_q-layer.5": 29996.2188, "encoder_q-layer.6": 30838.7891, "encoder_q-layer.7": 23316.0508, "encoder_q-layer.8": 19472.4863, "encoder_q-layer.9": 11331.1084, "epoch": 0.29, "inbatch_neg_score": 0.1166, "inbatch_pos_score": 0.6621, "learning_rate": 3.066666666666667e-05, "loss": 4.1156, "norm_diff": 0.1127, "norm_loss": 0.0, "num_token_doc": 66.5325, "num_token_overlap": 11.716, "num_token_query": 31.986, "num_token_union": 65.1962, "num_word_context": 201.9943, "num_word_doc": 49.6458, "num_word_query": 23.6196, "postclip_grad_norm": 1.0, "preclip_grad_norm": 41058.5616, "preclip_grad_norm_avg": 0.0004, "q@queue_neg_score": 0.1178, "query_norm": 1.233, "queue_k_norm": 1.3514, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.986, "sent_len_1": 66.5325, "sent_len_max_0": 127.4712, "sent_len_max_1": 189.6262, "stdk": 0.047, "stdq": 0.0416, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 44800 }, { "accuracy": 45.3125, "active_queue_size": 16384.0, "cl_loss": 4.1188, "doc_norm": 1.3576, "encoder_q-embeddings": 10537.1016, "encoder_q-layer.0": 7532.6968, "encoder_q-layer.1": 8614.2158, "encoder_q-layer.10": 4676.6943, "encoder_q-layer.11": 11837.5215, "encoder_q-layer.2": 10431.9707, "encoder_q-layer.3": 11873.0088, "encoder_q-layer.4": 14199.9053, "encoder_q-layer.5": 11516.5918, "encoder_q-layer.6": 12505.0635, "encoder_q-layer.7": 12873.2217, "encoder_q-layer.8": 10121.4609, "encoder_q-layer.9": 5980.3291, "epoch": 0.29, "inbatch_neg_score": 0.1132, "inbatch_pos_score": 0.6475, "learning_rate": 3.061111111111111e-05, "loss": 4.1188, "norm_diff": 0.1086, "norm_loss": 0.0, "num_token_doc": 66.8321, "num_token_overlap": 11.6633, "num_token_query": 31.8426, "num_token_union": 65.4013, "num_word_context": 202.4317, "num_word_doc": 49.9152, "num_word_query": 23.5358, "postclip_grad_norm": 1.0, "preclip_grad_norm": 15761.1185, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1134, "query_norm": 1.249, "queue_k_norm": 1.3505, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8426, "sent_len_1": 66.8321, "sent_len_max_0": 127.5175, "sent_len_max_1": 189.4325, "stdk": 0.0474, "stdq": 0.0424, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 44900 }, { "accuracy": 44.3359, "active_queue_size": 16384.0, "cl_loss": 4.1031, "doc_norm": 1.356, "encoder_q-embeddings": 5811.2671, "encoder_q-layer.0": 4013.1292, "encoder_q-layer.1": 4400.4023, "encoder_q-layer.10": 5554.3413, "encoder_q-layer.11": 12020.5312, "encoder_q-layer.2": 4930.5679, "encoder_q-layer.3": 5091.688, "encoder_q-layer.4": 5255.3545, "encoder_q-layer.5": 5276.1382, "encoder_q-layer.6": 5335.665, "encoder_q-layer.7": 5213.2793, "encoder_q-layer.8": 5625.6621, "encoder_q-layer.9": 5166.0259, "epoch": 0.29, "inbatch_neg_score": 0.116, "inbatch_pos_score": 0.6475, "learning_rate": 3.055555555555556e-05, "loss": 4.1031, "norm_diff": 0.1212, "norm_loss": 0.0, "num_token_doc": 66.7469, "num_token_overlap": 11.7133, "num_token_query": 32.073, "num_token_union": 65.4323, "num_word_context": 202.4364, "num_word_doc": 49.8158, "num_word_query": 23.686, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8889.6403, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.116, "query_norm": 1.2348, "queue_k_norm": 1.351, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 32.073, "sent_len_1": 66.7469, "sent_len_max_0": 127.4613, "sent_len_max_1": 189.405, "stdk": 0.0474, "stdq": 0.0416, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 45000 }, { "accuracy": 43.9453, "active_queue_size": 16384.0, "cl_loss": 4.0832, "doc_norm": 1.3516, "encoder_q-embeddings": 8337.8203, "encoder_q-layer.0": 5371.4448, "encoder_q-layer.1": 6409.2739, "encoder_q-layer.10": 5361.9365, "encoder_q-layer.11": 12812.7891, "encoder_q-layer.2": 7356.7173, "encoder_q-layer.3": 8179.8677, "encoder_q-layer.4": 9536.7617, "encoder_q-layer.5": 10241.9082, "encoder_q-layer.6": 9345.0781, "encoder_q-layer.7": 8234.0576, "encoder_q-layer.8": 8104.7383, "encoder_q-layer.9": 5988.6816, "epoch": 0.29, "inbatch_neg_score": 0.1253, "inbatch_pos_score": 0.6587, "learning_rate": 3.05e-05, "loss": 4.0832, "norm_diff": 0.0768, "norm_loss": 0.0, "num_token_doc": 66.6245, "num_token_overlap": 11.6371, "num_token_query": 31.7852, "num_token_union": 65.2388, "num_word_context": 202.214, "num_word_doc": 49.7309, "num_word_query": 23.4826, "postclip_grad_norm": 1.0, "preclip_grad_norm": 12544.8622, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1256, "query_norm": 1.2747, "queue_k_norm": 1.3509, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.7852, "sent_len_1": 66.6245, "sent_len_max_0": 127.4625, "sent_len_max_1": 187.3575, "stdk": 0.0473, "stdq": 0.0429, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 45100 }, { "accuracy": 43.5547, "active_queue_size": 16384.0, "cl_loss": 4.111, "doc_norm": 1.3512, "encoder_q-embeddings": 36850.3789, "encoder_q-layer.0": 24160.1172, "encoder_q-layer.1": 26571.9414, "encoder_q-layer.10": 4912.3213, "encoder_q-layer.11": 11926.1807, "encoder_q-layer.2": 29893.2852, "encoder_q-layer.3": 27729.4492, "encoder_q-layer.4": 25161.7949, "encoder_q-layer.5": 20313.5391, "encoder_q-layer.6": 19311.4043, "encoder_q-layer.7": 17476.8242, "encoder_q-layer.8": 12148.9609, "encoder_q-layer.9": 5851.8633, "epoch": 0.29, "inbatch_neg_score": 0.1224, "inbatch_pos_score": 0.6675, "learning_rate": 3.044444444444445e-05, "loss": 4.111, "norm_diff": 0.0928, "norm_loss": 0.0, "num_token_doc": 66.8142, "num_token_overlap": 11.6896, "num_token_query": 31.9519, "num_token_union": 65.3949, "num_word_context": 202.5771, "num_word_doc": 49.8283, "num_word_query": 23.6134, "postclip_grad_norm": 1.0, "preclip_grad_norm": 35236.1632, "preclip_grad_norm_avg": 0.0003, "q@queue_neg_score": 0.1233, "query_norm": 1.2584, "queue_k_norm": 1.3503, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9519, "sent_len_1": 66.8142, "sent_len_max_0": 127.4062, "sent_len_max_1": 190.855, "stdk": 0.0472, "stdq": 0.0422, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 45200 }, { "accuracy": 43.3594, "active_queue_size": 16384.0, "cl_loss": 4.1003, "doc_norm": 1.346, "encoder_q-embeddings": 5077.8906, "encoder_q-layer.0": 3428.1189, "encoder_q-layer.1": 3698.6401, "encoder_q-layer.10": 5438.646, "encoder_q-layer.11": 12410.748, "encoder_q-layer.2": 4064.7671, "encoder_q-layer.3": 4332.3887, "encoder_q-layer.4": 4671.0312, "encoder_q-layer.5": 4596.1191, "encoder_q-layer.6": 5459.0229, "encoder_q-layer.7": 5030.9341, "encoder_q-layer.8": 5580.0103, "encoder_q-layer.9": 5161.376, "epoch": 0.29, "inbatch_neg_score": 0.128, "inbatch_pos_score": 0.6592, "learning_rate": 3.0388888888888887e-05, "loss": 4.1003, "norm_diff": 0.0651, "norm_loss": 0.0, "num_token_doc": 66.8156, "num_token_overlap": 11.6741, "num_token_query": 31.8515, "num_token_union": 65.3655, "num_word_context": 202.307, "num_word_doc": 49.8513, "num_word_query": 23.5131, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8610.0178, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1279, "query_norm": 1.2808, "queue_k_norm": 1.3498, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8515, "sent_len_1": 66.8156, "sent_len_max_0": 127.4725, "sent_len_max_1": 188.7163, "stdk": 0.047, "stdq": 0.0429, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 45300 }, { "accuracy": 44.5312, "active_queue_size": 16384.0, "cl_loss": 4.0931, "doc_norm": 1.3561, "encoder_q-embeddings": 10142.1523, "encoder_q-layer.0": 7711.8706, "encoder_q-layer.1": 8975.0791, "encoder_q-layer.10": 4853.02, "encoder_q-layer.11": 11834.3184, "encoder_q-layer.2": 9992.1338, "encoder_q-layer.3": 10564.2705, "encoder_q-layer.4": 11535.6621, "encoder_q-layer.5": 11213.4756, "encoder_q-layer.6": 11787.9736, "encoder_q-layer.7": 10465.3555, "encoder_q-layer.8": 10350.9209, "encoder_q-layer.9": 6854.8325, "epoch": 0.3, "inbatch_neg_score": 0.1291, "inbatch_pos_score": 0.6548, "learning_rate": 3.0333333333333337e-05, "loss": 4.0931, "norm_diff": 0.0842, "norm_loss": 0.0, "num_token_doc": 66.8749, "num_token_overlap": 11.6561, "num_token_query": 31.7531, "num_token_union": 65.362, "num_word_context": 202.3038, "num_word_doc": 49.9155, "num_word_query": 23.4427, "postclip_grad_norm": 1.0, "preclip_grad_norm": 14923.3192, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1294, "query_norm": 1.2719, "queue_k_norm": 1.3489, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.7531, "sent_len_1": 66.8749, "sent_len_max_0": 127.4475, "sent_len_max_1": 189.7575, "stdk": 0.0474, "stdq": 0.0424, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 45400 }, { "accuracy": 43.9453, "active_queue_size": 16384.0, "cl_loss": 4.1088, "doc_norm": 1.351, "encoder_q-embeddings": 5669.0986, "encoder_q-layer.0": 3849.9648, "encoder_q-layer.1": 4238.7764, "encoder_q-layer.10": 5120.1543, "encoder_q-layer.11": 12088.9014, "encoder_q-layer.2": 4612.9473, "encoder_q-layer.3": 4841.188, "encoder_q-layer.4": 5319.1406, "encoder_q-layer.5": 5434.0112, "encoder_q-layer.6": 5260.3252, "encoder_q-layer.7": 5426.1675, "encoder_q-layer.8": 5518.793, "encoder_q-layer.9": 4774.0347, "epoch": 0.3, "inbatch_neg_score": 0.1336, "inbatch_pos_score": 0.6504, "learning_rate": 3.0277777777777776e-05, "loss": 4.1088, "norm_diff": 0.0989, "norm_loss": 0.0, "num_token_doc": 66.8069, "num_token_overlap": 11.6612, "num_token_query": 31.8691, "num_token_union": 65.3329, "num_word_context": 202.485, "num_word_doc": 49.8544, "num_word_query": 23.5447, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8828.2192, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1324, "query_norm": 1.2521, "queue_k_norm": 1.3513, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8691, "sent_len_1": 66.8069, "sent_len_max_0": 127.3637, "sent_len_max_1": 190.1362, "stdk": 0.0472, "stdq": 0.0415, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 45500 }, { "accuracy": 41.9922, "active_queue_size": 16384.0, "cl_loss": 4.1008, "doc_norm": 1.3526, "encoder_q-embeddings": 5165.6099, "encoder_q-layer.0": 3384.8069, "encoder_q-layer.1": 3560.7505, "encoder_q-layer.10": 5228.3809, "encoder_q-layer.11": 11927.2324, "encoder_q-layer.2": 3911.6392, "encoder_q-layer.3": 4021.2563, "encoder_q-layer.4": 4322.4727, "encoder_q-layer.5": 4435.9727, "encoder_q-layer.6": 4683.981, "encoder_q-layer.7": 4966.5361, "encoder_q-layer.8": 5337.064, "encoder_q-layer.9": 4766.814, "epoch": 0.3, "inbatch_neg_score": 0.1329, "inbatch_pos_score": 0.6606, "learning_rate": 3.0222222222222225e-05, "loss": 4.1008, "norm_diff": 0.0746, "norm_loss": 0.0, "num_token_doc": 66.8483, "num_token_overlap": 11.6121, "num_token_query": 31.7786, "num_token_union": 65.3894, "num_word_context": 202.2755, "num_word_doc": 49.916, "num_word_query": 23.4524, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8279.2617, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1329, "query_norm": 1.2779, "queue_k_norm": 1.3503, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.7786, "sent_len_1": 66.8483, "sent_len_max_0": 127.5075, "sent_len_max_1": 187.8487, "stdk": 0.0472, "stdq": 0.0424, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 45600 }, { "accuracy": 43.1641, "active_queue_size": 16384.0, "cl_loss": 4.1094, "doc_norm": 1.3482, "encoder_q-embeddings": 15922.2578, "encoder_q-layer.0": 10371.96, "encoder_q-layer.1": 11815.0557, "encoder_q-layer.10": 4752.5186, "encoder_q-layer.11": 12065.7793, "encoder_q-layer.2": 12683.3213, "encoder_q-layer.3": 13770.8076, "encoder_q-layer.4": 13702.4287, "encoder_q-layer.5": 12703.9121, "encoder_q-layer.6": 10877.1416, "encoder_q-layer.7": 8135.2666, "encoder_q-layer.8": 6814.644, "encoder_q-layer.9": 4697.875, "epoch": 0.3, "inbatch_neg_score": 0.1357, "inbatch_pos_score": 0.6626, "learning_rate": 3.016666666666667e-05, "loss": 4.1094, "norm_diff": 0.0945, "norm_loss": 0.0, "num_token_doc": 66.5637, "num_token_overlap": 11.6478, "num_token_query": 31.8734, "num_token_union": 65.1934, "num_word_context": 202.3051, "num_word_doc": 49.6303, "num_word_query": 23.5239, "postclip_grad_norm": 1.0, "preclip_grad_norm": 17165.6611, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.136, "query_norm": 1.2537, "queue_k_norm": 1.3493, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8734, "sent_len_1": 66.5637, "sent_len_max_0": 127.595, "sent_len_max_1": 190.9, "stdk": 0.0471, "stdq": 0.0414, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 45700 }, { "accuracy": 44.2383, "active_queue_size": 16384.0, "cl_loss": 4.0944, "doc_norm": 1.3544, "encoder_q-embeddings": 4974.1655, "encoder_q-layer.0": 3222.9282, "encoder_q-layer.1": 3492.334, "encoder_q-layer.10": 4950.9766, "encoder_q-layer.11": 12332.0137, "encoder_q-layer.2": 3966.3899, "encoder_q-layer.3": 4063.8904, "encoder_q-layer.4": 4516.9414, "encoder_q-layer.5": 4246.9424, "encoder_q-layer.6": 4543.9204, "encoder_q-layer.7": 4458.7993, "encoder_q-layer.8": 4937.2002, "encoder_q-layer.9": 4701.5752, "epoch": 0.3, "inbatch_neg_score": 0.1422, "inbatch_pos_score": 0.6621, "learning_rate": 3.0111111111111113e-05, "loss": 4.0944, "norm_diff": 0.0882, "norm_loss": 0.0, "num_token_doc": 66.8095, "num_token_overlap": 11.6871, "num_token_query": 31.9136, "num_token_union": 65.3744, "num_word_context": 202.4989, "num_word_doc": 49.8359, "num_word_query": 23.5737, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8295.3994, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1417, "query_norm": 1.2662, "queue_k_norm": 1.3509, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9136, "sent_len_1": 66.8095, "sent_len_max_0": 127.595, "sent_len_max_1": 188.9187, "stdk": 0.0473, "stdq": 0.0419, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 45800 }, { "accuracy": 47.168, "active_queue_size": 16384.0, "cl_loss": 4.1248, "doc_norm": 1.3499, "encoder_q-embeddings": 6739.0405, "encoder_q-layer.0": 4476.9912, "encoder_q-layer.1": 4814.9639, "encoder_q-layer.10": 5607.5078, "encoder_q-layer.11": 12814.4023, "encoder_q-layer.2": 5537.8228, "encoder_q-layer.3": 5923.9824, "encoder_q-layer.4": 6440.4976, "encoder_q-layer.5": 6349.9048, "encoder_q-layer.6": 6340.3184, "encoder_q-layer.7": 5654.3135, "encoder_q-layer.8": 5649.8872, "encoder_q-layer.9": 5304.2236, "epoch": 0.3, "inbatch_neg_score": 0.1449, "inbatch_pos_score": 0.7109, "learning_rate": 3.005555555555556e-05, "loss": 4.1248, "norm_diff": 0.0635, "norm_loss": 0.0, "num_token_doc": 66.9132, "num_token_overlap": 11.6941, "num_token_query": 31.9521, "num_token_union": 65.4388, "num_word_context": 202.8091, "num_word_doc": 49.9407, "num_word_query": 23.605, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9800.5994, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1442, "query_norm": 1.2864, "queue_k_norm": 1.3514, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9521, "sent_len_1": 66.9132, "sent_len_max_0": 127.67, "sent_len_max_1": 188.49, "stdk": 0.0471, "stdq": 0.0427, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 45900 }, { "accuracy": 45.4102, "active_queue_size": 16384.0, "cl_loss": 4.089, "doc_norm": 1.3499, "encoder_q-embeddings": 2577.9097, "encoder_q-layer.0": 1726.1455, "encoder_q-layer.1": 1864.5758, "encoder_q-layer.10": 2432.085, "encoder_q-layer.11": 6337.2744, "encoder_q-layer.2": 2133.6973, "encoder_q-layer.3": 2278.6858, "encoder_q-layer.4": 2319.0471, "encoder_q-layer.5": 2430.4036, "encoder_q-layer.6": 2691.8337, "encoder_q-layer.7": 2572.0977, "encoder_q-layer.8": 2795.7722, "encoder_q-layer.9": 2427.8677, "epoch": 0.3, "inbatch_neg_score": 0.1508, "inbatch_pos_score": 0.6792, "learning_rate": 3e-05, "loss": 4.089, "norm_diff": 0.0768, "norm_loss": 0.0, "num_token_doc": 66.701, "num_token_overlap": 11.6746, "num_token_query": 32.0181, "num_token_union": 65.3802, "num_word_context": 201.9786, "num_word_doc": 49.7807, "num_word_query": 23.648, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4331.1384, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1501, "query_norm": 1.2731, "queue_k_norm": 1.3512, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 32.0181, "sent_len_1": 66.701, "sent_len_max_0": 127.3275, "sent_len_max_1": 189.5513, "stdk": 0.047, "stdq": 0.0422, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 46000 }, { "accuracy": 44.8242, "active_queue_size": 16384.0, "cl_loss": 4.0987, "doc_norm": 1.3519, "encoder_q-embeddings": 4027.1826, "encoder_q-layer.0": 2791.3589, "encoder_q-layer.1": 3076.114, "encoder_q-layer.10": 2660.3765, "encoder_q-layer.11": 6387.5073, "encoder_q-layer.2": 3473.4417, "encoder_q-layer.3": 3621.5337, "encoder_q-layer.4": 3501.9646, "encoder_q-layer.5": 3493.8469, "encoder_q-layer.6": 3357.8442, "encoder_q-layer.7": 3412.9102, "encoder_q-layer.8": 3380.7788, "encoder_q-layer.9": 2530.9602, "epoch": 0.3, "inbatch_neg_score": 0.1521, "inbatch_pos_score": 0.6846, "learning_rate": 2.9944444444444446e-05, "loss": 4.0987, "norm_diff": 0.0752, "norm_loss": 0.0, "num_token_doc": 66.7087, "num_token_overlap": 11.7033, "num_token_query": 31.9918, "num_token_union": 65.3534, "num_word_context": 202.3381, "num_word_doc": 49.7582, "num_word_query": 23.6301, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5524.4443, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1526, "query_norm": 1.2767, "queue_k_norm": 1.3528, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9918, "sent_len_1": 66.7087, "sent_len_max_0": 127.4287, "sent_len_max_1": 191.6625, "stdk": 0.0471, "stdq": 0.0425, "stdqueue_k": 0.0472, "stdqueue_q": 0.0, "step": 46100 }, { "accuracy": 44.1406, "active_queue_size": 16384.0, "cl_loss": 4.1063, "doc_norm": 1.3519, "encoder_q-embeddings": 3953.9617, "encoder_q-layer.0": 2850.1831, "encoder_q-layer.1": 3237.655, "encoder_q-layer.10": 2480.9102, "encoder_q-layer.11": 6321.3667, "encoder_q-layer.2": 3613.3982, "encoder_q-layer.3": 3845.8938, "encoder_q-layer.4": 3945.7437, "encoder_q-layer.5": 3981.1614, "encoder_q-layer.6": 3643.0171, "encoder_q-layer.7": 3537.1772, "encoder_q-layer.8": 3273.6802, "encoder_q-layer.9": 2484.1545, "epoch": 0.3, "inbatch_neg_score": 0.1508, "inbatch_pos_score": 0.6758, "learning_rate": 2.988888888888889e-05, "loss": 4.1063, "norm_diff": 0.0776, "norm_loss": 0.0, "num_token_doc": 66.7802, "num_token_overlap": 11.7062, "num_token_query": 31.981, "num_token_union": 65.3697, "num_word_context": 201.9823, "num_word_doc": 49.834, "num_word_query": 23.615, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5655.3297, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1509, "query_norm": 1.2743, "queue_k_norm": 1.3558, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.981, "sent_len_1": 66.7802, "sent_len_max_0": 127.6412, "sent_len_max_1": 188.8713, "stdk": 0.0471, "stdq": 0.0425, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 46200 }, { "accuracy": 43.1641, "active_queue_size": 16384.0, "cl_loss": 4.1113, "doc_norm": 1.3513, "encoder_q-embeddings": 7623.4043, "encoder_q-layer.0": 4746.812, "encoder_q-layer.1": 5504.7964, "encoder_q-layer.10": 2570.915, "encoder_q-layer.11": 6075.4785, "encoder_q-layer.2": 6241.2642, "encoder_q-layer.3": 6455.7417, "encoder_q-layer.4": 5919.5317, "encoder_q-layer.5": 5423.8105, "encoder_q-layer.6": 4802.2383, "encoder_q-layer.7": 3391.5552, "encoder_q-layer.8": 2842.6377, "encoder_q-layer.9": 2365.0112, "epoch": 0.3, "inbatch_neg_score": 0.1512, "inbatch_pos_score": 0.6704, "learning_rate": 2.9833333333333335e-05, "loss": 4.1113, "norm_diff": 0.0878, "norm_loss": 0.0, "num_token_doc": 66.69, "num_token_overlap": 11.6277, "num_token_query": 31.7716, "num_token_union": 65.2891, "num_word_context": 202.004, "num_word_doc": 49.7883, "num_word_query": 23.4685, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7944.1801, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1501, "query_norm": 1.2635, "queue_k_norm": 1.3573, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.7716, "sent_len_1": 66.69, "sent_len_max_0": 127.5113, "sent_len_max_1": 187.71, "stdk": 0.047, "stdq": 0.0423, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 46300 }, { "accuracy": 45.7031, "active_queue_size": 16384.0, "cl_loss": 4.1056, "doc_norm": 1.3612, "encoder_q-embeddings": 3391.5669, "encoder_q-layer.0": 2407.1362, "encoder_q-layer.1": 2897.4292, "encoder_q-layer.10": 2782.8672, "encoder_q-layer.11": 6192.2393, "encoder_q-layer.2": 2441.8545, "encoder_q-layer.3": 2470.4888, "encoder_q-layer.4": 2552.8945, "encoder_q-layer.5": 2425.0649, "encoder_q-layer.6": 2653.1411, "encoder_q-layer.7": 2607.8342, "encoder_q-layer.8": 2824.9661, "encoder_q-layer.9": 2432.3262, "epoch": 0.3, "inbatch_neg_score": 0.1464, "inbatch_pos_score": 0.6895, "learning_rate": 2.9777777777777777e-05, "loss": 4.1056, "norm_diff": 0.0959, "norm_loss": 0.0, "num_token_doc": 66.6663, "num_token_overlap": 11.6235, "num_token_query": 31.746, "num_token_union": 65.2394, "num_word_context": 202.0891, "num_word_doc": 49.7788, "num_word_query": 23.4284, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4639.47, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1475, "query_norm": 1.2652, "queue_k_norm": 1.3566, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.746, "sent_len_1": 66.6663, "sent_len_max_0": 127.5238, "sent_len_max_1": 186.7512, "stdk": 0.0474, "stdq": 0.0425, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 46400 }, { "accuracy": 42.4805, "active_queue_size": 16384.0, "cl_loss": 4.0635, "doc_norm": 1.3564, "encoder_q-embeddings": 2810.3052, "encoder_q-layer.0": 1859.0668, "encoder_q-layer.1": 2036.5304, "encoder_q-layer.10": 2778.801, "encoder_q-layer.11": 6435.2388, "encoder_q-layer.2": 2513.2988, "encoder_q-layer.3": 2538.0107, "encoder_q-layer.4": 2836.6921, "encoder_q-layer.5": 2730.2717, "encoder_q-layer.6": 2825.4905, "encoder_q-layer.7": 2764.3972, "encoder_q-layer.8": 2989.9045, "encoder_q-layer.9": 2547.2446, "epoch": 0.3, "inbatch_neg_score": 0.146, "inbatch_pos_score": 0.6772, "learning_rate": 2.9722222222222223e-05, "loss": 4.0635, "norm_diff": 0.077, "norm_loss": 0.0, "num_token_doc": 67.0296, "num_token_overlap": 11.7329, "num_token_query": 32.0019, "num_token_union": 65.5102, "num_word_context": 202.3033, "num_word_doc": 50.0065, "num_word_query": 23.6407, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4622.7871, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1475, "query_norm": 1.2794, "queue_k_norm": 1.3597, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 32.0019, "sent_len_1": 67.0296, "sent_len_max_0": 127.6175, "sent_len_max_1": 189.5263, "stdk": 0.0472, "stdq": 0.0431, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 46500 }, { "accuracy": 45.7031, "active_queue_size": 16384.0, "cl_loss": 4.0848, "doc_norm": 1.3659, "encoder_q-embeddings": 4519.8359, "encoder_q-layer.0": 3158.5923, "encoder_q-layer.1": 3427.2473, "encoder_q-layer.10": 2643.7478, "encoder_q-layer.11": 6101.5518, "encoder_q-layer.2": 4106.6348, "encoder_q-layer.3": 4345.6562, "encoder_q-layer.4": 4655.4116, "encoder_q-layer.5": 4594.5952, "encoder_q-layer.6": 4864.5361, "encoder_q-layer.7": 4393.1655, "encoder_q-layer.8": 4461.4648, "encoder_q-layer.9": 2974.3154, "epoch": 0.3, "inbatch_neg_score": 0.148, "inbatch_pos_score": 0.6919, "learning_rate": 2.9666666666666672e-05, "loss": 4.0848, "norm_diff": 0.1013, "norm_loss": 0.0, "num_token_doc": 66.7066, "num_token_overlap": 11.7133, "num_token_query": 32.0043, "num_token_union": 65.3349, "num_word_context": 201.9967, "num_word_doc": 49.7795, "num_word_query": 23.6382, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6350.8714, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.147, "query_norm": 1.2646, "queue_k_norm": 1.3586, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 32.0043, "sent_len_1": 66.7066, "sent_len_max_0": 127.6112, "sent_len_max_1": 189.6312, "stdk": 0.0475, "stdq": 0.0426, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 46600 }, { "accuracy": 42.8711, "active_queue_size": 16384.0, "cl_loss": 4.0969, "doc_norm": 1.3565, "encoder_q-embeddings": 3176.1963, "encoder_q-layer.0": 2111.0684, "encoder_q-layer.1": 2201.6125, "encoder_q-layer.10": 2563.905, "encoder_q-layer.11": 6176.998, "encoder_q-layer.2": 2578.7773, "encoder_q-layer.3": 2623.0527, "encoder_q-layer.4": 2720.6272, "encoder_q-layer.5": 2676.0295, "encoder_q-layer.6": 2882.1084, "encoder_q-layer.7": 2835.4773, "encoder_q-layer.8": 3040.3745, "encoder_q-layer.9": 2692.8315, "epoch": 0.3, "inbatch_neg_score": 0.1428, "inbatch_pos_score": 0.6602, "learning_rate": 2.961111111111111e-05, "loss": 4.0969, "norm_diff": 0.1159, "norm_loss": 0.0, "num_token_doc": 66.8172, "num_token_overlap": 11.6257, "num_token_query": 31.7962, "num_token_union": 65.3423, "num_word_context": 202.3488, "num_word_doc": 49.8392, "num_word_query": 23.4757, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4671.7133, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1421, "query_norm": 1.2406, "queue_k_norm": 1.3618, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.7962, "sent_len_1": 66.8172, "sent_len_max_0": 127.6488, "sent_len_max_1": 191.4837, "stdk": 0.0472, "stdq": 0.0417, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 46700 }, { "accuracy": 41.9922, "active_queue_size": 16384.0, "cl_loss": 4.0848, "doc_norm": 1.3598, "encoder_q-embeddings": 2392.9839, "encoder_q-layer.0": 1610.6393, "encoder_q-layer.1": 1717.314, "encoder_q-layer.10": 2499.3083, "encoder_q-layer.11": 6379.7056, "encoder_q-layer.2": 1907.4669, "encoder_q-layer.3": 2034.9172, "encoder_q-layer.4": 2278.2139, "encoder_q-layer.5": 2251.7932, "encoder_q-layer.6": 2371.7144, "encoder_q-layer.7": 2571.7373, "encoder_q-layer.8": 2572.8706, "encoder_q-layer.9": 2423.0798, "epoch": 0.3, "inbatch_neg_score": 0.1448, "inbatch_pos_score": 0.6494, "learning_rate": 2.955555555555556e-05, "loss": 4.0848, "norm_diff": 0.1242, "norm_loss": 0.0, "num_token_doc": 66.9418, "num_token_overlap": 11.6399, "num_token_query": 31.7508, "num_token_union": 65.4142, "num_word_context": 202.7473, "num_word_doc": 49.9744, "num_word_query": 23.4333, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4244.3597, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1448, "query_norm": 1.2356, "queue_k_norm": 1.3591, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.7508, "sent_len_1": 66.9418, "sent_len_max_0": 127.4537, "sent_len_max_1": 188.9925, "stdk": 0.0473, "stdq": 0.0413, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 46800 }, { "accuracy": 42.3828, "active_queue_size": 16384.0, "cl_loss": 4.083, "doc_norm": 1.3609, "encoder_q-embeddings": 8898.6367, "encoder_q-layer.0": 5672.4463, "encoder_q-layer.1": 5699.0566, "encoder_q-layer.10": 2639.5791, "encoder_q-layer.11": 6018.4526, "encoder_q-layer.2": 5734.479, "encoder_q-layer.3": 5355.9282, "encoder_q-layer.4": 5645.4062, "encoder_q-layer.5": 4636.0117, "encoder_q-layer.6": 4138.9751, "encoder_q-layer.7": 3648.3479, "encoder_q-layer.8": 3418.0479, "encoder_q-layer.9": 2490.1931, "epoch": 0.31, "inbatch_neg_score": 0.1422, "inbatch_pos_score": 0.6719, "learning_rate": 2.95e-05, "loss": 4.083, "norm_diff": 0.1071, "norm_loss": 0.0, "num_token_doc": 66.6169, "num_token_overlap": 11.697, "num_token_query": 31.8739, "num_token_union": 65.2322, "num_word_context": 201.9573, "num_word_doc": 49.7268, "num_word_query": 23.5306, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8287.0252, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1429, "query_norm": 1.2538, "queue_k_norm": 1.3593, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8739, "sent_len_1": 66.6169, "sent_len_max_0": 127.4488, "sent_len_max_1": 189.9412, "stdk": 0.0473, "stdq": 0.0421, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 46900 }, { "accuracy": 44.5312, "active_queue_size": 16384.0, "cl_loss": 4.0875, "doc_norm": 1.3551, "encoder_q-embeddings": 3786.9988, "encoder_q-layer.0": 2686.251, "encoder_q-layer.1": 2861.2839, "encoder_q-layer.10": 2563.0691, "encoder_q-layer.11": 5925.0264, "encoder_q-layer.2": 3415.0249, "encoder_q-layer.3": 3669.7944, "encoder_q-layer.4": 3987.7603, "encoder_q-layer.5": 3926.5134, "encoder_q-layer.6": 4250.6787, "encoder_q-layer.7": 3737.4438, "encoder_q-layer.8": 3913.6187, "encoder_q-layer.9": 2893.2405, "epoch": 0.31, "inbatch_neg_score": 0.1444, "inbatch_pos_score": 0.6543, "learning_rate": 2.9444444444444448e-05, "loss": 4.0875, "norm_diff": 0.1252, "norm_loss": 0.0, "num_token_doc": 66.7824, "num_token_overlap": 11.7028, "num_token_query": 32.0035, "num_token_union": 65.3812, "num_word_context": 202.1053, "num_word_doc": 49.8136, "num_word_query": 23.6334, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5602.9736, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1436, "query_norm": 1.2299, "queue_k_norm": 1.3589, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 32.0035, "sent_len_1": 66.7824, "sent_len_max_0": 127.475, "sent_len_max_1": 189.9925, "stdk": 0.0471, "stdq": 0.0411, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 47000 }, { "accuracy": 44.1406, "active_queue_size": 16384.0, "cl_loss": 4.1113, "doc_norm": 1.3567, "encoder_q-embeddings": 2479.1313, "encoder_q-layer.0": 1620.8335, "encoder_q-layer.1": 1680.1273, "encoder_q-layer.10": 2426.3604, "encoder_q-layer.11": 5981.7964, "encoder_q-layer.2": 1877.4648, "encoder_q-layer.3": 1943.7537, "encoder_q-layer.4": 2092.8352, "encoder_q-layer.5": 2082.71, "encoder_q-layer.6": 2155.6331, "encoder_q-layer.7": 2302.2319, "encoder_q-layer.8": 2569.3057, "encoder_q-layer.9": 2352.1038, "epoch": 0.31, "inbatch_neg_score": 0.149, "inbatch_pos_score": 0.6704, "learning_rate": 2.9388888888888887e-05, "loss": 4.1113, "norm_diff": 0.1038, "norm_loss": 0.0, "num_token_doc": 66.8447, "num_token_overlap": 11.6105, "num_token_query": 31.7461, "num_token_union": 65.3641, "num_word_context": 202.3791, "num_word_doc": 49.8923, "num_word_query": 23.4206, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4101.7806, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1484, "query_norm": 1.2529, "queue_k_norm": 1.3624, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.7461, "sent_len_1": 66.8447, "sent_len_max_0": 127.4525, "sent_len_max_1": 189.455, "stdk": 0.0472, "stdq": 0.0418, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 47100 }, { "accuracy": 43.3594, "active_queue_size": 16384.0, "cl_loss": 4.1081, "doc_norm": 1.3651, "encoder_q-embeddings": 6055.2886, "encoder_q-layer.0": 4376.188, "encoder_q-layer.1": 5219.6997, "encoder_q-layer.10": 2346.8557, "encoder_q-layer.11": 6255.4966, "encoder_q-layer.2": 6144.585, "encoder_q-layer.3": 6533.1865, "encoder_q-layer.4": 6494.9463, "encoder_q-layer.5": 6128.9863, "encoder_q-layer.6": 6407.5117, "encoder_q-layer.7": 4689.4526, "encoder_q-layer.8": 3639.0295, "encoder_q-layer.9": 2566.9219, "epoch": 0.31, "inbatch_neg_score": 0.1476, "inbatch_pos_score": 0.6899, "learning_rate": 2.9333333333333336e-05, "loss": 4.1081, "norm_diff": 0.105, "norm_loss": 0.0, "num_token_doc": 66.5787, "num_token_overlap": 11.6717, "num_token_query": 31.927, "num_token_union": 65.2445, "num_word_context": 201.9589, "num_word_doc": 49.6914, "num_word_query": 23.5939, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8000.0395, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1477, "query_norm": 1.2601, "queue_k_norm": 1.3583, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.927, "sent_len_1": 66.5787, "sent_len_max_0": 127.5825, "sent_len_max_1": 190.2275, "stdk": 0.0475, "stdq": 0.0422, "stdqueue_k": 0.0473, "stdqueue_q": 0.0, "step": 47200 }, { "accuracy": 40.2344, "active_queue_size": 16384.0, "cl_loss": 4.0828, "doc_norm": 1.351, "encoder_q-embeddings": 4707.1133, "encoder_q-layer.0": 3201.3442, "encoder_q-layer.1": 3676.6179, "encoder_q-layer.10": 2743.0435, "encoder_q-layer.11": 6210.3096, "encoder_q-layer.2": 4462.5254, "encoder_q-layer.3": 4643.6528, "encoder_q-layer.4": 5084.4829, "encoder_q-layer.5": 4973.6865, "encoder_q-layer.6": 4786.9277, "encoder_q-layer.7": 4596.2329, "encoder_q-layer.8": 4206.8271, "encoder_q-layer.9": 2760.2195, "epoch": 0.31, "inbatch_neg_score": 0.1486, "inbatch_pos_score": 0.6572, "learning_rate": 2.927777777777778e-05, "loss": 4.0828, "norm_diff": 0.0937, "norm_loss": 0.0, "num_token_doc": 66.5561, "num_token_overlap": 11.6794, "num_token_query": 31.9746, "num_token_union": 65.2778, "num_word_context": 202.1876, "num_word_doc": 49.7539, "num_word_query": 23.6199, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6611.9189, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1489, "query_norm": 1.2573, "queue_k_norm": 1.3633, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9746, "sent_len_1": 66.5561, "sent_len_max_0": 127.6375, "sent_len_max_1": 185.7012, "stdk": 0.047, "stdq": 0.0421, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 47300 }, { "accuracy": 42.9688, "active_queue_size": 16384.0, "cl_loss": 4.0726, "doc_norm": 1.3544, "encoder_q-embeddings": 2670.3342, "encoder_q-layer.0": 1772.4921, "encoder_q-layer.1": 1902.8557, "encoder_q-layer.10": 2648.106, "encoder_q-layer.11": 6233.793, "encoder_q-layer.2": 2238.3835, "encoder_q-layer.3": 2389.8928, "encoder_q-layer.4": 2625.2969, "encoder_q-layer.5": 2699.116, "encoder_q-layer.6": 3092.6194, "encoder_q-layer.7": 2922.584, "encoder_q-layer.8": 3185.4177, "encoder_q-layer.9": 2680.3958, "epoch": 0.31, "inbatch_neg_score": 0.1488, "inbatch_pos_score": 0.6729, "learning_rate": 2.9222222222222224e-05, "loss": 4.0726, "norm_diff": 0.0879, "norm_loss": 0.0, "num_token_doc": 66.7554, "num_token_overlap": 11.6911, "num_token_query": 31.859, "num_token_union": 65.2986, "num_word_context": 202.1045, "num_word_doc": 49.8076, "num_word_query": 23.5416, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4504.4208, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1487, "query_norm": 1.2665, "queue_k_norm": 1.3616, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.859, "sent_len_1": 66.7554, "sent_len_max_0": 127.5675, "sent_len_max_1": 187.6325, "stdk": 0.047, "stdq": 0.0423, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 47400 }, { "accuracy": 42.1875, "active_queue_size": 16384.0, "cl_loss": 4.0934, "doc_norm": 1.3585, "encoder_q-embeddings": 5628.2114, "encoder_q-layer.0": 4071.105, "encoder_q-layer.1": 4702.043, "encoder_q-layer.10": 2512.9707, "encoder_q-layer.11": 6123.9365, "encoder_q-layer.2": 5232.4907, "encoder_q-layer.3": 5456.3921, "encoder_q-layer.4": 5939.7642, "encoder_q-layer.5": 6264.0913, "encoder_q-layer.6": 5209.7661, "encoder_q-layer.7": 3903.8335, "encoder_q-layer.8": 3242.5466, "encoder_q-layer.9": 2659.8254, "epoch": 0.31, "inbatch_neg_score": 0.1468, "inbatch_pos_score": 0.6665, "learning_rate": 2.916666666666667e-05, "loss": 4.0934, "norm_diff": 0.0902, "norm_loss": 0.0, "num_token_doc": 66.7542, "num_token_overlap": 11.6423, "num_token_query": 31.7232, "num_token_union": 65.2874, "num_word_context": 202.0134, "num_word_doc": 49.8118, "num_word_query": 23.4014, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7351.3804, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.146, "query_norm": 1.2682, "queue_k_norm": 1.3618, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.7232, "sent_len_1": 66.7542, "sent_len_max_0": 127.3675, "sent_len_max_1": 189.885, "stdk": 0.0472, "stdq": 0.0423, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 47500 }, { "accuracy": 40.2344, "active_queue_size": 16384.0, "cl_loss": 4.1008, "doc_norm": 1.3518, "encoder_q-embeddings": 3843.1372, "encoder_q-layer.0": 2806.1084, "encoder_q-layer.1": 3153.5188, "encoder_q-layer.10": 2524.2781, "encoder_q-layer.11": 6249.3125, "encoder_q-layer.2": 3377.2146, "encoder_q-layer.3": 3527.8699, "encoder_q-layer.4": 3535.9019, "encoder_q-layer.5": 3596.0479, "encoder_q-layer.6": 3475.5315, "encoder_q-layer.7": 3214.7549, "encoder_q-layer.8": 2955.7454, "encoder_q-layer.9": 2468.7761, "epoch": 0.31, "inbatch_neg_score": 0.1509, "inbatch_pos_score": 0.6582, "learning_rate": 2.9111111111111112e-05, "loss": 4.1008, "norm_diff": 0.0953, "norm_loss": 0.0, "num_token_doc": 66.9326, "num_token_overlap": 11.6659, "num_token_query": 31.7882, "num_token_union": 65.3764, "num_word_context": 202.0368, "num_word_doc": 49.9669, "num_word_query": 23.4616, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5472.5221, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1503, "query_norm": 1.2565, "queue_k_norm": 1.3629, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.7882, "sent_len_1": 66.9326, "sent_len_max_0": 127.4325, "sent_len_max_1": 190.3625, "stdk": 0.047, "stdq": 0.0417, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 47600 }, { "accuracy": 43.2617, "active_queue_size": 16384.0, "cl_loss": 4.0787, "doc_norm": 1.3633, "encoder_q-embeddings": 5470.5679, "encoder_q-layer.0": 3783.5525, "encoder_q-layer.1": 4568.3721, "encoder_q-layer.10": 2523.0442, "encoder_q-layer.11": 5939.4468, "encoder_q-layer.2": 5648.5811, "encoder_q-layer.3": 5326.6274, "encoder_q-layer.4": 6011.6694, "encoder_q-layer.5": 4961.3564, "encoder_q-layer.6": 4293.9067, "encoder_q-layer.7": 3417.3608, "encoder_q-layer.8": 3332.0217, "encoder_q-layer.9": 2496.4866, "epoch": 0.31, "inbatch_neg_score": 0.1545, "inbatch_pos_score": 0.6816, "learning_rate": 2.9055555555555558e-05, "loss": 4.0787, "norm_diff": 0.0789, "norm_loss": 0.0, "num_token_doc": 66.7325, "num_token_overlap": 11.6632, "num_token_query": 31.8942, "num_token_union": 65.3054, "num_word_context": 202.0986, "num_word_doc": 49.7825, "num_word_query": 23.5301, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6885.617, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1532, "query_norm": 1.2843, "queue_k_norm": 1.3612, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8942, "sent_len_1": 66.7325, "sent_len_max_0": 127.5537, "sent_len_max_1": 190.0687, "stdk": 0.0474, "stdq": 0.0427, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 47700 }, { "accuracy": 44.7266, "active_queue_size": 16384.0, "cl_loss": 4.0782, "doc_norm": 1.3661, "encoder_q-embeddings": 3126.4939, "encoder_q-layer.0": 2133.6746, "encoder_q-layer.1": 2266.5518, "encoder_q-layer.10": 2472.4937, "encoder_q-layer.11": 6088.062, "encoder_q-layer.2": 2375.5403, "encoder_q-layer.3": 2361.2583, "encoder_q-layer.4": 2460.9553, "encoder_q-layer.5": 2346.3079, "encoder_q-layer.6": 2561.9233, "encoder_q-layer.7": 2570.9722, "encoder_q-layer.8": 2766.6677, "encoder_q-layer.9": 2425.7085, "epoch": 0.31, "inbatch_neg_score": 0.1545, "inbatch_pos_score": 0.6797, "learning_rate": 2.9e-05, "loss": 4.0782, "norm_diff": 0.1082, "norm_loss": 0.0, "num_token_doc": 66.7442, "num_token_overlap": 11.7014, "num_token_query": 31.9071, "num_token_union": 65.3103, "num_word_context": 202.0578, "num_word_doc": 49.8438, "num_word_query": 23.5625, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4484.2134, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1545, "query_norm": 1.2579, "queue_k_norm": 1.3633, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9071, "sent_len_1": 66.7442, "sent_len_max_0": 127.4762, "sent_len_max_1": 189.3262, "stdk": 0.0475, "stdq": 0.0414, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 47800 }, { "accuracy": 42.4805, "active_queue_size": 16384.0, "cl_loss": 4.0569, "doc_norm": 1.3662, "encoder_q-embeddings": 2433.4744, "encoder_q-layer.0": 1652.6879, "encoder_q-layer.1": 1709.8578, "encoder_q-layer.10": 2637.5303, "encoder_q-layer.11": 6433.7998, "encoder_q-layer.2": 1950.1931, "encoder_q-layer.3": 2105.9534, "encoder_q-layer.4": 2235.2515, "encoder_q-layer.5": 2179.1887, "encoder_q-layer.6": 2545.1746, "encoder_q-layer.7": 2619.7432, "encoder_q-layer.8": 2866.4033, "encoder_q-layer.9": 2758.0583, "epoch": 0.31, "inbatch_neg_score": 0.1601, "inbatch_pos_score": 0.6748, "learning_rate": 2.8944444444444446e-05, "loss": 4.0569, "norm_diff": 0.0782, "norm_loss": 0.0, "num_token_doc": 66.894, "num_token_overlap": 11.744, "num_token_query": 31.9958, "num_token_union": 65.4094, "num_word_context": 202.3223, "num_word_doc": 49.929, "num_word_query": 23.6215, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4284.2877, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1595, "query_norm": 1.288, "queue_k_norm": 1.3627, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9958, "sent_len_1": 66.894, "sent_len_max_0": 127.7225, "sent_len_max_1": 189.7463, "stdk": 0.0475, "stdq": 0.0425, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 47900 }, { "accuracy": 45.9961, "active_queue_size": 16384.0, "cl_loss": 4.0875, "doc_norm": 1.3635, "encoder_q-embeddings": 5009.2397, "encoder_q-layer.0": 3257.3088, "encoder_q-layer.1": 3375.7229, "encoder_q-layer.10": 5383.4248, "encoder_q-layer.11": 11581.3135, "encoder_q-layer.2": 3774.9165, "encoder_q-layer.3": 3917.926, "encoder_q-layer.4": 4151.6821, "encoder_q-layer.5": 4194.0371, "encoder_q-layer.6": 4586.2993, "encoder_q-layer.7": 4757.4194, "encoder_q-layer.8": 5055.1514, "encoder_q-layer.9": 4563.5337, "epoch": 0.31, "inbatch_neg_score": 0.162, "inbatch_pos_score": 0.7085, "learning_rate": 2.8888888888888888e-05, "loss": 4.0875, "norm_diff": 0.0829, "norm_loss": 0.0, "num_token_doc": 66.8395, "num_token_overlap": 11.6548, "num_token_query": 31.8832, "num_token_union": 65.4427, "num_word_context": 202.6689, "num_word_doc": 49.8919, "num_word_query": 23.5545, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8028.567, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1615, "query_norm": 1.2806, "queue_k_norm": 1.3657, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.8832, "sent_len_1": 66.8395, "sent_len_max_0": 127.6475, "sent_len_max_1": 188.0387, "stdk": 0.0473, "stdq": 0.042, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 48000 }, { "accuracy": 42.9688, "active_queue_size": 16384.0, "cl_loss": 4.0729, "doc_norm": 1.3672, "encoder_q-embeddings": 6992.3652, "encoder_q-layer.0": 4794.479, "encoder_q-layer.1": 5545.71, "encoder_q-layer.10": 5167.8433, "encoder_q-layer.11": 11889.3291, "encoder_q-layer.2": 6665.1841, "encoder_q-layer.3": 7136.5732, "encoder_q-layer.4": 8112.0371, "encoder_q-layer.5": 8020.4785, "encoder_q-layer.6": 7674.8652, "encoder_q-layer.7": 7594.0391, "encoder_q-layer.8": 6782.603, "encoder_q-layer.9": 4933.293, "epoch": 0.31, "inbatch_neg_score": 0.1643, "inbatch_pos_score": 0.6816, "learning_rate": 2.8833333333333334e-05, "loss": 4.0729, "norm_diff": 0.0839, "norm_loss": 0.0, "num_token_doc": 66.4895, "num_token_overlap": 11.6531, "num_token_query": 31.9676, "num_token_union": 65.2443, "num_word_context": 201.9289, "num_word_doc": 49.6396, "num_word_query": 23.5964, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10783.065, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1641, "query_norm": 1.2834, "queue_k_norm": 1.364, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9676, "sent_len_1": 66.4895, "sent_len_max_0": 127.5487, "sent_len_max_1": 187.9913, "stdk": 0.0474, "stdq": 0.0422, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 48100 }, { "accuracy": 44.2383, "active_queue_size": 16384.0, "cl_loss": 4.0752, "doc_norm": 1.3718, "encoder_q-embeddings": 5935.27, "encoder_q-layer.0": 3983.9836, "encoder_q-layer.1": 4519.3101, "encoder_q-layer.10": 5520.5127, "encoder_q-layer.11": 12234.8291, "encoder_q-layer.2": 5102.4312, "encoder_q-layer.3": 5181.1753, "encoder_q-layer.4": 5449.2412, "encoder_q-layer.5": 5405.0552, "encoder_q-layer.6": 5859.748, "encoder_q-layer.7": 5986.5259, "encoder_q-layer.8": 5516.9341, "encoder_q-layer.9": 4674.9688, "epoch": 0.31, "inbatch_neg_score": 0.1662, "inbatch_pos_score": 0.7119, "learning_rate": 2.877777777777778e-05, "loss": 4.0752, "norm_diff": 0.0703, "norm_loss": 0.0, "num_token_doc": 66.77, "num_token_overlap": 11.7055, "num_token_query": 31.9338, "num_token_union": 65.3135, "num_word_context": 202.0455, "num_word_doc": 49.8545, "num_word_query": 23.5919, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9264.0628, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.165, "query_norm": 1.3015, "queue_k_norm": 1.3647, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9338, "sent_len_1": 66.77, "sent_len_max_0": 127.5187, "sent_len_max_1": 188.8688, "stdk": 0.0476, "stdq": 0.0429, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 48200 }, { "accuracy": 48.6328, "active_queue_size": 16384.0, "cl_loss": 4.0728, "doc_norm": 1.3664, "encoder_q-embeddings": 4879.9097, "encoder_q-layer.0": 3335.031, "encoder_q-layer.1": 3530.9062, "encoder_q-layer.10": 4840.7104, "encoder_q-layer.11": 11780.7891, "encoder_q-layer.2": 4008.4644, "encoder_q-layer.3": 4177.1152, "encoder_q-layer.4": 4384.5654, "encoder_q-layer.5": 4769.3086, "encoder_q-layer.6": 4910.9922, "encoder_q-layer.7": 4575.2241, "encoder_q-layer.8": 4909.1758, "encoder_q-layer.9": 4711.4443, "epoch": 0.31, "inbatch_neg_score": 0.1664, "inbatch_pos_score": 0.7271, "learning_rate": 2.8722222222222222e-05, "loss": 4.0728, "norm_diff": 0.0773, "norm_loss": 0.0, "num_token_doc": 66.7688, "num_token_overlap": 11.6929, "num_token_query": 31.7778, "num_token_union": 65.2732, "num_word_context": 202.4315, "num_word_doc": 49.808, "num_word_query": 23.455, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8158.8921, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1666, "query_norm": 1.2891, "queue_k_norm": 1.3663, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.7778, "sent_len_1": 66.7688, "sent_len_max_0": 127.4737, "sent_len_max_1": 187.4225, "stdk": 0.0474, "stdq": 0.0427, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 48300 }, { "accuracy": 45.9961, "active_queue_size": 16384.0, "cl_loss": 4.0886, "doc_norm": 1.3622, "encoder_q-embeddings": 4194.0981, "encoder_q-layer.0": 2933.5337, "encoder_q-layer.1": 3035.3274, "encoder_q-layer.10": 4769.5293, "encoder_q-layer.11": 12026.5752, "encoder_q-layer.2": 3339.2617, "encoder_q-layer.3": 3515.1829, "encoder_q-layer.4": 3512.0723, "encoder_q-layer.5": 3458.9939, "encoder_q-layer.6": 3792.1113, "encoder_q-layer.7": 4234.02, "encoder_q-layer.8": 5128.5117, "encoder_q-layer.9": 4594.1797, "epoch": 0.32, "inbatch_neg_score": 0.1699, "inbatch_pos_score": 0.7104, "learning_rate": 2.8666666666666668e-05, "loss": 4.0886, "norm_diff": 0.0834, "norm_loss": 0.0, "num_token_doc": 66.9262, "num_token_overlap": 11.7117, "num_token_query": 32.067, "num_token_union": 65.5037, "num_word_context": 202.3432, "num_word_doc": 49.9264, "num_word_query": 23.6736, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7614.7487, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1691, "query_norm": 1.2788, "queue_k_norm": 1.366, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 32.067, "sent_len_1": 66.9262, "sent_len_max_0": 127.4, "sent_len_max_1": 191.0563, "stdk": 0.0472, "stdq": 0.0425, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 48400 }, { "accuracy": 46.0938, "active_queue_size": 16384.0, "cl_loss": 4.0625, "doc_norm": 1.3772, "encoder_q-embeddings": 5929.8706, "encoder_q-layer.0": 4112.6494, "encoder_q-layer.1": 4746.5811, "encoder_q-layer.10": 5351.3228, "encoder_q-layer.11": 11523.5684, "encoder_q-layer.2": 4831.2627, "encoder_q-layer.3": 4399.6099, "encoder_q-layer.4": 4822.2788, "encoder_q-layer.5": 4612.5103, "encoder_q-layer.6": 4910.9521, "encoder_q-layer.7": 5145.7583, "encoder_q-layer.8": 5384.4546, "encoder_q-layer.9": 4664.6196, "epoch": 0.32, "inbatch_neg_score": 0.166, "inbatch_pos_score": 0.7222, "learning_rate": 2.861111111111111e-05, "loss": 4.0625, "norm_diff": 0.1058, "norm_loss": 0.0, "num_token_doc": 66.7233, "num_token_overlap": 11.677, "num_token_query": 31.9387, "num_token_union": 65.3991, "num_word_context": 202.0777, "num_word_doc": 49.8267, "num_word_query": 23.5871, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8712.7416, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1656, "query_norm": 1.2713, "queue_k_norm": 1.3663, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9387, "sent_len_1": 66.7233, "sent_len_max_0": 127.4938, "sent_len_max_1": 187.7237, "stdk": 0.0477, "stdq": 0.0425, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 48500 }, { "accuracy": 43.75, "active_queue_size": 16384.0, "cl_loss": 4.0935, "doc_norm": 1.3665, "encoder_q-embeddings": 38443.375, "encoder_q-layer.0": 27767.5566, "encoder_q-layer.1": 31902.8848, "encoder_q-layer.10": 5249.8877, "encoder_q-layer.11": 12579.457, "encoder_q-layer.2": 36309.6406, "encoder_q-layer.3": 39239.3711, "encoder_q-layer.4": 36492.6914, "encoder_q-layer.5": 35759.7109, "encoder_q-layer.6": 36048.0742, "encoder_q-layer.7": 22799.9824, "encoder_q-layer.8": 14452.3496, "encoder_q-layer.9": 6177.4634, "epoch": 0.32, "inbatch_neg_score": 0.1594, "inbatch_pos_score": 0.6895, "learning_rate": 2.855555555555556e-05, "loss": 4.0935, "norm_diff": 0.0958, "norm_loss": 0.0, "num_token_doc": 66.5584, "num_token_overlap": 11.6535, "num_token_query": 31.9859, "num_token_union": 65.3238, "num_word_context": 202.1669, "num_word_doc": 49.6889, "num_word_query": 23.616, "postclip_grad_norm": 1.0, "preclip_grad_norm": 44368.8264, "preclip_grad_norm_avg": 0.0004, "q@queue_neg_score": 0.1602, "query_norm": 1.2706, "queue_k_norm": 1.3675, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9859, "sent_len_1": 66.5584, "sent_len_max_0": 127.4275, "sent_len_max_1": 188.6337, "stdk": 0.0473, "stdq": 0.0428, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 48600 }, { "accuracy": 48.3398, "active_queue_size": 16384.0, "cl_loss": 4.0845, "doc_norm": 1.3762, "encoder_q-embeddings": 4681.2041, "encoder_q-layer.0": 3030.6106, "encoder_q-layer.1": 3184.9038, "encoder_q-layer.10": 4905.9922, "encoder_q-layer.11": 12467.0986, "encoder_q-layer.2": 3900.6707, "encoder_q-layer.3": 3639.0596, "encoder_q-layer.4": 3711.5786, "encoder_q-layer.5": 3717.7866, "encoder_q-layer.6": 4041.5759, "encoder_q-layer.7": 4432.7988, "encoder_q-layer.8": 5180.9619, "encoder_q-layer.9": 4631.0181, "epoch": 0.32, "inbatch_neg_score": 0.1576, "inbatch_pos_score": 0.7163, "learning_rate": 2.8499999999999998e-05, "loss": 4.0845, "norm_diff": 0.1206, "norm_loss": 0.0, "num_token_doc": 66.833, "num_token_overlap": 11.71, "num_token_query": 31.9451, "num_token_union": 65.3886, "num_word_context": 202.7266, "num_word_doc": 49.8621, "num_word_query": 23.6021, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7929.8023, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1594, "query_norm": 1.2557, "queue_k_norm": 1.3674, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9451, "sent_len_1": 66.833, "sent_len_max_0": 127.5675, "sent_len_max_1": 188.4087, "stdk": 0.0477, "stdq": 0.0422, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 48700 }, { "accuracy": 43.9453, "active_queue_size": 16384.0, "cl_loss": 4.0812, "doc_norm": 1.3649, "encoder_q-embeddings": 4795.2754, "encoder_q-layer.0": 2970.146, "encoder_q-layer.1": 3088.0713, "encoder_q-layer.10": 5002.1553, "encoder_q-layer.11": 11907.1279, "encoder_q-layer.2": 3348.9805, "encoder_q-layer.3": 3416.2505, "encoder_q-layer.4": 3627.5674, "encoder_q-layer.5": 3600.418, "encoder_q-layer.6": 3998.2478, "encoder_q-layer.7": 4579.604, "encoder_q-layer.8": 5087.6909, "encoder_q-layer.9": 4612.542, "epoch": 0.32, "inbatch_neg_score": 0.1625, "inbatch_pos_score": 0.6914, "learning_rate": 2.8444444444444447e-05, "loss": 4.0812, "norm_diff": 0.1039, "norm_loss": 0.0, "num_token_doc": 66.6633, "num_token_overlap": 11.6672, "num_token_query": 31.9235, "num_token_union": 65.3048, "num_word_context": 202.4249, "num_word_doc": 49.7416, "num_word_query": 23.5909, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7814.2638, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1619, "query_norm": 1.261, "queue_k_norm": 1.3672, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9235, "sent_len_1": 66.6633, "sent_len_max_0": 127.4075, "sent_len_max_1": 188.5875, "stdk": 0.0473, "stdq": 0.0422, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 48800 }, { "accuracy": 46.0938, "active_queue_size": 16384.0, "cl_loss": 4.0807, "doc_norm": 1.3645, "encoder_q-embeddings": 2306.1221, "encoder_q-layer.0": 1514.0574, "encoder_q-layer.1": 1524.0706, "encoder_q-layer.10": 2652.7502, "encoder_q-layer.11": 6324.0488, "encoder_q-layer.2": 1707.5833, "encoder_q-layer.3": 1915.0718, "encoder_q-layer.4": 1974.717, "encoder_q-layer.5": 1971.0002, "encoder_q-layer.6": 2275.2722, "encoder_q-layer.7": 2478.6545, "encoder_q-layer.8": 2875.002, "encoder_q-layer.9": 2550.4138, "epoch": 0.32, "inbatch_neg_score": 0.1574, "inbatch_pos_score": 0.6753, "learning_rate": 2.8388888888888893e-05, "loss": 4.0807, "norm_diff": 0.1289, "norm_loss": 0.0, "num_token_doc": 66.5607, "num_token_overlap": 11.6984, "num_token_query": 32.1867, "num_token_union": 65.3725, "num_word_context": 202.3252, "num_word_doc": 49.6444, "num_word_query": 23.7823, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4101.4399, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1566, "query_norm": 1.2355, "queue_k_norm": 1.3678, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 32.1867, "sent_len_1": 66.5607, "sent_len_max_0": 127.4338, "sent_len_max_1": 190.575, "stdk": 0.0472, "stdq": 0.0412, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 48900 }, { "accuracy": 44.6289, "active_queue_size": 16384.0, "cl_loss": 4.06, "doc_norm": 1.3645, "encoder_q-embeddings": 2125.4553, "encoder_q-layer.0": 1456.0963, "encoder_q-layer.1": 1499.1621, "encoder_q-layer.10": 2648.4592, "encoder_q-layer.11": 5905.0361, "encoder_q-layer.2": 1687.3457, "encoder_q-layer.3": 1763.1372, "encoder_q-layer.4": 1958.1366, "encoder_q-layer.5": 1936.8136, "encoder_q-layer.6": 2191.269, "encoder_q-layer.7": 2299.4866, "encoder_q-layer.8": 2502.3584, "encoder_q-layer.9": 2389.5969, "epoch": 0.32, "inbatch_neg_score": 0.1529, "inbatch_pos_score": 0.6826, "learning_rate": 2.8333333333333335e-05, "loss": 4.06, "norm_diff": 0.1165, "norm_loss": 0.0, "num_token_doc": 66.6392, "num_token_overlap": 11.6431, "num_token_query": 31.9053, "num_token_union": 65.2747, "num_word_context": 201.9592, "num_word_doc": 49.68, "num_word_query": 23.5651, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3929.9214, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1548, "query_norm": 1.248, "queue_k_norm": 1.3685, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9053, "sent_len_1": 66.6392, "sent_len_max_0": 127.4425, "sent_len_max_1": 189.59, "stdk": 0.0473, "stdq": 0.0418, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 49000 }, { "accuracy": 41.7969, "active_queue_size": 16384.0, "cl_loss": 4.0832, "doc_norm": 1.3644, "encoder_q-embeddings": 2909.26, "encoder_q-layer.0": 1919.7954, "encoder_q-layer.1": 2036.5389, "encoder_q-layer.10": 2883.4233, "encoder_q-layer.11": 6530.3237, "encoder_q-layer.2": 2372.8154, "encoder_q-layer.3": 2415.9717, "encoder_q-layer.4": 2690.1233, "encoder_q-layer.5": 2733.9778, "encoder_q-layer.6": 2913.2561, "encoder_q-layer.7": 2917.1833, "encoder_q-layer.8": 3143.4858, "encoder_q-layer.9": 2573.2737, "epoch": 0.32, "inbatch_neg_score": 0.1564, "inbatch_pos_score": 0.6479, "learning_rate": 2.827777777777778e-05, "loss": 4.0832, "norm_diff": 0.1231, "norm_loss": 0.0, "num_token_doc": 66.862, "num_token_overlap": 11.7031, "num_token_query": 31.9739, "num_token_union": 65.4247, "num_word_context": 202.4595, "num_word_doc": 49.865, "num_word_query": 23.6131, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4687.6449, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1562, "query_norm": 1.2414, "queue_k_norm": 1.3676, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9739, "sent_len_1": 66.862, "sent_len_max_0": 127.3538, "sent_len_max_1": 190.1225, "stdk": 0.0473, "stdq": 0.0413, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 49100 }, { "accuracy": 47.7539, "active_queue_size": 16384.0, "cl_loss": 4.0463, "doc_norm": 1.3732, "encoder_q-embeddings": 2557.0393, "encoder_q-layer.0": 1836.4215, "encoder_q-layer.1": 1923.0541, "encoder_q-layer.10": 2517.4866, "encoder_q-layer.11": 5731.3916, "encoder_q-layer.2": 2192.7332, "encoder_q-layer.3": 2280.6685, "encoder_q-layer.4": 2406.844, "encoder_q-layer.5": 2455.7419, "encoder_q-layer.6": 2419.7949, "encoder_q-layer.7": 2253.8459, "encoder_q-layer.8": 2522.2776, "encoder_q-layer.9": 2376.6023, "epoch": 0.32, "inbatch_neg_score": 0.1522, "inbatch_pos_score": 0.6948, "learning_rate": 2.8222222222222223e-05, "loss": 4.0463, "norm_diff": 0.1163, "norm_loss": 0.0, "num_token_doc": 66.9061, "num_token_overlap": 11.7263, "num_token_query": 32.1015, "num_token_union": 65.5717, "num_word_context": 202.6333, "num_word_doc": 49.9449, "num_word_query": 23.7256, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4081.9858, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1532, "query_norm": 1.2569, "queue_k_norm": 1.3682, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 32.1015, "sent_len_1": 66.9061, "sent_len_max_0": 127.3487, "sent_len_max_1": 188.905, "stdk": 0.0476, "stdq": 0.0419, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 49200 }, { "accuracy": 44.5312, "active_queue_size": 16384.0, "cl_loss": 4.0985, "doc_norm": 1.3728, "encoder_q-embeddings": 31238.3535, "encoder_q-layer.0": 22589.1543, "encoder_q-layer.1": 23844.5078, "encoder_q-layer.10": 2687.1472, "encoder_q-layer.11": 6362.9912, "encoder_q-layer.2": 25490.7578, "encoder_q-layer.3": 27833.3359, "encoder_q-layer.4": 29926.9805, "encoder_q-layer.5": 25823.7344, "encoder_q-layer.6": 24172.543, "encoder_q-layer.7": 15620.4453, "encoder_q-layer.8": 9764.6455, "encoder_q-layer.9": 4279.0127, "epoch": 0.32, "inbatch_neg_score": 0.155, "inbatch_pos_score": 0.6802, "learning_rate": 2.816666666666667e-05, "loss": 4.0985, "norm_diff": 0.1135, "norm_loss": 0.0, "num_token_doc": 66.7852, "num_token_overlap": 11.6344, "num_token_query": 31.8803, "num_token_union": 65.3664, "num_word_context": 202.3688, "num_word_doc": 49.8102, "num_word_query": 23.5366, "postclip_grad_norm": 1.0, "preclip_grad_norm": 33671.4813, "preclip_grad_norm_avg": 0.0003, "q@queue_neg_score": 0.1545, "query_norm": 1.2593, "queue_k_norm": 1.3694, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8803, "sent_len_1": 66.7852, "sent_len_max_0": 127.5, "sent_len_max_1": 190.3237, "stdk": 0.0476, "stdq": 0.042, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 49300 }, { "accuracy": 44.2383, "active_queue_size": 16384.0, "cl_loss": 4.0503, "doc_norm": 1.3714, "encoder_q-embeddings": 2816.77, "encoder_q-layer.0": 1931.741, "encoder_q-layer.1": 1914.277, "encoder_q-layer.10": 2393.2803, "encoder_q-layer.11": 6058.4072, "encoder_q-layer.2": 2237.1843, "encoder_q-layer.3": 2363.6602, "encoder_q-layer.4": 2670.1897, "encoder_q-layer.5": 2706.4092, "encoder_q-layer.6": 3052.2319, "encoder_q-layer.7": 2872.8599, "encoder_q-layer.8": 3039.4495, "encoder_q-layer.9": 2603.9255, "epoch": 0.32, "inbatch_neg_score": 0.1594, "inbatch_pos_score": 0.6953, "learning_rate": 2.811111111111111e-05, "loss": 4.0503, "norm_diff": 0.0961, "norm_loss": 0.0, "num_token_doc": 67.3469, "num_token_overlap": 11.7967, "num_token_query": 32.0858, "num_token_union": 65.7459, "num_word_context": 202.9383, "num_word_doc": 50.2753, "num_word_query": 23.702, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4487.8635, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1592, "query_norm": 1.2753, "queue_k_norm": 1.3703, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 32.0858, "sent_len_1": 67.3469, "sent_len_max_0": 127.4862, "sent_len_max_1": 189.595, "stdk": 0.0475, "stdq": 0.0423, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 49400 }, { "accuracy": 43.8477, "active_queue_size": 16384.0, "cl_loss": 4.0599, "doc_norm": 1.3654, "encoder_q-embeddings": 2831.9727, "encoder_q-layer.0": 1966.526, "encoder_q-layer.1": 2030.476, "encoder_q-layer.10": 2568.9126, "encoder_q-layer.11": 6037.9585, "encoder_q-layer.2": 2191.2949, "encoder_q-layer.3": 2330.218, "encoder_q-layer.4": 2297.3145, "encoder_q-layer.5": 2412.6555, "encoder_q-layer.6": 2549.2014, "encoder_q-layer.7": 2378.2263, "encoder_q-layer.8": 2530.4065, "encoder_q-layer.9": 2257.0269, "epoch": 0.32, "inbatch_neg_score": 0.1591, "inbatch_pos_score": 0.6826, "learning_rate": 2.8055555555555557e-05, "loss": 4.0599, "norm_diff": 0.107, "norm_loss": 0.0, "num_token_doc": 66.7032, "num_token_overlap": 11.7013, "num_token_query": 31.8939, "num_token_union": 65.3017, "num_word_context": 202.3773, "num_word_doc": 49.852, "num_word_query": 23.5648, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4305.4792, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1587, "query_norm": 1.2585, "queue_k_norm": 1.3692, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8939, "sent_len_1": 66.7032, "sent_len_max_0": 127.3263, "sent_len_max_1": 187.73, "stdk": 0.0473, "stdq": 0.0417, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 49500 }, { "accuracy": 42.0898, "active_queue_size": 16384.0, "cl_loss": 4.0659, "doc_norm": 1.365, "encoder_q-embeddings": 3678.3279, "encoder_q-layer.0": 2491.6453, "encoder_q-layer.1": 2674.3762, "encoder_q-layer.10": 2838.9604, "encoder_q-layer.11": 6531.5845, "encoder_q-layer.2": 2729.5488, "encoder_q-layer.3": 2752.6472, "encoder_q-layer.4": 2974.9666, "encoder_q-layer.5": 2813.6946, "encoder_q-layer.6": 3032.1196, "encoder_q-layer.7": 2784.228, "encoder_q-layer.8": 2755.2898, "encoder_q-layer.9": 2604.3042, "epoch": 0.32, "inbatch_neg_score": 0.1609, "inbatch_pos_score": 0.6938, "learning_rate": 2.8000000000000003e-05, "loss": 4.0659, "norm_diff": 0.0726, "norm_loss": 0.0, "num_token_doc": 66.823, "num_token_overlap": 11.6384, "num_token_query": 31.7457, "num_token_union": 65.3272, "num_word_context": 202.3091, "num_word_doc": 49.8389, "num_word_query": 23.4412, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4973.1273, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.161, "query_norm": 1.2924, "queue_k_norm": 1.3709, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.7457, "sent_len_1": 66.823, "sent_len_max_0": 127.47, "sent_len_max_1": 190.5288, "stdk": 0.0472, "stdq": 0.0428, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 49600 }, { "accuracy": 44.043, "active_queue_size": 16384.0, "cl_loss": 4.0657, "doc_norm": 1.374, "encoder_q-embeddings": 2492.1824, "encoder_q-layer.0": 1590.4779, "encoder_q-layer.1": 1681.3058, "encoder_q-layer.10": 2437.812, "encoder_q-layer.11": 6140.416, "encoder_q-layer.2": 1862.0084, "encoder_q-layer.3": 1913.9116, "encoder_q-layer.4": 1930.3054, "encoder_q-layer.5": 1970.8042, "encoder_q-layer.6": 2330.5178, "encoder_q-layer.7": 2559.0784, "encoder_q-layer.8": 2840.9985, "encoder_q-layer.9": 2401.5845, "epoch": 0.32, "inbatch_neg_score": 0.1605, "inbatch_pos_score": 0.6997, "learning_rate": 2.7944444444444445e-05, "loss": 4.0657, "norm_diff": 0.088, "norm_loss": 0.0, "num_token_doc": 67.0829, "num_token_overlap": 11.6609, "num_token_query": 31.7586, "num_token_union": 65.433, "num_word_context": 201.9643, "num_word_doc": 50.0562, "num_word_query": 23.4377, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4132.654, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1615, "query_norm": 1.286, "queue_k_norm": 1.372, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.7586, "sent_len_1": 67.0829, "sent_len_max_0": 127.465, "sent_len_max_1": 189.99, "stdk": 0.0476, "stdq": 0.0423, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 49700 }, { "accuracy": 41.6992, "active_queue_size": 16384.0, "cl_loss": 4.057, "doc_norm": 1.3681, "encoder_q-embeddings": 3387.3684, "encoder_q-layer.0": 2135.4221, "encoder_q-layer.1": 2383.5518, "encoder_q-layer.10": 2611.5715, "encoder_q-layer.11": 6233.9453, "encoder_q-layer.2": 2765.2439, "encoder_q-layer.3": 3002.0227, "encoder_q-layer.4": 3259.343, "encoder_q-layer.5": 2959.1145, "encoder_q-layer.6": 3119.7031, "encoder_q-layer.7": 2851.8535, "encoder_q-layer.8": 2803.4583, "encoder_q-layer.9": 2340.5281, "epoch": 0.32, "inbatch_neg_score": 0.165, "inbatch_pos_score": 0.6797, "learning_rate": 2.788888888888889e-05, "loss": 4.057, "norm_diff": 0.0997, "norm_loss": 0.0, "num_token_doc": 66.6363, "num_token_overlap": 11.6675, "num_token_query": 31.8473, "num_token_union": 65.2502, "num_word_context": 202.2635, "num_word_doc": 49.7667, "num_word_query": 23.5369, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4846.7874, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1646, "query_norm": 1.2684, "queue_k_norm": 1.3714, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8473, "sent_len_1": 66.6363, "sent_len_max_0": 127.5913, "sent_len_max_1": 187.4737, "stdk": 0.0473, "stdq": 0.0411, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 49800 }, { "accuracy": 44.4336, "active_queue_size": 16384.0, "cl_loss": 4.0472, "doc_norm": 1.3745, "encoder_q-embeddings": 2295.0251, "encoder_q-layer.0": 1581.1342, "encoder_q-layer.1": 1669.1903, "encoder_q-layer.10": 2638.8367, "encoder_q-layer.11": 6211.6401, "encoder_q-layer.2": 1857.5614, "encoder_q-layer.3": 1905.1296, "encoder_q-layer.4": 1999.2793, "encoder_q-layer.5": 2029.7581, "encoder_q-layer.6": 2283.7512, "encoder_q-layer.7": 2628.5884, "encoder_q-layer.8": 2739.9233, "encoder_q-layer.9": 2436.8206, "epoch": 0.32, "inbatch_neg_score": 0.1685, "inbatch_pos_score": 0.7012, "learning_rate": 2.7833333333333333e-05, "loss": 4.0472, "norm_diff": 0.0509, "norm_loss": 0.0, "num_token_doc": 66.6976, "num_token_overlap": 11.693, "num_token_query": 31.8604, "num_token_union": 65.2626, "num_word_context": 202.174, "num_word_doc": 49.7718, "num_word_query": 23.5405, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4043.9251, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1685, "query_norm": 1.3237, "queue_k_norm": 1.3707, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8604, "sent_len_1": 66.6976, "sent_len_max_0": 127.4025, "sent_len_max_1": 188.96, "stdk": 0.0476, "stdq": 0.0432, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 49900 }, { "accuracy": 45.8008, "active_queue_size": 16384.0, "cl_loss": 4.0362, "doc_norm": 1.3717, "encoder_q-embeddings": 27886.5293, "encoder_q-layer.0": 21340.6699, "encoder_q-layer.1": 21763.3594, "encoder_q-layer.10": 2710.9463, "encoder_q-layer.11": 6204.4956, "encoder_q-layer.2": 33439.0039, "encoder_q-layer.3": 28088.1855, "encoder_q-layer.4": 22173.9902, "encoder_q-layer.5": 25132.5059, "encoder_q-layer.6": 20546.8457, "encoder_q-layer.7": 13338.8438, "encoder_q-layer.8": 8587.0723, "encoder_q-layer.9": 3577.033, "epoch": 0.33, "inbatch_neg_score": 0.1781, "inbatch_pos_score": 0.7261, "learning_rate": 2.777777777777778e-05, "loss": 4.0362, "norm_diff": 0.0567, "norm_loss": 0.0, "num_token_doc": 66.9184, "num_token_overlap": 11.7354, "num_token_query": 31.9044, "num_token_union": 65.3916, "num_word_context": 202.3459, "num_word_doc": 49.9156, "num_word_query": 23.557, "postclip_grad_norm": 1.0, "preclip_grad_norm": 31424.7856, "preclip_grad_norm_avg": 0.0003, "q@queue_neg_score": 0.178, "query_norm": 1.315, "queue_k_norm": 1.3696, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9044, "sent_len_1": 66.9184, "sent_len_max_0": 127.5413, "sent_len_max_1": 190.5925, "stdk": 0.0475, "stdq": 0.0428, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 50000 }, { "dev_runtime": 44.1285, "dev_samples_per_second": 1.45, "dev_steps_per_second": 0.023, "epoch": 0.33, "step": 50000, "test_accuracy": 92.7001953125, "test_active_queue_size": 16384.0, "test_cl_loss": 0.42511239647865295, "test_doc_norm": 1.3370426893234253, "test_inbatch_neg_score": 0.5048930644989014, "test_inbatch_pos_score": 1.377486228942871, "test_loss": 0.42511239647865295, "test_loss_align": 1.0712120532989502, "test_loss_unif": 3.924814462661743, "test_loss_unif_q@queue": 3.924814224243164, "test_norm_diff": 0.08010803163051605, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.1694939136505127, "test_query_norm": 1.4171507358551025, "test_queue_k_norm": 1.3695976734161377, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04095403477549553, "test_stdq": 0.04126439616084099, "test_stdqueue_k": 0.04748940095305443, "test_stdqueue_q": 0.0 }, { "dev_runtime": 44.1285, "dev_samples_per_second": 1.45, "dev_steps_per_second": 0.023, "epoch": 0.33, "eval_beir-arguana_ndcg@10": 0.373, "eval_beir-arguana_recall@10": 0.62731, "eval_beir-arguana_recall@100": 0.91607, "eval_beir-arguana_recall@20": 0.77027, "eval_beir-avg_ndcg@10": 0.3653441666666667, "eval_beir-avg_recall@10": 0.43518466666666666, "eval_beir-avg_recall@100": 0.6186349166666666, "eval_beir-avg_recall@20": 0.49978, "eval_beir-cqadupstack_ndcg@10": 0.25131166666666666, "eval_beir-cqadupstack_recall@10": 0.3438566666666667, "eval_beir-cqadupstack_recall@100": 0.5749491666666666, "eval_beir-cqadupstack_recall@20": 0.41431, "eval_beir-fiqa_ndcg@10": 0.22449, "eval_beir-fiqa_recall@10": 0.28143, "eval_beir-fiqa_recall@100": 0.53684, "eval_beir-fiqa_recall@20": 0.3657, "eval_beir-nfcorpus_ndcg@10": 0.29427, "eval_beir-nfcorpus_recall@10": 0.14354, "eval_beir-nfcorpus_recall@100": 0.28041, "eval_beir-nfcorpus_recall@20": 0.1799, "eval_beir-nq_ndcg@10": 0.26495, "eval_beir-nq_recall@10": 0.43955, "eval_beir-nq_recall@100": 0.78681, "eval_beir-nq_recall@20": 0.56194, "eval_beir-quora_ndcg@10": 0.73785, "eval_beir-quora_recall@10": 0.85263, "eval_beir-quora_recall@100": 0.96619, "eval_beir-quora_recall@20": 0.90253, "eval_beir-scidocs_ndcg@10": 0.14515, "eval_beir-scidocs_recall@10": 0.15208, "eval_beir-scidocs_recall@100": 0.35082, "eval_beir-scidocs_recall@20": 0.20595, "eval_beir-scifact_ndcg@10": 0.61474, "eval_beir-scifact_recall@10": 0.77967, "eval_beir-scifact_recall@100": 0.90489, "eval_beir-scifact_recall@20": 0.823, "eval_beir-trec-covid_ndcg@10": 0.54317, "eval_beir-trec-covid_recall@10": 0.584, "eval_beir-trec-covid_recall@100": 0.4264, "eval_beir-trec-covid_recall@20": 0.565, "eval_beir-webis-touche2020_ndcg@10": 0.20451, "eval_beir-webis-touche2020_recall@10": 0.14778, "eval_beir-webis-touche2020_recall@100": 0.44297, "eval_beir-webis-touche2020_recall@20": 0.2092, "eval_senteval-avg_sts": 0.7565156731721747, "eval_senteval-sickr_spearman": 0.7123100720498754, "eval_senteval-stsb_spearman": 0.8007212742944742, "step": 50000, "test_accuracy": 92.7001953125, "test_active_queue_size": 16384.0, "test_cl_loss": 0.42511239647865295, "test_doc_norm": 1.3370426893234253, "test_inbatch_neg_score": 0.5048930644989014, "test_inbatch_pos_score": 1.377486228942871, "test_loss": 0.42511239647865295, "test_loss_align": 1.0712120532989502, "test_loss_unif": 3.924814462661743, "test_loss_unif_q@queue": 3.924814224243164, "test_norm_diff": 0.08010803163051605, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.1694939136505127, "test_query_norm": 1.4171507358551025, "test_queue_k_norm": 1.3695976734161377, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04095403477549553, "test_stdq": 0.04126439616084099, "test_stdqueue_k": 0.04748940095305443, "test_stdqueue_q": 0.0 }, { "accuracy": 44.1406, "active_queue_size": 16384.0, "cl_loss": 4.0592, "doc_norm": 1.3743, "encoder_q-embeddings": 19348.4121, "encoder_q-layer.0": 13951.7275, "encoder_q-layer.1": 17024.4863, "encoder_q-layer.10": 3022.73, "encoder_q-layer.11": 6358.0146, "encoder_q-layer.2": 17854.9121, "encoder_q-layer.3": 19366.1211, "encoder_q-layer.4": 20353.377, "encoder_q-layer.5": 20539.041, "encoder_q-layer.6": 22693.0703, "encoder_q-layer.7": 18398.9551, "encoder_q-layer.8": 10313.8359, "encoder_q-layer.9": 3298.4956, "epoch": 0.33, "inbatch_neg_score": 0.182, "inbatch_pos_score": 0.7163, "learning_rate": 2.772222222222222e-05, "loss": 4.0592, "norm_diff": 0.0791, "norm_loss": 0.0, "num_token_doc": 66.8077, "num_token_overlap": 11.6987, "num_token_query": 31.9656, "num_token_union": 65.3493, "num_word_context": 202.2202, "num_word_doc": 49.839, "num_word_query": 23.6054, "postclip_grad_norm": 1.0, "preclip_grad_norm": 24470.4877, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.1823, "query_norm": 1.2952, "queue_k_norm": 1.3711, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9656, "sent_len_1": 66.8077, "sent_len_max_0": 127.4338, "sent_len_max_1": 189.9737, "stdk": 0.0475, "stdq": 0.0423, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 50100 }, { "accuracy": 44.6289, "active_queue_size": 16384.0, "cl_loss": 4.058, "doc_norm": 1.3738, "encoder_q-embeddings": 4925.4883, "encoder_q-layer.0": 3475.7158, "encoder_q-layer.1": 4046.9185, "encoder_q-layer.10": 2697.8, "encoder_q-layer.11": 6675.3735, "encoder_q-layer.2": 4650.0054, "encoder_q-layer.3": 4627.2153, "encoder_q-layer.4": 5012.8545, "encoder_q-layer.5": 5122.6704, "encoder_q-layer.6": 4870.335, "encoder_q-layer.7": 4434.0044, "encoder_q-layer.8": 3527.9556, "encoder_q-layer.9": 2628.2039, "epoch": 0.33, "inbatch_neg_score": 0.1848, "inbatch_pos_score": 0.7075, "learning_rate": 2.7666666666666667e-05, "loss": 4.058, "norm_diff": 0.0888, "norm_loss": 0.0, "num_token_doc": 66.7512, "num_token_overlap": 11.7186, "num_token_query": 31.9694, "num_token_union": 65.3003, "num_word_context": 202.3166, "num_word_doc": 49.7907, "num_word_query": 23.6041, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6768.0019, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1853, "query_norm": 1.2851, "queue_k_norm": 1.3726, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9694, "sent_len_1": 66.7512, "sent_len_max_0": 127.4613, "sent_len_max_1": 190.6138, "stdk": 0.0475, "stdq": 0.0419, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 50200 }, { "accuracy": 42.2852, "active_queue_size": 16384.0, "cl_loss": 4.0486, "doc_norm": 1.3674, "encoder_q-embeddings": 2938.772, "encoder_q-layer.0": 2070.1995, "encoder_q-layer.1": 2246.4072, "encoder_q-layer.10": 2358.2202, "encoder_q-layer.11": 5962.3613, "encoder_q-layer.2": 2643.1821, "encoder_q-layer.3": 2651.2363, "encoder_q-layer.4": 2733.0266, "encoder_q-layer.5": 2739.1331, "encoder_q-layer.6": 2709.6653, "encoder_q-layer.7": 2802.8752, "encoder_q-layer.8": 3008.8921, "encoder_q-layer.9": 2437.9775, "epoch": 0.33, "inbatch_neg_score": 0.1887, "inbatch_pos_score": 0.7109, "learning_rate": 2.761111111111111e-05, "loss": 4.0486, "norm_diff": 0.0808, "norm_loss": 0.0, "num_token_doc": 67.0726, "num_token_overlap": 11.7335, "num_token_query": 31.9513, "num_token_union": 65.5234, "num_word_context": 202.5689, "num_word_doc": 50.0851, "num_word_query": 23.6215, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4562.4949, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1882, "query_norm": 1.2866, "queue_k_norm": 1.3743, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9513, "sent_len_1": 67.0726, "sent_len_max_0": 127.3612, "sent_len_max_1": 188.3713, "stdk": 0.0472, "stdq": 0.0419, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 50300 }, { "accuracy": 47.7539, "active_queue_size": 16384.0, "cl_loss": 4.0567, "doc_norm": 1.372, "encoder_q-embeddings": 3548.2549, "encoder_q-layer.0": 2429.2207, "encoder_q-layer.1": 2985.9451, "encoder_q-layer.10": 2540.3711, "encoder_q-layer.11": 6350.7622, "encoder_q-layer.2": 3339.7886, "encoder_q-layer.3": 3305.0332, "encoder_q-layer.4": 3236.425, "encoder_q-layer.5": 3003.7644, "encoder_q-layer.6": 3121.5959, "encoder_q-layer.7": 3020.4514, "encoder_q-layer.8": 2853.1619, "encoder_q-layer.9": 2395.0593, "epoch": 0.33, "inbatch_neg_score": 0.1899, "inbatch_pos_score": 0.7446, "learning_rate": 2.7555555555555555e-05, "loss": 4.0567, "norm_diff": 0.0766, "norm_loss": 0.0, "num_token_doc": 66.7379, "num_token_overlap": 11.6612, "num_token_query": 31.9548, "num_token_union": 65.3687, "num_word_context": 202.3829, "num_word_doc": 49.8016, "num_word_query": 23.5787, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5034.5865, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1899, "query_norm": 1.2954, "queue_k_norm": 1.3742, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9548, "sent_len_1": 66.7379, "sent_len_max_0": 127.57, "sent_len_max_1": 189.1087, "stdk": 0.0474, "stdq": 0.0426, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 50400 }, { "accuracy": 44.3359, "active_queue_size": 16384.0, "cl_loss": 4.0781, "doc_norm": 1.3755, "encoder_q-embeddings": 3347.5527, "encoder_q-layer.0": 2552.397, "encoder_q-layer.1": 2671.0383, "encoder_q-layer.10": 2565.2781, "encoder_q-layer.11": 6132.7593, "encoder_q-layer.2": 2416.1206, "encoder_q-layer.3": 2360.4993, "encoder_q-layer.4": 2514.1401, "encoder_q-layer.5": 2347.6118, "encoder_q-layer.6": 2371.0027, "encoder_q-layer.7": 2459.2112, "encoder_q-layer.8": 2710.3662, "encoder_q-layer.9": 2429.3855, "epoch": 0.33, "inbatch_neg_score": 0.1878, "inbatch_pos_score": 0.7119, "learning_rate": 2.7500000000000004e-05, "loss": 4.0781, "norm_diff": 0.1106, "norm_loss": 0.0, "num_token_doc": 66.9812, "num_token_overlap": 11.6695, "num_token_query": 31.7695, "num_token_union": 65.4036, "num_word_context": 202.8314, "num_word_doc": 50.0103, "num_word_query": 23.4614, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4517.7395, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1879, "query_norm": 1.265, "queue_k_norm": 1.373, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.7695, "sent_len_1": 66.9812, "sent_len_max_0": 127.57, "sent_len_max_1": 188.225, "stdk": 0.0475, "stdq": 0.0417, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 50500 }, { "accuracy": 44.5312, "active_queue_size": 16384.0, "cl_loss": 4.0841, "doc_norm": 1.375, "encoder_q-embeddings": 2535.021, "encoder_q-layer.0": 1680.0117, "encoder_q-layer.1": 1765.5319, "encoder_q-layer.10": 2703.8621, "encoder_q-layer.11": 6444.0205, "encoder_q-layer.2": 2068.5251, "encoder_q-layer.3": 2130.4446, "encoder_q-layer.4": 2272.1785, "encoder_q-layer.5": 2356.4966, "encoder_q-layer.6": 2750.449, "encoder_q-layer.7": 2469.1162, "encoder_q-layer.8": 2584.7429, "encoder_q-layer.9": 2403.1411, "epoch": 0.33, "inbatch_neg_score": 0.1894, "inbatch_pos_score": 0.7383, "learning_rate": 2.7444444444444443e-05, "loss": 4.0841, "norm_diff": 0.085, "norm_loss": 0.0, "num_token_doc": 66.8218, "num_token_overlap": 11.6177, "num_token_query": 31.8228, "num_token_union": 65.3932, "num_word_context": 202.3826, "num_word_doc": 49.8324, "num_word_query": 23.5097, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4260.3221, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1891, "query_norm": 1.29, "queue_k_norm": 1.3737, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8228, "sent_len_1": 66.8218, "sent_len_max_0": 127.6363, "sent_len_max_1": 191.2163, "stdk": 0.0474, "stdq": 0.0429, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 50600 }, { "accuracy": 44.6289, "active_queue_size": 16384.0, "cl_loss": 4.0573, "doc_norm": 1.3841, "encoder_q-embeddings": 2651.5312, "encoder_q-layer.0": 1787.9557, "encoder_q-layer.1": 1908.1281, "encoder_q-layer.10": 2649.6755, "encoder_q-layer.11": 6617.7661, "encoder_q-layer.2": 2134.6189, "encoder_q-layer.3": 2311.824, "encoder_q-layer.4": 2510.2803, "encoder_q-layer.5": 2414.7056, "encoder_q-layer.6": 2505.9878, "encoder_q-layer.7": 2650.1331, "encoder_q-layer.8": 3108.1626, "encoder_q-layer.9": 2552.9521, "epoch": 0.33, "inbatch_neg_score": 0.1896, "inbatch_pos_score": 0.7271, "learning_rate": 2.7388888888888892e-05, "loss": 4.0573, "norm_diff": 0.1174, "norm_loss": 0.0, "num_token_doc": 66.5989, "num_token_overlap": 11.6701, "num_token_query": 31.9917, "num_token_union": 65.2876, "num_word_context": 201.8817, "num_word_doc": 49.7102, "num_word_query": 23.6261, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4462.3975, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1896, "query_norm": 1.2667, "queue_k_norm": 1.3744, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9917, "sent_len_1": 66.5989, "sent_len_max_0": 127.5812, "sent_len_max_1": 189.5337, "stdk": 0.0477, "stdq": 0.042, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 50700 }, { "accuracy": 45.2148, "active_queue_size": 16384.0, "cl_loss": 4.0851, "doc_norm": 1.3706, "encoder_q-embeddings": 22465.9492, "encoder_q-layer.0": 16600.9688, "encoder_q-layer.1": 16443.207, "encoder_q-layer.10": 2359.8198, "encoder_q-layer.11": 5879.9214, "encoder_q-layer.2": 20732.6699, "encoder_q-layer.3": 21507.1738, "encoder_q-layer.4": 25624.3789, "encoder_q-layer.5": 21433.3281, "encoder_q-layer.6": 23652.7051, "encoder_q-layer.7": 18438.373, "encoder_q-layer.8": 11487.9883, "encoder_q-layer.9": 4152.2222, "epoch": 0.33, "inbatch_neg_score": 0.1858, "inbatch_pos_score": 0.728, "learning_rate": 2.733333333333333e-05, "loss": 4.0851, "norm_diff": 0.0944, "norm_loss": 0.0, "num_token_doc": 66.5903, "num_token_overlap": 11.6796, "num_token_query": 31.8661, "num_token_union": 65.2021, "num_word_context": 202.4075, "num_word_doc": 49.7141, "num_word_query": 23.5125, "postclip_grad_norm": 1.0, "preclip_grad_norm": 26756.5743, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.1859, "query_norm": 1.2763, "queue_k_norm": 1.374, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.8661, "sent_len_1": 66.5903, "sent_len_max_0": 127.585, "sent_len_max_1": 188.4988, "stdk": 0.0473, "stdq": 0.0426, "stdqueue_k": 0.0474, "stdqueue_q": 0.0, "step": 50800 }, { "accuracy": 45.5078, "active_queue_size": 16384.0, "cl_loss": 4.068, "doc_norm": 1.3713, "encoder_q-embeddings": 32088.3477, "encoder_q-layer.0": 21004.4707, "encoder_q-layer.1": 20613.1641, "encoder_q-layer.10": 5128.8657, "encoder_q-layer.11": 12679.8311, "encoder_q-layer.2": 19280.3164, "encoder_q-layer.3": 18774.6641, "encoder_q-layer.4": 22277.5762, "encoder_q-layer.5": 19851.4727, "encoder_q-layer.6": 14954.0752, "encoder_q-layer.7": 9362.5078, "encoder_q-layer.8": 6590.7705, "encoder_q-layer.9": 5103.7612, "epoch": 0.33, "inbatch_neg_score": 0.1895, "inbatch_pos_score": 0.7285, "learning_rate": 2.727777777777778e-05, "loss": 4.068, "norm_diff": 0.0966, "norm_loss": 0.0, "num_token_doc": 66.8393, "num_token_overlap": 11.6064, "num_token_query": 31.8368, "num_token_union": 65.398, "num_word_context": 202.4861, "num_word_doc": 49.8908, "num_word_query": 23.5269, "postclip_grad_norm": 1.0, "preclip_grad_norm": 28288.0469, "preclip_grad_norm_avg": 0.0003, "q@queue_neg_score": 0.1893, "query_norm": 1.2747, "queue_k_norm": 1.3773, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8368, "sent_len_1": 66.8393, "sent_len_max_0": 127.5175, "sent_len_max_1": 189.6213, "stdk": 0.0472, "stdq": 0.0423, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 50900 }, { "accuracy": 43.0664, "active_queue_size": 16384.0, "cl_loss": 4.0512, "doc_norm": 1.3766, "encoder_q-embeddings": 6306.4194, "encoder_q-layer.0": 4424.7646, "encoder_q-layer.1": 4937.4517, "encoder_q-layer.10": 4656.896, "encoder_q-layer.11": 11840.5811, "encoder_q-layer.2": 5779.6963, "encoder_q-layer.3": 5611.3604, "encoder_q-layer.4": 5857.7905, "encoder_q-layer.5": 6087.7559, "encoder_q-layer.6": 6347.2944, "encoder_q-layer.7": 5593.0425, "encoder_q-layer.8": 5421.9644, "encoder_q-layer.9": 4726.2671, "epoch": 0.33, "inbatch_neg_score": 0.1877, "inbatch_pos_score": 0.7061, "learning_rate": 2.7222222222222223e-05, "loss": 4.0512, "norm_diff": 0.1137, "norm_loss": 0.0, "num_token_doc": 66.6933, "num_token_overlap": 11.7113, "num_token_query": 31.9189, "num_token_union": 65.2926, "num_word_context": 202.4591, "num_word_doc": 49.7665, "num_word_query": 23.5824, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9448.4824, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1873, "query_norm": 1.2628, "queue_k_norm": 1.3772, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9189, "sent_len_1": 66.6933, "sent_len_max_0": 127.705, "sent_len_max_1": 189.695, "stdk": 0.0474, "stdq": 0.0418, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 51000 }, { "accuracy": 44.6289, "active_queue_size": 16384.0, "cl_loss": 4.0431, "doc_norm": 1.3821, "encoder_q-embeddings": 7551.4067, "encoder_q-layer.0": 5156.0312, "encoder_q-layer.1": 6044.4614, "encoder_q-layer.10": 5369.6582, "encoder_q-layer.11": 12346.7148, "encoder_q-layer.2": 7296.3921, "encoder_q-layer.3": 7304.7793, "encoder_q-layer.4": 7114.2632, "encoder_q-layer.5": 7428.8057, "encoder_q-layer.6": 7126.7534, "encoder_q-layer.7": 7167.5908, "encoder_q-layer.8": 7093.7236, "encoder_q-layer.9": 5316.4214, "epoch": 0.33, "inbatch_neg_score": 0.1891, "inbatch_pos_score": 0.7227, "learning_rate": 2.716666666666667e-05, "loss": 4.0431, "norm_diff": 0.0895, "norm_loss": 0.0, "num_token_doc": 66.7295, "num_token_overlap": 11.6844, "num_token_query": 31.8435, "num_token_union": 65.2535, "num_word_context": 202.2233, "num_word_doc": 49.7829, "num_word_query": 23.4977, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10901.7221, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1885, "query_norm": 1.2926, "queue_k_norm": 1.3757, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8435, "sent_len_1": 66.7295, "sent_len_max_0": 127.6562, "sent_len_max_1": 190.385, "stdk": 0.0476, "stdq": 0.043, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 51100 }, { "accuracy": 46.9727, "active_queue_size": 16384.0, "cl_loss": 4.0557, "doc_norm": 1.3841, "encoder_q-embeddings": 9846.9502, "encoder_q-layer.0": 6733.5444, "encoder_q-layer.1": 7775.2954, "encoder_q-layer.10": 4893.4004, "encoder_q-layer.11": 12134.7998, "encoder_q-layer.2": 8149.1182, "encoder_q-layer.3": 8693.2822, "encoder_q-layer.4": 8786.2744, "encoder_q-layer.5": 8658.0508, "encoder_q-layer.6": 9576.2949, "encoder_q-layer.7": 11101.7832, "encoder_q-layer.8": 9291.0586, "encoder_q-layer.9": 5550.291, "epoch": 0.33, "inbatch_neg_score": 0.1914, "inbatch_pos_score": 0.7402, "learning_rate": 2.7111111111111114e-05, "loss": 4.0557, "norm_diff": 0.1136, "norm_loss": 0.0, "num_token_doc": 66.6947, "num_token_overlap": 11.6442, "num_token_query": 31.7546, "num_token_union": 65.2269, "num_word_context": 202.4654, "num_word_doc": 49.7762, "num_word_query": 23.4425, "postclip_grad_norm": 1.0, "preclip_grad_norm": 13211.2491, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1915, "query_norm": 1.2705, "queue_k_norm": 1.3775, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.7546, "sent_len_1": 66.6947, "sent_len_max_0": 127.485, "sent_len_max_1": 189.315, "stdk": 0.0477, "stdq": 0.042, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 51200 }, { "accuracy": 46.9727, "active_queue_size": 16384.0, "cl_loss": 4.0694, "doc_norm": 1.381, "encoder_q-embeddings": 2598.2336, "encoder_q-layer.0": 1750.1331, "encoder_q-layer.1": 1811.9595, "encoder_q-layer.10": 2418.3071, "encoder_q-layer.11": 6065.1279, "encoder_q-layer.2": 2075.3181, "encoder_q-layer.3": 2138.822, "encoder_q-layer.4": 2237.032, "encoder_q-layer.5": 2249.4641, "encoder_q-layer.6": 2328.4658, "encoder_q-layer.7": 2450.1804, "encoder_q-layer.8": 2623.3416, "encoder_q-layer.9": 2303.4363, "epoch": 0.33, "inbatch_neg_score": 0.1929, "inbatch_pos_score": 0.7358, "learning_rate": 2.7055555555555557e-05, "loss": 4.0694, "norm_diff": 0.0965, "norm_loss": 0.0, "num_token_doc": 67.0171, "num_token_overlap": 11.7003, "num_token_query": 32.0396, "num_token_union": 65.5544, "num_word_context": 202.4969, "num_word_doc": 49.9587, "num_word_query": 23.6739, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4205.4519, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1932, "query_norm": 1.2846, "queue_k_norm": 1.3779, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 32.0396, "sent_len_1": 67.0171, "sent_len_max_0": 127.6325, "sent_len_max_1": 190.1612, "stdk": 0.0475, "stdq": 0.0425, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 51300 }, { "accuracy": 43.3594, "active_queue_size": 16384.0, "cl_loss": 4.0777, "doc_norm": 1.3812, "encoder_q-embeddings": 3819.3801, "encoder_q-layer.0": 2691.7039, "encoder_q-layer.1": 2850.1699, "encoder_q-layer.10": 2556.9873, "encoder_q-layer.11": 6432.2539, "encoder_q-layer.2": 3130.4224, "encoder_q-layer.3": 3044.9785, "encoder_q-layer.4": 3163.51, "encoder_q-layer.5": 2807.9746, "encoder_q-layer.6": 3142.4631, "encoder_q-layer.7": 3243.624, "encoder_q-layer.8": 3096.606, "encoder_q-layer.9": 2592.4202, "epoch": 0.33, "inbatch_neg_score": 0.1958, "inbatch_pos_score": 0.7173, "learning_rate": 2.7000000000000002e-05, "loss": 4.0777, "norm_diff": 0.0951, "norm_loss": 0.0, "num_token_doc": 66.4829, "num_token_overlap": 11.6133, "num_token_query": 31.8543, "num_token_union": 65.1996, "num_word_context": 202.0677, "num_word_doc": 49.5956, "num_word_query": 23.5033, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5154.0332, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1948, "query_norm": 1.2861, "queue_k_norm": 1.3779, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8543, "sent_len_1": 66.4829, "sent_len_max_0": 127.7375, "sent_len_max_1": 189.2925, "stdk": 0.0475, "stdq": 0.0425, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 51400 }, { "accuracy": 45.7031, "active_queue_size": 16384.0, "cl_loss": 4.0584, "doc_norm": 1.381, "encoder_q-embeddings": 8288.9336, "encoder_q-layer.0": 5758.48, "encoder_q-layer.1": 6652.5693, "encoder_q-layer.10": 2398.2002, "encoder_q-layer.11": 5965.3232, "encoder_q-layer.2": 8824.6768, "encoder_q-layer.3": 10777.9893, "encoder_q-layer.4": 12108.2168, "encoder_q-layer.5": 11429.6621, "encoder_q-layer.6": 10838.1396, "encoder_q-layer.7": 9072.8535, "encoder_q-layer.8": 7217.9976, "encoder_q-layer.9": 3099.2637, "epoch": 0.34, "inbatch_neg_score": 0.1963, "inbatch_pos_score": 0.729, "learning_rate": 2.6944444444444445e-05, "loss": 4.0584, "norm_diff": 0.1285, "norm_loss": 0.0, "num_token_doc": 66.9025, "num_token_overlap": 11.732, "num_token_query": 32.0232, "num_token_union": 65.4271, "num_word_context": 202.5639, "num_word_doc": 49.8868, "num_word_query": 23.6393, "postclip_grad_norm": 1.0, "preclip_grad_norm": 12774.6584, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1963, "query_norm": 1.2525, "queue_k_norm": 1.3795, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 32.0232, "sent_len_1": 66.9025, "sent_len_max_0": 127.5938, "sent_len_max_1": 189.9512, "stdk": 0.0475, "stdq": 0.0411, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 51500 }, { "accuracy": 44.2383, "active_queue_size": 16384.0, "cl_loss": 4.0658, "doc_norm": 1.3797, "encoder_q-embeddings": 4146.8037, "encoder_q-layer.0": 2760.3892, "encoder_q-layer.1": 2956.3792, "encoder_q-layer.10": 2504.6289, "encoder_q-layer.11": 5935.2075, "encoder_q-layer.2": 3481.6672, "encoder_q-layer.3": 3497.1069, "encoder_q-layer.4": 3870.1628, "encoder_q-layer.5": 3764.0024, "encoder_q-layer.6": 3561.1765, "encoder_q-layer.7": 3340.8076, "encoder_q-layer.8": 3213.9019, "encoder_q-layer.9": 2491.4949, "epoch": 0.34, "inbatch_neg_score": 0.1981, "inbatch_pos_score": 0.73, "learning_rate": 2.688888888888889e-05, "loss": 4.0658, "norm_diff": 0.0992, "norm_loss": 0.0, "num_token_doc": 66.5461, "num_token_overlap": 11.6525, "num_token_query": 31.9704, "num_token_union": 65.294, "num_word_context": 202.6096, "num_word_doc": 49.6889, "num_word_query": 23.6179, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5529.7385, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.1971, "query_norm": 1.2806, "queue_k_norm": 1.3786, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9704, "sent_len_1": 66.5461, "sent_len_max_0": 127.5837, "sent_len_max_1": 188.5275, "stdk": 0.0474, "stdq": 0.042, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 51600 }, { "accuracy": 48.3398, "active_queue_size": 16384.0, "cl_loss": 4.0571, "doc_norm": 1.3855, "encoder_q-embeddings": 2420.0042, "encoder_q-layer.0": 1542.1598, "encoder_q-layer.1": 1706.9767, "encoder_q-layer.10": 2642.7275, "encoder_q-layer.11": 6311.312, "encoder_q-layer.2": 1939.9471, "encoder_q-layer.3": 2029.6686, "encoder_q-layer.4": 2121.3269, "encoder_q-layer.5": 2113.0081, "encoder_q-layer.6": 2215.3125, "encoder_q-layer.7": 2314.6494, "encoder_q-layer.8": 2788.0857, "encoder_q-layer.9": 2399.7764, "epoch": 0.34, "inbatch_neg_score": 0.1966, "inbatch_pos_score": 0.7393, "learning_rate": 2.6833333333333333e-05, "loss": 4.0571, "norm_diff": 0.1135, "norm_loss": 0.0, "num_token_doc": 66.8583, "num_token_overlap": 11.6995, "num_token_query": 31.9298, "num_token_union": 65.4397, "num_word_context": 202.3094, "num_word_doc": 49.9107, "num_word_query": 23.5896, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4151.4739, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1984, "query_norm": 1.272, "queue_k_norm": 1.3814, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9298, "sent_len_1": 66.8583, "sent_len_max_0": 127.3375, "sent_len_max_1": 189.4825, "stdk": 0.0477, "stdq": 0.0416, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 51700 }, { "accuracy": 45.8008, "active_queue_size": 16384.0, "cl_loss": 4.0322, "doc_norm": 1.3857, "encoder_q-embeddings": 3645.446, "encoder_q-layer.0": 2401.03, "encoder_q-layer.1": 2562.7268, "encoder_q-layer.10": 2664.821, "encoder_q-layer.11": 6094.2808, "encoder_q-layer.2": 2750.5312, "encoder_q-layer.3": 2743.8328, "encoder_q-layer.4": 2682.2261, "encoder_q-layer.5": 2468.2979, "encoder_q-layer.6": 2478.2034, "encoder_q-layer.7": 2565.2188, "encoder_q-layer.8": 2774.4075, "encoder_q-layer.9": 2459.7898, "epoch": 0.34, "inbatch_neg_score": 0.1977, "inbatch_pos_score": 0.7368, "learning_rate": 2.677777777777778e-05, "loss": 4.0322, "norm_diff": 0.1067, "norm_loss": 0.0, "num_token_doc": 66.9666, "num_token_overlap": 11.6905, "num_token_query": 31.8336, "num_token_union": 65.3596, "num_word_context": 202.1304, "num_word_doc": 49.9503, "num_word_query": 23.5057, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4733.3041, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.198, "query_norm": 1.279, "queue_k_norm": 1.3806, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8336, "sent_len_1": 66.9666, "sent_len_max_0": 127.6925, "sent_len_max_1": 190.7612, "stdk": 0.0476, "stdq": 0.0418, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 51800 }, { "accuracy": 45.9961, "active_queue_size": 16384.0, "cl_loss": 4.043, "doc_norm": 1.3846, "encoder_q-embeddings": 3402.3882, "encoder_q-layer.0": 2228.5764, "encoder_q-layer.1": 2440.0503, "encoder_q-layer.10": 2306.5547, "encoder_q-layer.11": 6011.1309, "encoder_q-layer.2": 2788.0903, "encoder_q-layer.3": 3046.1575, "encoder_q-layer.4": 3377.5305, "encoder_q-layer.5": 3270.8228, "encoder_q-layer.6": 3195.1294, "encoder_q-layer.7": 3149.9834, "encoder_q-layer.8": 3219.2637, "encoder_q-layer.9": 2594.0925, "epoch": 0.34, "inbatch_neg_score": 0.199, "inbatch_pos_score": 0.7427, "learning_rate": 2.6722222222222228e-05, "loss": 4.043, "norm_diff": 0.1055, "norm_loss": 0.0, "num_token_doc": 66.8834, "num_token_overlap": 11.7382, "num_token_query": 31.9747, "num_token_union": 65.4218, "num_word_context": 202.5911, "num_word_doc": 49.9294, "num_word_query": 23.6219, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5019.4841, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.1973, "query_norm": 1.2791, "queue_k_norm": 1.3809, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9747, "sent_len_1": 66.8834, "sent_len_max_0": 127.4275, "sent_len_max_1": 189.4162, "stdk": 0.0475, "stdq": 0.0416, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 51900 }, { "accuracy": 46.9727, "active_queue_size": 16384.0, "cl_loss": 4.0745, "doc_norm": 1.3852, "encoder_q-embeddings": 3003.345, "encoder_q-layer.0": 2120.1882, "encoder_q-layer.1": 2240.5498, "encoder_q-layer.10": 2729.6648, "encoder_q-layer.11": 6464.5117, "encoder_q-layer.2": 2561.3767, "encoder_q-layer.3": 2790.9675, "encoder_q-layer.4": 3046.8455, "encoder_q-layer.5": 2730.501, "encoder_q-layer.6": 2713.8401, "encoder_q-layer.7": 2902.0503, "encoder_q-layer.8": 2893.2998, "encoder_q-layer.9": 2665.1262, "epoch": 0.34, "inbatch_neg_score": 0.2053, "inbatch_pos_score": 0.75, "learning_rate": 2.6666666666666667e-05, "loss": 4.0745, "norm_diff": 0.0915, "norm_loss": 0.0, "num_token_doc": 66.7198, "num_token_overlap": 11.6735, "num_token_query": 31.8843, "num_token_union": 65.3094, "num_word_context": 202.2429, "num_word_doc": 49.8118, "num_word_query": 23.5619, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4801.9363, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2046, "query_norm": 1.2937, "queue_k_norm": 1.3813, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.8843, "sent_len_1": 66.7198, "sent_len_max_0": 127.5625, "sent_len_max_1": 189.7012, "stdk": 0.0475, "stdq": 0.042, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 52000 }, { "accuracy": 42.9688, "active_queue_size": 16384.0, "cl_loss": 4.0586, "doc_norm": 1.3821, "encoder_q-embeddings": 2353.375, "encoder_q-layer.0": 1465.5002, "encoder_q-layer.1": 1495.2173, "encoder_q-layer.10": 2577.2046, "encoder_q-layer.11": 6259.061, "encoder_q-layer.2": 1610.9933, "encoder_q-layer.3": 1669.6117, "encoder_q-layer.4": 1734.7236, "encoder_q-layer.5": 1731.0009, "encoder_q-layer.6": 1950.251, "encoder_q-layer.7": 2125.4607, "encoder_q-layer.8": 2563.3318, "encoder_q-layer.9": 2407.3032, "epoch": 0.34, "inbatch_neg_score": 0.2104, "inbatch_pos_score": 0.751, "learning_rate": 2.6611111111111116e-05, "loss": 4.0586, "norm_diff": 0.0881, "norm_loss": 0.0, "num_token_doc": 66.6839, "num_token_overlap": 11.6849, "num_token_query": 31.8473, "num_token_union": 65.2799, "num_word_context": 202.4451, "num_word_doc": 49.7668, "num_word_query": 23.5327, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4011.2508, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2101, "query_norm": 1.294, "queue_k_norm": 1.3821, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8473, "sent_len_1": 66.6839, "sent_len_max_0": 127.5325, "sent_len_max_1": 189.245, "stdk": 0.0474, "stdq": 0.042, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 52100 }, { "accuracy": 43.0664, "active_queue_size": 16384.0, "cl_loss": 4.0383, "doc_norm": 1.3818, "encoder_q-embeddings": 3286.4131, "encoder_q-layer.0": 2200.7769, "encoder_q-layer.1": 2651.5044, "encoder_q-layer.10": 2518.3057, "encoder_q-layer.11": 6077.4248, "encoder_q-layer.2": 3098.4282, "encoder_q-layer.3": 3542.6409, "encoder_q-layer.4": 3683.7805, "encoder_q-layer.5": 4114.3799, "encoder_q-layer.6": 4128.7891, "encoder_q-layer.7": 3621.6187, "encoder_q-layer.8": 2999.574, "encoder_q-layer.9": 2516.9114, "epoch": 0.34, "inbatch_neg_score": 0.2155, "inbatch_pos_score": 0.7368, "learning_rate": 2.6555555555555555e-05, "loss": 4.0383, "norm_diff": 0.0757, "norm_loss": 0.0, "num_token_doc": 66.9858, "num_token_overlap": 11.6596, "num_token_query": 31.8725, "num_token_union": 65.4784, "num_word_context": 202.6058, "num_word_doc": 50.0129, "num_word_query": 23.5438, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5370.7433, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2148, "query_norm": 1.3061, "queue_k_norm": 1.3834, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8725, "sent_len_1": 66.9858, "sent_len_max_0": 127.3125, "sent_len_max_1": 189.405, "stdk": 0.0473, "stdq": 0.0422, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 52200 }, { "accuracy": 45.3125, "active_queue_size": 16384.0, "cl_loss": 4.0358, "doc_norm": 1.3775, "encoder_q-embeddings": 2249.3687, "encoder_q-layer.0": 1500.9969, "encoder_q-layer.1": 1604.6464, "encoder_q-layer.10": 2754.4751, "encoder_q-layer.11": 6103.2637, "encoder_q-layer.2": 1831.7275, "encoder_q-layer.3": 1895.8995, "encoder_q-layer.4": 1957.2102, "encoder_q-layer.5": 1998.443, "encoder_q-layer.6": 2228.3823, "encoder_q-layer.7": 2470.6001, "encoder_q-layer.8": 2972.3015, "encoder_q-layer.9": 2719.9324, "epoch": 0.34, "inbatch_neg_score": 0.221, "inbatch_pos_score": 0.7642, "learning_rate": 2.6500000000000004e-05, "loss": 4.0358, "norm_diff": 0.0436, "norm_loss": 0.0, "num_token_doc": 66.9009, "num_token_overlap": 11.7508, "num_token_query": 32.0471, "num_token_union": 65.4613, "num_word_context": 202.4439, "num_word_doc": 49.9137, "num_word_query": 23.6649, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4069.9536, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2205, "query_norm": 1.3339, "queue_k_norm": 1.387, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 32.0471, "sent_len_1": 66.9009, "sent_len_max_0": 127.5337, "sent_len_max_1": 190.2512, "stdk": 0.0472, "stdq": 0.0433, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 52300 }, { "accuracy": 43.0664, "active_queue_size": 16384.0, "cl_loss": 4.0406, "doc_norm": 1.3844, "encoder_q-embeddings": 2897.5686, "encoder_q-layer.0": 1921.8229, "encoder_q-layer.1": 2081.7117, "encoder_q-layer.10": 2448.6602, "encoder_q-layer.11": 6071.8979, "encoder_q-layer.2": 2402.6404, "encoder_q-layer.3": 2383.6895, "encoder_q-layer.4": 2542.1714, "encoder_q-layer.5": 2365.0942, "encoder_q-layer.6": 2382.1604, "encoder_q-layer.7": 2485.6548, "encoder_q-layer.8": 2619.2693, "encoder_q-layer.9": 2389.7827, "epoch": 0.34, "inbatch_neg_score": 0.2245, "inbatch_pos_score": 0.7617, "learning_rate": 2.6444444444444443e-05, "loss": 4.0406, "norm_diff": 0.0591, "norm_loss": 0.0, "num_token_doc": 67.0865, "num_token_overlap": 11.6702, "num_token_query": 31.9033, "num_token_union": 65.4991, "num_word_context": 202.5266, "num_word_doc": 50.0099, "num_word_query": 23.5573, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4381.1078, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2253, "query_norm": 1.3253, "queue_k_norm": 1.3839, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9033, "sent_len_1": 67.0865, "sent_len_max_0": 127.5487, "sent_len_max_1": 192.1538, "stdk": 0.0474, "stdq": 0.0427, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 52400 }, { "accuracy": 44.8242, "active_queue_size": 16384.0, "cl_loss": 4.0473, "doc_norm": 1.3801, "encoder_q-embeddings": 2962.8628, "encoder_q-layer.0": 2081.6353, "encoder_q-layer.1": 2347.7146, "encoder_q-layer.10": 2556.0981, "encoder_q-layer.11": 6139.2964, "encoder_q-layer.2": 2580.7183, "encoder_q-layer.3": 2652.1572, "encoder_q-layer.4": 2715.3621, "encoder_q-layer.5": 2656.7288, "encoder_q-layer.6": 2669.9617, "encoder_q-layer.7": 2484.2886, "encoder_q-layer.8": 2576.1885, "encoder_q-layer.9": 2287.0942, "epoch": 0.34, "inbatch_neg_score": 0.2317, "inbatch_pos_score": 0.7686, "learning_rate": 2.6388888888888892e-05, "loss": 4.0473, "norm_diff": 0.0816, "norm_loss": 0.0, "num_token_doc": 66.6885, "num_token_overlap": 11.6459, "num_token_query": 31.8552, "num_token_union": 65.3048, "num_word_context": 202.4124, "num_word_doc": 49.7498, "num_word_query": 23.5276, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4545.1118, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.231, "query_norm": 1.2985, "queue_k_norm": 1.3871, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8552, "sent_len_1": 66.6885, "sent_len_max_0": 127.4163, "sent_len_max_1": 190.1125, "stdk": 0.0472, "stdq": 0.0417, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 52500 }, { "accuracy": 41.7969, "active_queue_size": 16384.0, "cl_loss": 4.043, "doc_norm": 1.3844, "encoder_q-embeddings": 2366.9702, "encoder_q-layer.0": 1544.9843, "encoder_q-layer.1": 1591.1498, "encoder_q-layer.10": 2584.7417, "encoder_q-layer.11": 6454.6689, "encoder_q-layer.2": 1748.9712, "encoder_q-layer.3": 1885.0243, "encoder_q-layer.4": 1857.6775, "encoder_q-layer.5": 1905.0162, "encoder_q-layer.6": 2115.3154, "encoder_q-layer.7": 2368.2153, "encoder_q-layer.8": 2855.7598, "encoder_q-layer.9": 2448.5117, "epoch": 0.34, "inbatch_neg_score": 0.2338, "inbatch_pos_score": 0.751, "learning_rate": 2.633333333333333e-05, "loss": 4.043, "norm_diff": 0.0879, "norm_loss": 0.0, "num_token_doc": 66.6994, "num_token_overlap": 11.6968, "num_token_query": 31.9402, "num_token_union": 65.2973, "num_word_context": 201.8531, "num_word_doc": 49.7296, "num_word_query": 23.5905, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4226.9779, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2339, "query_norm": 1.2965, "queue_k_norm": 1.3873, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9402, "sent_len_1": 66.6994, "sent_len_max_0": 127.3725, "sent_len_max_1": 191.2337, "stdk": 0.0473, "stdq": 0.0416, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 52600 }, { "accuracy": 43.8477, "active_queue_size": 16384.0, "cl_loss": 4.0487, "doc_norm": 1.3899, "encoder_q-embeddings": 2616.1536, "encoder_q-layer.0": 1696.3226, "encoder_q-layer.1": 1786.2734, "encoder_q-layer.10": 2553.1633, "encoder_q-layer.11": 6619.2505, "encoder_q-layer.2": 2051.5825, "encoder_q-layer.3": 2053.3342, "encoder_q-layer.4": 2081.833, "encoder_q-layer.5": 2024.5267, "encoder_q-layer.6": 2096.8833, "encoder_q-layer.7": 2262.6982, "encoder_q-layer.8": 2622.4109, "encoder_q-layer.9": 2469.7908, "epoch": 0.34, "inbatch_neg_score": 0.2361, "inbatch_pos_score": 0.7583, "learning_rate": 2.627777777777778e-05, "loss": 4.0487, "norm_diff": 0.0911, "norm_loss": 0.0, "num_token_doc": 66.9574, "num_token_overlap": 11.6996, "num_token_query": 31.9048, "num_token_union": 65.4016, "num_word_context": 202.5908, "num_word_doc": 49.8661, "num_word_query": 23.5391, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4286.1945, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2355, "query_norm": 1.2988, "queue_k_norm": 1.3898, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9048, "sent_len_1": 66.9574, "sent_len_max_0": 127.5337, "sent_len_max_1": 192.985, "stdk": 0.0475, "stdq": 0.0419, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 52700 }, { "accuracy": 44.7266, "active_queue_size": 16384.0, "cl_loss": 4.0417, "doc_norm": 1.3956, "encoder_q-embeddings": 4449.4277, "encoder_q-layer.0": 2967.2524, "encoder_q-layer.1": 3575.0745, "encoder_q-layer.10": 2818.7366, "encoder_q-layer.11": 6210.3687, "encoder_q-layer.2": 3968.1267, "encoder_q-layer.3": 3738.8608, "encoder_q-layer.4": 3769.9075, "encoder_q-layer.5": 3353.1289, "encoder_q-layer.6": 2838.2954, "encoder_q-layer.7": 2636.0432, "encoder_q-layer.8": 2762.4575, "encoder_q-layer.9": 2413.4216, "epoch": 0.34, "inbatch_neg_score": 0.2374, "inbatch_pos_score": 0.7656, "learning_rate": 2.6222222222222226e-05, "loss": 4.0417, "norm_diff": 0.0946, "norm_loss": 0.0, "num_token_doc": 66.6732, "num_token_overlap": 11.6983, "num_token_query": 31.8515, "num_token_union": 65.2478, "num_word_context": 202.0767, "num_word_doc": 49.7062, "num_word_query": 23.4996, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5604.3551, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2372, "query_norm": 1.301, "queue_k_norm": 1.3911, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.8515, "sent_len_1": 66.6732, "sent_len_max_0": 127.6737, "sent_len_max_1": 189.7088, "stdk": 0.0477, "stdq": 0.0422, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 52800 }, { "accuracy": 45.5078, "active_queue_size": 16384.0, "cl_loss": 4.0684, "doc_norm": 1.3959, "encoder_q-embeddings": 2427.386, "encoder_q-layer.0": 1550.3193, "encoder_q-layer.1": 1606.3987, "encoder_q-layer.10": 2511.0518, "encoder_q-layer.11": 6398.2104, "encoder_q-layer.2": 1830.8472, "encoder_q-layer.3": 1948.7087, "encoder_q-layer.4": 1905.6282, "encoder_q-layer.5": 1843.1373, "encoder_q-layer.6": 2059.5886, "encoder_q-layer.7": 2357.8062, "encoder_q-layer.8": 2543.3679, "encoder_q-layer.9": 2477.5776, "epoch": 0.34, "inbatch_neg_score": 0.2381, "inbatch_pos_score": 0.7871, "learning_rate": 2.6166666666666668e-05, "loss": 4.0684, "norm_diff": 0.0842, "norm_loss": 0.0, "num_token_doc": 66.9653, "num_token_overlap": 11.698, "num_token_query": 31.8297, "num_token_union": 65.3948, "num_word_context": 202.5527, "num_word_doc": 49.9872, "num_word_query": 23.5044, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4140.5491, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2385, "query_norm": 1.3117, "queue_k_norm": 1.3929, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8297, "sent_len_1": 66.9653, "sent_len_max_0": 127.465, "sent_len_max_1": 189.8787, "stdk": 0.0476, "stdq": 0.0428, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 52900 }, { "accuracy": 43.2617, "active_queue_size": 16384.0, "cl_loss": 4.0275, "doc_norm": 1.3914, "encoder_q-embeddings": 2609.1311, "encoder_q-layer.0": 1710.0325, "encoder_q-layer.1": 1822.6296, "encoder_q-layer.10": 2586.5806, "encoder_q-layer.11": 6248.5498, "encoder_q-layer.2": 2197.4456, "encoder_q-layer.3": 2371.3179, "encoder_q-layer.4": 2627.6716, "encoder_q-layer.5": 2642.3032, "encoder_q-layer.6": 2745.5498, "encoder_q-layer.7": 2726.7866, "encoder_q-layer.8": 2703.8374, "encoder_q-layer.9": 2424.2905, "epoch": 0.34, "inbatch_neg_score": 0.2376, "inbatch_pos_score": 0.7563, "learning_rate": 2.6111111111111114e-05, "loss": 4.0275, "norm_diff": 0.0997, "norm_loss": 0.0, "num_token_doc": 66.9212, "num_token_overlap": 11.7258, "num_token_query": 32.0345, "num_token_union": 65.4649, "num_word_context": 202.4329, "num_word_doc": 49.8987, "num_word_query": 23.6562, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4409.6089, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2378, "query_norm": 1.2917, "queue_k_norm": 1.3942, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 32.0345, "sent_len_1": 66.9212, "sent_len_max_0": 127.4437, "sent_len_max_1": 191.0012, "stdk": 0.0475, "stdq": 0.0422, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 53000 }, { "accuracy": 44.6289, "active_queue_size": 16384.0, "cl_loss": 4.0284, "doc_norm": 1.4013, "encoder_q-embeddings": 2537.7139, "encoder_q-layer.0": 1608.8259, "encoder_q-layer.1": 1682.7791, "encoder_q-layer.10": 2386.7307, "encoder_q-layer.11": 6007.623, "encoder_q-layer.2": 1906.6995, "encoder_q-layer.3": 2079.072, "encoder_q-layer.4": 2158.1855, "encoder_q-layer.5": 2061.7126, "encoder_q-layer.6": 2164.7175, "encoder_q-layer.7": 2372.4065, "encoder_q-layer.8": 2563.3896, "encoder_q-layer.9": 2342.1182, "epoch": 0.35, "inbatch_neg_score": 0.2384, "inbatch_pos_score": 0.7715, "learning_rate": 2.6055555555555556e-05, "loss": 4.0284, "norm_diff": 0.1024, "norm_loss": 0.0, "num_token_doc": 66.7386, "num_token_overlap": 11.7053, "num_token_query": 31.8531, "num_token_union": 65.2995, "num_word_context": 202.345, "num_word_doc": 49.8447, "num_word_query": 23.554, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4135.6355, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2373, "query_norm": 1.2989, "queue_k_norm": 1.3941, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8531, "sent_len_1": 66.7386, "sent_len_max_0": 127.4862, "sent_len_max_1": 189.19, "stdk": 0.0478, "stdq": 0.0424, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 53100 }, { "accuracy": 47.9492, "active_queue_size": 16384.0, "cl_loss": 4.0596, "doc_norm": 1.3992, "encoder_q-embeddings": 4315.1377, "encoder_q-layer.0": 2875.6084, "encoder_q-layer.1": 3332.2407, "encoder_q-layer.10": 2607.3354, "encoder_q-layer.11": 6142.1406, "encoder_q-layer.2": 4149.6821, "encoder_q-layer.3": 4612.7085, "encoder_q-layer.4": 5013.4312, "encoder_q-layer.5": 5238.7051, "encoder_q-layer.6": 5338.1636, "encoder_q-layer.7": 4310.2246, "encoder_q-layer.8": 4053.446, "encoder_q-layer.9": 2905.217, "epoch": 0.35, "inbatch_neg_score": 0.2406, "inbatch_pos_score": 0.7939, "learning_rate": 2.6000000000000002e-05, "loss": 4.0596, "norm_diff": 0.1066, "norm_loss": 0.0, "num_token_doc": 66.8256, "num_token_overlap": 11.6489, "num_token_query": 31.9361, "num_token_union": 65.4235, "num_word_context": 202.2125, "num_word_doc": 49.7892, "num_word_query": 23.5821, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6496.2648, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2407, "query_norm": 1.2926, "queue_k_norm": 1.3936, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9361, "sent_len_1": 66.8256, "sent_len_max_0": 127.6937, "sent_len_max_1": 192.6325, "stdk": 0.0477, "stdq": 0.0422, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 53200 }, { "accuracy": 42.9688, "active_queue_size": 16384.0, "cl_loss": 4.0451, "doc_norm": 1.4021, "encoder_q-embeddings": 2969.3303, "encoder_q-layer.0": 1999.8492, "encoder_q-layer.1": 2046.3752, "encoder_q-layer.10": 2733.9128, "encoder_q-layer.11": 6629.2319, "encoder_q-layer.2": 2370.917, "encoder_q-layer.3": 2514.8669, "encoder_q-layer.4": 2556.7092, "encoder_q-layer.5": 2485.584, "encoder_q-layer.6": 2749.9299, "encoder_q-layer.7": 2859.7966, "encoder_q-layer.8": 3275.7942, "encoder_q-layer.9": 2711.6492, "epoch": 0.35, "inbatch_neg_score": 0.2382, "inbatch_pos_score": 0.7671, "learning_rate": 2.5944444444444444e-05, "loss": 4.0451, "norm_diff": 0.1046, "norm_loss": 0.0, "num_token_doc": 66.8482, "num_token_overlap": 11.6411, "num_token_query": 31.8524, "num_token_union": 65.4292, "num_word_context": 202.1925, "num_word_doc": 49.8816, "num_word_query": 23.5351, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4671.1687, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2382, "query_norm": 1.2975, "queue_k_norm": 1.3944, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8524, "sent_len_1": 66.8482, "sent_len_max_0": 127.5212, "sent_len_max_1": 189.5025, "stdk": 0.0478, "stdq": 0.0425, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 53300 }, { "accuracy": 43.6523, "active_queue_size": 16384.0, "cl_loss": 4.0771, "doc_norm": 1.3936, "encoder_q-embeddings": 6170.7158, "encoder_q-layer.0": 4230.2119, "encoder_q-layer.1": 4940.8828, "encoder_q-layer.10": 2621.9202, "encoder_q-layer.11": 6253.8374, "encoder_q-layer.2": 6610.3906, "encoder_q-layer.3": 6584.7026, "encoder_q-layer.4": 6327.4316, "encoder_q-layer.5": 6075.6772, "encoder_q-layer.6": 5978.3984, "encoder_q-layer.7": 4445.4331, "encoder_q-layer.8": 4434.2573, "encoder_q-layer.9": 2864.0059, "epoch": 0.35, "inbatch_neg_score": 0.2386, "inbatch_pos_score": 0.7612, "learning_rate": 2.588888888888889e-05, "loss": 4.0771, "norm_diff": 0.1202, "norm_loss": 0.0, "num_token_doc": 66.8348, "num_token_overlap": 11.6672, "num_token_query": 31.9363, "num_token_union": 65.4016, "num_word_context": 202.5375, "num_word_doc": 49.8646, "num_word_query": 23.5816, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7978.8713, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2388, "query_norm": 1.2734, "queue_k_norm": 1.3953, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9363, "sent_len_1": 66.8348, "sent_len_max_0": 127.6375, "sent_len_max_1": 189.16, "stdk": 0.0474, "stdq": 0.0415, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 53400 }, { "accuracy": 43.1641, "active_queue_size": 16384.0, "cl_loss": 4.0515, "doc_norm": 1.3866, "encoder_q-embeddings": 2952.5681, "encoder_q-layer.0": 1968.1266, "encoder_q-layer.1": 2156.856, "encoder_q-layer.10": 2554.6033, "encoder_q-layer.11": 6032.8745, "encoder_q-layer.2": 2437.9866, "encoder_q-layer.3": 2506.0942, "encoder_q-layer.4": 2733.2959, "encoder_q-layer.5": 2795.3877, "encoder_q-layer.6": 3011.4966, "encoder_q-layer.7": 3058.5063, "encoder_q-layer.8": 3119.1462, "encoder_q-layer.9": 2491.3955, "epoch": 0.35, "inbatch_neg_score": 0.2419, "inbatch_pos_score": 0.7817, "learning_rate": 2.5833333333333336e-05, "loss": 4.0515, "norm_diff": 0.0907, "norm_loss": 0.0, "num_token_doc": 66.7411, "num_token_overlap": 11.6514, "num_token_query": 31.909, "num_token_union": 65.3804, "num_word_context": 202.6908, "num_word_doc": 49.8412, "num_word_query": 23.5734, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4565.9104, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2397, "query_norm": 1.2959, "queue_k_norm": 1.3962, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.909, "sent_len_1": 66.7411, "sent_len_max_0": 127.475, "sent_len_max_1": 188.0875, "stdk": 0.0471, "stdq": 0.0424, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 53500 }, { "accuracy": 47.3633, "active_queue_size": 16384.0, "cl_loss": 4.0686, "doc_norm": 1.4054, "encoder_q-embeddings": 2880.8403, "encoder_q-layer.0": 1978.6938, "encoder_q-layer.1": 2301.2295, "encoder_q-layer.10": 2647.1553, "encoder_q-layer.11": 6169.6343, "encoder_q-layer.2": 2702.24, "encoder_q-layer.3": 2778.4895, "encoder_q-layer.4": 2760.6455, "encoder_q-layer.5": 2570.3345, "encoder_q-layer.6": 2588.8196, "encoder_q-layer.7": 2519.5986, "encoder_q-layer.8": 2761.0764, "encoder_q-layer.9": 2606.7605, "epoch": 0.35, "inbatch_neg_score": 0.2452, "inbatch_pos_score": 0.7891, "learning_rate": 2.5777777777777778e-05, "loss": 4.0686, "norm_diff": 0.1189, "norm_loss": 0.0, "num_token_doc": 66.7661, "num_token_overlap": 11.6393, "num_token_query": 31.8549, "num_token_union": 65.3992, "num_word_context": 202.2755, "num_word_doc": 49.8439, "num_word_query": 23.5246, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4545.132, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2454, "query_norm": 1.2865, "queue_k_norm": 1.3963, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.8549, "sent_len_1": 66.7661, "sent_len_max_0": 127.5375, "sent_len_max_1": 188.9313, "stdk": 0.0478, "stdq": 0.0419, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 53600 }, { "accuracy": 46.582, "active_queue_size": 16384.0, "cl_loss": 4.0382, "doc_norm": 1.3995, "encoder_q-embeddings": 1073.6199, "encoder_q-layer.0": 715.2687, "encoder_q-layer.1": 763.8167, "encoder_q-layer.10": 1368.223, "encoder_q-layer.11": 2945.9048, "encoder_q-layer.2": 866.9803, "encoder_q-layer.3": 869.308, "encoder_q-layer.4": 923.638, "encoder_q-layer.5": 951.282, "encoder_q-layer.6": 999.7683, "encoder_q-layer.7": 1069.5884, "encoder_q-layer.8": 1217.9296, "encoder_q-layer.9": 1093.1852, "epoch": 0.35, "inbatch_neg_score": 0.2421, "inbatch_pos_score": 0.7944, "learning_rate": 2.5722222222222224e-05, "loss": 4.0382, "norm_diff": 0.1089, "norm_loss": 0.0, "num_token_doc": 66.9034, "num_token_overlap": 11.6894, "num_token_query": 31.9041, "num_token_union": 65.4537, "num_word_context": 202.1177, "num_word_doc": 49.9213, "num_word_query": 23.5501, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1903.9681, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2424, "query_norm": 1.2906, "queue_k_norm": 1.3951, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9041, "sent_len_1": 66.9034, "sent_len_max_0": 127.4537, "sent_len_max_1": 189.4575, "stdk": 0.0476, "stdq": 0.0422, "stdqueue_k": 0.0475, "stdqueue_q": 0.0, "step": 53700 }, { "accuracy": 45.6055, "active_queue_size": 16384.0, "cl_loss": 4.0424, "doc_norm": 1.3968, "encoder_q-embeddings": 1139.1653, "encoder_q-layer.0": 735.6263, "encoder_q-layer.1": 764.0717, "encoder_q-layer.10": 1291.5404, "encoder_q-layer.11": 3107.6697, "encoder_q-layer.2": 878.6585, "encoder_q-layer.3": 943.2234, "encoder_q-layer.4": 1030.9115, "encoder_q-layer.5": 1021.0955, "encoder_q-layer.6": 1064.7839, "encoder_q-layer.7": 1148.175, "encoder_q-layer.8": 1272.7932, "encoder_q-layer.9": 1165.0935, "epoch": 0.35, "inbatch_neg_score": 0.2395, "inbatch_pos_score": 0.7749, "learning_rate": 2.5666666666666666e-05, "loss": 4.0424, "norm_diff": 0.1147, "norm_loss": 0.0, "num_token_doc": 66.6746, "num_token_overlap": 11.6525, "num_token_query": 31.8164, "num_token_union": 65.2493, "num_word_context": 202.1175, "num_word_doc": 49.7561, "num_word_query": 23.4874, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2024.2041, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2397, "query_norm": 1.2821, "queue_k_norm": 1.3984, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8164, "sent_len_1": 66.6746, "sent_len_max_0": 127.455, "sent_len_max_1": 190.4038, "stdk": 0.0475, "stdq": 0.042, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 53800 }, { "accuracy": 43.6523, "active_queue_size": 16384.0, "cl_loss": 4.0768, "doc_norm": 1.3941, "encoder_q-embeddings": 1144.7103, "encoder_q-layer.0": 772.0582, "encoder_q-layer.1": 803.9816, "encoder_q-layer.10": 1277.0693, "encoder_q-layer.11": 2902.7026, "encoder_q-layer.2": 910.2455, "encoder_q-layer.3": 941.63, "encoder_q-layer.4": 964.2706, "encoder_q-layer.5": 1003.1682, "encoder_q-layer.6": 1152.0249, "encoder_q-layer.7": 1181.7203, "encoder_q-layer.8": 1321.3254, "encoder_q-layer.9": 1195.0239, "epoch": 0.35, "inbatch_neg_score": 0.2404, "inbatch_pos_score": 0.7715, "learning_rate": 2.5611111111111115e-05, "loss": 4.0768, "norm_diff": 0.1017, "norm_loss": 0.0, "num_token_doc": 66.5244, "num_token_overlap": 11.5917, "num_token_query": 31.7825, "num_token_union": 65.2034, "num_word_context": 202.6277, "num_word_doc": 49.6505, "num_word_query": 23.4774, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1971.2065, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2405, "query_norm": 1.2924, "queue_k_norm": 1.4001, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.7825, "sent_len_1": 66.5244, "sent_len_max_0": 127.3175, "sent_len_max_1": 188.1387, "stdk": 0.0474, "stdq": 0.0424, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 53900 }, { "accuracy": 44.7266, "active_queue_size": 16384.0, "cl_loss": 4.0315, "doc_norm": 1.4012, "encoder_q-embeddings": 1430.0325, "encoder_q-layer.0": 954.5585, "encoder_q-layer.1": 1026.7085, "encoder_q-layer.10": 1217.6293, "encoder_q-layer.11": 3033.042, "encoder_q-layer.2": 1153.0812, "encoder_q-layer.3": 1140.7966, "encoder_q-layer.4": 1220.2764, "encoder_q-layer.5": 1227.4945, "encoder_q-layer.6": 1315.1685, "encoder_q-layer.7": 1282.2952, "encoder_q-layer.8": 1325.9838, "encoder_q-layer.9": 1168.4485, "epoch": 0.35, "inbatch_neg_score": 0.2379, "inbatch_pos_score": 0.7715, "learning_rate": 2.5555555555555554e-05, "loss": 4.0315, "norm_diff": 0.1126, "norm_loss": 0.0, "num_token_doc": 66.9522, "num_token_overlap": 11.6383, "num_token_query": 31.8725, "num_token_union": 65.4072, "num_word_context": 202.1658, "num_word_doc": 49.9438, "num_word_query": 23.5324, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2211.9432, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2389, "query_norm": 1.2885, "queue_k_norm": 1.3991, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.8725, "sent_len_1": 66.9522, "sent_len_max_0": 127.4712, "sent_len_max_1": 190.5225, "stdk": 0.0476, "stdq": 0.0423, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 54000 }, { "accuracy": 43.6523, "active_queue_size": 16384.0, "cl_loss": 4.0405, "doc_norm": 1.4048, "encoder_q-embeddings": 1112.6503, "encoder_q-layer.0": 750.9363, "encoder_q-layer.1": 785.6469, "encoder_q-layer.10": 1336.0011, "encoder_q-layer.11": 3201.4141, "encoder_q-layer.2": 870.7951, "encoder_q-layer.3": 896.3981, "encoder_q-layer.4": 964.2142, "encoder_q-layer.5": 933.8774, "encoder_q-layer.6": 1028.2748, "encoder_q-layer.7": 1247.7354, "encoder_q-layer.8": 1348.3475, "encoder_q-layer.9": 1214.1115, "epoch": 0.35, "inbatch_neg_score": 0.2403, "inbatch_pos_score": 0.7627, "learning_rate": 2.5500000000000003e-05, "loss": 4.0405, "norm_diff": 0.1236, "norm_loss": 0.0, "num_token_doc": 66.7748, "num_token_overlap": 11.6547, "num_token_query": 31.8018, "num_token_union": 65.3332, "num_word_context": 202.2037, "num_word_doc": 49.84, "num_word_query": 23.4747, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2031.8942, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2401, "query_norm": 1.2812, "queue_k_norm": 1.401, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8018, "sent_len_1": 66.7748, "sent_len_max_0": 127.4525, "sent_len_max_1": 188.9975, "stdk": 0.0477, "stdq": 0.0419, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 54100 }, { "accuracy": 41.7969, "active_queue_size": 16384.0, "cl_loss": 4.0506, "doc_norm": 1.3957, "encoder_q-embeddings": 1178.0978, "encoder_q-layer.0": 798.2947, "encoder_q-layer.1": 831.251, "encoder_q-layer.10": 1482.7528, "encoder_q-layer.11": 3381.8215, "encoder_q-layer.2": 958.6602, "encoder_q-layer.3": 1022.8904, "encoder_q-layer.4": 1045.9359, "encoder_q-layer.5": 1032.207, "encoder_q-layer.6": 1089.8638, "encoder_q-layer.7": 1208.7629, "encoder_q-layer.8": 1465.6141, "encoder_q-layer.9": 1371.5039, "epoch": 0.35, "inbatch_neg_score": 0.2397, "inbatch_pos_score": 0.7559, "learning_rate": 2.5444444444444442e-05, "loss": 4.0506, "norm_diff": 0.1208, "norm_loss": 0.0, "num_token_doc": 66.7835, "num_token_overlap": 11.6283, "num_token_query": 31.8154, "num_token_union": 65.3942, "num_word_context": 202.3297, "num_word_doc": 49.809, "num_word_query": 23.4719, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2142.2094, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2393, "query_norm": 1.2749, "queue_k_norm": 1.3991, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8154, "sent_len_1": 66.7835, "sent_len_max_0": 127.3162, "sent_len_max_1": 190.3475, "stdk": 0.0474, "stdq": 0.0417, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 54200 }, { "accuracy": 44.6289, "active_queue_size": 16384.0, "cl_loss": 4.0464, "doc_norm": 1.4037, "encoder_q-embeddings": 1319.8506, "encoder_q-layer.0": 935.7194, "encoder_q-layer.1": 999.7686, "encoder_q-layer.10": 1285.3865, "encoder_q-layer.11": 3086.8296, "encoder_q-layer.2": 1187.4723, "encoder_q-layer.3": 1243.8109, "encoder_q-layer.4": 1245.192, "encoder_q-layer.5": 1158.4515, "encoder_q-layer.6": 1252.0603, "encoder_q-layer.7": 1213.8506, "encoder_q-layer.8": 1282.4545, "encoder_q-layer.9": 1157.006, "epoch": 0.35, "inbatch_neg_score": 0.2432, "inbatch_pos_score": 0.7861, "learning_rate": 2.538888888888889e-05, "loss": 4.0464, "norm_diff": 0.0962, "norm_loss": 0.0, "num_token_doc": 66.7955, "num_token_overlap": 11.6937, "num_token_query": 31.9629, "num_token_union": 65.3872, "num_word_context": 202.1029, "num_word_doc": 49.8052, "num_word_query": 23.6041, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2147.2173, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2435, "query_norm": 1.3075, "queue_k_norm": 1.4021, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9629, "sent_len_1": 66.7955, "sent_len_max_0": 127.5312, "sent_len_max_1": 189.4837, "stdk": 0.0477, "stdq": 0.0429, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 54300 }, { "accuracy": 45.1172, "active_queue_size": 16384.0, "cl_loss": 4.0436, "doc_norm": 1.4022, "encoder_q-embeddings": 2307.3647, "encoder_q-layer.0": 1601.4281, "encoder_q-layer.1": 1814.8295, "encoder_q-layer.10": 1226.0702, "encoder_q-layer.11": 3090.8557, "encoder_q-layer.2": 2000.8086, "encoder_q-layer.3": 2009.1899, "encoder_q-layer.4": 1888.5537, "encoder_q-layer.5": 1725.6727, "encoder_q-layer.6": 1466.6429, "encoder_q-layer.7": 1352.7427, "encoder_q-layer.8": 1301.6759, "encoder_q-layer.9": 1141.0743, "epoch": 0.35, "inbatch_neg_score": 0.2394, "inbatch_pos_score": 0.7725, "learning_rate": 2.5333333333333337e-05, "loss": 4.0436, "norm_diff": 0.1218, "norm_loss": 0.0, "num_token_doc": 66.5072, "num_token_overlap": 11.6613, "num_token_query": 31.8039, "num_token_union": 65.112, "num_word_context": 202.3448, "num_word_doc": 49.6352, "num_word_query": 23.4952, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2831.6382, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2397, "query_norm": 1.2804, "queue_k_norm": 1.4003, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.8039, "sent_len_1": 66.5072, "sent_len_max_0": 127.4363, "sent_len_max_1": 189.0362, "stdk": 0.0476, "stdq": 0.0419, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 54400 }, { "accuracy": 44.9219, "active_queue_size": 16384.0, "cl_loss": 4.0339, "doc_norm": 1.3962, "encoder_q-embeddings": 1091.891, "encoder_q-layer.0": 701.4324, "encoder_q-layer.1": 730.5007, "encoder_q-layer.10": 1318.9375, "encoder_q-layer.11": 3011.7493, "encoder_q-layer.2": 840.9708, "encoder_q-layer.3": 864.7579, "encoder_q-layer.4": 900.6915, "encoder_q-layer.5": 905.7252, "encoder_q-layer.6": 1050.5713, "encoder_q-layer.7": 1101.4781, "encoder_q-layer.8": 1342.6205, "encoder_q-layer.9": 1249.8595, "epoch": 0.35, "inbatch_neg_score": 0.2414, "inbatch_pos_score": 0.7852, "learning_rate": 2.527777777777778e-05, "loss": 4.0339, "norm_diff": 0.0962, "norm_loss": 0.0, "num_token_doc": 66.7206, "num_token_overlap": 11.6559, "num_token_query": 31.8582, "num_token_union": 65.2995, "num_word_context": 202.3427, "num_word_doc": 49.7653, "num_word_query": 23.5389, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1950.8425, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2407, "query_norm": 1.2999, "queue_k_norm": 1.4007, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8582, "sent_len_1": 66.7206, "sent_len_max_0": 127.3912, "sent_len_max_1": 189.4913, "stdk": 0.0473, "stdq": 0.0427, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 54500 }, { "accuracy": 43.8477, "active_queue_size": 16384.0, "cl_loss": 4.0389, "doc_norm": 1.4004, "encoder_q-embeddings": 1268.5415, "encoder_q-layer.0": 827.5089, "encoder_q-layer.1": 856.2794, "encoder_q-layer.10": 1271.8303, "encoder_q-layer.11": 3156.783, "encoder_q-layer.2": 961.3606, "encoder_q-layer.3": 971.4651, "encoder_q-layer.4": 1036.0787, "encoder_q-layer.5": 1016.5536, "encoder_q-layer.6": 1109.2803, "encoder_q-layer.7": 1097.9937, "encoder_q-layer.8": 1353.4557, "encoder_q-layer.9": 1238.0492, "epoch": 0.36, "inbatch_neg_score": 0.2374, "inbatch_pos_score": 0.7725, "learning_rate": 2.5222222222222225e-05, "loss": 4.0389, "norm_diff": 0.1033, "norm_loss": 0.0, "num_token_doc": 66.8094, "num_token_overlap": 11.6882, "num_token_query": 31.8838, "num_token_union": 65.3317, "num_word_context": 202.1716, "num_word_doc": 49.8139, "num_word_query": 23.5449, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2072.9839, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.238, "query_norm": 1.2971, "queue_k_norm": 1.4015, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8838, "sent_len_1": 66.8094, "sent_len_max_0": 127.6675, "sent_len_max_1": 192.2138, "stdk": 0.0475, "stdq": 0.0426, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 54600 }, { "accuracy": 41.6016, "active_queue_size": 16384.0, "cl_loss": 4.0389, "doc_norm": 1.3999, "encoder_q-embeddings": 1932.4296, "encoder_q-layer.0": 1310.7993, "encoder_q-layer.1": 1468.8027, "encoder_q-layer.10": 1224.0924, "encoder_q-layer.11": 2988.3926, "encoder_q-layer.2": 1756.8295, "encoder_q-layer.3": 1863.0107, "encoder_q-layer.4": 2110.366, "encoder_q-layer.5": 1939.1133, "encoder_q-layer.6": 1790.4733, "encoder_q-layer.7": 1601.4816, "encoder_q-layer.8": 1509.0302, "encoder_q-layer.9": 1203.0295, "epoch": 0.36, "inbatch_neg_score": 0.2368, "inbatch_pos_score": 0.7563, "learning_rate": 2.5166666666666667e-05, "loss": 4.0389, "norm_diff": 0.1153, "norm_loss": 0.0, "num_token_doc": 66.8296, "num_token_overlap": 11.6295, "num_token_query": 31.8305, "num_token_union": 65.4032, "num_word_context": 202.5272, "num_word_doc": 49.8963, "num_word_query": 23.5207, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2699.4392, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2358, "query_norm": 1.2846, "queue_k_norm": 1.4027, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8305, "sent_len_1": 66.8296, "sent_len_max_0": 127.5337, "sent_len_max_1": 189.0525, "stdk": 0.0475, "stdq": 0.0422, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 54700 }, { "accuracy": 43.457, "active_queue_size": 16384.0, "cl_loss": 4.0341, "doc_norm": 1.3928, "encoder_q-embeddings": 2228.5815, "encoder_q-layer.0": 1726.5239, "encoder_q-layer.1": 1728.6819, "encoder_q-layer.10": 1295.3638, "encoder_q-layer.11": 3133.0647, "encoder_q-layer.2": 1879.0997, "encoder_q-layer.3": 1779.6587, "encoder_q-layer.4": 1768.2933, "encoder_q-layer.5": 1562.9954, "encoder_q-layer.6": 1423.5413, "encoder_q-layer.7": 1333.2695, "encoder_q-layer.8": 1374.3954, "encoder_q-layer.9": 1206.542, "epoch": 0.36, "inbatch_neg_score": 0.2402, "inbatch_pos_score": 0.7661, "learning_rate": 2.5111111111111113e-05, "loss": 4.0341, "norm_diff": 0.1065, "norm_loss": 0.0, "num_token_doc": 66.7031, "num_token_overlap": 11.6726, "num_token_query": 31.8592, "num_token_union": 65.3054, "num_word_context": 202.2131, "num_word_doc": 49.7861, "num_word_query": 23.5353, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2725.6459, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2399, "query_norm": 1.2863, "queue_k_norm": 1.3997, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.8592, "sent_len_1": 66.7031, "sent_len_max_0": 127.5138, "sent_len_max_1": 188.0938, "stdk": 0.0473, "stdq": 0.042, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 54800 }, { "accuracy": 45.0195, "active_queue_size": 16384.0, "cl_loss": 4.0275, "doc_norm": 1.404, "encoder_q-embeddings": 6174.5552, "encoder_q-layer.0": 4088.3125, "encoder_q-layer.1": 4867.7261, "encoder_q-layer.10": 1335.6615, "encoder_q-layer.11": 2957.6101, "encoder_q-layer.2": 5753.564, "encoder_q-layer.3": 5959.5615, "encoder_q-layer.4": 6180.9902, "encoder_q-layer.5": 7210.1694, "encoder_q-layer.6": 6622.5049, "encoder_q-layer.7": 5352.2505, "encoder_q-layer.8": 4024.6912, "encoder_q-layer.9": 2017.9702, "epoch": 0.36, "inbatch_neg_score": 0.2363, "inbatch_pos_score": 0.7734, "learning_rate": 2.5055555555555555e-05, "loss": 4.0275, "norm_diff": 0.116, "norm_loss": 0.0, "num_token_doc": 66.517, "num_token_overlap": 11.6889, "num_token_query": 31.9921, "num_token_union": 65.2274, "num_word_context": 202.0955, "num_word_doc": 49.6162, "num_word_query": 23.6214, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7661.9699, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.2366, "query_norm": 1.2879, "queue_k_norm": 1.4004, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9921, "sent_len_1": 66.517, "sent_len_max_0": 127.5825, "sent_len_max_1": 190.3562, "stdk": 0.0476, "stdq": 0.0421, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 54900 }, { "accuracy": 46.2891, "active_queue_size": 16384.0, "cl_loss": 4.0295, "doc_norm": 1.4039, "encoder_q-embeddings": 1986.5404, "encoder_q-layer.0": 1308.2026, "encoder_q-layer.1": 1562.4694, "encoder_q-layer.10": 1245.4244, "encoder_q-layer.11": 3020.8953, "encoder_q-layer.2": 1790.6333, "encoder_q-layer.3": 1960.1514, "encoder_q-layer.4": 2035.3486, "encoder_q-layer.5": 1777.1306, "encoder_q-layer.6": 1625.5485, "encoder_q-layer.7": 1522.4517, "encoder_q-layer.8": 1386.0316, "encoder_q-layer.9": 1211.2672, "epoch": 0.36, "inbatch_neg_score": 0.2353, "inbatch_pos_score": 0.7964, "learning_rate": 2.5e-05, "loss": 4.0295, "norm_diff": 0.1019, "norm_loss": 0.0, "num_token_doc": 67.0622, "num_token_overlap": 11.7191, "num_token_query": 31.973, "num_token_union": 65.5368, "num_word_context": 202.8136, "num_word_doc": 50.0455, "num_word_query": 23.6102, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2714.2363, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2363, "query_norm": 1.302, "queue_k_norm": 1.4012, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.973, "sent_len_1": 67.0622, "sent_len_max_0": 127.4762, "sent_len_max_1": 189.74, "stdk": 0.0477, "stdq": 0.0426, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 55000 }, { "accuracy": 44.1406, "active_queue_size": 16384.0, "cl_loss": 4.0311, "doc_norm": 1.4036, "encoder_q-embeddings": 1271.7483, "encoder_q-layer.0": 823.1752, "encoder_q-layer.1": 856.5222, "encoder_q-layer.10": 1233.5912, "encoder_q-layer.11": 3029.675, "encoder_q-layer.2": 922.1838, "encoder_q-layer.3": 958.4785, "encoder_q-layer.4": 975.1064, "encoder_q-layer.5": 964.6553, "encoder_q-layer.6": 1085.3722, "encoder_q-layer.7": 1196.3558, "encoder_q-layer.8": 1321.8239, "encoder_q-layer.9": 1237.005, "epoch": 0.36, "inbatch_neg_score": 0.2375, "inbatch_pos_score": 0.7793, "learning_rate": 2.4944444444444447e-05, "loss": 4.0311, "norm_diff": 0.1037, "norm_loss": 0.0, "num_token_doc": 66.8777, "num_token_overlap": 11.6421, "num_token_query": 31.8372, "num_token_union": 65.3636, "num_word_context": 201.9634, "num_word_doc": 49.8717, "num_word_query": 23.5021, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2052.2146, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2373, "query_norm": 1.3, "queue_k_norm": 1.3999, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8372, "sent_len_1": 66.8777, "sent_len_max_0": 127.5238, "sent_len_max_1": 190.25, "stdk": 0.0476, "stdq": 0.0424, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 55100 }, { "accuracy": 43.3594, "active_queue_size": 16384.0, "cl_loss": 4.0175, "doc_norm": 1.3976, "encoder_q-embeddings": 1117.3514, "encoder_q-layer.0": 747.2034, "encoder_q-layer.1": 767.4171, "encoder_q-layer.10": 1286.7119, "encoder_q-layer.11": 3073.3147, "encoder_q-layer.2": 856.9982, "encoder_q-layer.3": 896.6791, "encoder_q-layer.4": 937.2405, "encoder_q-layer.5": 939.4366, "encoder_q-layer.6": 1040.4475, "encoder_q-layer.7": 1221.9989, "encoder_q-layer.8": 1355.1814, "encoder_q-layer.9": 1221.4701, "epoch": 0.36, "inbatch_neg_score": 0.2381, "inbatch_pos_score": 0.7637, "learning_rate": 2.488888888888889e-05, "loss": 4.0175, "norm_diff": 0.0878, "norm_loss": 0.0, "num_token_doc": 66.9893, "num_token_overlap": 11.7223, "num_token_query": 31.8934, "num_token_union": 65.4174, "num_word_context": 202.2583, "num_word_doc": 50.0271, "num_word_query": 23.5441, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2003.8732, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2368, "query_norm": 1.3098, "queue_k_norm": 1.4016, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.8934, "sent_len_1": 66.9893, "sent_len_max_0": 127.4813, "sent_len_max_1": 189.2038, "stdk": 0.0473, "stdq": 0.0425, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 55200 }, { "accuracy": 40.332, "active_queue_size": 16384.0, "cl_loss": 4.0376, "doc_norm": 1.3992, "encoder_q-embeddings": 1984.9966, "encoder_q-layer.0": 1344.3285, "encoder_q-layer.1": 1478.1174, "encoder_q-layer.10": 1391.0176, "encoder_q-layer.11": 3194.4946, "encoder_q-layer.2": 1661.8418, "encoder_q-layer.3": 1671.3689, "encoder_q-layer.4": 1872.3035, "encoder_q-layer.5": 1734.6011, "encoder_q-layer.6": 1709.4469, "encoder_q-layer.7": 1570.502, "encoder_q-layer.8": 1599.5435, "encoder_q-layer.9": 1322.4969, "epoch": 0.36, "inbatch_neg_score": 0.2412, "inbatch_pos_score": 0.7529, "learning_rate": 2.4833333333333335e-05, "loss": 4.0376, "norm_diff": 0.0859, "norm_loss": 0.0, "num_token_doc": 66.8888, "num_token_overlap": 11.7285, "num_token_query": 32.0058, "num_token_union": 65.4369, "num_word_context": 202.6081, "num_word_doc": 49.919, "num_word_query": 23.6401, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2736.8027, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2394, "query_norm": 1.3133, "queue_k_norm": 1.4024, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 32.0058, "sent_len_1": 66.8888, "sent_len_max_0": 127.55, "sent_len_max_1": 188.3862, "stdk": 0.0474, "stdq": 0.0424, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 55300 }, { "accuracy": 44.5312, "active_queue_size": 16384.0, "cl_loss": 4.0458, "doc_norm": 1.4023, "encoder_q-embeddings": 2823.1628, "encoder_q-layer.0": 2137.3953, "encoder_q-layer.1": 2513.6565, "encoder_q-layer.10": 1264.5878, "encoder_q-layer.11": 3121.6763, "encoder_q-layer.2": 2883.2056, "encoder_q-layer.3": 2325.1211, "encoder_q-layer.4": 2172.0107, "encoder_q-layer.5": 2270.8887, "encoder_q-layer.6": 2174.4553, "encoder_q-layer.7": 1614.6791, "encoder_q-layer.8": 1440.6217, "encoder_q-layer.9": 1276.696, "epoch": 0.36, "inbatch_neg_score": 0.2471, "inbatch_pos_score": 0.7734, "learning_rate": 2.477777777777778e-05, "loss": 4.0458, "norm_diff": 0.0928, "norm_loss": 0.0, "num_token_doc": 66.8588, "num_token_overlap": 11.5975, "num_token_query": 31.755, "num_token_union": 65.4272, "num_word_context": 202.8481, "num_word_doc": 49.872, "num_word_query": 23.4333, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3424.5892, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.246, "query_norm": 1.3095, "queue_k_norm": 1.4021, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.755, "sent_len_1": 66.8588, "sent_len_max_0": 127.5725, "sent_len_max_1": 190.3887, "stdk": 0.0475, "stdq": 0.0421, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 55400 }, { "accuracy": 42.5781, "active_queue_size": 16384.0, "cl_loss": 4.0236, "doc_norm": 1.4089, "encoder_q-embeddings": 1295.9772, "encoder_q-layer.0": 887.6005, "encoder_q-layer.1": 977.4176, "encoder_q-layer.10": 1149.1005, "encoder_q-layer.11": 3068.439, "encoder_q-layer.2": 1127.8859, "encoder_q-layer.3": 1185.1157, "encoder_q-layer.4": 1273.8372, "encoder_q-layer.5": 1287.5853, "encoder_q-layer.6": 1282.7998, "encoder_q-layer.7": 1294.9463, "encoder_q-layer.8": 1310.7644, "encoder_q-layer.9": 1153.8918, "epoch": 0.36, "inbatch_neg_score": 0.249, "inbatch_pos_score": 0.7739, "learning_rate": 2.4722222222222223e-05, "loss": 4.0236, "norm_diff": 0.096, "norm_loss": 0.0, "num_token_doc": 67.1137, "num_token_overlap": 11.6699, "num_token_query": 31.744, "num_token_union": 65.4483, "num_word_context": 202.7323, "num_word_doc": 50.0411, "num_word_query": 23.4434, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2174.267, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.248, "query_norm": 1.3129, "queue_k_norm": 1.403, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.744, "sent_len_1": 67.1137, "sent_len_max_0": 127.5212, "sent_len_max_1": 191.9038, "stdk": 0.0478, "stdq": 0.0422, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 55500 }, { "accuracy": 45.6055, "active_queue_size": 16384.0, "cl_loss": 4.0313, "doc_norm": 1.4018, "encoder_q-embeddings": 1300.5193, "encoder_q-layer.0": 890.9401, "encoder_q-layer.1": 987.9252, "encoder_q-layer.10": 1305.2172, "encoder_q-layer.11": 2909.9031, "encoder_q-layer.2": 1116.7737, "encoder_q-layer.3": 1164.5229, "encoder_q-layer.4": 1171.9591, "encoder_q-layer.5": 1226.5548, "encoder_q-layer.6": 1244.4581, "encoder_q-layer.7": 1247.3916, "encoder_q-layer.8": 1310.1447, "encoder_q-layer.9": 1191.4006, "epoch": 0.36, "inbatch_neg_score": 0.2541, "inbatch_pos_score": 0.7798, "learning_rate": 2.466666666666667e-05, "loss": 4.0313, "norm_diff": 0.0852, "norm_loss": 0.0, "num_token_doc": 66.9075, "num_token_overlap": 11.7147, "num_token_query": 32.1238, "num_token_union": 65.5724, "num_word_context": 202.149, "num_word_doc": 49.9621, "num_word_query": 23.7452, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2108.9694, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2532, "query_norm": 1.3166, "queue_k_norm": 1.402, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 32.1238, "sent_len_1": 66.9075, "sent_len_max_0": 127.4638, "sent_len_max_1": 189.5538, "stdk": 0.0475, "stdq": 0.0422, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 55600 }, { "accuracy": 48.0469, "active_queue_size": 16384.0, "cl_loss": 4.0256, "doc_norm": 1.4055, "encoder_q-embeddings": 2164.9365, "encoder_q-layer.0": 1461.7937, "encoder_q-layer.1": 1540.0071, "encoder_q-layer.10": 2803.2429, "encoder_q-layer.11": 6106.0591, "encoder_q-layer.2": 1711.0513, "encoder_q-layer.3": 1724.2916, "encoder_q-layer.4": 1874.4534, "encoder_q-layer.5": 1860.4182, "encoder_q-layer.6": 2032.1698, "encoder_q-layer.7": 2177.405, "encoder_q-layer.8": 2474.0054, "encoder_q-layer.9": 2308.4443, "epoch": 0.36, "inbatch_neg_score": 0.2562, "inbatch_pos_score": 0.793, "learning_rate": 2.461111111111111e-05, "loss": 4.0256, "norm_diff": 0.0915, "norm_loss": 0.0, "num_token_doc": 66.8749, "num_token_overlap": 11.6471, "num_token_query": 31.8322, "num_token_union": 65.417, "num_word_context": 202.3771, "num_word_doc": 49.9175, "num_word_query": 23.5009, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3924.337, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2559, "query_norm": 1.314, "queue_k_norm": 1.4029, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8322, "sent_len_1": 66.8749, "sent_len_max_0": 127.51, "sent_len_max_1": 189.6525, "stdk": 0.0476, "stdq": 0.042, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 55700 }, { "accuracy": 45.9961, "active_queue_size": 16384.0, "cl_loss": 4.0342, "doc_norm": 1.4008, "encoder_q-embeddings": 2337.426, "encoder_q-layer.0": 1605.7559, "encoder_q-layer.1": 1710.9343, "encoder_q-layer.10": 2862.2393, "encoder_q-layer.11": 6796.9951, "encoder_q-layer.2": 1917.4993, "encoder_q-layer.3": 1916.4022, "encoder_q-layer.4": 1938.2104, "encoder_q-layer.5": 1969.4586, "encoder_q-layer.6": 2102.271, "encoder_q-layer.7": 2314.0288, "encoder_q-layer.8": 2854.406, "encoder_q-layer.9": 2542.5691, "epoch": 0.36, "inbatch_neg_score": 0.2591, "inbatch_pos_score": 0.8018, "learning_rate": 2.4555555555555557e-05, "loss": 4.0342, "norm_diff": 0.073, "norm_loss": 0.0, "num_token_doc": 66.7887, "num_token_overlap": 11.6171, "num_token_query": 31.7438, "num_token_union": 65.2815, "num_word_context": 202.4596, "num_word_doc": 49.8394, "num_word_query": 23.4522, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4157.1207, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2593, "query_norm": 1.3278, "queue_k_norm": 1.4052, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.7438, "sent_len_1": 66.7887, "sent_len_max_0": 127.4375, "sent_len_max_1": 190.26, "stdk": 0.0474, "stdq": 0.0427, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 55800 }, { "accuracy": 46.4844, "active_queue_size": 16384.0, "cl_loss": 4.018, "doc_norm": 1.4061, "encoder_q-embeddings": 2369.5627, "encoder_q-layer.0": 1547.2977, "encoder_q-layer.1": 1610.3298, "encoder_q-layer.10": 2558.5369, "encoder_q-layer.11": 6327.5303, "encoder_q-layer.2": 1781.5186, "encoder_q-layer.3": 1887.8818, "encoder_q-layer.4": 2079.7205, "encoder_q-layer.5": 2017.9337, "encoder_q-layer.6": 2121.499, "encoder_q-layer.7": 2358.9189, "encoder_q-layer.8": 2565.6174, "encoder_q-layer.9": 2476.9893, "epoch": 0.36, "inbatch_neg_score": 0.2615, "inbatch_pos_score": 0.8032, "learning_rate": 2.45e-05, "loss": 4.018, "norm_diff": 0.0952, "norm_loss": 0.0, "num_token_doc": 66.852, "num_token_overlap": 11.6523, "num_token_query": 31.8776, "num_token_union": 65.4142, "num_word_context": 202.6729, "num_word_doc": 49.8798, "num_word_query": 23.5441, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4104.2725, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2615, "query_norm": 1.3109, "queue_k_norm": 1.4073, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8776, "sent_len_1": 66.852, "sent_len_max_0": 127.5825, "sent_len_max_1": 189.0588, "stdk": 0.0476, "stdq": 0.0423, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 55900 }, { "accuracy": 44.7266, "active_queue_size": 16384.0, "cl_loss": 4.0147, "doc_norm": 1.4073, "encoder_q-embeddings": 3744.781, "encoder_q-layer.0": 2446.2783, "encoder_q-layer.1": 2699.9614, "encoder_q-layer.10": 2525.6184, "encoder_q-layer.11": 6257.8682, "encoder_q-layer.2": 3190.252, "encoder_q-layer.3": 3588.5896, "encoder_q-layer.4": 3744.8672, "encoder_q-layer.5": 3647.8293, "encoder_q-layer.6": 3586.0933, "encoder_q-layer.7": 2988.502, "encoder_q-layer.8": 2860.3057, "encoder_q-layer.9": 2417.2351, "epoch": 0.36, "inbatch_neg_score": 0.2617, "inbatch_pos_score": 0.8008, "learning_rate": 2.4444444444444445e-05, "loss": 4.0147, "norm_diff": 0.1091, "norm_loss": 0.0, "num_token_doc": 66.7248, "num_token_overlap": 11.6366, "num_token_query": 31.7916, "num_token_union": 65.2864, "num_word_context": 202.3402, "num_word_doc": 49.7686, "num_word_query": 23.4838, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5253.4046, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2625, "query_norm": 1.2982, "queue_k_norm": 1.405, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.7916, "sent_len_1": 66.7248, "sent_len_max_0": 127.4675, "sent_len_max_1": 190.0075, "stdk": 0.0477, "stdq": 0.0421, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 56000 }, { "accuracy": 43.75, "active_queue_size": 16384.0, "cl_loss": 4.0205, "doc_norm": 1.4057, "encoder_q-embeddings": 3300.3267, "encoder_q-layer.0": 2270.7805, "encoder_q-layer.1": 2430.6912, "encoder_q-layer.10": 2368.8926, "encoder_q-layer.11": 6432.5137, "encoder_q-layer.2": 2854.6912, "encoder_q-layer.3": 3205.2773, "encoder_q-layer.4": 2946.1721, "encoder_q-layer.5": 3048.9385, "encoder_q-layer.6": 2978.3633, "encoder_q-layer.7": 3072.1062, "encoder_q-layer.8": 2801.1135, "encoder_q-layer.9": 2416.561, "epoch": 0.37, "inbatch_neg_score": 0.2604, "inbatch_pos_score": 0.79, "learning_rate": 2.4388888888888887e-05, "loss": 4.0205, "norm_diff": 0.109, "norm_loss": 0.0, "num_token_doc": 66.6989, "num_token_overlap": 11.6955, "num_token_query": 32.0273, "num_token_union": 65.3917, "num_word_context": 202.318, "num_word_doc": 49.7829, "num_word_query": 23.6266, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4944.9858, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2607, "query_norm": 1.2966, "queue_k_norm": 1.4068, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 32.0273, "sent_len_1": 66.6989, "sent_len_max_0": 127.5025, "sent_len_max_1": 189.0938, "stdk": 0.0476, "stdq": 0.0422, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 56100 }, { "accuracy": 45.7031, "active_queue_size": 16384.0, "cl_loss": 4.0028, "doc_norm": 1.4036, "encoder_q-embeddings": 2258.9255, "encoder_q-layer.0": 1424.9524, "encoder_q-layer.1": 1483.9133, "encoder_q-layer.10": 2681.6179, "encoder_q-layer.11": 6712.873, "encoder_q-layer.2": 1665.7528, "encoder_q-layer.3": 1699.7814, "encoder_q-layer.4": 1827.2898, "encoder_q-layer.5": 1831.4639, "encoder_q-layer.6": 2067.3301, "encoder_q-layer.7": 2266.729, "encoder_q-layer.8": 2867.2268, "encoder_q-layer.9": 2529.9172, "epoch": 0.37, "inbatch_neg_score": 0.2604, "inbatch_pos_score": 0.8086, "learning_rate": 2.4333333333333336e-05, "loss": 4.0028, "norm_diff": 0.0959, "norm_loss": 0.0, "num_token_doc": 66.7984, "num_token_overlap": 11.709, "num_token_query": 31.8952, "num_token_union": 65.3488, "num_word_context": 202.6569, "num_word_doc": 49.8812, "num_word_query": 23.5555, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4155.174, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2615, "query_norm": 1.3077, "queue_k_norm": 1.4075, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8952, "sent_len_1": 66.7984, "sent_len_max_0": 127.53, "sent_len_max_1": 188.2675, "stdk": 0.0475, "stdq": 0.0427, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 56200 }, { "accuracy": 45.8984, "active_queue_size": 16384.0, "cl_loss": 4.0289, "doc_norm": 1.4151, "encoder_q-embeddings": 2323.8882, "encoder_q-layer.0": 1537.4674, "encoder_q-layer.1": 1593.1884, "encoder_q-layer.10": 2542.78, "encoder_q-layer.11": 6186.4043, "encoder_q-layer.2": 1809.7888, "encoder_q-layer.3": 1904.0444, "encoder_q-layer.4": 1931.8733, "encoder_q-layer.5": 1949.7482, "encoder_q-layer.6": 2237.415, "encoder_q-layer.7": 2287.5071, "encoder_q-layer.8": 2842.5583, "encoder_q-layer.9": 2467.491, "epoch": 0.37, "inbatch_neg_score": 0.2575, "inbatch_pos_score": 0.8013, "learning_rate": 2.427777777777778e-05, "loss": 4.0289, "norm_diff": 0.1172, "norm_loss": 0.0, "num_token_doc": 66.7707, "num_token_overlap": 11.6261, "num_token_query": 31.8651, "num_token_union": 65.4029, "num_word_context": 202.3291, "num_word_doc": 49.825, "num_word_query": 23.5432, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4098.3314, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2583, "query_norm": 1.2979, "queue_k_norm": 1.4078, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8651, "sent_len_1": 66.7707, "sent_len_max_0": 127.52, "sent_len_max_1": 188.9038, "stdk": 0.0479, "stdq": 0.0424, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 56300 }, { "accuracy": 41.8945, "active_queue_size": 16384.0, "cl_loss": 4.0163, "doc_norm": 1.4047, "encoder_q-embeddings": 2319.4292, "encoder_q-layer.0": 1526.5247, "encoder_q-layer.1": 1583.5852, "encoder_q-layer.10": 2711.5364, "encoder_q-layer.11": 6967.7983, "encoder_q-layer.2": 1780.7887, "encoder_q-layer.3": 1785.2294, "encoder_q-layer.4": 1863.6484, "encoder_q-layer.5": 1790.2125, "encoder_q-layer.6": 1913.8853, "encoder_q-layer.7": 2149.1404, "encoder_q-layer.8": 2696.5974, "encoder_q-layer.9": 2469.0344, "epoch": 0.37, "inbatch_neg_score": 0.2562, "inbatch_pos_score": 0.7832, "learning_rate": 2.4222222222222224e-05, "loss": 4.0163, "norm_diff": 0.1065, "norm_loss": 0.0, "num_token_doc": 66.8715, "num_token_overlap": 11.6463, "num_token_query": 31.8207, "num_token_union": 65.3818, "num_word_context": 202.5427, "num_word_doc": 49.8506, "num_word_query": 23.5077, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4179.9578, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2559, "query_norm": 1.2982, "queue_k_norm": 1.4088, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.8207, "sent_len_1": 66.8715, "sent_len_max_0": 127.6575, "sent_len_max_1": 190.895, "stdk": 0.0475, "stdq": 0.0425, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 56400 }, { "accuracy": 42.1875, "active_queue_size": 16384.0, "cl_loss": 4.0205, "doc_norm": 1.4021, "encoder_q-embeddings": 2474.2393, "encoder_q-layer.0": 1652.8746, "encoder_q-layer.1": 1780.2595, "encoder_q-layer.10": 2543.2761, "encoder_q-layer.11": 6267.0645, "encoder_q-layer.2": 2004.5139, "encoder_q-layer.3": 2054.7966, "encoder_q-layer.4": 2147.9468, "encoder_q-layer.5": 2233.1182, "encoder_q-layer.6": 2437.4883, "encoder_q-layer.7": 2428.0874, "encoder_q-layer.8": 2605.4695, "encoder_q-layer.9": 2377.5503, "epoch": 0.37, "inbatch_neg_score": 0.2549, "inbatch_pos_score": 0.7744, "learning_rate": 2.4166666666666667e-05, "loss": 4.0205, "norm_diff": 0.116, "norm_loss": 0.0, "num_token_doc": 66.8271, "num_token_overlap": 11.6515, "num_token_query": 31.8024, "num_token_union": 65.3354, "num_word_context": 202.0487, "num_word_doc": 49.8653, "num_word_query": 23.4738, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4223.0625, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2546, "query_norm": 1.2861, "queue_k_norm": 1.4072, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8024, "sent_len_1": 66.8271, "sent_len_max_0": 127.4475, "sent_len_max_1": 189.885, "stdk": 0.0474, "stdq": 0.0418, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 56500 }, { "accuracy": 44.043, "active_queue_size": 16384.0, "cl_loss": 4.0266, "doc_norm": 1.4185, "encoder_q-embeddings": 1852.4579, "encoder_q-layer.0": 1316.3702, "encoder_q-layer.1": 1518.6698, "encoder_q-layer.10": 1207.5546, "encoder_q-layer.11": 3018.0024, "encoder_q-layer.2": 1859.0004, "encoder_q-layer.3": 1851.4601, "encoder_q-layer.4": 1866.5317, "encoder_q-layer.5": 1796.5056, "encoder_q-layer.6": 1633.8373, "encoder_q-layer.7": 1508.3506, "encoder_q-layer.8": 1419.0283, "encoder_q-layer.9": 1238.2771, "epoch": 0.37, "inbatch_neg_score": 0.2554, "inbatch_pos_score": 0.7769, "learning_rate": 2.4111111111111113e-05, "loss": 4.0266, "norm_diff": 0.1386, "norm_loss": 0.0, "num_token_doc": 66.9358, "num_token_overlap": 11.6768, "num_token_query": 31.9487, "num_token_union": 65.5208, "num_word_context": 202.5387, "num_word_doc": 49.9909, "num_word_query": 23.5855, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2683.857, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2559, "query_norm": 1.28, "queue_k_norm": 1.4098, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9487, "sent_len_1": 66.9358, "sent_len_max_0": 127.52, "sent_len_max_1": 187.7125, "stdk": 0.048, "stdq": 0.0416, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 56600 }, { "accuracy": 48.4375, "active_queue_size": 16384.0, "cl_loss": 4.0291, "doc_norm": 1.4098, "encoder_q-embeddings": 1135.5968, "encoder_q-layer.0": 758.0306, "encoder_q-layer.1": 854.9226, "encoder_q-layer.10": 1373.1783, "encoder_q-layer.11": 3038.5771, "encoder_q-layer.2": 933.827, "encoder_q-layer.3": 979.0671, "encoder_q-layer.4": 989.2108, "encoder_q-layer.5": 1029.5669, "encoder_q-layer.6": 1144.3517, "encoder_q-layer.7": 1215.4592, "encoder_q-layer.8": 1289.0901, "encoder_q-layer.9": 1165.3348, "epoch": 0.37, "inbatch_neg_score": 0.2552, "inbatch_pos_score": 0.8164, "learning_rate": 2.4055555555555555e-05, "loss": 4.0291, "norm_diff": 0.1177, "norm_loss": 0.0, "num_token_doc": 66.6704, "num_token_overlap": 11.6692, "num_token_query": 31.8897, "num_token_union": 65.3163, "num_word_context": 202.1715, "num_word_doc": 49.7464, "num_word_query": 23.5416, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1981.3844, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2544, "query_norm": 1.2921, "queue_k_norm": 1.4069, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8897, "sent_len_1": 66.6704, "sent_len_max_0": 127.6225, "sent_len_max_1": 189.8175, "stdk": 0.0476, "stdq": 0.042, "stdqueue_k": 0.0476, "stdqueue_q": 0.0, "step": 56700 }, { "accuracy": 44.7266, "active_queue_size": 16384.0, "cl_loss": 4.0302, "doc_norm": 1.4108, "encoder_q-embeddings": 1020.3953, "encoder_q-layer.0": 678.6591, "encoder_q-layer.1": 717.3639, "encoder_q-layer.10": 1209.1143, "encoder_q-layer.11": 3205.259, "encoder_q-layer.2": 824.5897, "encoder_q-layer.3": 841.043, "encoder_q-layer.4": 892.7513, "encoder_q-layer.5": 917.1703, "encoder_q-layer.6": 1003.9569, "encoder_q-layer.7": 1055.2262, "encoder_q-layer.8": 1259.8002, "encoder_q-layer.9": 1206.6115, "epoch": 0.37, "inbatch_neg_score": 0.255, "inbatch_pos_score": 0.7764, "learning_rate": 2.4e-05, "loss": 4.0302, "norm_diff": 0.1216, "norm_loss": 0.0, "num_token_doc": 66.9626, "num_token_overlap": 11.6505, "num_token_query": 31.868, "num_token_union": 65.4748, "num_word_context": 202.5516, "num_word_doc": 49.9722, "num_word_query": 23.5353, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1977.5362, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2549, "query_norm": 1.2892, "queue_k_norm": 1.4111, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.868, "sent_len_1": 66.9626, "sent_len_max_0": 127.4963, "sent_len_max_1": 190.3562, "stdk": 0.0477, "stdq": 0.0417, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 56800 }, { "accuracy": 48.8281, "active_queue_size": 16384.0, "cl_loss": 4.0072, "doc_norm": 1.4135, "encoder_q-embeddings": 1153.7906, "encoder_q-layer.0": 757.8936, "encoder_q-layer.1": 813.7961, "encoder_q-layer.10": 1214.4688, "encoder_q-layer.11": 3087.634, "encoder_q-layer.2": 901.8354, "encoder_q-layer.3": 970.8525, "encoder_q-layer.4": 953.2316, "encoder_q-layer.5": 949.73, "encoder_q-layer.6": 1018.5786, "encoder_q-layer.7": 1081.7134, "encoder_q-layer.8": 1268.163, "encoder_q-layer.9": 1182.1526, "epoch": 0.37, "inbatch_neg_score": 0.2527, "inbatch_pos_score": 0.8032, "learning_rate": 2.3944444444444443e-05, "loss": 4.0072, "norm_diff": 0.1143, "norm_loss": 0.0, "num_token_doc": 66.9214, "num_token_overlap": 11.6666, "num_token_query": 31.8907, "num_token_union": 65.4614, "num_word_context": 202.3134, "num_word_doc": 49.9382, "num_word_query": 23.5509, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2021.3836, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2544, "query_norm": 1.2993, "queue_k_norm": 1.4107, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8907, "sent_len_1": 66.9214, "sent_len_max_0": 127.5787, "sent_len_max_1": 188.3, "stdk": 0.0478, "stdq": 0.0422, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 56900 }, { "accuracy": 44.7266, "active_queue_size": 16384.0, "cl_loss": 4.0323, "doc_norm": 1.401, "encoder_q-embeddings": 3341.1243, "encoder_q-layer.0": 2452.1799, "encoder_q-layer.1": 2387.6055, "encoder_q-layer.10": 1378.2349, "encoder_q-layer.11": 3210.5723, "encoder_q-layer.2": 2884.175, "encoder_q-layer.3": 2687.3721, "encoder_q-layer.4": 2436.7366, "encoder_q-layer.5": 2022.3301, "encoder_q-layer.6": 2246.1504, "encoder_q-layer.7": 2447.0518, "encoder_q-layer.8": 1779.2996, "encoder_q-layer.9": 1368.1902, "epoch": 0.37, "inbatch_neg_score": 0.2583, "inbatch_pos_score": 0.7964, "learning_rate": 2.3888888888888892e-05, "loss": 4.0323, "norm_diff": 0.0971, "norm_loss": 0.0, "num_token_doc": 66.5701, "num_token_overlap": 11.6061, "num_token_query": 31.7255, "num_token_union": 65.1504, "num_word_context": 202.2861, "num_word_doc": 49.6766, "num_word_query": 23.4199, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3747.5161, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2578, "query_norm": 1.3039, "queue_k_norm": 1.4086, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.7255, "sent_len_1": 66.5701, "sent_len_max_0": 127.595, "sent_len_max_1": 189.4938, "stdk": 0.0473, "stdq": 0.0421, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 57000 }, { "accuracy": 45.8008, "active_queue_size": 16384.0, "cl_loss": 4.0284, "doc_norm": 1.4047, "encoder_q-embeddings": 1645.7158, "encoder_q-layer.0": 1196.0903, "encoder_q-layer.1": 1329.1453, "encoder_q-layer.10": 1210.5447, "encoder_q-layer.11": 3143.5479, "encoder_q-layer.2": 1632.6578, "encoder_q-layer.3": 1507.8224, "encoder_q-layer.4": 1540.3413, "encoder_q-layer.5": 1416.3605, "encoder_q-layer.6": 1314.1169, "encoder_q-layer.7": 1444.0685, "encoder_q-layer.8": 1438.7499, "encoder_q-layer.9": 1218.6755, "epoch": 0.37, "inbatch_neg_score": 0.2606, "inbatch_pos_score": 0.8013, "learning_rate": 2.3833333333333334e-05, "loss": 4.0284, "norm_diff": 0.1004, "norm_loss": 0.0, "num_token_doc": 66.8094, "num_token_overlap": 11.6613, "num_token_query": 31.8718, "num_token_union": 65.3787, "num_word_context": 202.4817, "num_word_doc": 49.8671, "num_word_query": 23.54, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2484.3798, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2603, "query_norm": 1.3043, "queue_k_norm": 1.411, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8718, "sent_len_1": 66.8094, "sent_len_max_0": 127.4587, "sent_len_max_1": 189.01, "stdk": 0.0474, "stdq": 0.042, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 57100 }, { "accuracy": 46.6797, "active_queue_size": 16384.0, "cl_loss": 4.0198, "doc_norm": 1.4096, "encoder_q-embeddings": 2016.9163, "encoder_q-layer.0": 1548.0725, "encoder_q-layer.1": 1638.3135, "encoder_q-layer.10": 1229.615, "encoder_q-layer.11": 3125.4165, "encoder_q-layer.2": 2046.9243, "encoder_q-layer.3": 2221.7969, "encoder_q-layer.4": 2410.4978, "encoder_q-layer.5": 2047.4868, "encoder_q-layer.6": 1799.1237, "encoder_q-layer.7": 1867.2556, "encoder_q-layer.8": 1644.6842, "encoder_q-layer.9": 1236.7065, "epoch": 0.37, "inbatch_neg_score": 0.2635, "inbatch_pos_score": 0.8101, "learning_rate": 2.377777777777778e-05, "loss": 4.0198, "norm_diff": 0.0941, "norm_loss": 0.0, "num_token_doc": 66.7645, "num_token_overlap": 11.7194, "num_token_query": 31.9846, "num_token_union": 65.3563, "num_word_context": 202.4737, "num_word_doc": 49.8261, "num_word_query": 23.6271, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2975.8438, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2639, "query_norm": 1.3155, "queue_k_norm": 1.4091, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9846, "sent_len_1": 66.7645, "sent_len_max_0": 127.6088, "sent_len_max_1": 186.8587, "stdk": 0.0476, "stdq": 0.0423, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 57200 }, { "accuracy": 48.3398, "active_queue_size": 16384.0, "cl_loss": 4.0165, "doc_norm": 1.4171, "encoder_q-embeddings": 1381.1637, "encoder_q-layer.0": 959.6713, "encoder_q-layer.1": 1020.1285, "encoder_q-layer.10": 1387.3927, "encoder_q-layer.11": 3086.3323, "encoder_q-layer.2": 1156.5492, "encoder_q-layer.3": 1202.8577, "encoder_q-layer.4": 1263.8677, "encoder_q-layer.5": 1228.9634, "encoder_q-layer.6": 1319.1631, "encoder_q-layer.7": 1301.5022, "encoder_q-layer.8": 1368.5471, "encoder_q-layer.9": 1267.0963, "epoch": 0.37, "inbatch_neg_score": 0.2693, "inbatch_pos_score": 0.8306, "learning_rate": 2.3722222222222222e-05, "loss": 4.0165, "norm_diff": 0.0952, "norm_loss": 0.0, "num_token_doc": 67.0503, "num_token_overlap": 11.6562, "num_token_query": 31.7832, "num_token_union": 65.4803, "num_word_context": 203.0142, "num_word_doc": 50.0572, "num_word_query": 23.4894, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2193.2903, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2686, "query_norm": 1.3219, "queue_k_norm": 1.4113, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.7832, "sent_len_1": 67.0503, "sent_len_max_0": 127.4125, "sent_len_max_1": 191.1375, "stdk": 0.0478, "stdq": 0.0424, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 57300 }, { "accuracy": 45.9961, "active_queue_size": 16384.0, "cl_loss": 4.0168, "doc_norm": 1.4128, "encoder_q-embeddings": 1940.9889, "encoder_q-layer.0": 1362.7323, "encoder_q-layer.1": 1424.7974, "encoder_q-layer.10": 1202.4641, "encoder_q-layer.11": 2883.106, "encoder_q-layer.2": 1764.1112, "encoder_q-layer.3": 1711.1458, "encoder_q-layer.4": 1569.229, "encoder_q-layer.5": 1424.9398, "encoder_q-layer.6": 1526.2728, "encoder_q-layer.7": 1408.5764, "encoder_q-layer.8": 1319.3173, "encoder_q-layer.9": 1135.6748, "epoch": 0.37, "inbatch_neg_score": 0.2729, "inbatch_pos_score": 0.8242, "learning_rate": 2.3666666666666668e-05, "loss": 4.0168, "norm_diff": 0.0859, "norm_loss": 0.0, "num_token_doc": 66.6734, "num_token_overlap": 11.6482, "num_token_query": 31.9414, "num_token_union": 65.3529, "num_word_context": 202.3155, "num_word_doc": 49.7054, "num_word_query": 23.582, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2509.6199, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2729, "query_norm": 1.3269, "queue_k_norm": 1.4101, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9414, "sent_len_1": 66.6734, "sent_len_max_0": 127.6363, "sent_len_max_1": 191.1087, "stdk": 0.0477, "stdq": 0.0423, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 57400 }, { "accuracy": 43.8477, "active_queue_size": 16384.0, "cl_loss": 4.0157, "doc_norm": 1.4095, "encoder_q-embeddings": 3861.2781, "encoder_q-layer.0": 2727.3665, "encoder_q-layer.1": 3364.9392, "encoder_q-layer.10": 1214.5532, "encoder_q-layer.11": 3033.6826, "encoder_q-layer.2": 3879.5359, "encoder_q-layer.3": 3504.7856, "encoder_q-layer.4": 3787.0093, "encoder_q-layer.5": 3056.9495, "encoder_q-layer.6": 2366.0471, "encoder_q-layer.7": 2100.9285, "encoder_q-layer.8": 1727.9915, "encoder_q-layer.9": 1225.0933, "epoch": 0.37, "inbatch_neg_score": 0.2808, "inbatch_pos_score": 0.7988, "learning_rate": 2.361111111111111e-05, "loss": 4.0157, "norm_diff": 0.0885, "norm_loss": 0.0, "num_token_doc": 66.5962, "num_token_overlap": 11.6134, "num_token_query": 31.83, "num_token_union": 65.3124, "num_word_context": 202.2929, "num_word_doc": 49.6779, "num_word_query": 23.5334, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4508.9464, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.28, "query_norm": 1.3209, "queue_k_norm": 1.4127, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.83, "sent_len_1": 66.5962, "sent_len_max_0": 127.4475, "sent_len_max_1": 188.6488, "stdk": 0.0475, "stdq": 0.0419, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 57500 }, { "accuracy": 45.0195, "active_queue_size": 16384.0, "cl_loss": 4.0075, "doc_norm": 1.4176, "encoder_q-embeddings": 1173.5145, "encoder_q-layer.0": 823.3098, "encoder_q-layer.1": 835.798, "encoder_q-layer.10": 1390.0693, "encoder_q-layer.11": 3007.6431, "encoder_q-layer.2": 969.695, "encoder_q-layer.3": 980.448, "encoder_q-layer.4": 1023.649, "encoder_q-layer.5": 1011.7924, "encoder_q-layer.6": 1053.4563, "encoder_q-layer.7": 1100.5947, "encoder_q-layer.8": 1220.1838, "encoder_q-layer.9": 1099.1373, "epoch": 0.37, "inbatch_neg_score": 0.2856, "inbatch_pos_score": 0.8257, "learning_rate": 2.3555555555555556e-05, "loss": 4.0075, "norm_diff": 0.0813, "norm_loss": 0.0, "num_token_doc": 67.0949, "num_token_overlap": 11.7018, "num_token_query": 31.9804, "num_token_union": 65.6113, "num_word_context": 202.5112, "num_word_doc": 50.0458, "num_word_query": 23.5909, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2009.6456, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2852, "query_norm": 1.3364, "queue_k_norm": 1.4151, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9804, "sent_len_1": 67.0949, "sent_len_max_0": 127.625, "sent_len_max_1": 190.1438, "stdk": 0.0478, "stdq": 0.0424, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 57600 }, { "accuracy": 42.9688, "active_queue_size": 16384.0, "cl_loss": 4.0067, "doc_norm": 1.4129, "encoder_q-embeddings": 1870.1771, "encoder_q-layer.0": 1271.7073, "encoder_q-layer.1": 1451.2323, "encoder_q-layer.10": 1193.2573, "encoder_q-layer.11": 3139.188, "encoder_q-layer.2": 1665.3193, "encoder_q-layer.3": 1635.9835, "encoder_q-layer.4": 1767.205, "encoder_q-layer.5": 1860.6064, "encoder_q-layer.6": 1793.9766, "encoder_q-layer.7": 1667.4574, "encoder_q-layer.8": 1546.7142, "encoder_q-layer.9": 1197.3591, "epoch": 0.38, "inbatch_neg_score": 0.2915, "inbatch_pos_score": 0.8223, "learning_rate": 2.35e-05, "loss": 4.0067, "norm_diff": 0.0786, "norm_loss": 0.0, "num_token_doc": 66.6838, "num_token_overlap": 11.6922, "num_token_query": 32.024, "num_token_union": 65.3553, "num_word_context": 202.1414, "num_word_doc": 49.7286, "num_word_query": 23.6577, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2676.9347, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2913, "query_norm": 1.3343, "queue_k_norm": 1.4163, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 32.024, "sent_len_1": 66.6838, "sent_len_max_0": 127.55, "sent_len_max_1": 190.1062, "stdk": 0.0476, "stdq": 0.0422, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 57700 }, { "accuracy": 44.1406, "active_queue_size": 16384.0, "cl_loss": 4.0125, "doc_norm": 1.412, "encoder_q-embeddings": 1972.4827, "encoder_q-layer.0": 1403.8424, "encoder_q-layer.1": 1490.4836, "encoder_q-layer.10": 1310.8171, "encoder_q-layer.11": 3076.7485, "encoder_q-layer.2": 1795.9176, "encoder_q-layer.3": 1883.4536, "encoder_q-layer.4": 1945.0084, "encoder_q-layer.5": 1833.9054, "encoder_q-layer.6": 1805.7648, "encoder_q-layer.7": 1560.6897, "encoder_q-layer.8": 1550.2631, "encoder_q-layer.9": 1323.5237, "epoch": 0.38, "inbatch_neg_score": 0.2967, "inbatch_pos_score": 0.8379, "learning_rate": 2.3444444444444448e-05, "loss": 4.0125, "norm_diff": 0.0467, "norm_loss": 0.0, "num_token_doc": 66.775, "num_token_overlap": 11.5951, "num_token_query": 31.691, "num_token_union": 65.2625, "num_word_context": 202.4796, "num_word_doc": 49.8361, "num_word_query": 23.3826, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2771.6434, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.2964, "query_norm": 1.3653, "queue_k_norm": 1.4149, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.691, "sent_len_1": 66.775, "sent_len_max_0": 127.3487, "sent_len_max_1": 190.2575, "stdk": 0.0475, "stdq": 0.0431, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 57800 }, { "accuracy": 42.5781, "active_queue_size": 16384.0, "cl_loss": 4.0221, "doc_norm": 1.4165, "encoder_q-embeddings": 1034.4359, "encoder_q-layer.0": 695.5641, "encoder_q-layer.1": 697.4407, "encoder_q-layer.10": 1324.2817, "encoder_q-layer.11": 3131.9624, "encoder_q-layer.2": 777.8165, "encoder_q-layer.3": 821.4846, "encoder_q-layer.4": 857.1165, "encoder_q-layer.5": 877.7979, "encoder_q-layer.6": 966.4404, "encoder_q-layer.7": 1034.2538, "encoder_q-layer.8": 1228.7836, "encoder_q-layer.9": 1141.4695, "epoch": 0.38, "inbatch_neg_score": 0.3029, "inbatch_pos_score": 0.8262, "learning_rate": 2.338888888888889e-05, "loss": 4.0221, "norm_diff": 0.0776, "norm_loss": 0.0, "num_token_doc": 66.536, "num_token_overlap": 11.6017, "num_token_query": 31.8787, "num_token_union": 65.2452, "num_word_context": 202.2522, "num_word_doc": 49.5937, "num_word_query": 23.5218, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1975.0586, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.303, "query_norm": 1.3389, "queue_k_norm": 1.4143, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8787, "sent_len_1": 66.536, "sent_len_max_0": 127.6887, "sent_len_max_1": 189.8512, "stdk": 0.0477, "stdq": 0.0422, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 57900 }, { "accuracy": 43.457, "active_queue_size": 16384.0, "cl_loss": 4.0009, "doc_norm": 1.413, "encoder_q-embeddings": 1280.0461, "encoder_q-layer.0": 856.4728, "encoder_q-layer.1": 919.293, "encoder_q-layer.10": 1205.0573, "encoder_q-layer.11": 3105.8284, "encoder_q-layer.2": 1049.2439, "encoder_q-layer.3": 1111.8313, "encoder_q-layer.4": 1181.9788, "encoder_q-layer.5": 1089.7079, "encoder_q-layer.6": 1175.2649, "encoder_q-layer.7": 1208.2501, "encoder_q-layer.8": 1381.3389, "encoder_q-layer.9": 1226.6089, "epoch": 0.38, "inbatch_neg_score": 0.3079, "inbatch_pos_score": 0.8364, "learning_rate": 2.3333333333333336e-05, "loss": 4.0009, "norm_diff": 0.0775, "norm_loss": 0.0, "num_token_doc": 66.5089, "num_token_overlap": 11.6673, "num_token_query": 31.9415, "num_token_union": 65.2167, "num_word_context": 202.0222, "num_word_doc": 49.6413, "num_word_query": 23.5659, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2146.0918, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3069, "query_norm": 1.3355, "queue_k_norm": 1.4177, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9415, "sent_len_1": 66.5089, "sent_len_max_0": 127.5863, "sent_len_max_1": 188.065, "stdk": 0.0475, "stdq": 0.0421, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 58000 }, { "accuracy": 44.2383, "active_queue_size": 16384.0, "cl_loss": 4.0263, "doc_norm": 1.4171, "encoder_q-embeddings": 1410.4438, "encoder_q-layer.0": 952.6658, "encoder_q-layer.1": 979.7356, "encoder_q-layer.10": 1375.2799, "encoder_q-layer.11": 3286.4927, "encoder_q-layer.2": 1099.3561, "encoder_q-layer.3": 1166.1387, "encoder_q-layer.4": 1217.9749, "encoder_q-layer.5": 1149.4662, "encoder_q-layer.6": 1177.8322, "encoder_q-layer.7": 1307.976, "encoder_q-layer.8": 1430.1041, "encoder_q-layer.9": 1305.4524, "epoch": 0.38, "inbatch_neg_score": 0.3133, "inbatch_pos_score": 0.8374, "learning_rate": 2.3277777777777778e-05, "loss": 4.0263, "norm_diff": 0.0764, "norm_loss": 0.0, "num_token_doc": 66.6913, "num_token_overlap": 11.7152, "num_token_query": 32.0614, "num_token_union": 65.4138, "num_word_context": 202.291, "num_word_doc": 49.8158, "num_word_query": 23.7175, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2215.6833, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3118, "query_norm": 1.3407, "queue_k_norm": 1.4192, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 32.0614, "sent_len_1": 66.6913, "sent_len_max_0": 127.4875, "sent_len_max_1": 187.6012, "stdk": 0.0476, "stdq": 0.0424, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 58100 }, { "accuracy": 46.582, "active_queue_size": 16384.0, "cl_loss": 4.0118, "doc_norm": 1.4201, "encoder_q-embeddings": 1920.1572, "encoder_q-layer.0": 1401.1593, "encoder_q-layer.1": 1616.2596, "encoder_q-layer.10": 1229.0724, "encoder_q-layer.11": 3134.8618, "encoder_q-layer.2": 1881.7313, "encoder_q-layer.3": 2031.5509, "encoder_q-layer.4": 2147.3352, "encoder_q-layer.5": 2375.0847, "encoder_q-layer.6": 2338.5205, "encoder_q-layer.7": 2133.4333, "encoder_q-layer.8": 1587.6105, "encoder_q-layer.9": 1179.4808, "epoch": 0.38, "inbatch_neg_score": 0.314, "inbatch_pos_score": 0.8672, "learning_rate": 2.3222222222222224e-05, "loss": 4.0118, "norm_diff": 0.0749, "norm_loss": 0.0, "num_token_doc": 66.6802, "num_token_overlap": 11.6308, "num_token_query": 31.8454, "num_token_union": 65.3055, "num_word_context": 201.8949, "num_word_doc": 49.7232, "num_word_query": 23.5133, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2965.1931, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.313, "query_norm": 1.3452, "queue_k_norm": 1.4184, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8454, "sent_len_1": 66.6802, "sent_len_max_0": 127.4275, "sent_len_max_1": 190.5838, "stdk": 0.0477, "stdq": 0.0429, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 58200 }, { "accuracy": 43.5547, "active_queue_size": 16384.0, "cl_loss": 4.0147, "doc_norm": 1.4244, "encoder_q-embeddings": 1531.2493, "encoder_q-layer.0": 1051.2539, "encoder_q-layer.1": 1152.8828, "encoder_q-layer.10": 1249.9697, "encoder_q-layer.11": 3189.4172, "encoder_q-layer.2": 1329.405, "encoder_q-layer.3": 1389.6342, "encoder_q-layer.4": 1448.7598, "encoder_q-layer.5": 1485.562, "encoder_q-layer.6": 1536.9951, "encoder_q-layer.7": 1586.5441, "encoder_q-layer.8": 1441.0404, "encoder_q-layer.9": 1191.3521, "epoch": 0.38, "inbatch_neg_score": 0.3117, "inbatch_pos_score": 0.8291, "learning_rate": 2.3166666666666666e-05, "loss": 4.0147, "norm_diff": 0.1094, "norm_loss": 0.0, "num_token_doc": 66.9044, "num_token_overlap": 11.648, "num_token_query": 31.8487, "num_token_union": 65.4348, "num_word_context": 202.2223, "num_word_doc": 49.9182, "num_word_query": 23.5368, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2394.6179, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.313, "query_norm": 1.315, "queue_k_norm": 1.4237, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8487, "sent_len_1": 66.9044, "sent_len_max_0": 127.4262, "sent_len_max_1": 190.3862, "stdk": 0.0478, "stdq": 0.0417, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 58300 }, { "accuracy": 43.6523, "active_queue_size": 16384.0, "cl_loss": 4.0161, "doc_norm": 1.4258, "encoder_q-embeddings": 1213.736, "encoder_q-layer.0": 791.2933, "encoder_q-layer.1": 838.4062, "encoder_q-layer.10": 1293.9464, "encoder_q-layer.11": 3156.7251, "encoder_q-layer.2": 973.722, "encoder_q-layer.3": 1000.7336, "encoder_q-layer.4": 1041.1914, "encoder_q-layer.5": 1090.1384, "encoder_q-layer.6": 1160.8219, "encoder_q-layer.7": 1222.1365, "encoder_q-layer.8": 1390.8662, "encoder_q-layer.9": 1279.3977, "epoch": 0.38, "inbatch_neg_score": 0.3151, "inbatch_pos_score": 0.8501, "learning_rate": 2.3111111111111112e-05, "loss": 4.0161, "norm_diff": 0.1053, "norm_loss": 0.0, "num_token_doc": 66.7188, "num_token_overlap": 11.6383, "num_token_query": 31.7638, "num_token_union": 65.2244, "num_word_context": 202.219, "num_word_doc": 49.7783, "num_word_query": 23.4379, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2120.2136, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3145, "query_norm": 1.3206, "queue_k_norm": 1.423, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.7638, "sent_len_1": 66.7188, "sent_len_max_0": 127.4975, "sent_len_max_1": 190.7287, "stdk": 0.0478, "stdq": 0.0421, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 58400 }, { "accuracy": 43.8477, "active_queue_size": 16384.0, "cl_loss": 4.0154, "doc_norm": 1.4254, "encoder_q-embeddings": 2364.4363, "encoder_q-layer.0": 1566.2043, "encoder_q-layer.1": 1702.7091, "encoder_q-layer.10": 1345.3433, "encoder_q-layer.11": 3226.5833, "encoder_q-layer.2": 1934.3723, "encoder_q-layer.3": 1774.3737, "encoder_q-layer.4": 1847.5356, "encoder_q-layer.5": 1690.7555, "encoder_q-layer.6": 1642.2247, "encoder_q-layer.7": 1473.3794, "encoder_q-layer.8": 1500.3782, "encoder_q-layer.9": 1240.2568, "epoch": 0.38, "inbatch_neg_score": 0.3133, "inbatch_pos_score": 0.8574, "learning_rate": 2.3055555555555558e-05, "loss": 4.0154, "norm_diff": 0.0991, "norm_loss": 0.0, "num_token_doc": 66.8312, "num_token_overlap": 11.6326, "num_token_query": 31.7975, "num_token_union": 65.3649, "num_word_context": 202.08, "num_word_doc": 49.8627, "num_word_query": 23.4817, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2888.6251, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3147, "query_norm": 1.3263, "queue_k_norm": 1.4258, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.7975, "sent_len_1": 66.8312, "sent_len_max_0": 127.5637, "sent_len_max_1": 189.8537, "stdk": 0.0478, "stdq": 0.0424, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 58500 }, { "accuracy": 45.8008, "active_queue_size": 16384.0, "cl_loss": 4.0002, "doc_norm": 1.4246, "encoder_q-embeddings": 2043.1166, "encoder_q-layer.0": 1372.5955, "encoder_q-layer.1": 1445.2506, "encoder_q-layer.10": 2701.9414, "encoder_q-layer.11": 6438.4487, "encoder_q-layer.2": 1597.8336, "encoder_q-layer.3": 1702.668, "encoder_q-layer.4": 1728.1631, "encoder_q-layer.5": 1718.7633, "encoder_q-layer.6": 1929.1508, "encoder_q-layer.7": 2056.1216, "encoder_q-layer.8": 2518.7815, "encoder_q-layer.9": 2370.822, "epoch": 0.38, "inbatch_neg_score": 0.3163, "inbatch_pos_score": 0.8594, "learning_rate": 2.3000000000000003e-05, "loss": 4.0002, "norm_diff": 0.0916, "norm_loss": 0.0, "num_token_doc": 66.9868, "num_token_overlap": 11.662, "num_token_query": 31.7811, "num_token_union": 65.3622, "num_word_context": 202.1568, "num_word_doc": 49.9653, "num_word_query": 23.445, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3973.498, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3164, "query_norm": 1.333, "queue_k_norm": 1.4252, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.7811, "sent_len_1": 66.9868, "sent_len_max_0": 127.5263, "sent_len_max_1": 191.9187, "stdk": 0.0477, "stdq": 0.0427, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 58600 }, { "accuracy": 47.4609, "active_queue_size": 16384.0, "cl_loss": 4.0293, "doc_norm": 1.4283, "encoder_q-embeddings": 3639.8665, "encoder_q-layer.0": 2713.7703, "encoder_q-layer.1": 2841.3752, "encoder_q-layer.10": 2494.2949, "encoder_q-layer.11": 5987.0215, "encoder_q-layer.2": 3177.2734, "encoder_q-layer.3": 3022.3733, "encoder_q-layer.4": 3042.0327, "encoder_q-layer.5": 3327.6433, "encoder_q-layer.6": 3405.9246, "encoder_q-layer.7": 3285.395, "encoder_q-layer.8": 2919.3975, "encoder_q-layer.9": 2422.3369, "epoch": 0.38, "inbatch_neg_score": 0.3177, "inbatch_pos_score": 0.875, "learning_rate": 2.2944444444444446e-05, "loss": 4.0293, "norm_diff": 0.0978, "norm_loss": 0.0, "num_token_doc": 66.7156, "num_token_overlap": 11.6541, "num_token_query": 31.8505, "num_token_union": 65.2861, "num_word_context": 202.6492, "num_word_doc": 49.8305, "num_word_query": 23.5236, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5082.9932, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3179, "query_norm": 1.3305, "queue_k_norm": 1.425, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8505, "sent_len_1": 66.7156, "sent_len_max_0": 127.4025, "sent_len_max_1": 188.2125, "stdk": 0.0478, "stdq": 0.0426, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 58700 }, { "accuracy": 42.2852, "active_queue_size": 16384.0, "cl_loss": 4.0015, "doc_norm": 1.43, "encoder_q-embeddings": 2918.677, "encoder_q-layer.0": 2045.7161, "encoder_q-layer.1": 2249.7424, "encoder_q-layer.10": 2544.3594, "encoder_q-layer.11": 6703.1362, "encoder_q-layer.2": 2560.0308, "encoder_q-layer.3": 2856.9443, "encoder_q-layer.4": 3127.4102, "encoder_q-layer.5": 3152.5281, "encoder_q-layer.6": 3080.2952, "encoder_q-layer.7": 3058.4675, "encoder_q-layer.8": 2791.7073, "encoder_q-layer.9": 2484.2998, "epoch": 0.38, "inbatch_neg_score": 0.3162, "inbatch_pos_score": 0.8389, "learning_rate": 2.288888888888889e-05, "loss": 4.0015, "norm_diff": 0.1205, "norm_loss": 0.0, "num_token_doc": 66.6948, "num_token_overlap": 11.6363, "num_token_query": 31.8608, "num_token_union": 65.3256, "num_word_context": 201.9854, "num_word_doc": 49.7774, "num_word_query": 23.5256, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4933.0253, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3169, "query_norm": 1.3095, "queue_k_norm": 1.4261, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.8608, "sent_len_1": 66.6948, "sent_len_max_0": 127.3812, "sent_len_max_1": 189.115, "stdk": 0.0478, "stdq": 0.0418, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 58800 }, { "accuracy": 44.9219, "active_queue_size": 16384.0, "cl_loss": 4.0206, "doc_norm": 1.4339, "encoder_q-embeddings": 2412.4033, "encoder_q-layer.0": 1595.5133, "encoder_q-layer.1": 1714.6713, "encoder_q-layer.10": 2294.9382, "encoder_q-layer.11": 6023.9185, "encoder_q-layer.2": 1905.506, "encoder_q-layer.3": 1989.7943, "encoder_q-layer.4": 2087.7761, "encoder_q-layer.5": 2057.4419, "encoder_q-layer.6": 2142.5715, "encoder_q-layer.7": 2357.8857, "encoder_q-layer.8": 2566.3655, "encoder_q-layer.9": 2273.9692, "epoch": 0.38, "inbatch_neg_score": 0.3187, "inbatch_pos_score": 0.8535, "learning_rate": 2.2833333333333334e-05, "loss": 4.0206, "norm_diff": 0.1147, "norm_loss": 0.0, "num_token_doc": 66.7423, "num_token_overlap": 11.6473, "num_token_query": 31.8189, "num_token_union": 65.3219, "num_word_context": 202.3971, "num_word_doc": 49.7569, "num_word_query": 23.4886, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4056.4843, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3193, "query_norm": 1.3192, "queue_k_norm": 1.4289, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8189, "sent_len_1": 66.7423, "sent_len_max_0": 127.4925, "sent_len_max_1": 190.5087, "stdk": 0.048, "stdq": 0.0421, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 58900 }, { "accuracy": 44.9219, "active_queue_size": 16384.0, "cl_loss": 3.9905, "doc_norm": 1.4263, "encoder_q-embeddings": 3709.4543, "encoder_q-layer.0": 2494.1799, "encoder_q-layer.1": 2623.833, "encoder_q-layer.10": 2568.4006, "encoder_q-layer.11": 6386.4556, "encoder_q-layer.2": 2894.1482, "encoder_q-layer.3": 3020.1399, "encoder_q-layer.4": 3384.7041, "encoder_q-layer.5": 3327.0493, "encoder_q-layer.6": 3374.1306, "encoder_q-layer.7": 2992.3872, "encoder_q-layer.8": 3079.2063, "encoder_q-layer.9": 2562.6453, "epoch": 0.38, "inbatch_neg_score": 0.3166, "inbatch_pos_score": 0.8647, "learning_rate": 2.277777777777778e-05, "loss": 3.9905, "norm_diff": 0.1002, "norm_loss": 0.0, "num_token_doc": 66.9288, "num_token_overlap": 11.7339, "num_token_query": 32.0385, "num_token_union": 65.4429, "num_word_context": 201.9089, "num_word_doc": 49.8977, "num_word_query": 23.6646, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5160.7458, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3171, "query_norm": 1.3262, "queue_k_norm": 1.4286, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 32.0385, "sent_len_1": 66.9288, "sent_len_max_0": 127.5463, "sent_len_max_1": 192.3775, "stdk": 0.0477, "stdq": 0.0424, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 59000 }, { "accuracy": 45.2148, "active_queue_size": 16384.0, "cl_loss": 4.0091, "doc_norm": 1.4234, "encoder_q-embeddings": 1482.7236, "encoder_q-layer.0": 1014.0932, "encoder_q-layer.1": 1167.8168, "encoder_q-layer.10": 1275.0708, "encoder_q-layer.11": 3039.2842, "encoder_q-layer.2": 1343.1375, "encoder_q-layer.3": 1442.9943, "encoder_q-layer.4": 1474.4366, "encoder_q-layer.5": 1422.5018, "encoder_q-layer.6": 1308.8876, "encoder_q-layer.7": 1190.7183, "encoder_q-layer.8": 1254.7296, "encoder_q-layer.9": 1152.3638, "epoch": 0.38, "inbatch_neg_score": 0.3142, "inbatch_pos_score": 0.8501, "learning_rate": 2.2722222222222222e-05, "loss": 4.0091, "norm_diff": 0.107, "norm_loss": 0.0, "num_token_doc": 66.8812, "num_token_overlap": 11.611, "num_token_query": 31.7028, "num_token_union": 65.3085, "num_word_context": 202.2291, "num_word_doc": 49.8797, "num_word_query": 23.4076, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2323.0416, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3154, "query_norm": 1.3164, "queue_k_norm": 1.4275, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.7028, "sent_len_1": 66.8812, "sent_len_max_0": 127.4313, "sent_len_max_1": 191.505, "stdk": 0.0475, "stdq": 0.0421, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 59100 }, { "accuracy": 43.9453, "active_queue_size": 16384.0, "cl_loss": 3.9886, "doc_norm": 1.4314, "encoder_q-embeddings": 1365.5811, "encoder_q-layer.0": 908.3026, "encoder_q-layer.1": 961.1686, "encoder_q-layer.10": 1254.8828, "encoder_q-layer.11": 3058.218, "encoder_q-layer.2": 1093.4706, "encoder_q-layer.3": 1189.6343, "encoder_q-layer.4": 1381.6226, "encoder_q-layer.5": 1245.4062, "encoder_q-layer.6": 1468.0393, "encoder_q-layer.7": 1457.2192, "encoder_q-layer.8": 1519.624, "encoder_q-layer.9": 1242.1022, "epoch": 0.39, "inbatch_neg_score": 0.3149, "inbatch_pos_score": 0.8447, "learning_rate": 2.2666666666666668e-05, "loss": 3.9886, "norm_diff": 0.1178, "norm_loss": 0.0, "num_token_doc": 66.7213, "num_token_overlap": 11.721, "num_token_query": 32.0685, "num_token_union": 65.399, "num_word_context": 202.1896, "num_word_doc": 49.8208, "num_word_query": 23.6864, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2251.7522, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3154, "query_norm": 1.3136, "queue_k_norm": 1.4289, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 32.0685, "sent_len_1": 66.7213, "sent_len_max_0": 127.5288, "sent_len_max_1": 189.8438, "stdk": 0.0478, "stdq": 0.042, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 59200 }, { "accuracy": 45.2148, "active_queue_size": 16384.0, "cl_loss": 3.9955, "doc_norm": 1.4307, "encoder_q-embeddings": 1981.4819, "encoder_q-layer.0": 1426.0917, "encoder_q-layer.1": 1560.8147, "encoder_q-layer.10": 1283.7075, "encoder_q-layer.11": 2947.3958, "encoder_q-layer.2": 1875.963, "encoder_q-layer.3": 1674.056, "encoder_q-layer.4": 1876.1787, "encoder_q-layer.5": 1872.1128, "encoder_q-layer.6": 1752.7427, "encoder_q-layer.7": 1336.4539, "encoder_q-layer.8": 1377.1678, "encoder_q-layer.9": 1197.0496, "epoch": 0.39, "inbatch_neg_score": 0.3181, "inbatch_pos_score": 0.8657, "learning_rate": 2.2611111111111113e-05, "loss": 3.9955, "norm_diff": 0.1024, "norm_loss": 0.0, "num_token_doc": 66.9398, "num_token_overlap": 11.7275, "num_token_query": 31.9498, "num_token_union": 65.4637, "num_word_context": 202.3328, "num_word_doc": 49.9542, "num_word_query": 23.5997, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2699.0954, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3181, "query_norm": 1.3283, "queue_k_norm": 1.4301, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9498, "sent_len_1": 66.9398, "sent_len_max_0": 127.5238, "sent_len_max_1": 188.2825, "stdk": 0.0478, "stdq": 0.0425, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 59300 }, { "accuracy": 45.8008, "active_queue_size": 16384.0, "cl_loss": 4.0135, "doc_norm": 1.4265, "encoder_q-embeddings": 1627.2706, "encoder_q-layer.0": 1134.6693, "encoder_q-layer.1": 1323.7821, "encoder_q-layer.10": 1254.1013, "encoder_q-layer.11": 3136.1443, "encoder_q-layer.2": 1488.8146, "encoder_q-layer.3": 1610.8215, "encoder_q-layer.4": 1626.8394, "encoder_q-layer.5": 1579.2759, "encoder_q-layer.6": 1513.3403, "encoder_q-layer.7": 1533.3138, "encoder_q-layer.8": 1644.0062, "encoder_q-layer.9": 1334.2866, "epoch": 0.39, "inbatch_neg_score": 0.3172, "inbatch_pos_score": 0.8618, "learning_rate": 2.255555555555556e-05, "loss": 4.0135, "norm_diff": 0.0979, "norm_loss": 0.0, "num_token_doc": 66.5407, "num_token_overlap": 11.6683, "num_token_query": 31.9254, "num_token_union": 65.2602, "num_word_context": 202.0247, "num_word_doc": 49.662, "num_word_query": 23.5613, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2554.597, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3169, "query_norm": 1.3285, "queue_k_norm": 1.4279, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9254, "sent_len_1": 66.5407, "sent_len_max_0": 127.365, "sent_len_max_1": 188.9663, "stdk": 0.0475, "stdq": 0.0425, "stdqueue_k": 0.0477, "stdqueue_q": 0.0, "step": 59400 }, { "accuracy": 40.918, "active_queue_size": 16384.0, "cl_loss": 4.0068, "doc_norm": 1.4212, "encoder_q-embeddings": 1931.2281, "encoder_q-layer.0": 1330.1071, "encoder_q-layer.1": 1564.642, "encoder_q-layer.10": 1289.6508, "encoder_q-layer.11": 3237.3569, "encoder_q-layer.2": 1760.8972, "encoder_q-layer.3": 1844.4673, "encoder_q-layer.4": 1956.5398, "encoder_q-layer.5": 2264.4316, "encoder_q-layer.6": 2154.8286, "encoder_q-layer.7": 2011.5548, "encoder_q-layer.8": 1680.8711, "encoder_q-layer.9": 1232.6267, "epoch": 0.39, "inbatch_neg_score": 0.3165, "inbatch_pos_score": 0.835, "learning_rate": 2.25e-05, "loss": 4.0068, "norm_diff": 0.1091, "norm_loss": 0.0, "num_token_doc": 66.7234, "num_token_overlap": 11.6589, "num_token_query": 31.8734, "num_token_union": 65.3045, "num_word_context": 202.3291, "num_word_doc": 49.8052, "num_word_query": 23.5435, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2894.537, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3164, "query_norm": 1.3121, "queue_k_norm": 1.4314, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8734, "sent_len_1": 66.7234, "sent_len_max_0": 127.6088, "sent_len_max_1": 189.2537, "stdk": 0.0473, "stdq": 0.0419, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 59500 }, { "accuracy": 43.3594, "active_queue_size": 16384.0, "cl_loss": 4.012, "doc_norm": 1.43, "encoder_q-embeddings": 1241.6151, "encoder_q-layer.0": 815.0737, "encoder_q-layer.1": 869.2209, "encoder_q-layer.10": 1286.4412, "encoder_q-layer.11": 3029.2854, "encoder_q-layer.2": 972.6122, "encoder_q-layer.3": 1000.8167, "encoder_q-layer.4": 1009.3257, "encoder_q-layer.5": 1076.6641, "encoder_q-layer.6": 1102.059, "encoder_q-layer.7": 1154.6871, "encoder_q-layer.8": 1313.0695, "encoder_q-layer.9": 1212.9188, "epoch": 0.39, "inbatch_neg_score": 0.3164, "inbatch_pos_score": 0.8569, "learning_rate": 2.2444444444444447e-05, "loss": 4.012, "norm_diff": 0.1011, "norm_loss": 0.0, "num_token_doc": 66.602, "num_token_overlap": 11.6654, "num_token_query": 31.9678, "num_token_union": 65.2695, "num_word_context": 202.0585, "num_word_doc": 49.7023, "num_word_query": 23.6104, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2066.7084, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3164, "query_norm": 1.329, "queue_k_norm": 1.4323, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9678, "sent_len_1": 66.602, "sent_len_max_0": 127.585, "sent_len_max_1": 189.9412, "stdk": 0.0477, "stdq": 0.0426, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 59600 }, { "accuracy": 43.9453, "active_queue_size": 16384.0, "cl_loss": 4.0361, "doc_norm": 1.432, "encoder_q-embeddings": 994.4001, "encoder_q-layer.0": 675.4562, "encoder_q-layer.1": 705.1501, "encoder_q-layer.10": 1234.6184, "encoder_q-layer.11": 2911.7917, "encoder_q-layer.2": 790.0145, "encoder_q-layer.3": 802.0645, "encoder_q-layer.4": 828.4463, "encoder_q-layer.5": 844.7371, "encoder_q-layer.6": 902.0994, "encoder_q-layer.7": 994.9679, "encoder_q-layer.8": 1218.4298, "encoder_q-layer.9": 1176.7568, "epoch": 0.39, "inbatch_neg_score": 0.3172, "inbatch_pos_score": 0.8579, "learning_rate": 2.238888888888889e-05, "loss": 4.0361, "norm_diff": 0.114, "norm_loss": 0.0, "num_token_doc": 66.7883, "num_token_overlap": 11.58, "num_token_query": 31.636, "num_token_union": 65.2423, "num_word_context": 202.2984, "num_word_doc": 49.857, "num_word_query": 23.3719, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1849.7859, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3179, "query_norm": 1.318, "queue_k_norm": 1.4313, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.636, "sent_len_1": 66.7883, "sent_len_max_0": 127.4412, "sent_len_max_1": 188.4863, "stdk": 0.0477, "stdq": 0.0421, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 59700 }, { "accuracy": 47.8516, "active_queue_size": 16384.0, "cl_loss": 3.9853, "doc_norm": 1.4383, "encoder_q-embeddings": 2728.4412, "encoder_q-layer.0": 2041.8787, "encoder_q-layer.1": 2218.095, "encoder_q-layer.10": 1342.1283, "encoder_q-layer.11": 2971.4734, "encoder_q-layer.2": 2723.5437, "encoder_q-layer.3": 2951.1118, "encoder_q-layer.4": 3100.6587, "encoder_q-layer.5": 2765.031, "encoder_q-layer.6": 2770.8469, "encoder_q-layer.7": 2168.791, "encoder_q-layer.8": 1869.9905, "encoder_q-layer.9": 1310.1167, "epoch": 0.39, "inbatch_neg_score": 0.3144, "inbatch_pos_score": 0.8672, "learning_rate": 2.2333333333333335e-05, "loss": 3.9853, "norm_diff": 0.1238, "norm_loss": 0.0, "num_token_doc": 66.634, "num_token_overlap": 11.6879, "num_token_query": 31.9531, "num_token_union": 65.3378, "num_word_context": 202.4143, "num_word_doc": 49.7317, "num_word_query": 23.6027, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3696.174, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3149, "query_norm": 1.3145, "queue_k_norm": 1.4307, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9531, "sent_len_1": 66.634, "sent_len_max_0": 127.5487, "sent_len_max_1": 188.4837, "stdk": 0.048, "stdq": 0.0421, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 59800 }, { "accuracy": 45.4102, "active_queue_size": 16384.0, "cl_loss": 4.027, "doc_norm": 1.4358, "encoder_q-embeddings": 1141.4084, "encoder_q-layer.0": 770.1513, "encoder_q-layer.1": 813.107, "encoder_q-layer.10": 1210.3024, "encoder_q-layer.11": 2930.7791, "encoder_q-layer.2": 933.0175, "encoder_q-layer.3": 928.4073, "encoder_q-layer.4": 941.8205, "encoder_q-layer.5": 998.693, "encoder_q-layer.6": 1058.0958, "encoder_q-layer.7": 1117.3173, "encoder_q-layer.8": 1346.8298, "encoder_q-layer.9": 1175.311, "epoch": 0.39, "inbatch_neg_score": 0.3176, "inbatch_pos_score": 0.8584, "learning_rate": 2.2277777777777778e-05, "loss": 4.027, "norm_diff": 0.1127, "norm_loss": 0.0, "num_token_doc": 66.5352, "num_token_overlap": 11.6479, "num_token_query": 31.8527, "num_token_union": 65.1964, "num_word_context": 202.251, "num_word_doc": 49.651, "num_word_query": 23.5162, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1962.226, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3179, "query_norm": 1.3231, "queue_k_norm": 1.4308, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8527, "sent_len_1": 66.5352, "sent_len_max_0": 127.4213, "sent_len_max_1": 188.5475, "stdk": 0.0479, "stdq": 0.0423, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 59900 }, { "accuracy": 46.582, "active_queue_size": 16384.0, "cl_loss": 4.0025, "doc_norm": 1.439, "encoder_q-embeddings": 1218.9099, "encoder_q-layer.0": 795.4626, "encoder_q-layer.1": 871.1693, "encoder_q-layer.10": 1331.168, "encoder_q-layer.11": 3203.345, "encoder_q-layer.2": 1075.6519, "encoder_q-layer.3": 1105.0126, "encoder_q-layer.4": 1160.7632, "encoder_q-layer.5": 1124.007, "encoder_q-layer.6": 1135.2148, "encoder_q-layer.7": 1178.9944, "encoder_q-layer.8": 1398.298, "encoder_q-layer.9": 1245.8073, "epoch": 0.39, "inbatch_neg_score": 0.3189, "inbatch_pos_score": 0.8833, "learning_rate": 2.2222222222222223e-05, "loss": 4.0025, "norm_diff": 0.1048, "norm_loss": 0.0, "num_token_doc": 66.7342, "num_token_overlap": 11.6834, "num_token_query": 31.9436, "num_token_union": 65.3258, "num_word_context": 202.405, "num_word_doc": 49.7725, "num_word_query": 23.581, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2172.6105, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3191, "query_norm": 1.3342, "queue_k_norm": 1.4327, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9436, "sent_len_1": 66.7342, "sent_len_max_0": 127.4488, "sent_len_max_1": 190.0362, "stdk": 0.048, "stdq": 0.0428, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 60000 }, { "dev_runtime": 43.2397, "dev_samples_per_second": 1.48, "dev_steps_per_second": 0.023, "epoch": 0.39, "step": 60000, "test_accuracy": 93.26171875, "test_active_queue_size": 16384.0, "test_cl_loss": 0.3919990360736847, "test_doc_norm": 1.4016526937484741, "test_inbatch_neg_score": 0.653478741645813, "test_inbatch_pos_score": 1.5614413022994995, "test_loss": 0.3919990360736847, "test_loss_align": 1.0082471370697021, "test_loss_unif": 3.8016343116760254, "test_loss_unif_q@queue": 3.8016340732574463, "test_norm_diff": 0.05992225557565689, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.304887056350708, "test_query_norm": 1.461574912071228, "test_queue_k_norm": 1.4328831434249878, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04135196655988693, "test_stdq": 0.04239710420370102, "test_stdqueue_k": 0.04783887788653374, "test_stdqueue_q": 0.0 }, { "dev_runtime": 43.2397, "dev_samples_per_second": 1.48, "dev_steps_per_second": 0.023, "epoch": 0.39, "eval_beir-arguana_ndcg@10": 0.34916, "eval_beir-arguana_recall@10": 0.59317, "eval_beir-arguana_recall@100": 0.90114, "eval_beir-arguana_recall@20": 0.72475, "eval_beir-avg_ndcg@10": 0.370052, "eval_beir-avg_recall@10": 0.4398418333333334, "eval_beir-avg_recall@100": 0.62216225, "eval_beir-avg_recall@20": 0.5002880833333333, "eval_beir-cqadupstack_ndcg@10": 0.25311999999999996, "eval_beir-cqadupstack_recall@10": 0.34567833333333337, "eval_beir-cqadupstack_recall@100": 0.5804524999999999, "eval_beir-cqadupstack_recall@20": 0.41322083333333337, "eval_beir-fiqa_ndcg@10": 0.23818, "eval_beir-fiqa_recall@10": 0.29306, "eval_beir-fiqa_recall@100": 0.55076, "eval_beir-fiqa_recall@20": 0.38148, "eval_beir-nfcorpus_ndcg@10": 0.29705, "eval_beir-nfcorpus_recall@10": 0.15121, "eval_beir-nfcorpus_recall@100": 0.27494, "eval_beir-nfcorpus_recall@20": 0.18324, "eval_beir-nq_ndcg@10": 0.27249, "eval_beir-nq_recall@10": 0.44976, "eval_beir-nq_recall@100": 0.7921, "eval_beir-nq_recall@20": 0.57508, "eval_beir-quora_ndcg@10": 0.77799, "eval_beir-quora_recall@10": 0.88497, "eval_beir-quora_recall@100": 0.97617, "eval_beir-quora_recall@20": 0.92731, "eval_beir-scidocs_ndcg@10": 0.14695, "eval_beir-scidocs_recall@10": 0.15473, "eval_beir-scidocs_recall@100": 0.35583, "eval_beir-scidocs_recall@20": 0.20813, "eval_beir-scifact_ndcg@10": 0.61854, "eval_beir-scifact_recall@10": 0.78467, "eval_beir-scifact_recall@100": 0.92989, "eval_beir-scifact_recall@20": 0.80967, "eval_beir-trec-covid_ndcg@10": 0.56226, "eval_beir-trec-covid_recall@10": 0.608, "eval_beir-trec-covid_recall@100": 0.4388, "eval_beir-trec-covid_recall@20": 0.583, "eval_beir-webis-touche2020_ndcg@10": 0.18478, "eval_beir-webis-touche2020_recall@10": 0.13317, "eval_beir-webis-touche2020_recall@100": 0.42154, "eval_beir-webis-touche2020_recall@20": 0.197, "eval_senteval-avg_sts": 0.7532813627087043, "eval_senteval-sickr_spearman": 0.7152072120264195, "eval_senteval-stsb_spearman": 0.7913555133909892, "step": 60000, "test_accuracy": 93.26171875, "test_active_queue_size": 16384.0, "test_cl_loss": 0.3919990360736847, "test_doc_norm": 1.4016526937484741, "test_inbatch_neg_score": 0.653478741645813, "test_inbatch_pos_score": 1.5614413022994995, "test_loss": 0.3919990360736847, "test_loss_align": 1.0082471370697021, "test_loss_unif": 3.8016343116760254, "test_loss_unif_q@queue": 3.8016340732574463, "test_norm_diff": 0.05992225557565689, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.304887056350708, "test_query_norm": 1.461574912071228, "test_queue_k_norm": 1.4328831434249878, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04135196655988693, "test_stdq": 0.04239710420370102, "test_stdqueue_k": 0.04783887788653374, "test_stdqueue_q": 0.0 }, { "accuracy": 46.2891, "active_queue_size": 16384.0, "cl_loss": 4.0012, "doc_norm": 1.4315, "encoder_q-embeddings": 1583.2384, "encoder_q-layer.0": 1127.8441, "encoder_q-layer.1": 1286.4442, "encoder_q-layer.10": 1300.9961, "encoder_q-layer.11": 3072.147, "encoder_q-layer.2": 1481.1909, "encoder_q-layer.3": 1592.0912, "encoder_q-layer.4": 1504.8043, "encoder_q-layer.5": 1660.2645, "encoder_q-layer.6": 1667.3242, "encoder_q-layer.7": 1579.912, "encoder_q-layer.8": 1469.5804, "encoder_q-layer.9": 1211.9835, "epoch": 0.39, "inbatch_neg_score": 0.3183, "inbatch_pos_score": 0.8745, "learning_rate": 2.216666666666667e-05, "loss": 4.0012, "norm_diff": 0.1086, "norm_loss": 0.0, "num_token_doc": 66.503, "num_token_overlap": 11.6718, "num_token_query": 31.9248, "num_token_union": 65.1964, "num_word_context": 202.1175, "num_word_doc": 49.5966, "num_word_query": 23.5809, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2485.545, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3174, "query_norm": 1.3229, "queue_k_norm": 1.4327, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9248, "sent_len_1": 66.503, "sent_len_max_0": 127.5037, "sent_len_max_1": 189.6962, "stdk": 0.0477, "stdq": 0.0423, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 60100 }, { "accuracy": 46.582, "active_queue_size": 16384.0, "cl_loss": 4.0106, "doc_norm": 1.43, "encoder_q-embeddings": 2015.0757, "encoder_q-layer.0": 1420.3651, "encoder_q-layer.1": 1455.3044, "encoder_q-layer.10": 1285.6089, "encoder_q-layer.11": 2935.7908, "encoder_q-layer.2": 1836.9243, "encoder_q-layer.3": 1697.8246, "encoder_q-layer.4": 1743.8911, "encoder_q-layer.5": 1600.6556, "encoder_q-layer.6": 1337.1152, "encoder_q-layer.7": 1278.119, "encoder_q-layer.8": 1328.1704, "encoder_q-layer.9": 1132.0718, "epoch": 0.39, "inbatch_neg_score": 0.3199, "inbatch_pos_score": 0.8774, "learning_rate": 2.211111111111111e-05, "loss": 4.0106, "norm_diff": 0.0963, "norm_loss": 0.0, "num_token_doc": 66.8272, "num_token_overlap": 11.6587, "num_token_query": 31.8826, "num_token_union": 65.4098, "num_word_context": 202.2591, "num_word_doc": 49.8938, "num_word_query": 23.5476, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2574.8821, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3196, "query_norm": 1.3337, "queue_k_norm": 1.433, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8826, "sent_len_1": 66.8272, "sent_len_max_0": 127.6075, "sent_len_max_1": 188.4487, "stdk": 0.0476, "stdq": 0.0426, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 60200 }, { "accuracy": 45.0195, "active_queue_size": 16384.0, "cl_loss": 4.005, "doc_norm": 1.4328, "encoder_q-embeddings": 1100.2922, "encoder_q-layer.0": 722.6335, "encoder_q-layer.1": 751.8882, "encoder_q-layer.10": 1311.4106, "encoder_q-layer.11": 3137.5447, "encoder_q-layer.2": 839.614, "encoder_q-layer.3": 880.011, "encoder_q-layer.4": 915.5579, "encoder_q-layer.5": 952.7416, "encoder_q-layer.6": 1069.6969, "encoder_q-layer.7": 1139.3848, "encoder_q-layer.8": 1317.3533, "encoder_q-layer.9": 1178.6031, "epoch": 0.39, "inbatch_neg_score": 0.3175, "inbatch_pos_score": 0.8506, "learning_rate": 2.2055555555555557e-05, "loss": 4.005, "norm_diff": 0.1177, "norm_loss": 0.0, "num_token_doc": 66.9644, "num_token_overlap": 11.6509, "num_token_query": 31.7736, "num_token_union": 65.4057, "num_word_context": 202.5902, "num_word_doc": 49.989, "num_word_query": 23.4941, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2000.5542, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3179, "query_norm": 1.3151, "queue_k_norm": 1.4332, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.7736, "sent_len_1": 66.9644, "sent_len_max_0": 127.5863, "sent_len_max_1": 189.5387, "stdk": 0.0477, "stdq": 0.0418, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 60300 }, { "accuracy": 45.2148, "active_queue_size": 16384.0, "cl_loss": 3.9899, "doc_norm": 1.4365, "encoder_q-embeddings": 1358.3717, "encoder_q-layer.0": 899.3776, "encoder_q-layer.1": 1011.6884, "encoder_q-layer.10": 1205.6996, "encoder_q-layer.11": 3025.0591, "encoder_q-layer.2": 1152.501, "encoder_q-layer.3": 1136.38, "encoder_q-layer.4": 1263.2441, "encoder_q-layer.5": 1246.4517, "encoder_q-layer.6": 1290.9862, "encoder_q-layer.7": 1264.8613, "encoder_q-layer.8": 1407.1757, "encoder_q-layer.9": 1263.4486, "epoch": 0.39, "inbatch_neg_score": 0.32, "inbatch_pos_score": 0.8579, "learning_rate": 2.2000000000000003e-05, "loss": 3.9899, "norm_diff": 0.1161, "norm_loss": 0.0, "num_token_doc": 66.6313, "num_token_overlap": 11.7083, "num_token_query": 31.976, "num_token_union": 65.2571, "num_word_context": 202.0708, "num_word_doc": 49.7191, "num_word_query": 23.6186, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2180.5668, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3196, "query_norm": 1.3204, "queue_k_norm": 1.4339, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.976, "sent_len_1": 66.6313, "sent_len_max_0": 127.6125, "sent_len_max_1": 188.59, "stdk": 0.0478, "stdq": 0.0419, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 60400 }, { "accuracy": 43.2617, "active_queue_size": 16384.0, "cl_loss": 4.0275, "doc_norm": 1.4388, "encoder_q-embeddings": 1437.1544, "encoder_q-layer.0": 987.678, "encoder_q-layer.1": 1150.8156, "encoder_q-layer.10": 1511.9537, "encoder_q-layer.11": 3225.7512, "encoder_q-layer.2": 1298.9851, "encoder_q-layer.3": 1410.7561, "encoder_q-layer.4": 1568.1556, "encoder_q-layer.5": 1399.6921, "encoder_q-layer.6": 1432.8175, "encoder_q-layer.7": 1299.6495, "encoder_q-layer.8": 1453.5731, "encoder_q-layer.9": 1301.7617, "epoch": 0.39, "inbatch_neg_score": 0.3226, "inbatch_pos_score": 0.8574, "learning_rate": 2.1944444444444445e-05, "loss": 4.0275, "norm_diff": 0.0941, "norm_loss": 0.0, "num_token_doc": 66.8592, "num_token_overlap": 11.6229, "num_token_query": 31.8681, "num_token_union": 65.3944, "num_word_context": 202.3199, "num_word_doc": 49.8815, "num_word_query": 23.5178, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2338.234, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3228, "query_norm": 1.3447, "queue_k_norm": 1.4331, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8681, "sent_len_1": 66.8592, "sent_len_max_0": 127.4625, "sent_len_max_1": 190.5725, "stdk": 0.0479, "stdq": 0.0428, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 60500 }, { "accuracy": 41.6992, "active_queue_size": 16384.0, "cl_loss": 4.0042, "doc_norm": 1.4314, "encoder_q-embeddings": 1038.9238, "encoder_q-layer.0": 695.0352, "encoder_q-layer.1": 728.8002, "encoder_q-layer.10": 1522.7992, "encoder_q-layer.11": 3145.9053, "encoder_q-layer.2": 799.4999, "encoder_q-layer.3": 790.6248, "encoder_q-layer.4": 849.3837, "encoder_q-layer.5": 904.5429, "encoder_q-layer.6": 1023.6115, "encoder_q-layer.7": 1147.356, "encoder_q-layer.8": 1300.454, "encoder_q-layer.9": 1241.6017, "epoch": 0.39, "inbatch_neg_score": 0.3249, "inbatch_pos_score": 0.855, "learning_rate": 2.188888888888889e-05, "loss": 4.0042, "norm_diff": 0.0896, "norm_loss": 0.0, "num_token_doc": 66.7198, "num_token_overlap": 11.6733, "num_token_query": 31.895, "num_token_union": 65.3502, "num_word_context": 202.2463, "num_word_doc": 49.807, "num_word_query": 23.5661, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1999.7817, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3242, "query_norm": 1.3418, "queue_k_norm": 1.4347, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.895, "sent_len_1": 66.7198, "sent_len_max_0": 127.2913, "sent_len_max_1": 188.2, "stdk": 0.0476, "stdq": 0.0424, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 60600 }, { "accuracy": 45.7031, "active_queue_size": 16384.0, "cl_loss": 3.9854, "doc_norm": 1.439, "encoder_q-embeddings": 1165.2366, "encoder_q-layer.0": 755.6461, "encoder_q-layer.1": 783.6238, "encoder_q-layer.10": 1264.4585, "encoder_q-layer.11": 3066.2732, "encoder_q-layer.2": 868.9715, "encoder_q-layer.3": 898.1066, "encoder_q-layer.4": 942.6758, "encoder_q-layer.5": 937.2477, "encoder_q-layer.6": 1010.1816, "encoder_q-layer.7": 1112.6731, "encoder_q-layer.8": 1289.818, "encoder_q-layer.9": 1148.8953, "epoch": 0.4, "inbatch_neg_score": 0.3309, "inbatch_pos_score": 0.873, "learning_rate": 2.1833333333333333e-05, "loss": 3.9854, "norm_diff": 0.1105, "norm_loss": 0.0, "num_token_doc": 66.8606, "num_token_overlap": 11.7018, "num_token_query": 31.9184, "num_token_union": 65.3969, "num_word_context": 202.5921, "num_word_doc": 49.8965, "num_word_query": 23.5834, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2007.7759, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3306, "query_norm": 1.3285, "queue_k_norm": 1.4335, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9184, "sent_len_1": 66.8606, "sent_len_max_0": 127.545, "sent_len_max_1": 189.675, "stdk": 0.0479, "stdq": 0.0418, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 60700 }, { "accuracy": 45.7031, "active_queue_size": 16384.0, "cl_loss": 4.0013, "doc_norm": 1.4369, "encoder_q-embeddings": 1160.8176, "encoder_q-layer.0": 752.1323, "encoder_q-layer.1": 808.3065, "encoder_q-layer.10": 1285.1011, "encoder_q-layer.11": 3080.9331, "encoder_q-layer.2": 942.79, "encoder_q-layer.3": 1035.2235, "encoder_q-layer.4": 1081.6837, "encoder_q-layer.5": 1168.8589, "encoder_q-layer.6": 1231.082, "encoder_q-layer.7": 1283.9651, "encoder_q-layer.8": 1430.5455, "encoder_q-layer.9": 1220.1613, "epoch": 0.4, "inbatch_neg_score": 0.3341, "inbatch_pos_score": 0.8765, "learning_rate": 2.177777777777778e-05, "loss": 4.0013, "norm_diff": 0.0954, "norm_loss": 0.0, "num_token_doc": 66.6928, "num_token_overlap": 11.6348, "num_token_query": 31.9072, "num_token_union": 65.3417, "num_word_context": 202.3688, "num_word_doc": 49.7776, "num_word_query": 23.5653, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2113.9721, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3345, "query_norm": 1.3415, "queue_k_norm": 1.435, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9072, "sent_len_1": 66.6928, "sent_len_max_0": 127.5413, "sent_len_max_1": 188.345, "stdk": 0.0478, "stdq": 0.0422, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 60800 }, { "accuracy": 46.582, "active_queue_size": 16384.0, "cl_loss": 3.9787, "doc_norm": 1.4374, "encoder_q-embeddings": 1276.2854, "encoder_q-layer.0": 891.2327, "encoder_q-layer.1": 917.3838, "encoder_q-layer.10": 1120.6493, "encoder_q-layer.11": 2937.521, "encoder_q-layer.2": 1012.9906, "encoder_q-layer.3": 1019.8524, "encoder_q-layer.4": 1037.3125, "encoder_q-layer.5": 1011.6738, "encoder_q-layer.6": 1033.0261, "encoder_q-layer.7": 1073.0548, "encoder_q-layer.8": 1231.7554, "encoder_q-layer.9": 1107.1652, "epoch": 0.4, "inbatch_neg_score": 0.3394, "inbatch_pos_score": 0.8848, "learning_rate": 2.1722222222222225e-05, "loss": 3.9787, "norm_diff": 0.0934, "norm_loss": 0.0, "num_token_doc": 66.9327, "num_token_overlap": 11.7167, "num_token_query": 31.9372, "num_token_union": 65.4356, "num_word_context": 202.5681, "num_word_doc": 49.9078, "num_word_query": 23.5782, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2002.6946, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3398, "query_norm": 1.344, "queue_k_norm": 1.4351, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9372, "sent_len_1": 66.9327, "sent_len_max_0": 127.4575, "sent_len_max_1": 191.78, "stdk": 0.0478, "stdq": 0.0422, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 60900 }, { "accuracy": 47.5586, "active_queue_size": 16384.0, "cl_loss": 3.9869, "doc_norm": 1.4321, "encoder_q-embeddings": 1038.2446, "encoder_q-layer.0": 677.986, "encoder_q-layer.1": 726.6124, "encoder_q-layer.10": 1259.328, "encoder_q-layer.11": 3070.1816, "encoder_q-layer.2": 801.3395, "encoder_q-layer.3": 823.0612, "encoder_q-layer.4": 866.7341, "encoder_q-layer.5": 863.9581, "encoder_q-layer.6": 982.2648, "encoder_q-layer.7": 1091.89, "encoder_q-layer.8": 1306.9004, "encoder_q-layer.9": 1251.1821, "epoch": 0.4, "inbatch_neg_score": 0.3456, "inbatch_pos_score": 0.8838, "learning_rate": 2.1666666666666667e-05, "loss": 3.9869, "norm_diff": 0.109, "norm_loss": 0.0, "num_token_doc": 67.0253, "num_token_overlap": 11.7353, "num_token_query": 32.124, "num_token_union": 65.6363, "num_word_context": 202.3442, "num_word_doc": 50.0312, "num_word_query": 23.7405, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1961.4824, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3457, "query_norm": 1.3231, "queue_k_norm": 1.437, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 32.124, "sent_len_1": 67.0253, "sent_len_max_0": 127.6663, "sent_len_max_1": 188.6962, "stdk": 0.0476, "stdq": 0.0413, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 61000 }, { "accuracy": 43.0664, "active_queue_size": 16384.0, "cl_loss": 4.0111, "doc_norm": 1.435, "encoder_q-embeddings": 2198.6812, "encoder_q-layer.0": 1440.2061, "encoder_q-layer.1": 1467.7738, "encoder_q-layer.10": 2505.4766, "encoder_q-layer.11": 6472.9717, "encoder_q-layer.2": 1630.2253, "encoder_q-layer.3": 1716.0922, "encoder_q-layer.4": 1732.0626, "encoder_q-layer.5": 1727.8112, "encoder_q-layer.6": 1907.1975, "encoder_q-layer.7": 2287.1755, "encoder_q-layer.8": 2623.6345, "encoder_q-layer.9": 2451.9104, "epoch": 0.4, "inbatch_neg_score": 0.3481, "inbatch_pos_score": 0.8779, "learning_rate": 2.1611111111111113e-05, "loss": 4.0111, "norm_diff": 0.1032, "norm_loss": 0.0, "num_token_doc": 66.8919, "num_token_overlap": 11.6354, "num_token_query": 31.8243, "num_token_union": 65.4243, "num_word_context": 202.6062, "num_word_doc": 49.937, "num_word_query": 23.5026, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4079.9782, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3484, "query_norm": 1.3318, "queue_k_norm": 1.4364, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8243, "sent_len_1": 66.8919, "sent_len_max_0": 127.5263, "sent_len_max_1": 188.3313, "stdk": 0.0476, "stdq": 0.0418, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 61100 }, { "accuracy": 45.7031, "active_queue_size": 16384.0, "cl_loss": 4.0075, "doc_norm": 1.4347, "encoder_q-embeddings": 2337.8528, "encoder_q-layer.0": 1594.1389, "encoder_q-layer.1": 1654.2118, "encoder_q-layer.10": 2421.7576, "encoder_q-layer.11": 6158.1475, "encoder_q-layer.2": 1940.5378, "encoder_q-layer.3": 2052.3633, "encoder_q-layer.4": 2093.2454, "encoder_q-layer.5": 2204.1792, "encoder_q-layer.6": 2402.3931, "encoder_q-layer.7": 2562.2312, "encoder_q-layer.8": 2565.4639, "encoder_q-layer.9": 2239.4998, "epoch": 0.4, "inbatch_neg_score": 0.3532, "inbatch_pos_score": 0.9092, "learning_rate": 2.1555555555555555e-05, "loss": 4.0075, "norm_diff": 0.0922, "norm_loss": 0.0, "num_token_doc": 66.7357, "num_token_overlap": 11.6285, "num_token_query": 31.7409, "num_token_union": 65.2612, "num_word_context": 202.1516, "num_word_doc": 49.7801, "num_word_query": 23.4144, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4150.8545, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3525, "query_norm": 1.3425, "queue_k_norm": 1.4387, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.7409, "sent_len_1": 66.7357, "sent_len_max_0": 127.5975, "sent_len_max_1": 190.41, "stdk": 0.0476, "stdq": 0.0423, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 61200 }, { "accuracy": 45.2148, "active_queue_size": 16384.0, "cl_loss": 3.9897, "doc_norm": 1.4408, "encoder_q-embeddings": 1937.8773, "encoder_q-layer.0": 1330.1797, "encoder_q-layer.1": 1396.6836, "encoder_q-layer.10": 2502.7236, "encoder_q-layer.11": 6519.312, "encoder_q-layer.2": 1646.937, "encoder_q-layer.3": 1626.0284, "encoder_q-layer.4": 1782.6086, "encoder_q-layer.5": 1844.4607, "encoder_q-layer.6": 2039.0272, "encoder_q-layer.7": 2133.1248, "encoder_q-layer.8": 2784.582, "encoder_q-layer.9": 2422.6785, "epoch": 0.4, "inbatch_neg_score": 0.3531, "inbatch_pos_score": 0.8911, "learning_rate": 2.15e-05, "loss": 3.9897, "norm_diff": 0.1054, "norm_loss": 0.0, "num_token_doc": 66.7248, "num_token_overlap": 11.7, "num_token_query": 31.8875, "num_token_union": 65.3133, "num_word_context": 201.794, "num_word_doc": 49.77, "num_word_query": 23.5448, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4001.6599, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3535, "query_norm": 1.3354, "queue_k_norm": 1.4389, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8875, "sent_len_1": 66.7248, "sent_len_max_0": 127.4825, "sent_len_max_1": 188.95, "stdk": 0.0478, "stdq": 0.0422, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 61300 }, { "accuracy": 45.6055, "active_queue_size": 16384.0, "cl_loss": 3.9951, "doc_norm": 1.4409, "encoder_q-embeddings": 2458.0381, "encoder_q-layer.0": 1601.3107, "encoder_q-layer.1": 1664.5455, "encoder_q-layer.10": 2554.4482, "encoder_q-layer.11": 6322.6538, "encoder_q-layer.2": 1896.4816, "encoder_q-layer.3": 1940.5867, "encoder_q-layer.4": 2001.0885, "encoder_q-layer.5": 2014.9343, "encoder_q-layer.6": 2252.2424, "encoder_q-layer.7": 2291.5342, "encoder_q-layer.8": 2518.9158, "encoder_q-layer.9": 2466.4082, "epoch": 0.4, "inbatch_neg_score": 0.3562, "inbatch_pos_score": 0.8965, "learning_rate": 2.1444444444444443e-05, "loss": 3.9951, "norm_diff": 0.1026, "norm_loss": 0.0, "num_token_doc": 66.814, "num_token_overlap": 11.6732, "num_token_query": 31.9092, "num_token_union": 65.4097, "num_word_context": 202.2726, "num_word_doc": 49.8827, "num_word_query": 23.5615, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4179.8174, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3562, "query_norm": 1.3383, "queue_k_norm": 1.4396, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9092, "sent_len_1": 66.814, "sent_len_max_0": 127.4025, "sent_len_max_1": 188.3725, "stdk": 0.0477, "stdq": 0.0424, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 61400 }, { "accuracy": 44.2383, "active_queue_size": 16384.0, "cl_loss": 4.0179, "doc_norm": 1.4392, "encoder_q-embeddings": 2530.6541, "encoder_q-layer.0": 1709.3312, "encoder_q-layer.1": 1739.5195, "encoder_q-layer.10": 2766.3152, "encoder_q-layer.11": 6477.7808, "encoder_q-layer.2": 1967.2491, "encoder_q-layer.3": 2054.7146, "encoder_q-layer.4": 2179.9819, "encoder_q-layer.5": 2291.8337, "encoder_q-layer.6": 2473.7737, "encoder_q-layer.7": 2567.0532, "encoder_q-layer.8": 2855.1367, "encoder_q-layer.9": 2448.1807, "epoch": 0.4, "inbatch_neg_score": 0.3535, "inbatch_pos_score": 0.8936, "learning_rate": 2.138888888888889e-05, "loss": 4.0179, "norm_diff": 0.1072, "norm_loss": 0.0, "num_token_doc": 66.7418, "num_token_overlap": 11.6697, "num_token_query": 31.8945, "num_token_union": 65.3289, "num_word_context": 202.2709, "num_word_doc": 49.774, "num_word_query": 23.5442, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4352.1082, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.354, "query_norm": 1.332, "queue_k_norm": 1.4409, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8945, "sent_len_1": 66.7418, "sent_len_max_0": 127.6188, "sent_len_max_1": 190.3487, "stdk": 0.0477, "stdq": 0.0422, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 61500 }, { "accuracy": 46.0938, "active_queue_size": 16384.0, "cl_loss": 4.0191, "doc_norm": 1.4439, "encoder_q-embeddings": 1977.7546, "encoder_q-layer.0": 1295.3829, "encoder_q-layer.1": 1296.1449, "encoder_q-layer.10": 2718.2268, "encoder_q-layer.11": 6483.6099, "encoder_q-layer.2": 1464.4097, "encoder_q-layer.3": 1482.4979, "encoder_q-layer.4": 1552.8434, "encoder_q-layer.5": 1583.7385, "encoder_q-layer.6": 1839.2185, "encoder_q-layer.7": 2120.0535, "encoder_q-layer.8": 2380.5476, "encoder_q-layer.9": 2294.5852, "epoch": 0.4, "inbatch_neg_score": 0.3507, "inbatch_pos_score": 0.8994, "learning_rate": 2.1333333333333335e-05, "loss": 4.0191, "norm_diff": 0.1251, "norm_loss": 0.0, "num_token_doc": 66.6259, "num_token_overlap": 11.6837, "num_token_query": 31.9156, "num_token_union": 65.249, "num_word_context": 202.6231, "num_word_doc": 49.755, "num_word_query": 23.5739, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3916.1747, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3516, "query_norm": 1.3188, "queue_k_norm": 1.4443, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9156, "sent_len_1": 66.6259, "sent_len_max_0": 127.4775, "sent_len_max_1": 189.2825, "stdk": 0.0478, "stdq": 0.0418, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 61600 }, { "accuracy": 42.6758, "active_queue_size": 16384.0, "cl_loss": 4.0136, "doc_norm": 1.4403, "encoder_q-embeddings": 2192.1504, "encoder_q-layer.0": 1426.6035, "encoder_q-layer.1": 1441.009, "encoder_q-layer.10": 2517.0024, "encoder_q-layer.11": 6336.9375, "encoder_q-layer.2": 1603.7535, "encoder_q-layer.3": 1632.9266, "encoder_q-layer.4": 1714.4592, "encoder_q-layer.5": 1789.8271, "encoder_q-layer.6": 1969.2108, "encoder_q-layer.7": 2221.4368, "encoder_q-layer.8": 2616.2263, "encoder_q-layer.9": 2430.4478, "epoch": 0.4, "inbatch_neg_score": 0.3534, "inbatch_pos_score": 0.8755, "learning_rate": 2.127777777777778e-05, "loss": 4.0136, "norm_diff": 0.1141, "norm_loss": 0.0, "num_token_doc": 66.8512, "num_token_overlap": 11.6989, "num_token_query": 31.9123, "num_token_union": 65.3928, "num_word_context": 202.4322, "num_word_doc": 49.8896, "num_word_query": 23.5765, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3955.9133, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3533, "query_norm": 1.3261, "queue_k_norm": 1.4422, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9123, "sent_len_1": 66.8512, "sent_len_max_0": 127.34, "sent_len_max_1": 190.765, "stdk": 0.0476, "stdq": 0.042, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 61700 }, { "accuracy": 44.3359, "active_queue_size": 16384.0, "cl_loss": 3.9803, "doc_norm": 1.4467, "encoder_q-embeddings": 3088.0435, "encoder_q-layer.0": 2150.1743, "encoder_q-layer.1": 2371.3787, "encoder_q-layer.10": 2617.0525, "encoder_q-layer.11": 6428.1465, "encoder_q-layer.2": 2685.9241, "encoder_q-layer.3": 2945.3, "encoder_q-layer.4": 2866.8862, "encoder_q-layer.5": 3030.3555, "encoder_q-layer.6": 3098.4348, "encoder_q-layer.7": 3311.5308, "encoder_q-layer.8": 3184.708, "encoder_q-layer.9": 2692.2014, "epoch": 0.4, "inbatch_neg_score": 0.3528, "inbatch_pos_score": 0.8931, "learning_rate": 2.1222222222222223e-05, "loss": 3.9803, "norm_diff": 0.1122, "norm_loss": 0.0, "num_token_doc": 66.6946, "num_token_overlap": 11.687, "num_token_query": 31.9113, "num_token_union": 65.2937, "num_word_context": 202.2817, "num_word_doc": 49.7857, "num_word_query": 23.5859, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4924.2278, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.353, "query_norm": 1.3344, "queue_k_norm": 1.443, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9113, "sent_len_1": 66.6946, "sent_len_max_0": 127.575, "sent_len_max_1": 189.94, "stdk": 0.0479, "stdq": 0.0423, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 61800 }, { "accuracy": 43.6523, "active_queue_size": 16384.0, "cl_loss": 3.9727, "doc_norm": 1.4408, "encoder_q-embeddings": 3056.0938, "encoder_q-layer.0": 2073.5859, "encoder_q-layer.1": 2205.0193, "encoder_q-layer.10": 2375.5764, "encoder_q-layer.11": 6237.1143, "encoder_q-layer.2": 2603.6213, "encoder_q-layer.3": 2660.051, "encoder_q-layer.4": 2666.9973, "encoder_q-layer.5": 2643.1147, "encoder_q-layer.6": 2788.9089, "encoder_q-layer.7": 2812.4617, "encoder_q-layer.8": 2774.2405, "encoder_q-layer.9": 2393.4441, "epoch": 0.4, "inbatch_neg_score": 0.357, "inbatch_pos_score": 0.9019, "learning_rate": 2.116666666666667e-05, "loss": 3.9727, "norm_diff": 0.0931, "norm_loss": 0.0, "num_token_doc": 66.6353, "num_token_overlap": 11.6881, "num_token_query": 31.9702, "num_token_union": 65.2791, "num_word_context": 202.2179, "num_word_doc": 49.6941, "num_word_query": 23.6274, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4678.8267, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3567, "query_norm": 1.3476, "queue_k_norm": 1.4423, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9702, "sent_len_1": 66.6353, "sent_len_max_0": 127.62, "sent_len_max_1": 189.505, "stdk": 0.0476, "stdq": 0.0426, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 61900 }, { "accuracy": 44.4336, "active_queue_size": 16384.0, "cl_loss": 3.9869, "doc_norm": 1.4484, "encoder_q-embeddings": 7815.1543, "encoder_q-layer.0": 5404.6074, "encoder_q-layer.1": 6010.4102, "encoder_q-layer.10": 2535.3843, "encoder_q-layer.11": 6191.6523, "encoder_q-layer.2": 7325.8784, "encoder_q-layer.3": 7680.6157, "encoder_q-layer.4": 8919.4873, "encoder_q-layer.5": 8795.8486, "encoder_q-layer.6": 9435.2422, "encoder_q-layer.7": 8220.874, "encoder_q-layer.8": 6248.3027, "encoder_q-layer.9": 2592.155, "epoch": 0.4, "inbatch_neg_score": 0.3563, "inbatch_pos_score": 0.8828, "learning_rate": 2.111111111111111e-05, "loss": 3.9869, "norm_diff": 0.114, "norm_loss": 0.0, "num_token_doc": 66.7748, "num_token_overlap": 11.7041, "num_token_query": 32.0002, "num_token_union": 65.3699, "num_word_context": 202.2457, "num_word_doc": 49.8289, "num_word_query": 23.6469, "postclip_grad_norm": 1.0, "preclip_grad_norm": 10541.798, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3569, "query_norm": 1.3343, "queue_k_norm": 1.4445, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 32.0002, "sent_len_1": 66.7748, "sent_len_max_0": 127.5625, "sent_len_max_1": 190.8713, "stdk": 0.0479, "stdq": 0.042, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 62000 }, { "accuracy": 44.2383, "active_queue_size": 16384.0, "cl_loss": 3.9821, "doc_norm": 1.4407, "encoder_q-embeddings": 3060.4556, "encoder_q-layer.0": 2030.7041, "encoder_q-layer.1": 2255.5898, "encoder_q-layer.10": 2361.0439, "encoder_q-layer.11": 6345.4282, "encoder_q-layer.2": 2603.2141, "encoder_q-layer.3": 2723.5823, "encoder_q-layer.4": 2931.3042, "encoder_q-layer.5": 3020.0222, "encoder_q-layer.6": 3036.2104, "encoder_q-layer.7": 3179.4795, "encoder_q-layer.8": 2945.2585, "encoder_q-layer.9": 2394.1064, "epoch": 0.4, "inbatch_neg_score": 0.3551, "inbatch_pos_score": 0.9014, "learning_rate": 2.1055555555555556e-05, "loss": 3.9821, "norm_diff": 0.1024, "norm_loss": 0.0, "num_token_doc": 66.8188, "num_token_overlap": 11.7055, "num_token_query": 32.0021, "num_token_union": 65.3822, "num_word_context": 202.1815, "num_word_doc": 49.8304, "num_word_query": 23.6478, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4702.887, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3552, "query_norm": 1.3383, "queue_k_norm": 1.4455, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 32.0021, "sent_len_1": 66.8188, "sent_len_max_0": 127.2788, "sent_len_max_1": 190.905, "stdk": 0.0476, "stdq": 0.0423, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 62100 }, { "accuracy": 42.3828, "active_queue_size": 16384.0, "cl_loss": 4.0035, "doc_norm": 1.4497, "encoder_q-embeddings": 3642.7205, "encoder_q-layer.0": 2527.1052, "encoder_q-layer.1": 2748.0857, "encoder_q-layer.10": 2662.1116, "encoder_q-layer.11": 6245.2847, "encoder_q-layer.2": 3325.1438, "encoder_q-layer.3": 3403.9907, "encoder_q-layer.4": 3471.1379, "encoder_q-layer.5": 3573.4136, "encoder_q-layer.6": 3606.9346, "encoder_q-layer.7": 3141.0154, "encoder_q-layer.8": 3112.325, "encoder_q-layer.9": 2573.7732, "epoch": 0.4, "inbatch_neg_score": 0.357, "inbatch_pos_score": 0.874, "learning_rate": 2.1e-05, "loss": 4.0035, "norm_diff": 0.1326, "norm_loss": 0.0, "num_token_doc": 67.0502, "num_token_overlap": 11.6809, "num_token_query": 31.922, "num_token_union": 65.5459, "num_word_context": 202.527, "num_word_doc": 49.9839, "num_word_query": 23.5396, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5332.0686, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3574, "query_norm": 1.3172, "queue_k_norm": 1.4448, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.922, "sent_len_1": 67.0502, "sent_len_max_0": 127.3825, "sent_len_max_1": 190.5175, "stdk": 0.0479, "stdq": 0.0413, "stdqueue_k": 0.0478, "stdqueue_q": 0.0, "step": 62200 }, { "accuracy": 42.3828, "active_queue_size": 16384.0, "cl_loss": 3.9956, "doc_norm": 1.4495, "encoder_q-embeddings": 3799.981, "encoder_q-layer.0": 2633.8782, "encoder_q-layer.1": 3135.1836, "encoder_q-layer.10": 2753.9702, "encoder_q-layer.11": 6492.603, "encoder_q-layer.2": 3725.334, "encoder_q-layer.3": 3910.9133, "encoder_q-layer.4": 4272.29, "encoder_q-layer.5": 4055.7588, "encoder_q-layer.6": 3147.6187, "encoder_q-layer.7": 3038.2959, "encoder_q-layer.8": 2960.9624, "encoder_q-layer.9": 2559.4307, "epoch": 0.41, "inbatch_neg_score": 0.3602, "inbatch_pos_score": 0.8857, "learning_rate": 2.0944444444444445e-05, "loss": 3.9956, "norm_diff": 0.1175, "norm_loss": 0.0, "num_token_doc": 66.7703, "num_token_overlap": 11.6927, "num_token_query": 31.9143, "num_token_union": 65.3428, "num_word_context": 202.4188, "num_word_doc": 49.8301, "num_word_query": 23.5777, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5576.8133, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3594, "query_norm": 1.332, "queue_k_norm": 1.4469, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9143, "sent_len_1": 66.7703, "sent_len_max_0": 127.4613, "sent_len_max_1": 188.4812, "stdk": 0.0479, "stdq": 0.0418, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 62300 }, { "accuracy": 43.75, "active_queue_size": 16384.0, "cl_loss": 3.9796, "doc_norm": 1.4488, "encoder_q-embeddings": 2426.2922, "encoder_q-layer.0": 1676.8956, "encoder_q-layer.1": 1790.6205, "encoder_q-layer.10": 2461.1062, "encoder_q-layer.11": 6514.5752, "encoder_q-layer.2": 2050.5825, "encoder_q-layer.3": 2156.3787, "encoder_q-layer.4": 2359.6738, "encoder_q-layer.5": 2373.3511, "encoder_q-layer.6": 2536.7507, "encoder_q-layer.7": 2616.8489, "encoder_q-layer.8": 2782.6221, "encoder_q-layer.9": 2484.1582, "epoch": 0.41, "inbatch_neg_score": 0.3642, "inbatch_pos_score": 0.8965, "learning_rate": 2.088888888888889e-05, "loss": 3.9796, "norm_diff": 0.1075, "norm_loss": 0.0, "num_token_doc": 66.7366, "num_token_overlap": 11.672, "num_token_query": 31.918, "num_token_union": 65.3984, "num_word_context": 202.4462, "num_word_doc": 49.8617, "num_word_query": 23.5816, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4315.3593, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3645, "query_norm": 1.3413, "queue_k_norm": 1.4481, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.918, "sent_len_1": 66.7366, "sent_len_max_0": 127.5575, "sent_len_max_1": 188.3638, "stdk": 0.0479, "stdq": 0.042, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 62400 }, { "accuracy": 42.9688, "active_queue_size": 16384.0, "cl_loss": 3.9993, "doc_norm": 1.4442, "encoder_q-embeddings": 2441.4368, "encoder_q-layer.0": 1713.9479, "encoder_q-layer.1": 1798.882, "encoder_q-layer.10": 2468.1787, "encoder_q-layer.11": 6484.4722, "encoder_q-layer.2": 1967.3195, "encoder_q-layer.3": 2153.0046, "encoder_q-layer.4": 2176.7437, "encoder_q-layer.5": 2188.0945, "encoder_q-layer.6": 2291.2517, "encoder_q-layer.7": 2341.7698, "encoder_q-layer.8": 2696.2278, "encoder_q-layer.9": 2415.3733, "epoch": 0.41, "inbatch_neg_score": 0.3671, "inbatch_pos_score": 0.8989, "learning_rate": 2.0833333333333336e-05, "loss": 3.9993, "norm_diff": 0.0973, "norm_loss": 0.0, "num_token_doc": 66.8328, "num_token_overlap": 11.6091, "num_token_query": 31.7226, "num_token_union": 65.3563, "num_word_context": 202.5288, "num_word_doc": 49.8301, "num_word_query": 23.4204, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4252.4291, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3667, "query_norm": 1.3469, "queue_k_norm": 1.4472, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.7226, "sent_len_1": 66.8328, "sent_len_max_0": 127.445, "sent_len_max_1": 190.3025, "stdk": 0.0477, "stdq": 0.0422, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 62500 }, { "accuracy": 44.4336, "active_queue_size": 16384.0, "cl_loss": 3.9675, "doc_norm": 1.4477, "encoder_q-embeddings": 3976.4985, "encoder_q-layer.0": 2832.0234, "encoder_q-layer.1": 2945.5498, "encoder_q-layer.10": 2617.739, "encoder_q-layer.11": 6204.127, "encoder_q-layer.2": 3568.7603, "encoder_q-layer.3": 3682.8542, "encoder_q-layer.4": 3887.8894, "encoder_q-layer.5": 4279.5444, "encoder_q-layer.6": 3666.4414, "encoder_q-layer.7": 3505.7957, "encoder_q-layer.8": 3032.7361, "encoder_q-layer.9": 2482.3049, "epoch": 0.41, "inbatch_neg_score": 0.3673, "inbatch_pos_score": 0.8994, "learning_rate": 2.077777777777778e-05, "loss": 3.9675, "norm_diff": 0.0979, "norm_loss": 0.0, "num_token_doc": 66.7962, "num_token_overlap": 11.665, "num_token_query": 31.9328, "num_token_union": 65.399, "num_word_context": 202.4919, "num_word_doc": 49.8625, "num_word_query": 23.5908, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5599.273, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3679, "query_norm": 1.3498, "queue_k_norm": 1.4488, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9328, "sent_len_1": 66.7962, "sent_len_max_0": 127.545, "sent_len_max_1": 189.6937, "stdk": 0.0478, "stdq": 0.0423, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 62600 }, { "accuracy": 45.8984, "active_queue_size": 16384.0, "cl_loss": 3.9864, "doc_norm": 1.4434, "encoder_q-embeddings": 2257.1309, "encoder_q-layer.0": 1498.6514, "encoder_q-layer.1": 1589.0228, "encoder_q-layer.10": 2490.6765, "encoder_q-layer.11": 6290.1978, "encoder_q-layer.2": 1778.0122, "encoder_q-layer.3": 1888.8589, "encoder_q-layer.4": 1977.1823, "encoder_q-layer.5": 2078.8074, "encoder_q-layer.6": 2262.3748, "encoder_q-layer.7": 2340.835, "encoder_q-layer.8": 2599.7134, "encoder_q-layer.9": 2317.635, "epoch": 0.41, "inbatch_neg_score": 0.3729, "inbatch_pos_score": 0.9175, "learning_rate": 2.0722222222222224e-05, "loss": 3.9864, "norm_diff": 0.0971, "norm_loss": 0.0, "num_token_doc": 66.6644, "num_token_overlap": 11.6554, "num_token_query": 31.8695, "num_token_union": 65.2909, "num_word_context": 202.0539, "num_word_doc": 49.7384, "num_word_query": 23.5442, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4033.3404, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3721, "query_norm": 1.3463, "queue_k_norm": 1.4484, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8695, "sent_len_1": 66.6644, "sent_len_max_0": 127.5425, "sent_len_max_1": 189.2713, "stdk": 0.0476, "stdq": 0.0419, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 62700 }, { "accuracy": 43.1641, "active_queue_size": 16384.0, "cl_loss": 3.9736, "doc_norm": 1.4537, "encoder_q-embeddings": 2323.5522, "encoder_q-layer.0": 1568.705, "encoder_q-layer.1": 1680.0212, "encoder_q-layer.10": 2629.0518, "encoder_q-layer.11": 6338.5815, "encoder_q-layer.2": 1854.2183, "encoder_q-layer.3": 1948.3584, "encoder_q-layer.4": 2184.251, "encoder_q-layer.5": 2272.1907, "encoder_q-layer.6": 2318.6096, "encoder_q-layer.7": 2454.124, "encoder_q-layer.8": 2777.2913, "encoder_q-layer.9": 2572.2615, "epoch": 0.41, "inbatch_neg_score": 0.3759, "inbatch_pos_score": 0.8994, "learning_rate": 2.0666666666666666e-05, "loss": 3.9736, "norm_diff": 0.0995, "norm_loss": 0.0, "num_token_doc": 66.6707, "num_token_overlap": 11.6701, "num_token_query": 31.8973, "num_token_union": 65.3305, "num_word_context": 202.1784, "num_word_doc": 49.7765, "num_word_query": 23.5621, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4166.9184, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3745, "query_norm": 1.3542, "queue_k_norm": 1.4493, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.8973, "sent_len_1": 66.6707, "sent_len_max_0": 127.5187, "sent_len_max_1": 188.2962, "stdk": 0.0479, "stdq": 0.0423, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 62800 }, { "accuracy": 44.8242, "active_queue_size": 16384.0, "cl_loss": 3.9836, "doc_norm": 1.45, "encoder_q-embeddings": 2515.1289, "encoder_q-layer.0": 1747.0381, "encoder_q-layer.1": 1806.5599, "encoder_q-layer.10": 2346.356, "encoder_q-layer.11": 6018.6089, "encoder_q-layer.2": 2061.0596, "encoder_q-layer.3": 2185.6475, "encoder_q-layer.4": 2377.8755, "encoder_q-layer.5": 2509.7532, "encoder_q-layer.6": 2411.5164, "encoder_q-layer.7": 2286.2781, "encoder_q-layer.8": 2426.374, "encoder_q-layer.9": 2276.8979, "epoch": 0.41, "inbatch_neg_score": 0.3817, "inbatch_pos_score": 0.915, "learning_rate": 2.0611111111111112e-05, "loss": 3.9836, "norm_diff": 0.101, "norm_loss": 0.0, "num_token_doc": 66.8184, "num_token_overlap": 11.6704, "num_token_query": 31.9028, "num_token_union": 65.4241, "num_word_context": 202.3666, "num_word_doc": 49.8838, "num_word_query": 23.5281, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4157.432, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.3813, "query_norm": 1.349, "queue_k_norm": 1.4508, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9028, "sent_len_1": 66.8184, "sent_len_max_0": 127.42, "sent_len_max_1": 188.1325, "stdk": 0.0478, "stdq": 0.0417, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 62900 }, { "accuracy": 43.3594, "active_queue_size": 16384.0, "cl_loss": 3.9678, "doc_norm": 1.4451, "encoder_q-embeddings": 2157.8298, "encoder_q-layer.0": 1413.4028, "encoder_q-layer.1": 1481.2135, "encoder_q-layer.10": 2721.4526, "encoder_q-layer.11": 6267.4731, "encoder_q-layer.2": 1677.3612, "encoder_q-layer.3": 1706.9718, "encoder_q-layer.4": 1828.1761, "encoder_q-layer.5": 1863.8225, "encoder_q-layer.6": 2085.3057, "encoder_q-layer.7": 2164.2122, "encoder_q-layer.8": 2530.1511, "encoder_q-layer.9": 2442.3987, "epoch": 0.41, "inbatch_neg_score": 0.3851, "inbatch_pos_score": 0.9048, "learning_rate": 2.0555555555555555e-05, "loss": 3.9678, "norm_diff": 0.0997, "norm_loss": 0.0, "num_token_doc": 66.7328, "num_token_overlap": 11.6659, "num_token_query": 31.7976, "num_token_union": 65.2939, "num_word_context": 202.2456, "num_word_doc": 49.8411, "num_word_query": 23.5044, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3984.174, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.385, "query_norm": 1.3454, "queue_k_norm": 1.4513, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.7976, "sent_len_1": 66.7328, "sent_len_max_0": 127.4875, "sent_len_max_1": 187.535, "stdk": 0.0475, "stdq": 0.0414, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 63000 }, { "accuracy": 44.9219, "active_queue_size": 16384.0, "cl_loss": 3.968, "doc_norm": 1.4514, "encoder_q-embeddings": 40867.2852, "encoder_q-layer.0": 29635.5664, "encoder_q-layer.1": 35201.7617, "encoder_q-layer.10": 4950.5898, "encoder_q-layer.11": 12076.8398, "encoder_q-layer.2": 39057.3359, "encoder_q-layer.3": 41195.2852, "encoder_q-layer.4": 46780.6055, "encoder_q-layer.5": 41451.582, "encoder_q-layer.6": 44734.6992, "encoder_q-layer.7": 44712.8359, "encoder_q-layer.8": 29009.0508, "encoder_q-layer.9": 8576.7227, "epoch": 0.41, "inbatch_neg_score": 0.3919, "inbatch_pos_score": 0.9375, "learning_rate": 2.05e-05, "loss": 3.968, "norm_diff": 0.073, "norm_loss": 0.0, "num_token_doc": 66.8557, "num_token_overlap": 11.662, "num_token_query": 31.8822, "num_token_union": 65.3792, "num_word_context": 201.9549, "num_word_doc": 49.8605, "num_word_query": 23.5338, "postclip_grad_norm": 1.0, "preclip_grad_norm": 52510.0492, "preclip_grad_norm_avg": 0.0005, "q@queue_neg_score": 0.3909, "query_norm": 1.3784, "queue_k_norm": 1.452, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8822, "sent_len_1": 66.8557, "sent_len_max_0": 127.78, "sent_len_max_1": 189.22, "stdk": 0.0477, "stdq": 0.0426, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 63100 }, { "accuracy": 43.5547, "active_queue_size": 16384.0, "cl_loss": 3.9821, "doc_norm": 1.4558, "encoder_q-embeddings": 4721.3091, "encoder_q-layer.0": 3222.1074, "encoder_q-layer.1": 3420.6909, "encoder_q-layer.10": 5676.624, "encoder_q-layer.11": 12740.1689, "encoder_q-layer.2": 3860.1372, "encoder_q-layer.3": 4189.8203, "encoder_q-layer.4": 4453.1309, "encoder_q-layer.5": 4409.9351, "encoder_q-layer.6": 4588.5498, "encoder_q-layer.7": 4515.8906, "encoder_q-layer.8": 5066.4121, "encoder_q-layer.9": 4832.6079, "epoch": 0.41, "inbatch_neg_score": 0.3928, "inbatch_pos_score": 0.9277, "learning_rate": 2.0444444444444446e-05, "loss": 3.9821, "norm_diff": 0.0758, "norm_loss": 0.0, "num_token_doc": 66.6518, "num_token_overlap": 11.7342, "num_token_query": 32.2038, "num_token_union": 65.427, "num_word_context": 202.4113, "num_word_doc": 49.77, "num_word_query": 23.8082, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8303.5894, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3923, "query_norm": 1.38, "queue_k_norm": 1.4541, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 32.2038, "sent_len_1": 66.6518, "sent_len_max_0": 127.6975, "sent_len_max_1": 187.17, "stdk": 0.0479, "stdq": 0.0426, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 63200 }, { "accuracy": 43.457, "active_queue_size": 16384.0, "cl_loss": 3.9622, "doc_norm": 1.458, "encoder_q-embeddings": 4987.4272, "encoder_q-layer.0": 3338.166, "encoder_q-layer.1": 3562.0818, "encoder_q-layer.10": 5559.5913, "encoder_q-layer.11": 12330.0391, "encoder_q-layer.2": 4040.1304, "encoder_q-layer.3": 3988.5884, "encoder_q-layer.4": 4124.4922, "encoder_q-layer.5": 4068.113, "encoder_q-layer.6": 4451.8457, "encoder_q-layer.7": 4597.5967, "encoder_q-layer.8": 5380.6304, "encoder_q-layer.9": 4932.4878, "epoch": 0.41, "inbatch_neg_score": 0.3977, "inbatch_pos_score": 0.9409, "learning_rate": 2.0388888888888892e-05, "loss": 3.9622, "norm_diff": 0.0799, "norm_loss": 0.0, "num_token_doc": 66.7592, "num_token_overlap": 11.6744, "num_token_query": 31.9271, "num_token_union": 65.3795, "num_word_context": 202.2, "num_word_doc": 49.8374, "num_word_query": 23.598, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8437.6311, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.3975, "query_norm": 1.3781, "queue_k_norm": 1.4548, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9271, "sent_len_1": 66.7592, "sent_len_max_0": 127.5588, "sent_len_max_1": 188.2675, "stdk": 0.0479, "stdq": 0.0425, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 63300 }, { "accuracy": 45.2148, "active_queue_size": 16384.0, "cl_loss": 3.9957, "doc_norm": 1.4531, "encoder_q-embeddings": 11775.9033, "encoder_q-layer.0": 7964.7441, "encoder_q-layer.1": 8800.2324, "encoder_q-layer.10": 4894.3779, "encoder_q-layer.11": 11854.6855, "encoder_q-layer.2": 10395.1426, "encoder_q-layer.3": 10786.6152, "encoder_q-layer.4": 11822.4463, "encoder_q-layer.5": 11471.8682, "encoder_q-layer.6": 11255.7852, "encoder_q-layer.7": 9010.6787, "encoder_q-layer.8": 8037.3164, "encoder_q-layer.9": 4851.0356, "epoch": 0.41, "inbatch_neg_score": 0.401, "inbatch_pos_score": 0.9351, "learning_rate": 2.0333333333333334e-05, "loss": 3.9957, "norm_diff": 0.094, "norm_loss": 0.0, "num_token_doc": 66.7292, "num_token_overlap": 11.681, "num_token_query": 31.9637, "num_token_union": 65.3411, "num_word_context": 202.099, "num_word_doc": 49.7817, "num_word_query": 23.611, "postclip_grad_norm": 1.0, "preclip_grad_norm": 14760.3559, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4009, "query_norm": 1.3591, "queue_k_norm": 1.4545, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9637, "sent_len_1": 66.7292, "sent_len_max_0": 127.4513, "sent_len_max_1": 191.0637, "stdk": 0.0477, "stdq": 0.0418, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 63400 }, { "accuracy": 45.8984, "active_queue_size": 16384.0, "cl_loss": 3.98, "doc_norm": 1.4567, "encoder_q-embeddings": 4343.3955, "encoder_q-layer.0": 2859.6338, "encoder_q-layer.1": 2984.9084, "encoder_q-layer.10": 4909.8828, "encoder_q-layer.11": 12061.9863, "encoder_q-layer.2": 3432.8352, "encoder_q-layer.3": 3500.9458, "encoder_q-layer.4": 3728.854, "encoder_q-layer.5": 3691.8411, "encoder_q-layer.6": 4075.802, "encoder_q-layer.7": 4557.3145, "encoder_q-layer.8": 4909.1313, "encoder_q-layer.9": 4551.5029, "epoch": 0.41, "inbatch_neg_score": 0.4047, "inbatch_pos_score": 0.9517, "learning_rate": 2.027777777777778e-05, "loss": 3.98, "norm_diff": 0.0854, "norm_loss": 0.0, "num_token_doc": 66.8607, "num_token_overlap": 11.69, "num_token_query": 31.8732, "num_token_union": 65.4247, "num_word_context": 202.2667, "num_word_doc": 49.9466, "num_word_query": 23.5423, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7849.1098, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4043, "query_norm": 1.3713, "queue_k_norm": 1.4588, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8732, "sent_len_1": 66.8607, "sent_len_max_0": 127.63, "sent_len_max_1": 190.0462, "stdk": 0.0478, "stdq": 0.0424, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 63500 }, { "accuracy": 46.9727, "active_queue_size": 16384.0, "cl_loss": 3.9533, "doc_norm": 1.4616, "encoder_q-embeddings": 6392.8491, "encoder_q-layer.0": 4239.272, "encoder_q-layer.1": 4344.686, "encoder_q-layer.10": 4720.4141, "encoder_q-layer.11": 12709.5986, "encoder_q-layer.2": 4122.6655, "encoder_q-layer.3": 3959.145, "encoder_q-layer.4": 4168.4595, "encoder_q-layer.5": 4199.5615, "encoder_q-layer.6": 4376.0269, "encoder_q-layer.7": 4495.7964, "encoder_q-layer.8": 5272.416, "encoder_q-layer.9": 4792.7075, "epoch": 0.41, "inbatch_neg_score": 0.408, "inbatch_pos_score": 0.9688, "learning_rate": 2.0222222222222222e-05, "loss": 3.9533, "norm_diff": 0.0861, "norm_loss": 0.0, "num_token_doc": 66.6448, "num_token_overlap": 11.7186, "num_token_query": 31.9176, "num_token_union": 65.2658, "num_word_context": 202.075, "num_word_doc": 49.7231, "num_word_query": 23.559, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8753.2515, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.408, "query_norm": 1.3755, "queue_k_norm": 1.4599, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9176, "sent_len_1": 66.6448, "sent_len_max_0": 127.4188, "sent_len_max_1": 190.6937, "stdk": 0.048, "stdq": 0.0428, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 63600 }, { "accuracy": 43.8477, "active_queue_size": 16384.0, "cl_loss": 3.9614, "doc_norm": 1.4656, "encoder_q-embeddings": 46132.4141, "encoder_q-layer.0": 30570.0625, "encoder_q-layer.1": 33731.4297, "encoder_q-layer.10": 5252.084, "encoder_q-layer.11": 12983.9561, "encoder_q-layer.2": 36742.6523, "encoder_q-layer.3": 37945.1445, "encoder_q-layer.4": 38934.2344, "encoder_q-layer.5": 45161.2812, "encoder_q-layer.6": 45133.125, "encoder_q-layer.7": 46432.6055, "encoder_q-layer.8": 31616.3613, "encoder_q-layer.9": 8003.4907, "epoch": 0.41, "inbatch_neg_score": 0.4107, "inbatch_pos_score": 0.9414, "learning_rate": 2.0166666666666668e-05, "loss": 3.9614, "norm_diff": 0.1108, "norm_loss": 0.0, "num_token_doc": 66.7688, "num_token_overlap": 11.7167, "num_token_query": 31.9414, "num_token_union": 65.3061, "num_word_context": 202.152, "num_word_doc": 49.7828, "num_word_query": 23.5892, "postclip_grad_norm": 1.0, "preclip_grad_norm": 54078.9011, "preclip_grad_norm_avg": 0.0005, "q@queue_neg_score": 0.4114, "query_norm": 1.3548, "queue_k_norm": 1.4607, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9414, "sent_len_1": 66.7688, "sent_len_max_0": 127.4062, "sent_len_max_1": 188.9338, "stdk": 0.0481, "stdq": 0.0418, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 63700 }, { "accuracy": 44.4336, "active_queue_size": 16384.0, "cl_loss": 3.9923, "doc_norm": 1.4516, "encoder_q-embeddings": 5815.8779, "encoder_q-layer.0": 3936.866, "encoder_q-layer.1": 4442.9175, "encoder_q-layer.10": 4943.6763, "encoder_q-layer.11": 12745.7432, "encoder_q-layer.2": 5111.8794, "encoder_q-layer.3": 5663.8101, "encoder_q-layer.4": 6291.8345, "encoder_q-layer.5": 6453.8916, "encoder_q-layer.6": 6547.894, "encoder_q-layer.7": 6715.1133, "encoder_q-layer.8": 5573.4849, "encoder_q-layer.9": 4563.605, "epoch": 0.42, "inbatch_neg_score": 0.4125, "inbatch_pos_score": 0.9287, "learning_rate": 2.011111111111111e-05, "loss": 3.9923, "norm_diff": 0.1135, "norm_loss": 0.0, "num_token_doc": 66.583, "num_token_overlap": 11.7242, "num_token_query": 32.0963, "num_token_union": 65.3699, "num_word_context": 202.3597, "num_word_doc": 49.6708, "num_word_query": 23.7329, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9651.0012, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4119, "query_norm": 1.3381, "queue_k_norm": 1.4615, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 32.0963, "sent_len_1": 66.583, "sent_len_max_0": 127.4737, "sent_len_max_1": 188.78, "stdk": 0.0475, "stdq": 0.0413, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 63800 }, { "accuracy": 46.6797, "active_queue_size": 16384.0, "cl_loss": 3.9671, "doc_norm": 1.4605, "encoder_q-embeddings": 5191.2886, "encoder_q-layer.0": 3448.3142, "encoder_q-layer.1": 3525.9998, "encoder_q-layer.10": 4921.4658, "encoder_q-layer.11": 12281.998, "encoder_q-layer.2": 4152.5791, "encoder_q-layer.3": 4243.4854, "encoder_q-layer.4": 4276.4014, "encoder_q-layer.5": 4318.4375, "encoder_q-layer.6": 4692.0049, "encoder_q-layer.7": 4539.6167, "encoder_q-layer.8": 5191.6841, "encoder_q-layer.9": 4648.3916, "epoch": 0.42, "inbatch_neg_score": 0.4122, "inbatch_pos_score": 0.9712, "learning_rate": 2.0055555555555556e-05, "loss": 3.9671, "norm_diff": 0.0973, "norm_loss": 0.0, "num_token_doc": 66.7026, "num_token_overlap": 11.6863, "num_token_query": 31.951, "num_token_union": 65.3354, "num_word_context": 202.5457, "num_word_doc": 49.7521, "num_word_query": 23.5979, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8390.0547, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4133, "query_norm": 1.3632, "queue_k_norm": 1.4619, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.951, "sent_len_1": 66.7026, "sent_len_max_0": 127.4562, "sent_len_max_1": 190.5412, "stdk": 0.0478, "stdq": 0.0425, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 63900 }, { "accuracy": 45.2148, "active_queue_size": 16384.0, "cl_loss": 3.9871, "doc_norm": 1.4627, "encoder_q-embeddings": 5957.2661, "encoder_q-layer.0": 3906.0142, "encoder_q-layer.1": 4548.6709, "encoder_q-layer.10": 5368.105, "encoder_q-layer.11": 13054.1396, "encoder_q-layer.2": 5163.8003, "encoder_q-layer.3": 5198.8711, "encoder_q-layer.4": 5463.7832, "encoder_q-layer.5": 5237.7583, "encoder_q-layer.6": 5164.0195, "encoder_q-layer.7": 5439.7266, "encoder_q-layer.8": 5796.2285, "encoder_q-layer.9": 5048.1006, "epoch": 0.42, "inbatch_neg_score": 0.4159, "inbatch_pos_score": 0.9521, "learning_rate": 2e-05, "loss": 3.9871, "norm_diff": 0.1212, "norm_loss": 0.0, "num_token_doc": 66.6954, "num_token_overlap": 11.6962, "num_token_query": 31.8775, "num_token_union": 65.2628, "num_word_context": 202.441, "num_word_doc": 49.7568, "num_word_query": 23.5388, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9318.0537, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.415, "query_norm": 1.3415, "queue_k_norm": 1.4625, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.8775, "sent_len_1": 66.6954, "sent_len_max_0": 127.5775, "sent_len_max_1": 189.8088, "stdk": 0.0479, "stdq": 0.0415, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 64000 }, { "accuracy": 46.582, "active_queue_size": 16384.0, "cl_loss": 3.9778, "doc_norm": 1.4588, "encoder_q-embeddings": 3543.1135, "encoder_q-layer.0": 2735.0342, "encoder_q-layer.1": 3138.4873, "encoder_q-layer.10": 2434.0845, "encoder_q-layer.11": 6520.9258, "encoder_q-layer.2": 3124.4839, "encoder_q-layer.3": 3248.1587, "encoder_q-layer.4": 3251.9148, "encoder_q-layer.5": 3172.4692, "encoder_q-layer.6": 3156.0444, "encoder_q-layer.7": 2902.4109, "encoder_q-layer.8": 2904.0601, "encoder_q-layer.9": 2459.6077, "epoch": 0.42, "inbatch_neg_score": 0.4143, "inbatch_pos_score": 0.96, "learning_rate": 1.9944444444444447e-05, "loss": 3.9778, "norm_diff": 0.1017, "norm_loss": 0.0, "num_token_doc": 66.8965, "num_token_overlap": 11.7146, "num_token_query": 31.9598, "num_token_union": 65.4362, "num_word_context": 202.3686, "num_word_doc": 49.9428, "num_word_query": 23.6226, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5179.8955, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4141, "query_norm": 1.3571, "queue_k_norm": 1.4656, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9598, "sent_len_1": 66.8965, "sent_len_max_0": 127.5525, "sent_len_max_1": 189.0, "stdk": 0.0477, "stdq": 0.0423, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 64100 }, { "accuracy": 45.1172, "active_queue_size": 16384.0, "cl_loss": 3.9779, "doc_norm": 1.4611, "encoder_q-embeddings": 2482.0347, "encoder_q-layer.0": 1668.8734, "encoder_q-layer.1": 1805.0273, "encoder_q-layer.10": 2625.7817, "encoder_q-layer.11": 6782.5234, "encoder_q-layer.2": 2056.9705, "encoder_q-layer.3": 2131.8127, "encoder_q-layer.4": 2277.1394, "encoder_q-layer.5": 2337.6262, "encoder_q-layer.6": 2265.1304, "encoder_q-layer.7": 2611.4661, "encoder_q-layer.8": 2948.0747, "encoder_q-layer.9": 2513.1743, "epoch": 0.42, "inbatch_neg_score": 0.4131, "inbatch_pos_score": 0.9551, "learning_rate": 1.988888888888889e-05, "loss": 3.9779, "norm_diff": 0.117, "norm_loss": 0.0, "num_token_doc": 66.6841, "num_token_overlap": 11.6972, "num_token_query": 31.9663, "num_token_union": 65.3162, "num_word_context": 202.0959, "num_word_doc": 49.7545, "num_word_query": 23.5925, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4409.1193, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4141, "query_norm": 1.3441, "queue_k_norm": 1.4651, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9663, "sent_len_1": 66.6841, "sent_len_max_0": 127.5475, "sent_len_max_1": 188.3487, "stdk": 0.0477, "stdq": 0.0418, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 64200 }, { "accuracy": 43.75, "active_queue_size": 16384.0, "cl_loss": 3.9861, "doc_norm": 1.4647, "encoder_q-embeddings": 2464.8921, "encoder_q-layer.0": 1672.4808, "encoder_q-layer.1": 1819.0095, "encoder_q-layer.10": 2639.0886, "encoder_q-layer.11": 6504.5317, "encoder_q-layer.2": 2058.5947, "encoder_q-layer.3": 1980.0753, "encoder_q-layer.4": 2130.28, "encoder_q-layer.5": 2081.7634, "encoder_q-layer.6": 2266.0967, "encoder_q-layer.7": 2494.3713, "encoder_q-layer.8": 2797.9231, "encoder_q-layer.9": 2429.2285, "epoch": 0.42, "inbatch_neg_score": 0.4126, "inbatch_pos_score": 0.9429, "learning_rate": 1.9833333333333335e-05, "loss": 3.9861, "norm_diff": 0.1298, "norm_loss": 0.0, "num_token_doc": 66.7485, "num_token_overlap": 11.6869, "num_token_query": 31.9427, "num_token_union": 65.3725, "num_word_context": 202.5746, "num_word_doc": 49.8465, "num_word_query": 23.6102, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4333.7047, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4131, "query_norm": 1.3349, "queue_k_norm": 1.4626, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9427, "sent_len_1": 66.7485, "sent_len_max_0": 127.5, "sent_len_max_1": 187.6788, "stdk": 0.0478, "stdq": 0.0415, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 64300 }, { "accuracy": 46.9727, "active_queue_size": 16384.0, "cl_loss": 3.9662, "doc_norm": 1.4722, "encoder_q-embeddings": 2156.0393, "encoder_q-layer.0": 1472.5898, "encoder_q-layer.1": 1611.4198, "encoder_q-layer.10": 2479.574, "encoder_q-layer.11": 6217.0439, "encoder_q-layer.2": 1761.2449, "encoder_q-layer.3": 1831.2004, "encoder_q-layer.4": 1782.4233, "encoder_q-layer.5": 1701.1733, "encoder_q-layer.6": 1938.6918, "encoder_q-layer.7": 2249.9719, "encoder_q-layer.8": 2582.1333, "encoder_q-layer.9": 2331.6667, "epoch": 0.42, "inbatch_neg_score": 0.4163, "inbatch_pos_score": 0.9702, "learning_rate": 1.9777777777777778e-05, "loss": 3.9662, "norm_diff": 0.1105, "norm_loss": 0.0, "num_token_doc": 66.7479, "num_token_overlap": 11.6635, "num_token_query": 31.8246, "num_token_union": 65.2964, "num_word_context": 202.2253, "num_word_doc": 49.8311, "num_word_query": 23.5055, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3965.0169, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4167, "query_norm": 1.3617, "queue_k_norm": 1.466, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.8246, "sent_len_1": 66.7479, "sent_len_max_0": 127.435, "sent_len_max_1": 190.6962, "stdk": 0.0481, "stdq": 0.0425, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 64400 }, { "accuracy": 43.457, "active_queue_size": 16384.0, "cl_loss": 3.9809, "doc_norm": 1.4645, "encoder_q-embeddings": 2859.3706, "encoder_q-layer.0": 2018.8422, "encoder_q-layer.1": 2245.3484, "encoder_q-layer.10": 2452.9954, "encoder_q-layer.11": 6504.8271, "encoder_q-layer.2": 2626.074, "encoder_q-layer.3": 2606.5828, "encoder_q-layer.4": 2780.1433, "encoder_q-layer.5": 2924.2363, "encoder_q-layer.6": 2678.1907, "encoder_q-layer.7": 2391.4746, "encoder_q-layer.8": 2583.957, "encoder_q-layer.9": 2389.707, "epoch": 0.42, "inbatch_neg_score": 0.4179, "inbatch_pos_score": 0.9473, "learning_rate": 1.9722222222222224e-05, "loss": 3.9809, "norm_diff": 0.1086, "norm_loss": 0.0, "num_token_doc": 66.9596, "num_token_overlap": 11.6971, "num_token_query": 31.9656, "num_token_union": 65.4961, "num_word_context": 202.6483, "num_word_doc": 49.9252, "num_word_query": 23.6072, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4695.0187, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4185, "query_norm": 1.3559, "queue_k_norm": 1.4684, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9656, "sent_len_1": 66.9596, "sent_len_max_0": 127.5187, "sent_len_max_1": 191.0412, "stdk": 0.0478, "stdq": 0.0423, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 64500 }, { "accuracy": 46.3867, "active_queue_size": 16384.0, "cl_loss": 3.9769, "doc_norm": 1.4695, "encoder_q-embeddings": 27759.9473, "encoder_q-layer.0": 17823.1133, "encoder_q-layer.1": 22338.9883, "encoder_q-layer.10": 2638.1304, "encoder_q-layer.11": 6102.3647, "encoder_q-layer.2": 26167.041, "encoder_q-layer.3": 26106.3906, "encoder_q-layer.4": 31457.8027, "encoder_q-layer.5": 30804.1777, "encoder_q-layer.6": 24371.2578, "encoder_q-layer.7": 17338.7012, "encoder_q-layer.8": 9793.1963, "encoder_q-layer.9": 3018.1074, "epoch": 0.42, "inbatch_neg_score": 0.4229, "inbatch_pos_score": 0.98, "learning_rate": 1.9666666666666666e-05, "loss": 3.9769, "norm_diff": 0.099, "norm_loss": 0.0, "num_token_doc": 66.7451, "num_token_overlap": 11.6776, "num_token_query": 31.8182, "num_token_union": 65.3026, "num_word_context": 202.5905, "num_word_doc": 49.8349, "num_word_query": 23.5021, "postclip_grad_norm": 1.0, "preclip_grad_norm": 32415.9848, "preclip_grad_norm_avg": 0.0003, "q@queue_neg_score": 0.4221, "query_norm": 1.3705, "queue_k_norm": 1.4696, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8182, "sent_len_1": 66.7451, "sent_len_max_0": 127.575, "sent_len_max_1": 189.5888, "stdk": 0.0479, "stdq": 0.0427, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 64600 }, { "accuracy": 44.1406, "active_queue_size": 16384.0, "cl_loss": 3.9952, "doc_norm": 1.4755, "encoder_q-embeddings": 2188.5808, "encoder_q-layer.0": 1483.834, "encoder_q-layer.1": 1529.3359, "encoder_q-layer.10": 2415.3142, "encoder_q-layer.11": 6411.4834, "encoder_q-layer.2": 1733.6597, "encoder_q-layer.3": 1744.0457, "encoder_q-layer.4": 1872.825, "encoder_q-layer.5": 1858.4109, "encoder_q-layer.6": 2050.4619, "encoder_q-layer.7": 2270.4465, "encoder_q-layer.8": 2542.3943, "encoder_q-layer.9": 2352.9036, "epoch": 0.42, "inbatch_neg_score": 0.4222, "inbatch_pos_score": 0.9775, "learning_rate": 1.9611111111111115e-05, "loss": 3.9952, "norm_diff": 0.1159, "norm_loss": 0.0, "num_token_doc": 66.843, "num_token_overlap": 11.6524, "num_token_query": 31.7441, "num_token_union": 65.3177, "num_word_context": 202.2748, "num_word_doc": 49.9088, "num_word_query": 23.458, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4102.0846, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4229, "query_norm": 1.3596, "queue_k_norm": 1.4696, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.7441, "sent_len_1": 66.843, "sent_len_max_0": 127.455, "sent_len_max_1": 188.9663, "stdk": 0.0482, "stdq": 0.0423, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 64700 }, { "accuracy": 47.8516, "active_queue_size": 16384.0, "cl_loss": 3.9697, "doc_norm": 1.4709, "encoder_q-embeddings": 3217.3552, "encoder_q-layer.0": 2244.7656, "encoder_q-layer.1": 2536.1733, "encoder_q-layer.10": 2287.6936, "encoder_q-layer.11": 5901.1914, "encoder_q-layer.2": 2756.2605, "encoder_q-layer.3": 2734.1409, "encoder_q-layer.4": 2797.45, "encoder_q-layer.5": 2741.6338, "encoder_q-layer.6": 2737.0974, "encoder_q-layer.7": 2628.8081, "encoder_q-layer.8": 2531.7986, "encoder_q-layer.9": 2295.3367, "epoch": 0.42, "inbatch_neg_score": 0.4219, "inbatch_pos_score": 0.9897, "learning_rate": 1.9555555555555557e-05, "loss": 3.9697, "norm_diff": 0.1058, "norm_loss": 0.0, "num_token_doc": 66.9709, "num_token_overlap": 11.6508, "num_token_query": 31.7919, "num_token_union": 65.4426, "num_word_context": 202.5145, "num_word_doc": 49.9439, "num_word_query": 23.4848, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4656.3449, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4216, "query_norm": 1.3651, "queue_k_norm": 1.4703, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.7919, "sent_len_1": 66.9709, "sent_len_max_0": 127.4275, "sent_len_max_1": 191.5687, "stdk": 0.048, "stdq": 0.0425, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 64800 }, { "accuracy": 43.0664, "active_queue_size": 16384.0, "cl_loss": 3.9845, "doc_norm": 1.4609, "encoder_q-embeddings": 2399.8086, "encoder_q-layer.0": 1605.5366, "encoder_q-layer.1": 1667.0972, "encoder_q-layer.10": 2510.9387, "encoder_q-layer.11": 6445.1602, "encoder_q-layer.2": 1937.8013, "encoder_q-layer.3": 2090.0989, "encoder_q-layer.4": 2180.7927, "encoder_q-layer.5": 2133.0325, "encoder_q-layer.6": 2348.8755, "encoder_q-layer.7": 2333.5706, "encoder_q-layer.8": 2631.6694, "encoder_q-layer.9": 2517.6812, "epoch": 0.42, "inbatch_neg_score": 0.4249, "inbatch_pos_score": 0.9595, "learning_rate": 1.9500000000000003e-05, "loss": 3.9845, "norm_diff": 0.0989, "norm_loss": 0.0, "num_token_doc": 66.8643, "num_token_overlap": 11.6605, "num_token_query": 31.8775, "num_token_union": 65.4162, "num_word_context": 202.3514, "num_word_doc": 49.9031, "num_word_query": 23.5336, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4281.744, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4233, "query_norm": 1.362, "queue_k_norm": 1.467, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8775, "sent_len_1": 66.8643, "sent_len_max_0": 127.575, "sent_len_max_1": 189.2837, "stdk": 0.0475, "stdq": 0.0423, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 64900 }, { "accuracy": 44.7266, "active_queue_size": 16384.0, "cl_loss": 3.9747, "doc_norm": 1.4691, "encoder_q-embeddings": 3180.0452, "encoder_q-layer.0": 2224.9832, "encoder_q-layer.1": 2512.313, "encoder_q-layer.10": 2705.106, "encoder_q-layer.11": 6240.6582, "encoder_q-layer.2": 3037.4172, "encoder_q-layer.3": 3076.3567, "encoder_q-layer.4": 3042.9124, "encoder_q-layer.5": 3178.6797, "encoder_q-layer.6": 3128.8535, "encoder_q-layer.7": 2954.6528, "encoder_q-layer.8": 3176.1807, "encoder_q-layer.9": 2631.6699, "epoch": 0.42, "inbatch_neg_score": 0.4265, "inbatch_pos_score": 0.9492, "learning_rate": 1.9444444444444445e-05, "loss": 3.9747, "norm_diff": 0.1215, "norm_loss": 0.0, "num_token_doc": 66.8523, "num_token_overlap": 11.6707, "num_token_query": 31.8083, "num_token_union": 65.3341, "num_word_context": 202.4657, "num_word_doc": 49.8653, "num_word_query": 23.4966, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4962.9769, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4258, "query_norm": 1.3475, "queue_k_norm": 1.4714, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8083, "sent_len_1": 66.8523, "sent_len_max_0": 127.4363, "sent_len_max_1": 189.4675, "stdk": 0.0478, "stdq": 0.0416, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 65000 }, { "accuracy": 43.8477, "active_queue_size": 16384.0, "cl_loss": 3.9783, "doc_norm": 1.4708, "encoder_q-embeddings": 6706.0225, "encoder_q-layer.0": 5266.6118, "encoder_q-layer.1": 5944.9824, "encoder_q-layer.10": 1349.1455, "encoder_q-layer.11": 3176.1304, "encoder_q-layer.2": 6708.5024, "encoder_q-layer.3": 6626.2861, "encoder_q-layer.4": 6151.8672, "encoder_q-layer.5": 6877.3833, "encoder_q-layer.6": 6707.7959, "encoder_q-layer.7": 4689.5884, "encoder_q-layer.8": 2954.1494, "encoder_q-layer.9": 1511.6318, "epoch": 0.42, "inbatch_neg_score": 0.423, "inbatch_pos_score": 0.9668, "learning_rate": 1.938888888888889e-05, "loss": 3.9783, "norm_diff": 0.1073, "norm_loss": 0.0, "num_token_doc": 66.7966, "num_token_overlap": 11.6908, "num_token_query": 31.8857, "num_token_union": 65.3397, "num_word_context": 202.3823, "num_word_doc": 49.7923, "num_word_query": 23.5358, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8299.57, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4224, "query_norm": 1.3635, "queue_k_norm": 1.4705, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8857, "sent_len_1": 66.7966, "sent_len_max_0": 127.5975, "sent_len_max_1": 191.6113, "stdk": 0.0479, "stdq": 0.0425, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 65100 }, { "accuracy": 44.5312, "active_queue_size": 16384.0, "cl_loss": 3.9816, "doc_norm": 1.4722, "encoder_q-embeddings": 2368.0615, "encoder_q-layer.0": 1576.7653, "encoder_q-layer.1": 1774.9802, "encoder_q-layer.10": 1260.7764, "encoder_q-layer.11": 3166.5156, "encoder_q-layer.2": 2107.5916, "encoder_q-layer.3": 2193.1184, "encoder_q-layer.4": 2115.2588, "encoder_q-layer.5": 1940.924, "encoder_q-layer.6": 2047.8141, "encoder_q-layer.7": 1888.0807, "encoder_q-layer.8": 1805.5576, "encoder_q-layer.9": 1401.9739, "epoch": 0.42, "inbatch_neg_score": 0.4181, "inbatch_pos_score": 0.9688, "learning_rate": 1.9333333333333333e-05, "loss": 3.9816, "norm_diff": 0.1113, "norm_loss": 0.0, "num_token_doc": 66.7597, "num_token_overlap": 11.6802, "num_token_query": 31.9514, "num_token_union": 65.3811, "num_word_context": 202.2716, "num_word_doc": 49.813, "num_word_query": 23.6048, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3113.2439, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4187, "query_norm": 1.3609, "queue_k_norm": 1.4725, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9514, "sent_len_1": 66.7597, "sent_len_max_0": 127.4988, "sent_len_max_1": 190.2075, "stdk": 0.0479, "stdq": 0.0425, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 65200 }, { "accuracy": 44.3359, "active_queue_size": 16384.0, "cl_loss": 3.9748, "doc_norm": 1.4671, "encoder_q-embeddings": 1909.8337, "encoder_q-layer.0": 1377.3599, "encoder_q-layer.1": 1530.8999, "encoder_q-layer.10": 1303.7542, "encoder_q-layer.11": 3046.1816, "encoder_q-layer.2": 1678.2957, "encoder_q-layer.3": 1759.5338, "encoder_q-layer.4": 1818.3826, "encoder_q-layer.5": 1780.3562, "encoder_q-layer.6": 1786.0646, "encoder_q-layer.7": 1711.2976, "encoder_q-layer.8": 1719.9528, "encoder_q-layer.9": 1269.5422, "epoch": 0.43, "inbatch_neg_score": 0.4204, "inbatch_pos_score": 0.9404, "learning_rate": 1.927777777777778e-05, "loss": 3.9748, "norm_diff": 0.1331, "norm_loss": 0.0, "num_token_doc": 66.7557, "num_token_overlap": 11.6697, "num_token_query": 31.9065, "num_token_union": 65.3246, "num_word_context": 202.1297, "num_word_doc": 49.8145, "num_word_query": 23.5783, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2721.4574, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4199, "query_norm": 1.334, "queue_k_norm": 1.4717, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9065, "sent_len_1": 66.7557, "sent_len_max_0": 127.5575, "sent_len_max_1": 189.1413, "stdk": 0.0477, "stdq": 0.0413, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 65300 }, { "accuracy": 44.5312, "active_queue_size": 16384.0, "cl_loss": 3.9952, "doc_norm": 1.4641, "encoder_q-embeddings": 4246.5479, "encoder_q-layer.0": 2967.3359, "encoder_q-layer.1": 2938.4592, "encoder_q-layer.10": 619.9778, "encoder_q-layer.11": 1571.1835, "encoder_q-layer.2": 2929.7688, "encoder_q-layer.3": 2767.645, "encoder_q-layer.4": 2664.8049, "encoder_q-layer.5": 2339.5813, "encoder_q-layer.6": 1982.2974, "encoder_q-layer.7": 1572.286, "encoder_q-layer.8": 1093.4946, "encoder_q-layer.9": 633.6435, "epoch": 0.43, "inbatch_neg_score": 0.4218, "inbatch_pos_score": 0.9536, "learning_rate": 1.922222222222222e-05, "loss": 3.9952, "norm_diff": 0.1133, "norm_loss": 0.0, "num_token_doc": 66.5854, "num_token_overlap": 11.6405, "num_token_query": 31.8737, "num_token_union": 65.2229, "num_word_context": 202.2046, "num_word_doc": 49.6936, "num_word_query": 23.5486, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3793.5702, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4214, "query_norm": 1.3508, "queue_k_norm": 1.4717, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8737, "sent_len_1": 66.5854, "sent_len_max_0": 127.6238, "sent_len_max_1": 190.7287, "stdk": 0.0476, "stdq": 0.042, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 65400 }, { "accuracy": 45.3125, "active_queue_size": 16384.0, "cl_loss": 3.9851, "doc_norm": 1.4707, "encoder_q-embeddings": 511.5005, "encoder_q-layer.0": 326.7552, "encoder_q-layer.1": 332.3773, "encoder_q-layer.10": 668.4126, "encoder_q-layer.11": 1602.1808, "encoder_q-layer.2": 374.5686, "encoder_q-layer.3": 391.8653, "encoder_q-layer.4": 423.2795, "encoder_q-layer.5": 414.8949, "encoder_q-layer.6": 457.3313, "encoder_q-layer.7": 512.7568, "encoder_q-layer.8": 648.3163, "encoder_q-layer.9": 573.8411, "epoch": 0.43, "inbatch_neg_score": 0.4216, "inbatch_pos_score": 0.9775, "learning_rate": 1.9166666666666667e-05, "loss": 3.9851, "norm_diff": 0.1169, "norm_loss": 0.0, "num_token_doc": 66.6776, "num_token_overlap": 11.6501, "num_token_query": 31.8181, "num_token_union": 65.308, "num_word_context": 202.3532, "num_word_doc": 49.8039, "num_word_query": 23.4921, "postclip_grad_norm": 1.0, "preclip_grad_norm": 998.2115, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4211, "query_norm": 1.3538, "queue_k_norm": 1.4716, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8181, "sent_len_1": 66.6776, "sent_len_max_0": 127.485, "sent_len_max_1": 188.1562, "stdk": 0.0478, "stdq": 0.0421, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 65500 }, { "accuracy": 45.9961, "active_queue_size": 16384.0, "cl_loss": 3.9892, "doc_norm": 1.4678, "encoder_q-embeddings": 624.0338, "encoder_q-layer.0": 411.3408, "encoder_q-layer.1": 439.9348, "encoder_q-layer.10": 587.1807, "encoder_q-layer.11": 1472.0835, "encoder_q-layer.2": 493.6647, "encoder_q-layer.3": 513.3973, "encoder_q-layer.4": 540.5045, "encoder_q-layer.5": 534.2025, "encoder_q-layer.6": 557.6328, "encoder_q-layer.7": 615.7608, "encoder_q-layer.8": 651.91, "encoder_q-layer.9": 588.3286, "epoch": 0.43, "inbatch_neg_score": 0.4227, "inbatch_pos_score": 0.9624, "learning_rate": 1.9111111111111113e-05, "loss": 3.9892, "norm_diff": 0.1058, "norm_loss": 0.0, "num_token_doc": 66.7243, "num_token_overlap": 11.6653, "num_token_query": 31.9535, "num_token_union": 65.3754, "num_word_context": 202.1661, "num_word_doc": 49.8218, "num_word_query": 23.6091, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1022.4092, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4219, "query_norm": 1.362, "queue_k_norm": 1.4731, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9535, "sent_len_1": 66.7243, "sent_len_max_0": 127.4912, "sent_len_max_1": 187.4187, "stdk": 0.0477, "stdq": 0.0425, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 65600 }, { "accuracy": 46.3867, "active_queue_size": 16384.0, "cl_loss": 3.967, "doc_norm": 1.4739, "encoder_q-embeddings": 2267.5908, "encoder_q-layer.0": 1657.4683, "encoder_q-layer.1": 1895.743, "encoder_q-layer.10": 631.7891, "encoder_q-layer.11": 1496.796, "encoder_q-layer.2": 2064.6917, "encoder_q-layer.3": 2256.0444, "encoder_q-layer.4": 2057.5054, "encoder_q-layer.5": 1972.9695, "encoder_q-layer.6": 1841.5386, "encoder_q-layer.7": 1503.3491, "encoder_q-layer.8": 1287.0421, "encoder_q-layer.9": 780.4842, "epoch": 0.43, "inbatch_neg_score": 0.423, "inbatch_pos_score": 0.9912, "learning_rate": 1.905555555555556e-05, "loss": 3.967, "norm_diff": 0.1005, "norm_loss": 0.0, "num_token_doc": 66.9644, "num_token_overlap": 11.6807, "num_token_query": 31.8052, "num_token_union": 65.3941, "num_word_context": 202.4485, "num_word_doc": 49.9523, "num_word_query": 23.4921, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2683.5712, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4231, "query_norm": 1.3734, "queue_k_norm": 1.4728, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8052, "sent_len_1": 66.9644, "sent_len_max_0": 127.5175, "sent_len_max_1": 188.625, "stdk": 0.0479, "stdq": 0.0428, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 65700 }, { "accuracy": 42.6758, "active_queue_size": 16384.0, "cl_loss": 3.966, "doc_norm": 1.4824, "encoder_q-embeddings": 566.1747, "encoder_q-layer.0": 374.3916, "encoder_q-layer.1": 380.9471, "encoder_q-layer.10": 621.8271, "encoder_q-layer.11": 1627.1564, "encoder_q-layer.2": 418.5757, "encoder_q-layer.3": 420.1186, "encoder_q-layer.4": 439.3414, "encoder_q-layer.5": 440.3241, "encoder_q-layer.6": 517.0699, "encoder_q-layer.7": 571.4191, "encoder_q-layer.8": 679.7258, "encoder_q-layer.9": 614.0695, "epoch": 0.43, "inbatch_neg_score": 0.4232, "inbatch_pos_score": 0.9502, "learning_rate": 1.9e-05, "loss": 3.966, "norm_diff": 0.1233, "norm_loss": 0.0, "num_token_doc": 66.922, "num_token_overlap": 11.6448, "num_token_query": 31.822, "num_token_union": 65.4222, "num_word_context": 202.3002, "num_word_doc": 49.9103, "num_word_query": 23.4927, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1036.4384, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4226, "query_norm": 1.3591, "queue_k_norm": 1.4738, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.822, "sent_len_1": 66.922, "sent_len_max_0": 127.3688, "sent_len_max_1": 190.92, "stdk": 0.0483, "stdq": 0.0423, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 65800 }, { "accuracy": 47.4609, "active_queue_size": 16384.0, "cl_loss": 3.9752, "doc_norm": 1.4761, "encoder_q-embeddings": 663.5356, "encoder_q-layer.0": 468.0499, "encoder_q-layer.1": 486.9662, "encoder_q-layer.10": 699.8185, "encoder_q-layer.11": 1509.3336, "encoder_q-layer.2": 540.4383, "encoder_q-layer.3": 598.2526, "encoder_q-layer.4": 624.6935, "encoder_q-layer.5": 632.6479, "encoder_q-layer.6": 661.3576, "encoder_q-layer.7": 657.6935, "encoder_q-layer.8": 676.5272, "encoder_q-layer.9": 601.2824, "epoch": 0.43, "inbatch_neg_score": 0.4203, "inbatch_pos_score": 0.9668, "learning_rate": 1.8944444444444447e-05, "loss": 3.9752, "norm_diff": 0.133, "norm_loss": 0.0, "num_token_doc": 66.6389, "num_token_overlap": 11.6456, "num_token_query": 32.0158, "num_token_union": 65.3336, "num_word_context": 201.9576, "num_word_doc": 49.6669, "num_word_query": 23.6525, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1088.0311, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4207, "query_norm": 1.3431, "queue_k_norm": 1.4732, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 32.0158, "sent_len_1": 66.6389, "sent_len_max_0": 127.4638, "sent_len_max_1": 190.2625, "stdk": 0.048, "stdq": 0.0417, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 65900 }, { "accuracy": 45.8984, "active_queue_size": 16384.0, "cl_loss": 3.9801, "doc_norm": 1.4809, "encoder_q-embeddings": 579.1796, "encoder_q-layer.0": 391.4758, "encoder_q-layer.1": 412.3487, "encoder_q-layer.10": 601.9504, "encoder_q-layer.11": 1494.8688, "encoder_q-layer.2": 486.1864, "encoder_q-layer.3": 525.1639, "encoder_q-layer.4": 579.301, "encoder_q-layer.5": 566.7651, "encoder_q-layer.6": 619.7456, "encoder_q-layer.7": 621.5895, "encoder_q-layer.8": 682.6698, "encoder_q-layer.9": 612.2492, "epoch": 0.43, "inbatch_neg_score": 0.4221, "inbatch_pos_score": 0.9663, "learning_rate": 1.888888888888889e-05, "loss": 3.9801, "norm_diff": 0.1246, "norm_loss": 0.0, "num_token_doc": 66.5355, "num_token_overlap": 11.6262, "num_token_query": 31.8844, "num_token_union": 65.2511, "num_word_context": 202.2533, "num_word_doc": 49.6441, "num_word_query": 23.5629, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1032.6936, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4221, "query_norm": 1.3562, "queue_k_norm": 1.473, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.8844, "sent_len_1": 66.5355, "sent_len_max_0": 127.5037, "sent_len_max_1": 188.7363, "stdk": 0.0482, "stdq": 0.0422, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 66000 }, { "accuracy": 44.3359, "active_queue_size": 16384.0, "cl_loss": 3.9628, "doc_norm": 1.4748, "encoder_q-embeddings": 1203.7391, "encoder_q-layer.0": 900.606, "encoder_q-layer.1": 953.0411, "encoder_q-layer.10": 595.2462, "encoder_q-layer.11": 1540.4089, "encoder_q-layer.2": 1125.3878, "encoder_q-layer.3": 1102.6292, "encoder_q-layer.4": 1231.0449, "encoder_q-layer.5": 1270.806, "encoder_q-layer.6": 1266.9484, "encoder_q-layer.7": 1500.1776, "encoder_q-layer.8": 1004.3154, "encoder_q-layer.9": 621.9078, "epoch": 0.43, "inbatch_neg_score": 0.427, "inbatch_pos_score": 0.9546, "learning_rate": 1.8833333333333335e-05, "loss": 3.9628, "norm_diff": 0.1139, "norm_loss": 0.0, "num_token_doc": 66.7344, "num_token_overlap": 11.6146, "num_token_query": 31.7868, "num_token_union": 65.2788, "num_word_context": 201.933, "num_word_doc": 49.7536, "num_word_query": 23.474, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1691.1237, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4255, "query_norm": 1.3609, "queue_k_norm": 1.4739, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.7868, "sent_len_1": 66.7344, "sent_len_max_0": 127.515, "sent_len_max_1": 189.5513, "stdk": 0.0479, "stdq": 0.0422, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 66100 }, { "accuracy": 44.5312, "active_queue_size": 16384.0, "cl_loss": 3.9573, "doc_norm": 1.4689, "encoder_q-embeddings": 738.8561, "encoder_q-layer.0": 498.4727, "encoder_q-layer.1": 500.4504, "encoder_q-layer.10": 654.7184, "encoder_q-layer.11": 1583.6002, "encoder_q-layer.2": 538.4854, "encoder_q-layer.3": 554.2989, "encoder_q-layer.4": 602.9965, "encoder_q-layer.5": 559.444, "encoder_q-layer.6": 600.219, "encoder_q-layer.7": 643.3677, "encoder_q-layer.8": 702.3409, "encoder_q-layer.9": 609.256, "epoch": 0.43, "inbatch_neg_score": 0.4243, "inbatch_pos_score": 0.9639, "learning_rate": 1.8777777777777777e-05, "loss": 3.9573, "norm_diff": 0.1074, "norm_loss": 0.0, "num_token_doc": 66.8757, "num_token_overlap": 11.7201, "num_token_query": 32.0102, "num_token_union": 65.472, "num_word_context": 202.5595, "num_word_doc": 49.8989, "num_word_query": 23.6589, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1126.5664, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4238, "query_norm": 1.3615, "queue_k_norm": 1.4737, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 32.0102, "sent_len_1": 66.8757, "sent_len_max_0": 127.5913, "sent_len_max_1": 189.0962, "stdk": 0.0477, "stdq": 0.0422, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 66200 }, { "accuracy": 44.7266, "active_queue_size": 16384.0, "cl_loss": 3.959, "doc_norm": 1.4763, "encoder_q-embeddings": 1422.2378, "encoder_q-layer.0": 917.04, "encoder_q-layer.1": 978.8972, "encoder_q-layer.10": 617.9372, "encoder_q-layer.11": 1543.6167, "encoder_q-layer.2": 1092.5966, "encoder_q-layer.3": 1191.1244, "encoder_q-layer.4": 1226.6497, "encoder_q-layer.5": 1099.0901, "encoder_q-layer.6": 1086.8508, "encoder_q-layer.7": 1124.5367, "encoder_q-layer.8": 1113.0876, "encoder_q-layer.9": 761.0326, "epoch": 0.43, "inbatch_neg_score": 0.4263, "inbatch_pos_score": 0.9692, "learning_rate": 1.8722222222222223e-05, "loss": 3.959, "norm_diff": 0.1144, "norm_loss": 0.0, "num_token_doc": 67.0241, "num_token_overlap": 11.6546, "num_token_query": 31.8687, "num_token_union": 65.5376, "num_word_context": 202.6045, "num_word_doc": 50.0536, "num_word_query": 23.5458, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1736.6452, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4263, "query_norm": 1.3619, "queue_k_norm": 1.4748, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8687, "sent_len_1": 67.0241, "sent_len_max_0": 127.6038, "sent_len_max_1": 187.9, "stdk": 0.048, "stdq": 0.0422, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 66300 }, { "accuracy": 44.2383, "active_queue_size": 16384.0, "cl_loss": 3.9575, "doc_norm": 1.4815, "encoder_q-embeddings": 847.1161, "encoder_q-layer.0": 595.601, "encoder_q-layer.1": 647.1406, "encoder_q-layer.10": 638.4091, "encoder_q-layer.11": 1603.9637, "encoder_q-layer.2": 761.9695, "encoder_q-layer.3": 720.7752, "encoder_q-layer.4": 686.238, "encoder_q-layer.5": 692.8351, "encoder_q-layer.6": 724.2391, "encoder_q-layer.7": 667.0369, "encoder_q-layer.8": 676.3047, "encoder_q-layer.9": 624.2708, "epoch": 0.43, "inbatch_neg_score": 0.4293, "inbatch_pos_score": 0.9688, "learning_rate": 1.866666666666667e-05, "loss": 3.9575, "norm_diff": 0.1159, "norm_loss": 0.0, "num_token_doc": 66.8479, "num_token_overlap": 11.6419, "num_token_query": 31.7141, "num_token_union": 65.3131, "num_word_context": 202.1768, "num_word_doc": 49.8319, "num_word_query": 23.4156, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1224.4699, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4292, "query_norm": 1.3656, "queue_k_norm": 1.4756, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.7141, "sent_len_1": 66.8479, "sent_len_max_0": 127.365, "sent_len_max_1": 189.585, "stdk": 0.0481, "stdq": 0.0423, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 66400 }, { "accuracy": 45.0195, "active_queue_size": 16384.0, "cl_loss": 3.9707, "doc_norm": 1.4715, "encoder_q-embeddings": 647.6532, "encoder_q-layer.0": 434.4764, "encoder_q-layer.1": 453.6535, "encoder_q-layer.10": 640.1327, "encoder_q-layer.11": 1559.4237, "encoder_q-layer.2": 516.1666, "encoder_q-layer.3": 541.2253, "encoder_q-layer.4": 599.3945, "encoder_q-layer.5": 650.4261, "encoder_q-layer.6": 706.2904, "encoder_q-layer.7": 767.4692, "encoder_q-layer.8": 809.5767, "encoder_q-layer.9": 679.2241, "epoch": 0.43, "inbatch_neg_score": 0.4297, "inbatch_pos_score": 0.9688, "learning_rate": 1.861111111111111e-05, "loss": 3.9707, "norm_diff": 0.1138, "norm_loss": 0.0, "num_token_doc": 66.6152, "num_token_overlap": 11.6567, "num_token_query": 31.8393, "num_token_union": 65.2448, "num_word_context": 202.098, "num_word_doc": 49.7037, "num_word_query": 23.505, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1140.7277, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4297, "query_norm": 1.3577, "queue_k_norm": 1.472, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8393, "sent_len_1": 66.6152, "sent_len_max_0": 127.545, "sent_len_max_1": 189.5637, "stdk": 0.0478, "stdq": 0.042, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 66500 }, { "accuracy": 47.9492, "active_queue_size": 16384.0, "cl_loss": 3.9662, "doc_norm": 1.4805, "encoder_q-embeddings": 3072.9878, "encoder_q-layer.0": 2258.4233, "encoder_q-layer.1": 2904.9268, "encoder_q-layer.10": 651.2697, "encoder_q-layer.11": 1469.2947, "encoder_q-layer.2": 3018.5703, "encoder_q-layer.3": 2948.1865, "encoder_q-layer.4": 2949.3958, "encoder_q-layer.5": 2644.2883, "encoder_q-layer.6": 2699.7703, "encoder_q-layer.7": 2447.0188, "encoder_q-layer.8": 2244.616, "encoder_q-layer.9": 1121.1377, "epoch": 0.43, "inbatch_neg_score": 0.435, "inbatch_pos_score": 0.9922, "learning_rate": 1.8555555555555557e-05, "loss": 3.9662, "norm_diff": 0.1067, "norm_loss": 0.0, "num_token_doc": 66.8995, "num_token_overlap": 11.737, "num_token_query": 32.1005, "num_token_union": 65.515, "num_word_context": 202.5658, "num_word_doc": 49.9159, "num_word_query": 23.7103, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3813.1196, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4346, "query_norm": 1.3737, "queue_k_norm": 1.4746, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 32.1005, "sent_len_1": 66.8995, "sent_len_max_0": 127.5075, "sent_len_max_1": 189.665, "stdk": 0.0481, "stdq": 0.0425, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 66600 }, { "accuracy": 48.6328, "active_queue_size": 16384.0, "cl_loss": 3.98, "doc_norm": 1.4811, "encoder_q-embeddings": 1820.1387, "encoder_q-layer.0": 1216.8346, "encoder_q-layer.1": 1511.8472, "encoder_q-layer.10": 683.1402, "encoder_q-layer.11": 1611.6577, "encoder_q-layer.2": 1891.3588, "encoder_q-layer.3": 2012.0245, "encoder_q-layer.4": 2304.6804, "encoder_q-layer.5": 2569.5967, "encoder_q-layer.6": 2825.1663, "encoder_q-layer.7": 2265.8459, "encoder_q-layer.8": 1686.2747, "encoder_q-layer.9": 951.3593, "epoch": 0.43, "inbatch_neg_score": 0.4366, "inbatch_pos_score": 0.9873, "learning_rate": 1.85e-05, "loss": 3.98, "norm_diff": 0.116, "norm_loss": 0.0, "num_token_doc": 66.9725, "num_token_overlap": 11.6755, "num_token_query": 31.8733, "num_token_union": 65.4543, "num_word_context": 202.4141, "num_word_doc": 49.9295, "num_word_query": 23.5381, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2822.0593, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.438, "query_norm": 1.365, "queue_k_norm": 1.4771, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8733, "sent_len_1": 66.9725, "sent_len_max_0": 127.4813, "sent_len_max_1": 192.1662, "stdk": 0.0481, "stdq": 0.0422, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 66700 }, { "accuracy": 44.9219, "active_queue_size": 16384.0, "cl_loss": 3.9617, "doc_norm": 1.4787, "encoder_q-embeddings": 547.9541, "encoder_q-layer.0": 383.8098, "encoder_q-layer.1": 394.865, "encoder_q-layer.10": 652.2968, "encoder_q-layer.11": 1455.4738, "encoder_q-layer.2": 453.6602, "encoder_q-layer.3": 455.5246, "encoder_q-layer.4": 482.4638, "encoder_q-layer.5": 477.9399, "encoder_q-layer.6": 523.3136, "encoder_q-layer.7": 565.2657, "encoder_q-layer.8": 620.2357, "encoder_q-layer.9": 564.9189, "epoch": 0.43, "inbatch_neg_score": 0.4357, "inbatch_pos_score": 0.9717, "learning_rate": 1.8444444444444445e-05, "loss": 3.9617, "norm_diff": 0.1147, "norm_loss": 0.0, "num_token_doc": 66.7875, "num_token_overlap": 11.6755, "num_token_query": 31.8781, "num_token_union": 65.3415, "num_word_context": 201.9175, "num_word_doc": 49.8157, "num_word_query": 23.5419, "postclip_grad_norm": 1.0, "preclip_grad_norm": 977.2483, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4365, "query_norm": 1.3639, "queue_k_norm": 1.4751, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.8781, "sent_len_1": 66.7875, "sent_len_max_0": 127.5738, "sent_len_max_1": 188.3725, "stdk": 0.048, "stdq": 0.0422, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 66800 }, { "accuracy": 46.6797, "active_queue_size": 16384.0, "cl_loss": 3.9679, "doc_norm": 1.473, "encoder_q-embeddings": 1425.397, "encoder_q-layer.0": 1116.9176, "encoder_q-layer.1": 1101.551, "encoder_q-layer.10": 650.8125, "encoder_q-layer.11": 1581.0371, "encoder_q-layer.2": 1222.6051, "encoder_q-layer.3": 1449.052, "encoder_q-layer.4": 1453.578, "encoder_q-layer.5": 1458.1135, "encoder_q-layer.6": 1636.9568, "encoder_q-layer.7": 1782.5035, "encoder_q-layer.8": 1319.0707, "encoder_q-layer.9": 811.476, "epoch": 0.44, "inbatch_neg_score": 0.4436, "inbatch_pos_score": 0.9961, "learning_rate": 1.838888888888889e-05, "loss": 3.9679, "norm_diff": 0.1061, "norm_loss": 0.0, "num_token_doc": 66.5988, "num_token_overlap": 11.6446, "num_token_query": 31.9475, "num_token_union": 65.3278, "num_word_context": 202.3397, "num_word_doc": 49.6927, "num_word_query": 23.5933, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2051.4705, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4431, "query_norm": 1.3668, "queue_k_norm": 1.4774, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9475, "sent_len_1": 66.5988, "sent_len_max_0": 127.5888, "sent_len_max_1": 189.8762, "stdk": 0.0477, "stdq": 0.0423, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 66900 }, { "accuracy": 47.7539, "active_queue_size": 16384.0, "cl_loss": 3.9647, "doc_norm": 1.477, "encoder_q-embeddings": 899.708, "encoder_q-layer.0": 632.1448, "encoder_q-layer.1": 769.1375, "encoder_q-layer.10": 599.0168, "encoder_q-layer.11": 1557.4528, "encoder_q-layer.2": 902.4692, "encoder_q-layer.3": 1024.2892, "encoder_q-layer.4": 1091.0457, "encoder_q-layer.5": 1090.6953, "encoder_q-layer.6": 1006.2181, "encoder_q-layer.7": 841.7645, "encoder_q-layer.8": 745.6578, "encoder_q-layer.9": 582.3199, "epoch": 0.44, "inbatch_neg_score": 0.4457, "inbatch_pos_score": 1.0059, "learning_rate": 1.8333333333333333e-05, "loss": 3.9647, "norm_diff": 0.1115, "norm_loss": 0.0, "num_token_doc": 66.7329, "num_token_overlap": 11.6796, "num_token_query": 31.8414, "num_token_union": 65.2801, "num_word_context": 202.2269, "num_word_doc": 49.8087, "num_word_query": 23.5266, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1408.0176, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.446, "query_norm": 1.3655, "queue_k_norm": 1.4768, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8414, "sent_len_1": 66.7329, "sent_len_max_0": 127.405, "sent_len_max_1": 189.895, "stdk": 0.0479, "stdq": 0.0423, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 67000 }, { "accuracy": 48.5352, "active_queue_size": 16384.0, "cl_loss": 3.9725, "doc_norm": 1.4778, "encoder_q-embeddings": 529.7607, "encoder_q-layer.0": 339.5749, "encoder_q-layer.1": 349.8526, "encoder_q-layer.10": 597.2222, "encoder_q-layer.11": 1459.8939, "encoder_q-layer.2": 389.3997, "encoder_q-layer.3": 407.6148, "encoder_q-layer.4": 414.7544, "encoder_q-layer.5": 423.3091, "encoder_q-layer.6": 476.1324, "encoder_q-layer.7": 534.1877, "encoder_q-layer.8": 626.5521, "encoder_q-layer.9": 570.2216, "epoch": 0.44, "inbatch_neg_score": 0.4454, "inbatch_pos_score": 1.0195, "learning_rate": 1.827777777777778e-05, "loss": 3.9725, "norm_diff": 0.1115, "norm_loss": 0.0, "num_token_doc": 66.6607, "num_token_overlap": 11.7019, "num_token_query": 31.817, "num_token_union": 65.2167, "num_word_context": 201.831, "num_word_doc": 49.7471, "num_word_query": 23.4781, "postclip_grad_norm": 1.0, "preclip_grad_norm": 948.975, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4458, "query_norm": 1.3663, "queue_k_norm": 1.4762, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.817, "sent_len_1": 66.6607, "sent_len_max_0": 127.59, "sent_len_max_1": 188.8025, "stdk": 0.0479, "stdq": 0.0423, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 67100 }, { "accuracy": 45.5078, "active_queue_size": 16384.0, "cl_loss": 3.9753, "doc_norm": 1.4779, "encoder_q-embeddings": 581.9317, "encoder_q-layer.0": 382.5063, "encoder_q-layer.1": 405.7648, "encoder_q-layer.10": 624.7631, "encoder_q-layer.11": 1549.4934, "encoder_q-layer.2": 439.5004, "encoder_q-layer.3": 458.4677, "encoder_q-layer.4": 489.8198, "encoder_q-layer.5": 468.3713, "encoder_q-layer.6": 517.743, "encoder_q-layer.7": 563.3207, "encoder_q-layer.8": 709.0361, "encoder_q-layer.9": 606.1152, "epoch": 0.44, "inbatch_neg_score": 0.4464, "inbatch_pos_score": 0.9912, "learning_rate": 1.8222222222222224e-05, "loss": 3.9753, "norm_diff": 0.1154, "norm_loss": 0.0, "num_token_doc": 66.6303, "num_token_overlap": 11.6736, "num_token_query": 31.8586, "num_token_union": 65.2648, "num_word_context": 202.0605, "num_word_doc": 49.7413, "num_word_query": 23.5138, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1014.7053, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4465, "query_norm": 1.3625, "queue_k_norm": 1.4784, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.8586, "sent_len_1": 66.6303, "sent_len_max_0": 127.4912, "sent_len_max_1": 187.7337, "stdk": 0.0479, "stdq": 0.0423, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 67200 }, { "accuracy": 43.75, "active_queue_size": 16384.0, "cl_loss": 3.9603, "doc_norm": 1.4802, "encoder_q-embeddings": 699.9708, "encoder_q-layer.0": 489.9531, "encoder_q-layer.1": 497.9008, "encoder_q-layer.10": 734.4086, "encoder_q-layer.11": 1735.2177, "encoder_q-layer.2": 594.6599, "encoder_q-layer.3": 598.7437, "encoder_q-layer.4": 659.13, "encoder_q-layer.5": 613.405, "encoder_q-layer.6": 683.5229, "encoder_q-layer.7": 705.1073, "encoder_q-layer.8": 819.5595, "encoder_q-layer.9": 715.1739, "epoch": 0.44, "inbatch_neg_score": 0.4459, "inbatch_pos_score": 0.9873, "learning_rate": 1.8166666666666667e-05, "loss": 3.9603, "norm_diff": 0.108, "norm_loss": 0.0, "num_token_doc": 66.6747, "num_token_overlap": 11.6615, "num_token_query": 31.8659, "num_token_union": 65.3116, "num_word_context": 202.2978, "num_word_doc": 49.7393, "num_word_query": 23.5373, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1192.5638, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4468, "query_norm": 1.3721, "queue_k_norm": 1.4819, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8659, "sent_len_1": 66.6747, "sent_len_max_0": 127.3325, "sent_len_max_1": 189.0362, "stdk": 0.0479, "stdq": 0.0427, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 67300 }, { "accuracy": 45.1172, "active_queue_size": 16384.0, "cl_loss": 3.9684, "doc_norm": 1.4799, "encoder_q-embeddings": 1176.7097, "encoder_q-layer.0": 766.1965, "encoder_q-layer.1": 813.1777, "encoder_q-layer.10": 1250.4335, "encoder_q-layer.11": 3284.4443, "encoder_q-layer.2": 922.1902, "encoder_q-layer.3": 960.1054, "encoder_q-layer.4": 1061.9644, "encoder_q-layer.5": 1042.9789, "encoder_q-layer.6": 1179.037, "encoder_q-layer.7": 1193.002, "encoder_q-layer.8": 1356.0875, "encoder_q-layer.9": 1210.7539, "epoch": 0.44, "inbatch_neg_score": 0.4467, "inbatch_pos_score": 1.002, "learning_rate": 1.8111111111111112e-05, "loss": 3.9684, "norm_diff": 0.1047, "norm_loss": 0.0, "num_token_doc": 66.7016, "num_token_overlap": 11.6708, "num_token_query": 31.8541, "num_token_union": 65.2445, "num_word_context": 202.1641, "num_word_doc": 49.7583, "num_word_query": 23.523, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2111.2924, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4473, "query_norm": 1.3753, "queue_k_norm": 1.4813, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8541, "sent_len_1": 66.7016, "sent_len_max_0": 127.5325, "sent_len_max_1": 189.8038, "stdk": 0.0479, "stdq": 0.0429, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 67400 }, { "accuracy": 48.2422, "active_queue_size": 16384.0, "cl_loss": 3.9648, "doc_norm": 1.4818, "encoder_q-embeddings": 3807.3181, "encoder_q-layer.0": 2836.6877, "encoder_q-layer.1": 3246.9895, "encoder_q-layer.10": 1362.9698, "encoder_q-layer.11": 3228.2129, "encoder_q-layer.2": 3588.2397, "encoder_q-layer.3": 3449.9548, "encoder_q-layer.4": 3769.3906, "encoder_q-layer.5": 3155.9883, "encoder_q-layer.6": 2585.6072, "encoder_q-layer.7": 2362.1184, "encoder_q-layer.8": 2016.9707, "encoder_q-layer.9": 1409.3412, "epoch": 0.44, "inbatch_neg_score": 0.4468, "inbatch_pos_score": 1.0107, "learning_rate": 1.8055555555555555e-05, "loss": 3.9648, "norm_diff": 0.128, "norm_loss": 0.0, "num_token_doc": 67.101, "num_token_overlap": 11.7063, "num_token_query": 31.8863, "num_token_union": 65.4974, "num_word_context": 202.4011, "num_word_doc": 50.0859, "num_word_query": 23.5493, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4426.3231, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4473, "query_norm": 1.3538, "queue_k_norm": 1.4803, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8863, "sent_len_1": 67.101, "sent_len_max_0": 127.4725, "sent_len_max_1": 190.6463, "stdk": 0.048, "stdq": 0.042, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 67500 }, { "accuracy": 45.4102, "active_queue_size": 16384.0, "cl_loss": 3.9723, "doc_norm": 1.4812, "encoder_q-embeddings": 1132.3932, "encoder_q-layer.0": 761.6711, "encoder_q-layer.1": 793.1304, "encoder_q-layer.10": 1235.646, "encoder_q-layer.11": 3164.4841, "encoder_q-layer.2": 865.8931, "encoder_q-layer.3": 870.434, "encoder_q-layer.4": 933.9359, "encoder_q-layer.5": 962.9271, "encoder_q-layer.6": 1051.3096, "encoder_q-layer.7": 1150.0756, "encoder_q-layer.8": 1292.7357, "encoder_q-layer.9": 1191.069, "epoch": 0.44, "inbatch_neg_score": 0.445, "inbatch_pos_score": 0.9805, "learning_rate": 1.8e-05, "loss": 3.9723, "norm_diff": 0.1317, "norm_loss": 0.0, "num_token_doc": 66.6912, "num_token_overlap": 11.6587, "num_token_query": 31.8401, "num_token_union": 65.2609, "num_word_context": 202.3414, "num_word_doc": 49.7447, "num_word_query": 23.4642, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2034.8796, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4463, "query_norm": 1.3495, "queue_k_norm": 1.4795, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.8401, "sent_len_1": 66.6912, "sent_len_max_0": 127.4387, "sent_len_max_1": 190.68, "stdk": 0.0479, "stdq": 0.0418, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 67600 }, { "accuracy": 44.5312, "active_queue_size": 16384.0, "cl_loss": 3.9447, "doc_norm": 1.4795, "encoder_q-embeddings": 1258.9631, "encoder_q-layer.0": 877.7394, "encoder_q-layer.1": 960.515, "encoder_q-layer.10": 1226.0588, "encoder_q-layer.11": 3196.0752, "encoder_q-layer.2": 1112.7793, "encoder_q-layer.3": 1131.2826, "encoder_q-layer.4": 1225.9976, "encoder_q-layer.5": 1140.3704, "encoder_q-layer.6": 1172.7223, "encoder_q-layer.7": 1220.3588, "encoder_q-layer.8": 1311.5792, "encoder_q-layer.9": 1185.3042, "epoch": 0.44, "inbatch_neg_score": 0.4474, "inbatch_pos_score": 0.9917, "learning_rate": 1.7944444444444443e-05, "loss": 3.9447, "norm_diff": 0.1165, "norm_loss": 0.0, "num_token_doc": 66.8465, "num_token_overlap": 11.6744, "num_token_query": 31.9326, "num_token_union": 65.421, "num_word_context": 202.5779, "num_word_doc": 49.9116, "num_word_query": 23.5975, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2183.711, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4478, "query_norm": 1.363, "queue_k_norm": 1.4815, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9326, "sent_len_1": 66.8465, "sent_len_max_0": 127.5, "sent_len_max_1": 188.58, "stdk": 0.0479, "stdq": 0.0423, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 67700 }, { "accuracy": 47.3633, "active_queue_size": 16384.0, "cl_loss": 3.9653, "doc_norm": 1.4842, "encoder_q-embeddings": 2284.8916, "encoder_q-layer.0": 1519.0419, "encoder_q-layer.1": 1755.8933, "encoder_q-layer.10": 1261.1803, "encoder_q-layer.11": 3230.8054, "encoder_q-layer.2": 2197.7559, "encoder_q-layer.3": 2372.0378, "encoder_q-layer.4": 2623.1472, "encoder_q-layer.5": 2690.1282, "encoder_q-layer.6": 2657.6982, "encoder_q-layer.7": 2380.3857, "encoder_q-layer.8": 2000.0624, "encoder_q-layer.9": 1260.4512, "epoch": 0.44, "inbatch_neg_score": 0.4477, "inbatch_pos_score": 0.9863, "learning_rate": 1.788888888888889e-05, "loss": 3.9653, "norm_diff": 0.1231, "norm_loss": 0.0, "num_token_doc": 66.5863, "num_token_overlap": 11.6512, "num_token_query": 31.8026, "num_token_union": 65.1974, "num_word_context": 201.944, "num_word_doc": 49.6877, "num_word_query": 23.4723, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3364.8739, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.448, "query_norm": 1.3611, "queue_k_norm": 1.4808, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8026, "sent_len_1": 66.5863, "sent_len_max_0": 127.4638, "sent_len_max_1": 189.4925, "stdk": 0.048, "stdq": 0.0422, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 67800 }, { "accuracy": 43.6523, "active_queue_size": 16384.0, "cl_loss": 3.9567, "doc_norm": 1.4798, "encoder_q-embeddings": 1057.5433, "encoder_q-layer.0": 703.2373, "encoder_q-layer.1": 718.5941, "encoder_q-layer.10": 1235.9609, "encoder_q-layer.11": 3294.5205, "encoder_q-layer.2": 807.1569, "encoder_q-layer.3": 804.7433, "encoder_q-layer.4": 853.7383, "encoder_q-layer.5": 854.0465, "encoder_q-layer.6": 951.4813, "encoder_q-layer.7": 1090.0433, "encoder_q-layer.8": 1303.7565, "encoder_q-layer.9": 1185.4556, "epoch": 0.44, "inbatch_neg_score": 0.4457, "inbatch_pos_score": 0.9819, "learning_rate": 1.7833333333333334e-05, "loss": 3.9567, "norm_diff": 0.119, "norm_loss": 0.0, "num_token_doc": 67.0341, "num_token_overlap": 11.6681, "num_token_query": 31.8831, "num_token_union": 65.5076, "num_word_context": 202.3296, "num_word_doc": 49.9946, "num_word_query": 23.5454, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2028.7747, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4451, "query_norm": 1.3608, "queue_k_norm": 1.4836, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8831, "sent_len_1": 67.0341, "sent_len_max_0": 127.5337, "sent_len_max_1": 188.9462, "stdk": 0.0478, "stdq": 0.0422, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 67900 }, { "accuracy": 45.3125, "active_queue_size": 16384.0, "cl_loss": 3.948, "doc_norm": 1.4816, "encoder_q-embeddings": 2739.0159, "encoder_q-layer.0": 2024.106, "encoder_q-layer.1": 2218.2336, "encoder_q-layer.10": 1306.9401, "encoder_q-layer.11": 3175.9604, "encoder_q-layer.2": 2614.1877, "encoder_q-layer.3": 2702.793, "encoder_q-layer.4": 3022.6121, "encoder_q-layer.5": 3144.5417, "encoder_q-layer.6": 2734.4375, "encoder_q-layer.7": 2463.4753, "encoder_q-layer.8": 2164.2549, "encoder_q-layer.9": 1336.6241, "epoch": 0.44, "inbatch_neg_score": 0.4506, "inbatch_pos_score": 0.9692, "learning_rate": 1.777777777777778e-05, "loss": 3.948, "norm_diff": 0.1465, "norm_loss": 0.0, "num_token_doc": 67.0147, "num_token_overlap": 11.7402, "num_token_query": 32.0324, "num_token_union": 65.4988, "num_word_context": 202.2757, "num_word_doc": 49.9794, "num_word_query": 23.6803, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3727.497, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4507, "query_norm": 1.3351, "queue_k_norm": 1.4825, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 32.0324, "sent_len_1": 67.0147, "sent_len_max_0": 127.4463, "sent_len_max_1": 192.6325, "stdk": 0.0479, "stdq": 0.0409, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 68000 }, { "accuracy": 43.75, "active_queue_size": 16384.0, "cl_loss": 3.961, "doc_norm": 1.4754, "encoder_q-embeddings": 1929.4895, "encoder_q-layer.0": 1331.8967, "encoder_q-layer.1": 1478.9316, "encoder_q-layer.10": 1314.7316, "encoder_q-layer.11": 3109.0896, "encoder_q-layer.2": 1747.6647, "encoder_q-layer.3": 1934.7114, "encoder_q-layer.4": 1944.7181, "encoder_q-layer.5": 1899.976, "encoder_q-layer.6": 1732.616, "encoder_q-layer.7": 1650.9183, "encoder_q-layer.8": 1534.5381, "encoder_q-layer.9": 1267.5387, "epoch": 0.44, "inbatch_neg_score": 0.4498, "inbatch_pos_score": 0.9824, "learning_rate": 1.7722222222222222e-05, "loss": 3.961, "norm_diff": 0.1206, "norm_loss": 0.0, "num_token_doc": 66.7809, "num_token_overlap": 11.7099, "num_token_query": 31.973, "num_token_union": 65.3994, "num_word_context": 202.5604, "num_word_doc": 49.8221, "num_word_query": 23.624, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2743.984, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.45, "query_norm": 1.3548, "queue_k_norm": 1.4814, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.973, "sent_len_1": 66.7809, "sent_len_max_0": 127.56, "sent_len_max_1": 188.9387, "stdk": 0.0476, "stdq": 0.0418, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 68100 }, { "accuracy": 47.6562, "active_queue_size": 16384.0, "cl_loss": 3.958, "doc_norm": 1.4901, "encoder_q-embeddings": 1384.8975, "encoder_q-layer.0": 972.9461, "encoder_q-layer.1": 1074.2224, "encoder_q-layer.10": 1450.4055, "encoder_q-layer.11": 3284.6482, "encoder_q-layer.2": 1244.1335, "encoder_q-layer.3": 1393.0885, "encoder_q-layer.4": 1461.03, "encoder_q-layer.5": 1445.0795, "encoder_q-layer.6": 1435.2472, "encoder_q-layer.7": 1401.7249, "encoder_q-layer.8": 1506.2711, "encoder_q-layer.9": 1288.1261, "epoch": 0.44, "inbatch_neg_score": 0.452, "inbatch_pos_score": 1.0088, "learning_rate": 1.7666666666666668e-05, "loss": 3.958, "norm_diff": 0.1121, "norm_loss": 0.0, "num_token_doc": 66.9269, "num_token_overlap": 11.6644, "num_token_query": 31.9583, "num_token_union": 65.4652, "num_word_context": 202.8454, "num_word_doc": 49.9355, "num_word_query": 23.6199, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2352.6432, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4526, "query_norm": 1.378, "queue_k_norm": 1.4828, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9583, "sent_len_1": 66.9269, "sent_len_max_0": 127.6575, "sent_len_max_1": 190.5488, "stdk": 0.0482, "stdq": 0.0427, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 68200 }, { "accuracy": 46.3867, "active_queue_size": 16384.0, "cl_loss": 3.9496, "doc_norm": 1.4839, "encoder_q-embeddings": 1244.6025, "encoder_q-layer.0": 826.4421, "encoder_q-layer.1": 904.4427, "encoder_q-layer.10": 1240.4852, "encoder_q-layer.11": 3180.8052, "encoder_q-layer.2": 1043.8663, "encoder_q-layer.3": 1043.9628, "encoder_q-layer.4": 1093.8601, "encoder_q-layer.5": 1076.3049, "encoder_q-layer.6": 1175.975, "encoder_q-layer.7": 1245.2456, "encoder_q-layer.8": 1303.4745, "encoder_q-layer.9": 1232.8796, "epoch": 0.44, "inbatch_neg_score": 0.4536, "inbatch_pos_score": 0.9878, "learning_rate": 1.761111111111111e-05, "loss": 3.9496, "norm_diff": 0.1201, "norm_loss": 0.0, "num_token_doc": 66.8521, "num_token_overlap": 11.656, "num_token_query": 31.7539, "num_token_union": 65.3522, "num_word_context": 202.3969, "num_word_doc": 49.9067, "num_word_query": 23.4512, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2135.9828, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4531, "query_norm": 1.3639, "queue_k_norm": 1.4847, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.7539, "sent_len_1": 66.8521, "sent_len_max_0": 127.3137, "sent_len_max_1": 188.745, "stdk": 0.048, "stdq": 0.0421, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 68300 }, { "accuracy": 44.2383, "active_queue_size": 16384.0, "cl_loss": 3.9681, "doc_norm": 1.4825, "encoder_q-embeddings": 1112.9469, "encoder_q-layer.0": 731.098, "encoder_q-layer.1": 769.8341, "encoder_q-layer.10": 1304.0764, "encoder_q-layer.11": 3132.6719, "encoder_q-layer.2": 870.8688, "encoder_q-layer.3": 882.4991, "encoder_q-layer.4": 961.2012, "encoder_q-layer.5": 972.7427, "encoder_q-layer.6": 1021.7392, "encoder_q-layer.7": 1164.8129, "encoder_q-layer.8": 1264.8027, "encoder_q-layer.9": 1202.0065, "epoch": 0.45, "inbatch_neg_score": 0.46, "inbatch_pos_score": 0.9937, "learning_rate": 1.7555555555555556e-05, "loss": 3.9681, "norm_diff": 0.103, "norm_loss": 0.0, "num_token_doc": 67.0908, "num_token_overlap": 11.6946, "num_token_query": 31.9185, "num_token_union": 65.5116, "num_word_context": 202.6191, "num_word_doc": 50.0461, "num_word_query": 23.5579, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2046.9245, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4592, "query_norm": 1.3795, "queue_k_norm": 1.4823, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9185, "sent_len_1": 67.0908, "sent_len_max_0": 127.6525, "sent_len_max_1": 191.645, "stdk": 0.0479, "stdq": 0.0425, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 68400 }, { "accuracy": 48.6328, "active_queue_size": 16384.0, "cl_loss": 3.97, "doc_norm": 1.4847, "encoder_q-embeddings": 1824.386, "encoder_q-layer.0": 1280.3817, "encoder_q-layer.1": 1310.9043, "encoder_q-layer.10": 1322.989, "encoder_q-layer.11": 3070.4197, "encoder_q-layer.2": 1514.4609, "encoder_q-layer.3": 1421.5511, "encoder_q-layer.4": 1386.3093, "encoder_q-layer.5": 1317.7898, "encoder_q-layer.6": 1372.5958, "encoder_q-layer.7": 1316.5558, "encoder_q-layer.8": 1425.6692, "encoder_q-layer.9": 1252.0847, "epoch": 0.45, "inbatch_neg_score": 0.4578, "inbatch_pos_score": 1.0156, "learning_rate": 1.75e-05, "loss": 3.97, "norm_diff": 0.1181, "norm_loss": 0.0, "num_token_doc": 66.7085, "num_token_overlap": 11.6199, "num_token_query": 31.6502, "num_token_union": 65.2252, "num_word_context": 202.4668, "num_word_doc": 49.7977, "num_word_query": 23.3742, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2438.3919, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.458, "query_norm": 1.3666, "queue_k_norm": 1.4843, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.6502, "sent_len_1": 66.7085, "sent_len_max_0": 127.4963, "sent_len_max_1": 187.3713, "stdk": 0.0479, "stdq": 0.042, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 68500 }, { "accuracy": 43.9453, "active_queue_size": 16384.0, "cl_loss": 3.9704, "doc_norm": 1.4802, "encoder_q-embeddings": 1015.0152, "encoder_q-layer.0": 679.5409, "encoder_q-layer.1": 702.1035, "encoder_q-layer.10": 1292.1667, "encoder_q-layer.11": 3240.0525, "encoder_q-layer.2": 777.7621, "encoder_q-layer.3": 812.7963, "encoder_q-layer.4": 853.0592, "encoder_q-layer.5": 870.2847, "encoder_q-layer.6": 965.1807, "encoder_q-layer.7": 1043.5187, "encoder_q-layer.8": 1279.0916, "encoder_q-layer.9": 1145.705, "epoch": 0.45, "inbatch_neg_score": 0.4653, "inbatch_pos_score": 0.9956, "learning_rate": 1.7444444444444448e-05, "loss": 3.9704, "norm_diff": 0.1119, "norm_loss": 0.0, "num_token_doc": 66.7503, "num_token_overlap": 11.6531, "num_token_query": 31.8293, "num_token_union": 65.3155, "num_word_context": 202.1736, "num_word_doc": 49.8329, "num_word_query": 23.5035, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2002.7103, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4651, "query_norm": 1.3682, "queue_k_norm": 1.4848, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8293, "sent_len_1": 66.7503, "sent_len_max_0": 127.415, "sent_len_max_1": 189.6012, "stdk": 0.0477, "stdq": 0.0419, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 68600 }, { "accuracy": 43.2617, "active_queue_size": 16384.0, "cl_loss": 3.9578, "doc_norm": 1.4859, "encoder_q-embeddings": 1136.8688, "encoder_q-layer.0": 766.9304, "encoder_q-layer.1": 807.1226, "encoder_q-layer.10": 1365.7106, "encoder_q-layer.11": 3241.134, "encoder_q-layer.2": 937.9229, "encoder_q-layer.3": 954.2078, "encoder_q-layer.4": 1049.5798, "encoder_q-layer.5": 1137.292, "encoder_q-layer.6": 1180.0378, "encoder_q-layer.7": 1254.6736, "encoder_q-layer.8": 1331.114, "encoder_q-layer.9": 1199.1804, "epoch": 0.45, "inbatch_neg_score": 0.4704, "inbatch_pos_score": 1.0127, "learning_rate": 1.738888888888889e-05, "loss": 3.9578, "norm_diff": 0.0975, "norm_loss": 0.0, "num_token_doc": 66.8417, "num_token_overlap": 11.6847, "num_token_query": 31.9662, "num_token_union": 65.4294, "num_word_context": 202.5037, "num_word_doc": 49.8989, "num_word_query": 23.619, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2149.6413, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4697, "query_norm": 1.3884, "queue_k_norm": 1.4842, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9662, "sent_len_1": 66.8417, "sent_len_max_0": 127.5125, "sent_len_max_1": 188.5838, "stdk": 0.0479, "stdq": 0.0427, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 68700 }, { "accuracy": 44.1406, "active_queue_size": 16384.0, "cl_loss": 3.9484, "doc_norm": 1.4939, "encoder_q-embeddings": 962.3801, "encoder_q-layer.0": 659.2934, "encoder_q-layer.1": 685.3376, "encoder_q-layer.10": 1223.7267, "encoder_q-layer.11": 3201.6978, "encoder_q-layer.2": 763.0073, "encoder_q-layer.3": 775.5051, "encoder_q-layer.4": 838.7178, "encoder_q-layer.5": 845.7906, "encoder_q-layer.6": 930.2422, "encoder_q-layer.7": 1040.8877, "encoder_q-layer.8": 1265.9813, "encoder_q-layer.9": 1177.1409, "epoch": 0.45, "inbatch_neg_score": 0.4721, "inbatch_pos_score": 1.0264, "learning_rate": 1.7333333333333336e-05, "loss": 3.9484, "norm_diff": 0.1126, "norm_loss": 0.0, "num_token_doc": 66.7836, "num_token_overlap": 11.6846, "num_token_query": 32.0035, "num_token_union": 65.4185, "num_word_context": 202.4377, "num_word_doc": 49.8712, "num_word_query": 23.6531, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1985.3262, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4722, "query_norm": 1.3813, "queue_k_norm": 1.4857, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 32.0035, "sent_len_1": 66.7836, "sent_len_max_0": 127.515, "sent_len_max_1": 189.1463, "stdk": 0.0482, "stdq": 0.0424, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 68800 }, { "accuracy": 45.8008, "active_queue_size": 16384.0, "cl_loss": 3.9683, "doc_norm": 1.4855, "encoder_q-embeddings": 1279.1964, "encoder_q-layer.0": 847.9677, "encoder_q-layer.1": 947.954, "encoder_q-layer.10": 1278.8101, "encoder_q-layer.11": 3191.7651, "encoder_q-layer.2": 1057.2108, "encoder_q-layer.3": 1084.9147, "encoder_q-layer.4": 1225.968, "encoder_q-layer.5": 1233.5229, "encoder_q-layer.6": 1242.377, "encoder_q-layer.7": 1347.3386, "encoder_q-layer.8": 1441.0565, "encoder_q-layer.9": 1223.5409, "epoch": 0.45, "inbatch_neg_score": 0.4786, "inbatch_pos_score": 1.0273, "learning_rate": 1.7277777777777778e-05, "loss": 3.9683, "norm_diff": 0.0972, "norm_loss": 0.0, "num_token_doc": 66.9073, "num_token_overlap": 11.685, "num_token_query": 31.8989, "num_token_union": 65.4004, "num_word_context": 202.4511, "num_word_doc": 49.8935, "num_word_query": 23.5578, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2216.2696, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.478, "query_norm": 1.3883, "queue_k_norm": 1.4876, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8989, "sent_len_1": 66.9073, "sent_len_max_0": 127.605, "sent_len_max_1": 191.2025, "stdk": 0.0478, "stdq": 0.0424, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 68900 }, { "accuracy": 44.043, "active_queue_size": 16384.0, "cl_loss": 3.9436, "doc_norm": 1.4848, "encoder_q-embeddings": 1271.5697, "encoder_q-layer.0": 838.0881, "encoder_q-layer.1": 879.3472, "encoder_q-layer.10": 1264.0947, "encoder_q-layer.11": 3150.2488, "encoder_q-layer.2": 971.6279, "encoder_q-layer.3": 1004.6687, "encoder_q-layer.4": 1081.0306, "encoder_q-layer.5": 1139.1709, "encoder_q-layer.6": 1131.5559, "encoder_q-layer.7": 1199.7698, "encoder_q-layer.8": 1323.7605, "encoder_q-layer.9": 1205.2692, "epoch": 0.45, "inbatch_neg_score": 0.4824, "inbatch_pos_score": 1.0068, "learning_rate": 1.7222222222222224e-05, "loss": 3.9436, "norm_diff": 0.1041, "norm_loss": 0.0, "num_token_doc": 66.7797, "num_token_overlap": 11.6677, "num_token_query": 31.9654, "num_token_union": 65.3568, "num_word_context": 202.4665, "num_word_doc": 49.8458, "num_word_query": 23.5954, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2127.2854, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4812, "query_norm": 1.3807, "queue_k_norm": 1.4875, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9654, "sent_len_1": 66.7797, "sent_len_max_0": 127.6887, "sent_len_max_1": 188.3925, "stdk": 0.0478, "stdq": 0.042, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 69000 }, { "accuracy": 44.5312, "active_queue_size": 16384.0, "cl_loss": 3.9655, "doc_norm": 1.4927, "encoder_q-embeddings": 1435.179, "encoder_q-layer.0": 984.8748, "encoder_q-layer.1": 1100.4805, "encoder_q-layer.10": 1358.8519, "encoder_q-layer.11": 3275.1826, "encoder_q-layer.2": 1237.7576, "encoder_q-layer.3": 1267.7456, "encoder_q-layer.4": 1256.1987, "encoder_q-layer.5": 1212.014, "encoder_q-layer.6": 1293.7395, "encoder_q-layer.7": 1230.652, "encoder_q-layer.8": 1388.2852, "encoder_q-layer.9": 1254.9044, "epoch": 0.45, "inbatch_neg_score": 0.4839, "inbatch_pos_score": 1.0273, "learning_rate": 1.7166666666666666e-05, "loss": 3.9655, "norm_diff": 0.1079, "norm_loss": 0.0, "num_token_doc": 66.7565, "num_token_overlap": 11.678, "num_token_query": 31.8959, "num_token_union": 65.3068, "num_word_context": 202.1448, "num_word_doc": 49.8102, "num_word_query": 23.5539, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2299.437, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4836, "query_norm": 1.3848, "queue_k_norm": 1.4871, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8959, "sent_len_1": 66.7565, "sent_len_max_0": 127.585, "sent_len_max_1": 190.705, "stdk": 0.0481, "stdq": 0.0422, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 69100 }, { "accuracy": 43.6523, "active_queue_size": 16384.0, "cl_loss": 3.9662, "doc_norm": 1.4938, "encoder_q-embeddings": 1366.1444, "encoder_q-layer.0": 887.575, "encoder_q-layer.1": 982.0805, "encoder_q-layer.10": 1288.0024, "encoder_q-layer.11": 3118.1628, "encoder_q-layer.2": 1107.8856, "encoder_q-layer.3": 1174.3287, "encoder_q-layer.4": 1281.9495, "encoder_q-layer.5": 1282.4714, "encoder_q-layer.6": 1337.3278, "encoder_q-layer.7": 1488.1855, "encoder_q-layer.8": 1642.8048, "encoder_q-layer.9": 1293.1451, "epoch": 0.45, "inbatch_neg_score": 0.4875, "inbatch_pos_score": 1.0264, "learning_rate": 1.7111111111111112e-05, "loss": 3.9662, "norm_diff": 0.1054, "norm_loss": 0.0, "num_token_doc": 66.7486, "num_token_overlap": 11.6075, "num_token_query": 31.9101, "num_token_union": 65.4101, "num_word_context": 202.4524, "num_word_doc": 49.8207, "num_word_query": 23.5727, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2315.3331, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4878, "query_norm": 1.3884, "queue_k_norm": 1.4885, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9101, "sent_len_1": 66.7486, "sent_len_max_0": 127.5575, "sent_len_max_1": 190.3512, "stdk": 0.0481, "stdq": 0.0422, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 69200 }, { "accuracy": 45.6055, "active_queue_size": 16384.0, "cl_loss": 3.9748, "doc_norm": 1.4838, "encoder_q-embeddings": 1100.2632, "encoder_q-layer.0": 771.4698, "encoder_q-layer.1": 804.5851, "encoder_q-layer.10": 1123.6808, "encoder_q-layer.11": 2933.4595, "encoder_q-layer.2": 909.3416, "encoder_q-layer.3": 933.2385, "encoder_q-layer.4": 1032.1476, "encoder_q-layer.5": 987.514, "encoder_q-layer.6": 1034.2826, "encoder_q-layer.7": 1162.5974, "encoder_q-layer.8": 1263.9707, "encoder_q-layer.9": 1178.5516, "epoch": 0.45, "inbatch_neg_score": 0.4892, "inbatch_pos_score": 1.0303, "learning_rate": 1.7055555555555554e-05, "loss": 3.9748, "norm_diff": 0.0922, "norm_loss": 0.0, "num_token_doc": 66.8979, "num_token_overlap": 11.6647, "num_token_query": 31.9015, "num_token_union": 65.4535, "num_word_context": 202.4694, "num_word_doc": 49.9355, "num_word_query": 23.5831, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1971.0012, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4897, "query_norm": 1.3916, "queue_k_norm": 1.4899, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9015, "sent_len_1": 66.8979, "sent_len_max_0": 127.52, "sent_len_max_1": 188.2912, "stdk": 0.0477, "stdq": 0.0423, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 69300 }, { "accuracy": 45.2148, "active_queue_size": 16384.0, "cl_loss": 3.9315, "doc_norm": 1.4949, "encoder_q-embeddings": 2376.5901, "encoder_q-layer.0": 1596.4437, "encoder_q-layer.1": 1618.793, "encoder_q-layer.10": 2681.3909, "encoder_q-layer.11": 6429.8589, "encoder_q-layer.2": 1844.8975, "encoder_q-layer.3": 1830.4169, "encoder_q-layer.4": 1842.1332, "encoder_q-layer.5": 1760.6567, "encoder_q-layer.6": 1927.2103, "encoder_q-layer.7": 2077.2239, "encoder_q-layer.8": 2524.8914, "encoder_q-layer.9": 2404.7898, "epoch": 0.45, "inbatch_neg_score": 0.4938, "inbatch_pos_score": 1.0303, "learning_rate": 1.7000000000000003e-05, "loss": 3.9315, "norm_diff": 0.1028, "norm_loss": 0.0, "num_token_doc": 66.8646, "num_token_overlap": 11.6751, "num_token_query": 31.925, "num_token_union": 65.3589, "num_word_context": 202.1551, "num_word_doc": 49.877, "num_word_query": 23.5506, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4127.4064, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.4934, "query_norm": 1.3921, "queue_k_norm": 1.4908, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.925, "sent_len_1": 66.8646, "sent_len_max_0": 127.5125, "sent_len_max_1": 188.3775, "stdk": 0.048, "stdq": 0.0422, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 69400 }, { "accuracy": 44.4336, "active_queue_size": 16384.0, "cl_loss": 3.9567, "doc_norm": 1.4846, "encoder_q-embeddings": 6374.5928, "encoder_q-layer.0": 4566.9395, "encoder_q-layer.1": 4703.1782, "encoder_q-layer.10": 2716.5161, "encoder_q-layer.11": 6328.4971, "encoder_q-layer.2": 5334.1963, "encoder_q-layer.3": 5220.4648, "encoder_q-layer.4": 5470.8086, "encoder_q-layer.5": 5216.3286, "encoder_q-layer.6": 5216.1997, "encoder_q-layer.7": 4729.3789, "encoder_q-layer.8": 4284.8887, "encoder_q-layer.9": 2934.7556, "epoch": 0.45, "inbatch_neg_score": 0.5004, "inbatch_pos_score": 1.0479, "learning_rate": 1.6944444444444446e-05, "loss": 3.9567, "norm_diff": 0.0754, "norm_loss": 0.0, "num_token_doc": 66.6897, "num_token_overlap": 11.6918, "num_token_query": 31.955, "num_token_union": 65.2627, "num_word_context": 202.2983, "num_word_doc": 49.7451, "num_word_query": 23.5996, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7689.7703, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.4995, "query_norm": 1.4091, "queue_k_norm": 1.4894, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.955, "sent_len_1": 66.6897, "sent_len_max_0": 127.5713, "sent_len_max_1": 189.865, "stdk": 0.0476, "stdq": 0.0429, "stdqueue_k": 0.0479, "stdqueue_q": 0.0, "step": 69500 }, { "accuracy": 44.9219, "active_queue_size": 16384.0, "cl_loss": 3.9598, "doc_norm": 1.4814, "encoder_q-embeddings": 2368.084, "encoder_q-layer.0": 1487.3043, "encoder_q-layer.1": 1645.0079, "encoder_q-layer.10": 2663.9834, "encoder_q-layer.11": 6765.1426, "encoder_q-layer.2": 1852.8033, "encoder_q-layer.3": 1930.35, "encoder_q-layer.4": 2047.184, "encoder_q-layer.5": 2118.6155, "encoder_q-layer.6": 2320.6958, "encoder_q-layer.7": 2654.3254, "encoder_q-layer.8": 2896.7083, "encoder_q-layer.9": 2549.9922, "epoch": 0.45, "inbatch_neg_score": 0.5028, "inbatch_pos_score": 1.0479, "learning_rate": 1.688888888888889e-05, "loss": 3.9598, "norm_diff": 0.0729, "norm_loss": 0.0, "num_token_doc": 66.8298, "num_token_overlap": 11.6625, "num_token_query": 31.8522, "num_token_union": 65.349, "num_word_context": 202.2734, "num_word_doc": 49.8673, "num_word_query": 23.5315, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4397.2976, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5024, "query_norm": 1.4085, "queue_k_norm": 1.493, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.8522, "sent_len_1": 66.8298, "sent_len_max_0": 127.4075, "sent_len_max_1": 189.2612, "stdk": 0.0474, "stdq": 0.0428, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 69600 }, { "accuracy": 45.0195, "active_queue_size": 16384.0, "cl_loss": 3.9493, "doc_norm": 1.4902, "encoder_q-embeddings": 2432.2314, "encoder_q-layer.0": 1643.0957, "encoder_q-layer.1": 1732.7627, "encoder_q-layer.10": 2491.9521, "encoder_q-layer.11": 6009.0137, "encoder_q-layer.2": 2004.8917, "encoder_q-layer.3": 2010.9563, "encoder_q-layer.4": 2193.1541, "encoder_q-layer.5": 2198.1348, "encoder_q-layer.6": 2168.6707, "encoder_q-layer.7": 2281.4468, "encoder_q-layer.8": 2580.4182, "encoder_q-layer.9": 2319.6951, "epoch": 0.45, "inbatch_neg_score": 0.5081, "inbatch_pos_score": 1.0508, "learning_rate": 1.6833333333333334e-05, "loss": 3.9493, "norm_diff": 0.0854, "norm_loss": 0.0, "num_token_doc": 66.6497, "num_token_overlap": 11.6883, "num_token_query": 31.8974, "num_token_union": 65.2709, "num_word_context": 202.0879, "num_word_doc": 49.7356, "num_word_query": 23.5692, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4138.7278, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5083, "query_norm": 1.4048, "queue_k_norm": 1.4941, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8974, "sent_len_1": 66.6497, "sent_len_max_0": 127.4638, "sent_len_max_1": 190.2837, "stdk": 0.0478, "stdq": 0.0426, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 69700 }, { "accuracy": 46.4844, "active_queue_size": 16384.0, "cl_loss": 3.9424, "doc_norm": 1.4951, "encoder_q-embeddings": 2040.9984, "encoder_q-layer.0": 1313.655, "encoder_q-layer.1": 1374.9272, "encoder_q-layer.10": 2477.229, "encoder_q-layer.11": 6332.1772, "encoder_q-layer.2": 1526.6334, "encoder_q-layer.3": 1566.1239, "encoder_q-layer.4": 1657.8981, "encoder_q-layer.5": 1680.099, "encoder_q-layer.6": 1937.4373, "encoder_q-layer.7": 2116.3557, "encoder_q-layer.8": 2551.2852, "encoder_q-layer.9": 2369.7485, "epoch": 0.45, "inbatch_neg_score": 0.5105, "inbatch_pos_score": 1.0547, "learning_rate": 1.677777777777778e-05, "loss": 3.9424, "norm_diff": 0.0994, "norm_loss": 0.0, "num_token_doc": 66.9963, "num_token_overlap": 11.7236, "num_token_query": 31.9787, "num_token_union": 65.5116, "num_word_context": 202.5321, "num_word_doc": 49.9647, "num_word_query": 23.5946, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3949.5008, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5107, "query_norm": 1.3957, "queue_k_norm": 1.4941, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9787, "sent_len_1": 66.9963, "sent_len_max_0": 127.5162, "sent_len_max_1": 189.985, "stdk": 0.0479, "stdq": 0.0423, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 69800 }, { "accuracy": 45.5078, "active_queue_size": 16384.0, "cl_loss": 3.9509, "doc_norm": 1.4954, "encoder_q-embeddings": 12226.2959, "encoder_q-layer.0": 8855.5586, "encoder_q-layer.1": 9229.3848, "encoder_q-layer.10": 2515.1472, "encoder_q-layer.11": 6441.5239, "encoder_q-layer.2": 10102.2002, "encoder_q-layer.3": 8558.3613, "encoder_q-layer.4": 8735.2178, "encoder_q-layer.5": 7895.4785, "encoder_q-layer.6": 8522.9385, "encoder_q-layer.7": 7000.4009, "encoder_q-layer.8": 5004.605, "encoder_q-layer.9": 2879.699, "epoch": 0.45, "inbatch_neg_score": 0.5115, "inbatch_pos_score": 1.0557, "learning_rate": 1.6722222222222222e-05, "loss": 3.9509, "norm_diff": 0.114, "norm_loss": 0.0, "num_token_doc": 66.5661, "num_token_overlap": 11.7107, "num_token_query": 31.8902, "num_token_union": 65.2096, "num_word_context": 202.1362, "num_word_doc": 49.687, "num_word_query": 23.5672, "postclip_grad_norm": 1.0, "preclip_grad_norm": 12402.5615, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5122, "query_norm": 1.3813, "queue_k_norm": 1.4955, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8902, "sent_len_1": 66.5661, "sent_len_max_0": 127.6762, "sent_len_max_1": 188.41, "stdk": 0.0479, "stdq": 0.0416, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 69900 }, { "accuracy": 44.1406, "active_queue_size": 16384.0, "cl_loss": 3.9591, "doc_norm": 1.4911, "encoder_q-embeddings": 2561.8792, "encoder_q-layer.0": 1708.4115, "encoder_q-layer.1": 1812.5056, "encoder_q-layer.10": 2515.7864, "encoder_q-layer.11": 6159.3857, "encoder_q-layer.2": 2070.1733, "encoder_q-layer.3": 2094.7751, "encoder_q-layer.4": 2135.4321, "encoder_q-layer.5": 2124.9771, "encoder_q-layer.6": 2292.5344, "encoder_q-layer.7": 2360.0967, "encoder_q-layer.8": 2573.6558, "encoder_q-layer.9": 2307.9385, "epoch": 0.46, "inbatch_neg_score": 0.5172, "inbatch_pos_score": 1.0635, "learning_rate": 1.6666666666666667e-05, "loss": 3.9591, "norm_diff": 0.095, "norm_loss": 0.0, "num_token_doc": 66.6893, "num_token_overlap": 11.695, "num_token_query": 31.9379, "num_token_union": 65.2708, "num_word_context": 202.358, "num_word_doc": 49.7314, "num_word_query": 23.5737, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4233.4226, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5171, "query_norm": 1.3961, "queue_k_norm": 1.4955, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9379, "sent_len_1": 66.6893, "sent_len_max_0": 127.6863, "sent_len_max_1": 191.4, "stdk": 0.0477, "stdq": 0.0422, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 70000 }, { "dev_runtime": 42.8912, "dev_samples_per_second": 1.492, "dev_steps_per_second": 0.023, "epoch": 0.46, "step": 70000, "test_accuracy": 93.017578125, "test_active_queue_size": 16384.0, "test_cl_loss": 0.39231210947036743, "test_doc_norm": 1.4667843580245972, "test_inbatch_neg_score": 0.8625028133392334, "test_inbatch_pos_score": 1.7668371200561523, "test_loss": 0.39231210947036743, "test_loss_align": 0.9685328602790833, "test_loss_unif": 3.4850690364837646, "test_loss_unif_q@queue": 3.4850687980651855, "test_norm_diff": 0.057306356728076935, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.5081378221511841, "test_query_norm": 1.5240906476974487, "test_queue_k_norm": 1.4951763153076172, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04143857955932617, "test_stdq": 0.04204752296209335, "test_stdqueue_k": 0.04794192314147949, "test_stdqueue_q": 0.0 }, { "dev_runtime": 42.8912, "dev_samples_per_second": 1.492, "dev_steps_per_second": 0.023, "epoch": 0.46, "eval_beir-arguana_ndcg@10": 0.37132, "eval_beir-arguana_recall@10": 0.63442, "eval_beir-arguana_recall@100": 0.92532, "eval_beir-arguana_recall@20": 0.766, "eval_beir-avg_ndcg@10": 0.3760895833333333, "eval_beir-avg_recall@10": 0.4461125, "eval_beir-avg_recall@100": 0.6273053333333334, "eval_beir-avg_recall@20": 0.5076769999999999, "eval_beir-cqadupstack_ndcg@10": 0.25081583333333335, "eval_beir-cqadupstack_recall@10": 0.34388499999999994, "eval_beir-cqadupstack_recall@100": 0.5788533333333333, "eval_beir-cqadupstack_recall@20": 0.41202, "eval_beir-fiqa_ndcg@10": 0.25201, "eval_beir-fiqa_recall@10": 0.30722, "eval_beir-fiqa_recall@100": 0.56472, "eval_beir-fiqa_recall@20": 0.37, "eval_beir-nfcorpus_ndcg@10": 0.29495, "eval_beir-nfcorpus_recall@10": 0.15101, "eval_beir-nfcorpus_recall@100": 0.27871, "eval_beir-nfcorpus_recall@20": 0.18505, "eval_beir-nq_ndcg@10": 0.26654, "eval_beir-nq_recall@10": 0.44414, "eval_beir-nq_recall@100": 0.79024, "eval_beir-nq_recall@20": 0.56965, "eval_beir-quora_ndcg@10": 0.78072, "eval_beir-quora_recall@10": 0.8901, "eval_beir-quora_recall@100": 0.97818, "eval_beir-quora_recall@20": 0.92915, "eval_beir-scidocs_ndcg@10": 0.14894, "eval_beir-scidocs_recall@10": 0.15783, "eval_beir-scidocs_recall@100": 0.3658, "eval_beir-scidocs_recall@20": 0.21458, "eval_beir-scifact_ndcg@10": 0.63662, "eval_beir-scifact_recall@10": 0.78633, "eval_beir-scifact_recall@100": 0.92656, "eval_beir-scifact_recall@20": 0.82244, "eval_beir-trec-covid_ndcg@10": 0.56941, "eval_beir-trec-covid_recall@10": 0.618, "eval_beir-trec-covid_recall@100": 0.4432, "eval_beir-trec-covid_recall@20": 0.594, "eval_beir-webis-touche2020_ndcg@10": 0.18957, "eval_beir-webis-touche2020_recall@10": 0.12819, "eval_beir-webis-touche2020_recall@100": 0.42147, "eval_beir-webis-touche2020_recall@20": 0.21388, "eval_senteval-avg_sts": 0.7507130518383345, "eval_senteval-sickr_spearman": 0.7150855012153752, "eval_senteval-stsb_spearman": 0.7863406024612937, "step": 70000, "test_accuracy": 93.017578125, "test_active_queue_size": 16384.0, "test_cl_loss": 0.39231210947036743, "test_doc_norm": 1.4667843580245972, "test_inbatch_neg_score": 0.8625028133392334, "test_inbatch_pos_score": 1.7668371200561523, "test_loss": 0.39231210947036743, "test_loss_align": 0.9685328602790833, "test_loss_unif": 3.4850690364837646, "test_loss_unif_q@queue": 3.4850687980651855, "test_norm_diff": 0.057306356728076935, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.5081378221511841, "test_query_norm": 1.5240906476974487, "test_queue_k_norm": 1.4951763153076172, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04143857955932617, "test_stdq": 0.04204752296209335, "test_stdqueue_k": 0.04794192314147949, "test_stdqueue_q": 0.0 }, { "accuracy": 44.6289, "active_queue_size": 16384.0, "cl_loss": 3.9571, "doc_norm": 1.5052, "encoder_q-embeddings": 2261.0039, "encoder_q-layer.0": 1496.2823, "encoder_q-layer.1": 1501.9354, "encoder_q-layer.10": 2524.7039, "encoder_q-layer.11": 6663.5444, "encoder_q-layer.2": 1668.9373, "encoder_q-layer.3": 1719.3781, "encoder_q-layer.4": 1894.0311, "encoder_q-layer.5": 1925.1865, "encoder_q-layer.6": 2213.3325, "encoder_q-layer.7": 2365.3428, "encoder_q-layer.8": 2587.459, "encoder_q-layer.9": 2365.0117, "epoch": 0.46, "inbatch_neg_score": 0.5179, "inbatch_pos_score": 1.0527, "learning_rate": 1.661111111111111e-05, "loss": 3.9571, "norm_diff": 0.1226, "norm_loss": 0.0, "num_token_doc": 66.7831, "num_token_overlap": 11.7099, "num_token_query": 31.8883, "num_token_union": 65.338, "num_word_context": 202.0141, "num_word_doc": 49.8136, "num_word_query": 23.5593, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4273.9579, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5176, "query_norm": 1.3826, "queue_k_norm": 1.4974, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8883, "sent_len_1": 66.7831, "sent_len_max_0": 127.5837, "sent_len_max_1": 188.7788, "stdk": 0.0482, "stdq": 0.0417, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 70100 }, { "accuracy": 43.457, "active_queue_size": 16384.0, "cl_loss": 3.9388, "doc_norm": 1.498, "encoder_q-embeddings": 2259.7842, "encoder_q-layer.0": 1451.132, "encoder_q-layer.1": 1542.9602, "encoder_q-layer.10": 2737.3938, "encoder_q-layer.11": 6527.7876, "encoder_q-layer.2": 1774.2118, "encoder_q-layer.3": 1894.2648, "encoder_q-layer.4": 2072.8477, "encoder_q-layer.5": 2192.5425, "encoder_q-layer.6": 2591.5376, "encoder_q-layer.7": 2749.864, "encoder_q-layer.8": 3093.8145, "encoder_q-layer.9": 2686.0173, "epoch": 0.46, "inbatch_neg_score": 0.5135, "inbatch_pos_score": 1.0293, "learning_rate": 1.655555555555556e-05, "loss": 3.9388, "norm_diff": 0.1176, "norm_loss": 0.0, "num_token_doc": 66.8569, "num_token_overlap": 11.6313, "num_token_query": 31.8531, "num_token_union": 65.4369, "num_word_context": 202.2723, "num_word_doc": 49.9439, "num_word_query": 23.5216, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4405.3992, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5132, "query_norm": 1.3804, "queue_k_norm": 1.4992, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8531, "sent_len_1": 66.8569, "sent_len_max_0": 127.5438, "sent_len_max_1": 188.6575, "stdk": 0.0479, "stdq": 0.0418, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 70200 }, { "accuracy": 47.8516, "active_queue_size": 16384.0, "cl_loss": 3.9378, "doc_norm": 1.5077, "encoder_q-embeddings": 2139.978, "encoder_q-layer.0": 1433.0297, "encoder_q-layer.1": 1521.7209, "encoder_q-layer.10": 2479.6572, "encoder_q-layer.11": 6053.1504, "encoder_q-layer.2": 1730.9877, "encoder_q-layer.3": 1830.4674, "encoder_q-layer.4": 1945.418, "encoder_q-layer.5": 1992.2036, "encoder_q-layer.6": 2100.9165, "encoder_q-layer.7": 2159.9148, "encoder_q-layer.8": 2586.5637, "encoder_q-layer.9": 2287.2834, "epoch": 0.46, "inbatch_neg_score": 0.5148, "inbatch_pos_score": 1.0723, "learning_rate": 1.65e-05, "loss": 3.9378, "norm_diff": 0.1121, "norm_loss": 0.0, "num_token_doc": 66.7794, "num_token_overlap": 11.6952, "num_token_query": 31.9045, "num_token_union": 65.3785, "num_word_context": 202.4513, "num_word_doc": 49.8582, "num_word_query": 23.5784, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3946.4029, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5156, "query_norm": 1.3956, "queue_k_norm": 1.5015, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9045, "sent_len_1": 66.7794, "sent_len_max_0": 127.4912, "sent_len_max_1": 188.8825, "stdk": 0.0482, "stdq": 0.0425, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 70300 }, { "accuracy": 48.9258, "active_queue_size": 16384.0, "cl_loss": 3.9901, "doc_norm": 1.5053, "encoder_q-embeddings": 2713.5698, "encoder_q-layer.0": 1790.9651, "encoder_q-layer.1": 1913.4049, "encoder_q-layer.10": 2587.4128, "encoder_q-layer.11": 6150.7861, "encoder_q-layer.2": 2186.6079, "encoder_q-layer.3": 2264.8345, "encoder_q-layer.4": 2433.6821, "encoder_q-layer.5": 2429.9436, "encoder_q-layer.6": 2485.3694, "encoder_q-layer.7": 2613.4143, "encoder_q-layer.8": 2666.5532, "encoder_q-layer.9": 2428.71, "epoch": 0.46, "inbatch_neg_score": 0.5165, "inbatch_pos_score": 1.0752, "learning_rate": 1.6444444444444447e-05, "loss": 3.9901, "norm_diff": 0.1111, "norm_loss": 0.0, "num_token_doc": 66.4924, "num_token_overlap": 11.6074, "num_token_query": 31.7722, "num_token_union": 65.1325, "num_word_context": 202.2701, "num_word_doc": 49.6118, "num_word_query": 23.4762, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4353.7096, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5166, "query_norm": 1.3942, "queue_k_norm": 1.4994, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.7722, "sent_len_1": 66.4924, "sent_len_max_0": 127.5275, "sent_len_max_1": 188.7425, "stdk": 0.0481, "stdq": 0.0424, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 70400 }, { "accuracy": 43.75, "active_queue_size": 16384.0, "cl_loss": 3.9297, "doc_norm": 1.4937, "encoder_q-embeddings": 6523.7817, "encoder_q-layer.0": 4600.5757, "encoder_q-layer.1": 4943.8926, "encoder_q-layer.10": 2621.3296, "encoder_q-layer.11": 6820.6533, "encoder_q-layer.2": 5650.335, "encoder_q-layer.3": 6590.7979, "encoder_q-layer.4": 7063.9922, "encoder_q-layer.5": 6969.1978, "encoder_q-layer.6": 7772.1284, "encoder_q-layer.7": 7600.0977, "encoder_q-layer.8": 7036.1641, "encoder_q-layer.9": 4252.9209, "epoch": 0.46, "inbatch_neg_score": 0.5175, "inbatch_pos_score": 1.0479, "learning_rate": 1.638888888888889e-05, "loss": 3.9297, "norm_diff": 0.1113, "norm_loss": 0.0, "num_token_doc": 67.0832, "num_token_overlap": 11.6795, "num_token_query": 31.9359, "num_token_union": 65.5481, "num_word_context": 202.6739, "num_word_doc": 49.951, "num_word_query": 23.5922, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9431.9252, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5176, "query_norm": 1.3824, "queue_k_norm": 1.5032, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9359, "sent_len_1": 67.0832, "sent_len_max_0": 127.6188, "sent_len_max_1": 193.0513, "stdk": 0.0476, "stdq": 0.042, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 70500 }, { "accuracy": 43.5547, "active_queue_size": 16384.0, "cl_loss": 3.9485, "doc_norm": 1.5085, "encoder_q-embeddings": 2558.3911, "encoder_q-layer.0": 1825.9788, "encoder_q-layer.1": 1917.4277, "encoder_q-layer.10": 2630.6731, "encoder_q-layer.11": 6406.0674, "encoder_q-layer.2": 2206.1055, "encoder_q-layer.3": 2241.877, "encoder_q-layer.4": 2298.3386, "encoder_q-layer.5": 2215.2207, "encoder_q-layer.6": 2341.177, "encoder_q-layer.7": 2455.2788, "encoder_q-layer.8": 2809.3674, "encoder_q-layer.9": 2467.3469, "epoch": 0.46, "inbatch_neg_score": 0.5183, "inbatch_pos_score": 1.0654, "learning_rate": 1.6333333333333335e-05, "loss": 3.9485, "norm_diff": 0.1093, "norm_loss": 0.0, "num_token_doc": 66.6104, "num_token_overlap": 11.6701, "num_token_query": 31.8855, "num_token_union": 65.2668, "num_word_context": 202.3781, "num_word_doc": 49.7197, "num_word_query": 23.567, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4380.0896, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5181, "query_norm": 1.3991, "queue_k_norm": 1.5031, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8855, "sent_len_1": 66.6104, "sent_len_max_0": 127.7775, "sent_len_max_1": 188.4025, "stdk": 0.0482, "stdq": 0.0427, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 70600 }, { "accuracy": 45.3125, "active_queue_size": 16384.0, "cl_loss": 3.9537, "doc_norm": 1.5031, "encoder_q-embeddings": 2322.9592, "encoder_q-layer.0": 1571.2073, "encoder_q-layer.1": 1584.2177, "encoder_q-layer.10": 2659.7207, "encoder_q-layer.11": 6159.0845, "encoder_q-layer.2": 1752.8273, "encoder_q-layer.3": 1844.0682, "encoder_q-layer.4": 1920.9326, "encoder_q-layer.5": 1995.8594, "encoder_q-layer.6": 2121.8267, "encoder_q-layer.7": 2318.4446, "encoder_q-layer.8": 2607.377, "encoder_q-layer.9": 2374.2722, "epoch": 0.46, "inbatch_neg_score": 0.5167, "inbatch_pos_score": 1.0781, "learning_rate": 1.6277777777777777e-05, "loss": 3.9537, "norm_diff": 0.1061, "norm_loss": 0.0, "num_token_doc": 66.5755, "num_token_overlap": 11.6834, "num_token_query": 31.9446, "num_token_union": 65.2469, "num_word_context": 202.4372, "num_word_doc": 49.6734, "num_word_query": 23.5936, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4070.0972, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5171, "query_norm": 1.3971, "queue_k_norm": 1.5027, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9446, "sent_len_1": 66.5755, "sent_len_max_0": 127.4912, "sent_len_max_1": 189.2612, "stdk": 0.048, "stdq": 0.0426, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 70700 }, { "accuracy": 43.5547, "active_queue_size": 16384.0, "cl_loss": 3.9695, "doc_norm": 1.5123, "encoder_q-embeddings": 2203.3364, "encoder_q-layer.0": 1441.7128, "encoder_q-layer.1": 1468.519, "encoder_q-layer.10": 2543.0342, "encoder_q-layer.11": 6419.2798, "encoder_q-layer.2": 1699.1102, "encoder_q-layer.3": 1782.3639, "encoder_q-layer.4": 1922.2493, "encoder_q-layer.5": 1933.3613, "encoder_q-layer.6": 2085.4868, "encoder_q-layer.7": 2179.1687, "encoder_q-layer.8": 2840.9243, "encoder_q-layer.9": 2463.9402, "epoch": 0.46, "inbatch_neg_score": 0.5159, "inbatch_pos_score": 1.0547, "learning_rate": 1.6222222222222223e-05, "loss": 3.9695, "norm_diff": 0.1317, "norm_loss": 0.0, "num_token_doc": 66.8601, "num_token_overlap": 11.637, "num_token_query": 31.8285, "num_token_union": 65.3952, "num_word_context": 202.3748, "num_word_doc": 49.8987, "num_word_query": 23.5185, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4124.0045, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5166, "query_norm": 1.3806, "queue_k_norm": 1.5035, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.8285, "sent_len_1": 66.8601, "sent_len_max_0": 127.3725, "sent_len_max_1": 189.9363, "stdk": 0.0483, "stdq": 0.042, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 70800 }, { "accuracy": 45.6055, "active_queue_size": 16384.0, "cl_loss": 3.9632, "doc_norm": 1.5132, "encoder_q-embeddings": 3471.6624, "encoder_q-layer.0": 2359.3972, "encoder_q-layer.1": 2876.0273, "encoder_q-layer.10": 2691.0256, "encoder_q-layer.11": 6720.0146, "encoder_q-layer.2": 3312.9209, "encoder_q-layer.3": 3373.4436, "encoder_q-layer.4": 3592.8716, "encoder_q-layer.5": 3440.7158, "encoder_q-layer.6": 3802.6978, "encoder_q-layer.7": 4004.8831, "encoder_q-layer.8": 3634.0002, "encoder_q-layer.9": 2703.0486, "epoch": 0.46, "inbatch_neg_score": 0.5165, "inbatch_pos_score": 1.0586, "learning_rate": 1.6166666666666665e-05, "loss": 3.9632, "norm_diff": 0.122, "norm_loss": 0.0, "num_token_doc": 66.7982, "num_token_overlap": 11.6468, "num_token_query": 31.8046, "num_token_union": 65.3117, "num_word_context": 202.3966, "num_word_doc": 49.8746, "num_word_query": 23.4735, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5598.3655, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5171, "query_norm": 1.3912, "queue_k_norm": 1.5047, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8046, "sent_len_1": 66.7982, "sent_len_max_0": 127.4925, "sent_len_max_1": 190.6275, "stdk": 0.0483, "stdq": 0.0424, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 70900 }, { "accuracy": 47.5586, "active_queue_size": 16384.0, "cl_loss": 3.9477, "doc_norm": 1.5053, "encoder_q-embeddings": 5902.4868, "encoder_q-layer.0": 4399.812, "encoder_q-layer.1": 5217.5498, "encoder_q-layer.10": 2503.8894, "encoder_q-layer.11": 6490.8008, "encoder_q-layer.2": 5857.6304, "encoder_q-layer.3": 5836.002, "encoder_q-layer.4": 6059.979, "encoder_q-layer.5": 4825.7588, "encoder_q-layer.6": 3817.1912, "encoder_q-layer.7": 3300.9424, "encoder_q-layer.8": 2937.4336, "encoder_q-layer.9": 2294.7734, "epoch": 0.46, "inbatch_neg_score": 0.5176, "inbatch_pos_score": 1.0684, "learning_rate": 1.6111111111111115e-05, "loss": 3.9477, "norm_diff": 0.1173, "norm_loss": 0.0, "num_token_doc": 66.7027, "num_token_overlap": 11.6604, "num_token_query": 31.8615, "num_token_union": 65.268, "num_word_context": 202.1503, "num_word_doc": 49.7997, "num_word_query": 23.5256, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7312.8401, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5166, "query_norm": 1.3879, "queue_k_norm": 1.5048, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8615, "sent_len_1": 66.7027, "sent_len_max_0": 127.6462, "sent_len_max_1": 189.1337, "stdk": 0.0479, "stdq": 0.0423, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 71000 }, { "accuracy": 45.8008, "active_queue_size": 16384.0, "cl_loss": 3.956, "doc_norm": 1.5062, "encoder_q-embeddings": 2212.9282, "encoder_q-layer.0": 1508.1184, "encoder_q-layer.1": 1548.7489, "encoder_q-layer.10": 2304.707, "encoder_q-layer.11": 6165.8691, "encoder_q-layer.2": 1708.9208, "encoder_q-layer.3": 1795.432, "encoder_q-layer.4": 1845.7288, "encoder_q-layer.5": 1826.5845, "encoder_q-layer.6": 2044.036, "encoder_q-layer.7": 2113.9636, "encoder_q-layer.8": 2359.6858, "encoder_q-layer.9": 2200.4373, "epoch": 0.46, "inbatch_neg_score": 0.5155, "inbatch_pos_score": 1.0469, "learning_rate": 1.6055555555555557e-05, "loss": 3.956, "norm_diff": 0.1306, "norm_loss": 0.0, "num_token_doc": 66.6482, "num_token_overlap": 11.6217, "num_token_query": 31.7219, "num_token_union": 65.2676, "num_word_context": 201.9322, "num_word_doc": 49.7689, "num_word_query": 23.3927, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3910.1936, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5146, "query_norm": 1.3757, "queue_k_norm": 1.5053, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.7219, "sent_len_1": 66.6482, "sent_len_max_0": 127.4125, "sent_len_max_1": 187.2063, "stdk": 0.048, "stdq": 0.0419, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 71100 }, { "accuracy": 47.2656, "active_queue_size": 16384.0, "cl_loss": 3.9366, "doc_norm": 1.5063, "encoder_q-embeddings": 2531.6318, "encoder_q-layer.0": 1786.7709, "encoder_q-layer.1": 1869.8262, "encoder_q-layer.10": 2803.095, "encoder_q-layer.11": 6423.5015, "encoder_q-layer.2": 2055.1582, "encoder_q-layer.3": 2180.6877, "encoder_q-layer.4": 2248.0396, "encoder_q-layer.5": 2358.3687, "encoder_q-layer.6": 2426.9199, "encoder_q-layer.7": 2525.1301, "encoder_q-layer.8": 2977.9983, "encoder_q-layer.9": 2669.4683, "epoch": 0.46, "inbatch_neg_score": 0.5139, "inbatch_pos_score": 1.0615, "learning_rate": 1.6000000000000003e-05, "loss": 3.9366, "norm_diff": 0.1338, "norm_loss": 0.0, "num_token_doc": 66.545, "num_token_overlap": 11.6894, "num_token_query": 31.9301, "num_token_union": 65.2276, "num_word_context": 201.9062, "num_word_doc": 49.6975, "num_word_query": 23.5749, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4299.6718, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5142, "query_norm": 1.3726, "queue_k_norm": 1.5069, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9301, "sent_len_1": 66.545, "sent_len_max_0": 127.6262, "sent_len_max_1": 186.52, "stdk": 0.048, "stdq": 0.0418, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 71200 }, { "accuracy": 44.9219, "active_queue_size": 16384.0, "cl_loss": 3.952, "doc_norm": 1.5076, "encoder_q-embeddings": 2540.177, "encoder_q-layer.0": 1707.5417, "encoder_q-layer.1": 1880.7919, "encoder_q-layer.10": 2664.6047, "encoder_q-layer.11": 6426.5264, "encoder_q-layer.2": 2197.998, "encoder_q-layer.3": 2232.2542, "encoder_q-layer.4": 2173.9314, "encoder_q-layer.5": 2200.1868, "encoder_q-layer.6": 2257.1143, "encoder_q-layer.7": 2353.2634, "encoder_q-layer.8": 2680.0935, "encoder_q-layer.9": 2428.6626, "epoch": 0.46, "inbatch_neg_score": 0.5129, "inbatch_pos_score": 1.0596, "learning_rate": 1.5944444444444445e-05, "loss": 3.952, "norm_diff": 0.1258, "norm_loss": 0.0, "num_token_doc": 66.5859, "num_token_overlap": 11.6473, "num_token_query": 31.8559, "num_token_union": 65.2308, "num_word_context": 201.9968, "num_word_doc": 49.6658, "num_word_query": 23.5191, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4371.7535, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5127, "query_norm": 1.3818, "queue_k_norm": 1.5054, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8559, "sent_len_1": 66.5859, "sent_len_max_0": 127.5275, "sent_len_max_1": 191.4387, "stdk": 0.048, "stdq": 0.0422, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 71300 }, { "accuracy": 44.8242, "active_queue_size": 16384.0, "cl_loss": 3.9681, "doc_norm": 1.5105, "encoder_q-embeddings": 6463.833, "encoder_q-layer.0": 4366.0938, "encoder_q-layer.1": 4606.71, "encoder_q-layer.10": 4814.7607, "encoder_q-layer.11": 12257.9883, "encoder_q-layer.2": 5263.5713, "encoder_q-layer.3": 5523.2524, "encoder_q-layer.4": 5614.0884, "encoder_q-layer.5": 5513.0874, "encoder_q-layer.6": 5714.6313, "encoder_q-layer.7": 5416.4263, "encoder_q-layer.8": 5458.5811, "encoder_q-layer.9": 4836.4053, "epoch": 0.46, "inbatch_neg_score": 0.511, "inbatch_pos_score": 1.0518, "learning_rate": 1.588888888888889e-05, "loss": 3.9681, "norm_diff": 0.1211, "norm_loss": 0.0, "num_token_doc": 66.7863, "num_token_overlap": 11.6606, "num_token_query": 31.8967, "num_token_union": 65.3195, "num_word_context": 202.1855, "num_word_doc": 49.7963, "num_word_query": 23.5325, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9485.4294, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5107, "query_norm": 1.3894, "queue_k_norm": 1.5048, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8967, "sent_len_1": 66.7863, "sent_len_max_0": 127.4825, "sent_len_max_1": 191.225, "stdk": 0.0482, "stdq": 0.0426, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 71400 }, { "accuracy": 44.3359, "active_queue_size": 16384.0, "cl_loss": 3.9603, "doc_norm": 1.507, "encoder_q-embeddings": 4870.0693, "encoder_q-layer.0": 3302.2837, "encoder_q-layer.1": 3499.895, "encoder_q-layer.10": 5020.6084, "encoder_q-layer.11": 12464.7031, "encoder_q-layer.2": 3932.7415, "encoder_q-layer.3": 4022.0632, "encoder_q-layer.4": 4392.4502, "encoder_q-layer.5": 4575.335, "encoder_q-layer.6": 5199.8198, "encoder_q-layer.7": 5274.9966, "encoder_q-layer.8": 5709.5825, "encoder_q-layer.9": 4786.5801, "epoch": 0.47, "inbatch_neg_score": 0.5106, "inbatch_pos_score": 1.0332, "learning_rate": 1.5833333333333333e-05, "loss": 3.9603, "norm_diff": 0.1398, "norm_loss": 0.0, "num_token_doc": 66.8644, "num_token_overlap": 11.683, "num_token_query": 31.9408, "num_token_union": 65.4131, "num_word_context": 202.386, "num_word_doc": 49.8803, "num_word_query": 23.586, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8650.3006, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5103, "query_norm": 1.3672, "queue_k_norm": 1.508, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9408, "sent_len_1": 66.8644, "sent_len_max_0": 127.585, "sent_len_max_1": 191.7562, "stdk": 0.048, "stdq": 0.0416, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 71500 }, { "accuracy": 44.3359, "active_queue_size": 16384.0, "cl_loss": 3.9547, "doc_norm": 1.512, "encoder_q-embeddings": 5202.54, "encoder_q-layer.0": 3369.9299, "encoder_q-layer.1": 3552.6125, "encoder_q-layer.10": 5474.9766, "encoder_q-layer.11": 13101.9648, "encoder_q-layer.2": 4162.8706, "encoder_q-layer.3": 4308.3838, "encoder_q-layer.4": 4580.5396, "encoder_q-layer.5": 4322.7065, "encoder_q-layer.6": 4691.6377, "encoder_q-layer.7": 4858.9121, "encoder_q-layer.8": 5500.0054, "encoder_q-layer.9": 4903.0747, "epoch": 0.47, "inbatch_neg_score": 0.5108, "inbatch_pos_score": 1.0488, "learning_rate": 1.577777777777778e-05, "loss": 3.9547, "norm_diff": 0.1311, "norm_loss": 0.0, "num_token_doc": 66.7021, "num_token_overlap": 11.6476, "num_token_query": 31.8088, "num_token_union": 65.2788, "num_word_context": 202.2684, "num_word_doc": 49.7568, "num_word_query": 23.4986, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8878.5533, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5098, "query_norm": 1.3809, "queue_k_norm": 1.5062, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.8088, "sent_len_1": 66.7021, "sent_len_max_0": 127.6175, "sent_len_max_1": 188.6488, "stdk": 0.0482, "stdq": 0.0421, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 71600 }, { "accuracy": 44.6289, "active_queue_size": 16384.0, "cl_loss": 3.9484, "doc_norm": 1.5046, "encoder_q-embeddings": 5655.9434, "encoder_q-layer.0": 3847.3335, "encoder_q-layer.1": 4113.1445, "encoder_q-layer.10": 5172.5908, "encoder_q-layer.11": 12785.458, "encoder_q-layer.2": 5022.0308, "encoder_q-layer.3": 4884.2642, "encoder_q-layer.4": 4886.7285, "encoder_q-layer.5": 4693.8838, "encoder_q-layer.6": 4706.7534, "encoder_q-layer.7": 5150.4067, "encoder_q-layer.8": 5540.2554, "encoder_q-layer.9": 4795.2192, "epoch": 0.47, "inbatch_neg_score": 0.5113, "inbatch_pos_score": 1.0527, "learning_rate": 1.5722222222222225e-05, "loss": 3.9484, "norm_diff": 0.1163, "norm_loss": 0.0, "num_token_doc": 66.7352, "num_token_overlap": 11.7061, "num_token_query": 31.915, "num_token_union": 65.2932, "num_word_context": 202.0746, "num_word_doc": 49.7453, "num_word_query": 23.5345, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9130.8359, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5117, "query_norm": 1.3883, "queue_k_norm": 1.5079, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.915, "sent_len_1": 66.7352, "sent_len_max_0": 127.3425, "sent_len_max_1": 191.0213, "stdk": 0.0479, "stdq": 0.0424, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 71700 }, { "accuracy": 45.6055, "active_queue_size": 16384.0, "cl_loss": 3.9407, "doc_norm": 1.5129, "encoder_q-embeddings": 4448.5708, "encoder_q-layer.0": 3022.918, "encoder_q-layer.1": 3062.6941, "encoder_q-layer.10": 4903.062, "encoder_q-layer.11": 12142.8857, "encoder_q-layer.2": 3464.3223, "encoder_q-layer.3": 3708.8967, "encoder_q-layer.4": 3876.6641, "encoder_q-layer.5": 4186.3628, "encoder_q-layer.6": 4679.103, "encoder_q-layer.7": 4635.9248, "encoder_q-layer.8": 5625.6392, "encoder_q-layer.9": 4872.6079, "epoch": 0.47, "inbatch_neg_score": 0.511, "inbatch_pos_score": 1.0742, "learning_rate": 1.5666666666666667e-05, "loss": 3.9407, "norm_diff": 0.1123, "norm_loss": 0.0, "num_token_doc": 67.0096, "num_token_overlap": 11.6991, "num_token_query": 31.8314, "num_token_union": 65.4038, "num_word_context": 202.6374, "num_word_doc": 49.9903, "num_word_query": 23.5045, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8084.5336, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5122, "query_norm": 1.4006, "queue_k_norm": 1.5087, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8314, "sent_len_1": 67.0096, "sent_len_max_0": 127.4387, "sent_len_max_1": 189.7925, "stdk": 0.0483, "stdq": 0.0429, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 71800 }, { "accuracy": 45.5078, "active_queue_size": 16384.0, "cl_loss": 3.9661, "doc_norm": 1.5108, "encoder_q-embeddings": 5985.5508, "encoder_q-layer.0": 4242.1587, "encoder_q-layer.1": 4415.959, "encoder_q-layer.10": 4791.3955, "encoder_q-layer.11": 11877.6299, "encoder_q-layer.2": 5036.8579, "encoder_q-layer.3": 5634.23, "encoder_q-layer.4": 5664.9243, "encoder_q-layer.5": 5307.8291, "encoder_q-layer.6": 5137.687, "encoder_q-layer.7": 5243.0449, "encoder_q-layer.8": 5481.7427, "encoder_q-layer.9": 4728.6704, "epoch": 0.47, "inbatch_neg_score": 0.5083, "inbatch_pos_score": 1.0645, "learning_rate": 1.5611111111111113e-05, "loss": 3.9661, "norm_diff": 0.1194, "norm_loss": 0.0, "num_token_doc": 66.6949, "num_token_overlap": 11.5792, "num_token_query": 31.8588, "num_token_union": 65.3658, "num_word_context": 202.3235, "num_word_doc": 49.7526, "num_word_query": 23.5406, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9172.8227, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5103, "query_norm": 1.3914, "queue_k_norm": 1.5083, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8588, "sent_len_1": 66.6949, "sent_len_max_0": 127.5238, "sent_len_max_1": 190.89, "stdk": 0.0482, "stdq": 0.0425, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 71900 }, { "accuracy": 44.5312, "active_queue_size": 16384.0, "cl_loss": 3.9504, "doc_norm": 1.5017, "encoder_q-embeddings": 22331.9785, "encoder_q-layer.0": 16281.6172, "encoder_q-layer.1": 16868.7734, "encoder_q-layer.10": 5020.0371, "encoder_q-layer.11": 12750.8125, "encoder_q-layer.2": 18798.9727, "encoder_q-layer.3": 19216.2129, "encoder_q-layer.4": 20789.5293, "encoder_q-layer.5": 21195.8457, "encoder_q-layer.6": 22681.3105, "encoder_q-layer.7": 24130.4336, "encoder_q-layer.8": 18113.3926, "encoder_q-layer.9": 6687.2651, "epoch": 0.47, "inbatch_neg_score": 0.5197, "inbatch_pos_score": 1.0498, "learning_rate": 1.5555555555555555e-05, "loss": 3.9504, "norm_diff": 0.115, "norm_loss": 0.0, "num_token_doc": 66.9698, "num_token_overlap": 11.6942, "num_token_query": 32.0126, "num_token_union": 65.4996, "num_word_context": 202.6351, "num_word_doc": 49.9825, "num_word_query": 23.6613, "postclip_grad_norm": 1.0, "preclip_grad_norm": 27902.036, "preclip_grad_norm_avg": 0.0003, "q@queue_neg_score": 0.5186, "query_norm": 1.3867, "queue_k_norm": 1.5076, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 32.0126, "sent_len_1": 66.9698, "sent_len_max_0": 127.4862, "sent_len_max_1": 188.1238, "stdk": 0.0477, "stdq": 0.0421, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 72000 }, { "accuracy": 44.5312, "active_queue_size": 16384.0, "cl_loss": 3.9498, "doc_norm": 1.5089, "encoder_q-embeddings": 9421.4648, "encoder_q-layer.0": 6078.1836, "encoder_q-layer.1": 6754.6763, "encoder_q-layer.10": 5434.3594, "encoder_q-layer.11": 12941.6025, "encoder_q-layer.2": 7938.4897, "encoder_q-layer.3": 8681.8018, "encoder_q-layer.4": 9752.0508, "encoder_q-layer.5": 10900.291, "encoder_q-layer.6": 10966.7871, "encoder_q-layer.7": 10977.8008, "encoder_q-layer.8": 9072.9004, "encoder_q-layer.9": 6022.3564, "epoch": 0.47, "inbatch_neg_score": 0.5161, "inbatch_pos_score": 1.0537, "learning_rate": 1.55e-05, "loss": 3.9498, "norm_diff": 0.1096, "norm_loss": 0.0, "num_token_doc": 66.71, "num_token_overlap": 11.6806, "num_token_query": 31.9468, "num_token_union": 65.3724, "num_word_context": 202.1638, "num_word_doc": 49.7807, "num_word_query": 23.6042, "postclip_grad_norm": 1.0, "preclip_grad_norm": 13806.8961, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5166, "query_norm": 1.3993, "queue_k_norm": 1.5079, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9468, "sent_len_1": 66.71, "sent_len_max_0": 127.4862, "sent_len_max_1": 188.945, "stdk": 0.0481, "stdq": 0.0426, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 72100 }, { "accuracy": 45.9961, "active_queue_size": 16384.0, "cl_loss": 3.9382, "doc_norm": 1.5086, "encoder_q-embeddings": 4543.3418, "encoder_q-layer.0": 2942.7087, "encoder_q-layer.1": 2931.2317, "encoder_q-layer.10": 5362.2759, "encoder_q-layer.11": 13340.5859, "encoder_q-layer.2": 3327.2605, "encoder_q-layer.3": 3302.3662, "encoder_q-layer.4": 3367.1663, "encoder_q-layer.5": 3364.3779, "encoder_q-layer.6": 3792.3376, "encoder_q-layer.7": 4211.4326, "encoder_q-layer.8": 5148.2822, "encoder_q-layer.9": 4974.4448, "epoch": 0.47, "inbatch_neg_score": 0.5171, "inbatch_pos_score": 1.0547, "learning_rate": 1.5444444444444446e-05, "loss": 3.9382, "norm_diff": 0.1123, "norm_loss": 0.0, "num_token_doc": 66.817, "num_token_overlap": 11.6711, "num_token_query": 31.9367, "num_token_union": 65.441, "num_word_context": 202.1612, "num_word_doc": 49.8659, "num_word_query": 23.5968, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8139.9196, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5176, "query_norm": 1.3963, "queue_k_norm": 1.5086, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9367, "sent_len_1": 66.817, "sent_len_max_0": 127.4013, "sent_len_max_1": 190.115, "stdk": 0.048, "stdq": 0.0425, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 72200 }, { "accuracy": 45.0195, "active_queue_size": 16384.0, "cl_loss": 3.9491, "doc_norm": 1.509, "encoder_q-embeddings": 3200.7625, "encoder_q-layer.0": 2145.5156, "encoder_q-layer.1": 2250.5093, "encoder_q-layer.10": 2755.0024, "encoder_q-layer.11": 6368.4336, "encoder_q-layer.2": 2626.7383, "encoder_q-layer.3": 2671.3911, "encoder_q-layer.4": 2815.4407, "encoder_q-layer.5": 2772.1887, "encoder_q-layer.6": 2995.8999, "encoder_q-layer.7": 3040.8535, "encoder_q-layer.8": 3236.291, "encoder_q-layer.9": 2660.7507, "epoch": 0.47, "inbatch_neg_score": 0.5174, "inbatch_pos_score": 1.0635, "learning_rate": 1.538888888888889e-05, "loss": 3.9491, "norm_diff": 0.1219, "norm_loss": 0.0, "num_token_doc": 66.5003, "num_token_overlap": 11.6759, "num_token_query": 31.845, "num_token_union": 65.1385, "num_word_context": 202.0187, "num_word_doc": 49.6639, "num_word_query": 23.5121, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4927.6206, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5181, "query_norm": 1.3871, "queue_k_norm": 1.5088, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.845, "sent_len_1": 66.5003, "sent_len_max_0": 127.5863, "sent_len_max_1": 187.0025, "stdk": 0.048, "stdq": 0.042, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 72300 }, { "accuracy": 46.4844, "active_queue_size": 16384.0, "cl_loss": 3.9565, "doc_norm": 1.5082, "encoder_q-embeddings": 3499.8345, "encoder_q-layer.0": 2544.6379, "encoder_q-layer.1": 2821.0503, "encoder_q-layer.10": 2691.6125, "encoder_q-layer.11": 6042.0947, "encoder_q-layer.2": 3427.4951, "encoder_q-layer.3": 3529.5896, "encoder_q-layer.4": 3409.417, "encoder_q-layer.5": 2772.8091, "encoder_q-layer.6": 2514.2837, "encoder_q-layer.7": 2338.9465, "encoder_q-layer.8": 2802.6494, "encoder_q-layer.9": 2373.5967, "epoch": 0.47, "inbatch_neg_score": 0.5186, "inbatch_pos_score": 1.0615, "learning_rate": 1.5333333333333334e-05, "loss": 3.9565, "norm_diff": 0.1253, "norm_loss": 0.0, "num_token_doc": 66.9336, "num_token_overlap": 11.694, "num_token_query": 31.995, "num_token_union": 65.5078, "num_word_context": 202.3582, "num_word_doc": 49.9477, "num_word_query": 23.6537, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4996.8981, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5186, "query_norm": 1.3828, "queue_k_norm": 1.508, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.995, "sent_len_1": 66.9336, "sent_len_max_0": 127.6825, "sent_len_max_1": 190.3063, "stdk": 0.048, "stdq": 0.0417, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 72400 }, { "accuracy": 47.0703, "active_queue_size": 16384.0, "cl_loss": 3.9566, "doc_norm": 1.5055, "encoder_q-embeddings": 2486.1475, "encoder_q-layer.0": 1689.8524, "encoder_q-layer.1": 1793.3778, "encoder_q-layer.10": 2546.7373, "encoder_q-layer.11": 6528.4854, "encoder_q-layer.2": 2075.2302, "encoder_q-layer.3": 2153.9768, "encoder_q-layer.4": 2269.5012, "encoder_q-layer.5": 2207.5405, "encoder_q-layer.6": 2485.072, "encoder_q-layer.7": 2482.2014, "encoder_q-layer.8": 2718.6667, "encoder_q-layer.9": 2411.6782, "epoch": 0.47, "inbatch_neg_score": 0.5195, "inbatch_pos_score": 1.084, "learning_rate": 1.527777777777778e-05, "loss": 3.9566, "norm_diff": 0.0992, "norm_loss": 0.0, "num_token_doc": 66.6689, "num_token_overlap": 11.6892, "num_token_query": 31.9378, "num_token_union": 65.3271, "num_word_context": 202.3477, "num_word_doc": 49.7481, "num_word_query": 23.579, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4375.9102, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5195, "query_norm": 1.4063, "queue_k_norm": 1.5077, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9378, "sent_len_1": 66.6689, "sent_len_max_0": 127.5362, "sent_len_max_1": 188.7375, "stdk": 0.0479, "stdq": 0.0427, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 72500 }, { "accuracy": 46.875, "active_queue_size": 16384.0, "cl_loss": 3.9472, "doc_norm": 1.5133, "encoder_q-embeddings": 2456.9863, "encoder_q-layer.0": 1616.3206, "encoder_q-layer.1": 1769.2365, "encoder_q-layer.10": 2706.0769, "encoder_q-layer.11": 6579.1748, "encoder_q-layer.2": 1987.4955, "encoder_q-layer.3": 2098.1804, "encoder_q-layer.4": 2296.2551, "encoder_q-layer.5": 2321.2444, "encoder_q-layer.6": 2672.6013, "encoder_q-layer.7": 3203.48, "encoder_q-layer.8": 3354.3552, "encoder_q-layer.9": 2478.5776, "epoch": 0.47, "inbatch_neg_score": 0.5192, "inbatch_pos_score": 1.0625, "learning_rate": 1.5222222222222224e-05, "loss": 3.9472, "norm_diff": 0.1227, "norm_loss": 0.0, "num_token_doc": 66.6044, "num_token_overlap": 11.7026, "num_token_query": 31.958, "num_token_union": 65.2779, "num_word_context": 202.0742, "num_word_doc": 49.6942, "num_word_query": 23.6108, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4596.5529, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.519, "query_norm": 1.3906, "queue_k_norm": 1.5077, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.958, "sent_len_1": 66.6044, "sent_len_max_0": 127.5325, "sent_len_max_1": 187.265, "stdk": 0.0482, "stdq": 0.0421, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 72600 }, { "accuracy": 43.3594, "active_queue_size": 16384.0, "cl_loss": 3.9468, "doc_norm": 1.5046, "encoder_q-embeddings": 2850.7148, "encoder_q-layer.0": 2035.552, "encoder_q-layer.1": 2195.4778, "encoder_q-layer.10": 2629.3274, "encoder_q-layer.11": 6421.3286, "encoder_q-layer.2": 2359.9761, "encoder_q-layer.3": 2560.5735, "encoder_q-layer.4": 2627.4277, "encoder_q-layer.5": 2669.1309, "encoder_q-layer.6": 2893.1794, "encoder_q-layer.7": 3201.0447, "encoder_q-layer.8": 3150.4868, "encoder_q-layer.9": 2408.9102, "epoch": 0.47, "inbatch_neg_score": 0.5224, "inbatch_pos_score": 1.0664, "learning_rate": 1.5166666666666668e-05, "loss": 3.9468, "norm_diff": 0.1046, "norm_loss": 0.0, "num_token_doc": 66.5892, "num_token_overlap": 11.6481, "num_token_query": 31.7991, "num_token_union": 65.2023, "num_word_context": 201.763, "num_word_doc": 49.7047, "num_word_query": 23.4766, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4768.3731, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5234, "query_norm": 1.4001, "queue_k_norm": 1.5081, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.7991, "sent_len_1": 66.5892, "sent_len_max_0": 127.3875, "sent_len_max_1": 187.9062, "stdk": 0.0479, "stdq": 0.0424, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 72700 }, { "accuracy": 46.6797, "active_queue_size": 16384.0, "cl_loss": 3.952, "doc_norm": 1.5085, "encoder_q-embeddings": 2950.6382, "encoder_q-layer.0": 2033.0533, "encoder_q-layer.1": 2190.7712, "encoder_q-layer.10": 2670.7336, "encoder_q-layer.11": 6327.7163, "encoder_q-layer.2": 2507.7476, "encoder_q-layer.3": 2619.3557, "encoder_q-layer.4": 2707.2319, "encoder_q-layer.5": 2598.2424, "encoder_q-layer.6": 2571.2754, "encoder_q-layer.7": 2530.1619, "encoder_q-layer.8": 2758.7056, "encoder_q-layer.9": 2461.9126, "epoch": 0.47, "inbatch_neg_score": 0.526, "inbatch_pos_score": 1.085, "learning_rate": 1.5111111111111112e-05, "loss": 3.952, "norm_diff": 0.1157, "norm_loss": 0.0, "num_token_doc": 66.7051, "num_token_overlap": 11.6233, "num_token_query": 31.8244, "num_token_union": 65.3477, "num_word_context": 202.4114, "num_word_doc": 49.7491, "num_word_query": 23.5025, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4701.0756, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5259, "query_norm": 1.3928, "queue_k_norm": 1.5087, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.8244, "sent_len_1": 66.7051, "sent_len_max_0": 127.3838, "sent_len_max_1": 189.1912, "stdk": 0.048, "stdq": 0.042, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 72800 }, { "accuracy": 46.1914, "active_queue_size": 16384.0, "cl_loss": 3.9385, "doc_norm": 1.51, "encoder_q-embeddings": 3281.2593, "encoder_q-layer.0": 2265.2075, "encoder_q-layer.1": 2599.2732, "encoder_q-layer.10": 2323.4099, "encoder_q-layer.11": 6036.3413, "encoder_q-layer.2": 2975.0806, "encoder_q-layer.3": 3140.5339, "encoder_q-layer.4": 3290.3171, "encoder_q-layer.5": 3313.7144, "encoder_q-layer.6": 2990.7317, "encoder_q-layer.7": 2644.3696, "encoder_q-layer.8": 2716.8604, "encoder_q-layer.9": 2249.9438, "epoch": 0.47, "inbatch_neg_score": 0.5295, "inbatch_pos_score": 1.0664, "learning_rate": 1.5055555555555556e-05, "loss": 3.9385, "norm_diff": 0.1164, "norm_loss": 0.0, "num_token_doc": 66.7733, "num_token_overlap": 11.6838, "num_token_query": 31.9486, "num_token_union": 65.3831, "num_word_context": 202.3504, "num_word_doc": 49.8388, "num_word_query": 23.5843, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4921.3387, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5288, "query_norm": 1.3935, "queue_k_norm": 1.5083, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9486, "sent_len_1": 66.7733, "sent_len_max_0": 127.61, "sent_len_max_1": 190.6887, "stdk": 0.048, "stdq": 0.042, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 72900 }, { "accuracy": 45.6055, "active_queue_size": 16384.0, "cl_loss": 3.9249, "doc_norm": 1.5096, "encoder_q-embeddings": 2436.4714, "encoder_q-layer.0": 1705.5977, "encoder_q-layer.1": 1897.3896, "encoder_q-layer.10": 2544.7708, "encoder_q-layer.11": 6378.5215, "encoder_q-layer.2": 2285.5728, "encoder_q-layer.3": 2351.4119, "encoder_q-layer.4": 2599.1484, "encoder_q-layer.5": 2683.6748, "encoder_q-layer.6": 2664.0903, "encoder_q-layer.7": 2586.5383, "encoder_q-layer.8": 2850.6382, "encoder_q-layer.9": 2451.1118, "epoch": 0.48, "inbatch_neg_score": 0.5272, "inbatch_pos_score": 1.085, "learning_rate": 1.5e-05, "loss": 3.9249, "norm_diff": 0.0921, "norm_loss": 0.0, "num_token_doc": 67.0766, "num_token_overlap": 11.7094, "num_token_query": 31.9062, "num_token_union": 65.4968, "num_word_context": 202.4045, "num_word_doc": 50.038, "num_word_query": 23.5614, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4489.512, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5278, "query_norm": 1.4175, "queue_k_norm": 1.5106, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9062, "sent_len_1": 67.0766, "sent_len_max_0": 127.7262, "sent_len_max_1": 189.425, "stdk": 0.048, "stdq": 0.0431, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 73000 }, { "accuracy": 42.9688, "active_queue_size": 16384.0, "cl_loss": 3.9488, "doc_norm": 1.5053, "encoder_q-embeddings": 2446.7305, "encoder_q-layer.0": 1643.7765, "encoder_q-layer.1": 1884.3534, "encoder_q-layer.10": 2664.7869, "encoder_q-layer.11": 6732.6807, "encoder_q-layer.2": 2090.9504, "encoder_q-layer.3": 2032.4946, "encoder_q-layer.4": 1999.9457, "encoder_q-layer.5": 2046.058, "encoder_q-layer.6": 2185.1055, "encoder_q-layer.7": 2315.5354, "encoder_q-layer.8": 2662.4465, "encoder_q-layer.9": 2418.804, "epoch": 0.48, "inbatch_neg_score": 0.5334, "inbatch_pos_score": 1.0625, "learning_rate": 1.4944444444444444e-05, "loss": 3.9488, "norm_diff": 0.1072, "norm_loss": 0.0, "num_token_doc": 66.7412, "num_token_overlap": 11.7059, "num_token_query": 32.0089, "num_token_union": 65.373, "num_word_context": 202.4178, "num_word_doc": 49.8151, "num_word_query": 23.6346, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4361.2453, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5327, "query_norm": 1.3981, "queue_k_norm": 1.51, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 32.0089, "sent_len_1": 66.7412, "sent_len_max_0": 127.415, "sent_len_max_1": 190.1425, "stdk": 0.0478, "stdq": 0.0423, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 73100 }, { "accuracy": 45.5078, "active_queue_size": 16384.0, "cl_loss": 3.9744, "doc_norm": 1.5091, "encoder_q-embeddings": 2694.0491, "encoder_q-layer.0": 1928.1075, "encoder_q-layer.1": 2083.0835, "encoder_q-layer.10": 2704.9587, "encoder_q-layer.11": 6217.8818, "encoder_q-layer.2": 2408.2966, "encoder_q-layer.3": 2480.1099, "encoder_q-layer.4": 2883.9314, "encoder_q-layer.5": 2768.51, "encoder_q-layer.6": 2654.3379, "encoder_q-layer.7": 2746.6997, "encoder_q-layer.8": 2788.8682, "encoder_q-layer.9": 2361.5977, "epoch": 0.48, "inbatch_neg_score": 0.5365, "inbatch_pos_score": 1.0869, "learning_rate": 1.4888888888888888e-05, "loss": 3.9744, "norm_diff": 0.0996, "norm_loss": 0.0, "num_token_doc": 66.4622, "num_token_overlap": 11.6129, "num_token_query": 31.6453, "num_token_union": 65.0327, "num_word_context": 201.8488, "num_word_doc": 49.6015, "num_word_query": 23.3748, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4613.7016, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5361, "query_norm": 1.4095, "queue_k_norm": 1.5092, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.6453, "sent_len_1": 66.4622, "sent_len_max_0": 127.4262, "sent_len_max_1": 188.8313, "stdk": 0.0479, "stdq": 0.0427, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 73200 }, { "accuracy": 47.2656, "active_queue_size": 16384.0, "cl_loss": 3.9594, "doc_norm": 1.5086, "encoder_q-embeddings": 2515.2039, "encoder_q-layer.0": 1656.5129, "encoder_q-layer.1": 1911.9541, "encoder_q-layer.10": 2695.9136, "encoder_q-layer.11": 6538.7959, "encoder_q-layer.2": 2198.7112, "encoder_q-layer.3": 2422.8857, "encoder_q-layer.4": 2408.3516, "encoder_q-layer.5": 2507.7566, "encoder_q-layer.6": 2733.459, "encoder_q-layer.7": 3053.3813, "encoder_q-layer.8": 2761.1758, "encoder_q-layer.9": 2450.8577, "epoch": 0.48, "inbatch_neg_score": 0.5354, "inbatch_pos_score": 1.0908, "learning_rate": 1.4833333333333336e-05, "loss": 3.9594, "norm_diff": 0.1132, "norm_loss": 0.0, "num_token_doc": 66.8138, "num_token_overlap": 11.647, "num_token_query": 31.8025, "num_token_union": 65.3854, "num_word_context": 202.5478, "num_word_doc": 49.8695, "num_word_query": 23.4577, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4408.6194, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5361, "query_norm": 1.3954, "queue_k_norm": 1.5096, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8025, "sent_len_1": 66.8138, "sent_len_max_0": 127.3988, "sent_len_max_1": 185.8363, "stdk": 0.0479, "stdq": 0.0422, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 73300 }, { "accuracy": 45.1172, "active_queue_size": 16384.0, "cl_loss": 3.9476, "doc_norm": 1.5099, "encoder_q-embeddings": 2525.5493, "encoder_q-layer.0": 1670.9385, "encoder_q-layer.1": 1851.1063, "encoder_q-layer.10": 2416.7395, "encoder_q-layer.11": 6277.5874, "encoder_q-layer.2": 2153.1743, "encoder_q-layer.3": 2216.2786, "encoder_q-layer.4": 2334.425, "encoder_q-layer.5": 2419.7661, "encoder_q-layer.6": 2638.2737, "encoder_q-layer.7": 2844.0459, "encoder_q-layer.8": 2892.2512, "encoder_q-layer.9": 2372.9321, "epoch": 0.48, "inbatch_neg_score": 0.5392, "inbatch_pos_score": 1.0811, "learning_rate": 1.477777777777778e-05, "loss": 3.9476, "norm_diff": 0.1248, "norm_loss": 0.0, "num_token_doc": 66.7275, "num_token_overlap": 11.6519, "num_token_query": 31.7616, "num_token_union": 65.2609, "num_word_context": 201.8951, "num_word_doc": 49.7911, "num_word_query": 23.4577, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4396.6563, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.54, "query_norm": 1.3851, "queue_k_norm": 1.5101, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.7616, "sent_len_1": 66.7275, "sent_len_max_0": 127.6112, "sent_len_max_1": 190.6612, "stdk": 0.048, "stdq": 0.0417, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 73400 }, { "accuracy": 44.7266, "active_queue_size": 16384.0, "cl_loss": 3.917, "doc_norm": 1.5148, "encoder_q-embeddings": 2715.1965, "encoder_q-layer.0": 1840.7119, "encoder_q-layer.1": 2083.1492, "encoder_q-layer.10": 2754.8535, "encoder_q-layer.11": 6406.9731, "encoder_q-layer.2": 2562.8948, "encoder_q-layer.3": 2771.7295, "encoder_q-layer.4": 2895.6218, "encoder_q-layer.5": 2966.2373, "encoder_q-layer.6": 2890.7717, "encoder_q-layer.7": 2943.5957, "encoder_q-layer.8": 2870.2085, "encoder_q-layer.9": 2351.2305, "epoch": 0.48, "inbatch_neg_score": 0.5421, "inbatch_pos_score": 1.0791, "learning_rate": 1.4722222222222224e-05, "loss": 3.917, "norm_diff": 0.1139, "norm_loss": 0.0, "num_token_doc": 66.866, "num_token_overlap": 11.6683, "num_token_query": 31.7699, "num_token_union": 65.3955, "num_word_context": 202.3363, "num_word_doc": 49.8909, "num_word_query": 23.4617, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4718.9674, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5415, "query_norm": 1.4009, "queue_k_norm": 1.5128, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.7699, "sent_len_1": 66.866, "sent_len_max_0": 127.3738, "sent_len_max_1": 191.4688, "stdk": 0.0481, "stdq": 0.0424, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 73500 }, { "accuracy": 46.4844, "active_queue_size": 16384.0, "cl_loss": 3.946, "doc_norm": 1.5165, "encoder_q-embeddings": 5700.1001, "encoder_q-layer.0": 4073.5059, "encoder_q-layer.1": 4365.4863, "encoder_q-layer.10": 2609.4653, "encoder_q-layer.11": 6492.3604, "encoder_q-layer.2": 5109.9014, "encoder_q-layer.3": 5453.2744, "encoder_q-layer.4": 5441.832, "encoder_q-layer.5": 5819.3799, "encoder_q-layer.6": 5318.5786, "encoder_q-layer.7": 4913.168, "encoder_q-layer.8": 4555.0093, "encoder_q-layer.9": 2905.3025, "epoch": 0.48, "inbatch_neg_score": 0.5405, "inbatch_pos_score": 1.0811, "learning_rate": 1.4666666666666668e-05, "loss": 3.946, "norm_diff": 0.1194, "norm_loss": 0.0, "num_token_doc": 66.8822, "num_token_overlap": 11.6994, "num_token_query": 31.8866, "num_token_union": 65.4024, "num_word_context": 202.0043, "num_word_doc": 49.8913, "num_word_query": 23.5599, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7556.5758, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5405, "query_norm": 1.397, "queue_k_norm": 1.5127, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.8866, "sent_len_1": 66.8822, "sent_len_max_0": 127.545, "sent_len_max_1": 190.9, "stdk": 0.0482, "stdq": 0.0423, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 73600 }, { "accuracy": 46.875, "active_queue_size": 16384.0, "cl_loss": 3.9362, "doc_norm": 1.5067, "encoder_q-embeddings": 3645.386, "encoder_q-layer.0": 2597.1372, "encoder_q-layer.1": 2703.4873, "encoder_q-layer.10": 2459.1582, "encoder_q-layer.11": 6147.9536, "encoder_q-layer.2": 3065.1331, "encoder_q-layer.3": 3378.3455, "encoder_q-layer.4": 3661.55, "encoder_q-layer.5": 3586.6431, "encoder_q-layer.6": 3572.3867, "encoder_q-layer.7": 3177.8403, "encoder_q-layer.8": 3028.4524, "encoder_q-layer.9": 2412.9426, "epoch": 0.48, "inbatch_neg_score": 0.5419, "inbatch_pos_score": 1.1006, "learning_rate": 1.4611111111111112e-05, "loss": 3.9362, "norm_diff": 0.1043, "norm_loss": 0.0, "num_token_doc": 66.9854, "num_token_overlap": 11.7055, "num_token_query": 31.9726, "num_token_union": 65.4916, "num_word_context": 202.1283, "num_word_doc": 49.9839, "num_word_query": 23.6289, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5278.8326, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.542, "query_norm": 1.4024, "queue_k_norm": 1.5136, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9726, "sent_len_1": 66.9854, "sent_len_max_0": 127.6513, "sent_len_max_1": 191.22, "stdk": 0.0478, "stdq": 0.0426, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 73700 }, { "accuracy": 44.2383, "active_queue_size": 16384.0, "cl_loss": 3.9523, "doc_norm": 1.5155, "encoder_q-embeddings": 2400.3584, "encoder_q-layer.0": 1600.926, "encoder_q-layer.1": 1764.6854, "encoder_q-layer.10": 2400.3748, "encoder_q-layer.11": 6231.0215, "encoder_q-layer.2": 2105.6389, "encoder_q-layer.3": 2174.3057, "encoder_q-layer.4": 2283.6011, "encoder_q-layer.5": 2422.6074, "encoder_q-layer.6": 2541.4023, "encoder_q-layer.7": 2557.0811, "encoder_q-layer.8": 2899.7207, "encoder_q-layer.9": 2382.4243, "epoch": 0.48, "inbatch_neg_score": 0.5418, "inbatch_pos_score": 1.0752, "learning_rate": 1.4555555555555556e-05, "loss": 3.9523, "norm_diff": 0.1286, "norm_loss": 0.0, "num_token_doc": 66.6731, "num_token_overlap": 11.6932, "num_token_query": 31.9981, "num_token_union": 65.3556, "num_word_context": 202.6775, "num_word_doc": 49.7607, "num_word_query": 23.6265, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4339.9483, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.542, "query_norm": 1.3869, "queue_k_norm": 1.5139, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9981, "sent_len_1": 66.6731, "sent_len_max_0": 127.4788, "sent_len_max_1": 188.5375, "stdk": 0.0481, "stdq": 0.0419, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 73800 }, { "accuracy": 47.6562, "active_queue_size": 16384.0, "cl_loss": 3.9448, "doc_norm": 1.5144, "encoder_q-embeddings": 4876.8477, "encoder_q-layer.0": 3686.2236, "encoder_q-layer.1": 4020.7388, "encoder_q-layer.10": 2557.4058, "encoder_q-layer.11": 6563.8979, "encoder_q-layer.2": 4735.3496, "encoder_q-layer.3": 4971.7993, "encoder_q-layer.4": 5497.3726, "encoder_q-layer.5": 5329.6689, "encoder_q-layer.6": 4505.5474, "encoder_q-layer.7": 3788.2888, "encoder_q-layer.8": 3532.615, "encoder_q-layer.9": 2542.6111, "epoch": 0.48, "inbatch_neg_score": 0.5414, "inbatch_pos_score": 1.0938, "learning_rate": 1.45e-05, "loss": 3.9448, "norm_diff": 0.1219, "norm_loss": 0.0, "num_token_doc": 66.9168, "num_token_overlap": 11.6884, "num_token_query": 31.9661, "num_token_union": 65.4416, "num_word_context": 202.3633, "num_word_doc": 49.9151, "num_word_query": 23.5894, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6690.6687, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.542, "query_norm": 1.3925, "queue_k_norm": 1.5137, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9661, "sent_len_1": 66.9168, "sent_len_max_0": 127.645, "sent_len_max_1": 191.56, "stdk": 0.048, "stdq": 0.0421, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 73900 }, { "accuracy": 47.3633, "active_queue_size": 16384.0, "cl_loss": 3.9617, "doc_norm": 1.5142, "encoder_q-embeddings": 2041.5581, "encoder_q-layer.0": 1340.7323, "encoder_q-layer.1": 1382.8458, "encoder_q-layer.10": 2559.814, "encoder_q-layer.11": 6448.5518, "encoder_q-layer.2": 1542.2551, "encoder_q-layer.3": 1651.2788, "encoder_q-layer.4": 1817.4698, "encoder_q-layer.5": 1770.5521, "encoder_q-layer.6": 1990.2075, "encoder_q-layer.7": 2231.2795, "encoder_q-layer.8": 2594.9243, "encoder_q-layer.9": 2465.7778, "epoch": 0.48, "inbatch_neg_score": 0.5434, "inbatch_pos_score": 1.0977, "learning_rate": 1.4444444444444444e-05, "loss": 3.9617, "norm_diff": 0.122, "norm_loss": 0.0, "num_token_doc": 66.5795, "num_token_overlap": 11.6531, "num_token_query": 31.8747, "num_token_union": 65.2193, "num_word_context": 202.1613, "num_word_doc": 49.7169, "num_word_query": 23.5674, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4029.3563, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5425, "query_norm": 1.3921, "queue_k_norm": 1.5149, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.8747, "sent_len_1": 66.5795, "sent_len_max_0": 127.5162, "sent_len_max_1": 188.8825, "stdk": 0.048, "stdq": 0.0421, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 74000 }, { "accuracy": 44.6289, "active_queue_size": 16384.0, "cl_loss": 3.943, "doc_norm": 1.5149, "encoder_q-embeddings": 2198.0603, "encoder_q-layer.0": 1524.1886, "encoder_q-layer.1": 1565.8851, "encoder_q-layer.10": 2622.0366, "encoder_q-layer.11": 6713.5063, "encoder_q-layer.2": 1767.3124, "encoder_q-layer.3": 1835.854, "encoder_q-layer.4": 1896.1198, "encoder_q-layer.5": 1928.056, "encoder_q-layer.6": 2098.3291, "encoder_q-layer.7": 2378.8335, "encoder_q-layer.8": 2614.0774, "encoder_q-layer.9": 2367.5464, "epoch": 0.48, "inbatch_neg_score": 0.5451, "inbatch_pos_score": 1.0781, "learning_rate": 1.438888888888889e-05, "loss": 3.943, "norm_diff": 0.1302, "norm_loss": 0.0, "num_token_doc": 66.7935, "num_token_overlap": 11.6265, "num_token_query": 31.9109, "num_token_union": 65.4198, "num_word_context": 202.1567, "num_word_doc": 49.8064, "num_word_query": 23.5574, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4200.4415, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5454, "query_norm": 1.3847, "queue_k_norm": 1.5156, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9109, "sent_len_1": 66.7935, "sent_len_max_0": 127.515, "sent_len_max_1": 191.205, "stdk": 0.048, "stdq": 0.0417, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 74100 }, { "accuracy": 47.5586, "active_queue_size": 16384.0, "cl_loss": 3.929, "doc_norm": 1.5186, "encoder_q-embeddings": 2401.8621, "encoder_q-layer.0": 1567.875, "encoder_q-layer.1": 1689.5032, "encoder_q-layer.10": 2570.5112, "encoder_q-layer.11": 6274.3892, "encoder_q-layer.2": 1955.4961, "encoder_q-layer.3": 1959.3417, "encoder_q-layer.4": 2160.6294, "encoder_q-layer.5": 2367.7263, "encoder_q-layer.6": 2341.1912, "encoder_q-layer.7": 2416.6606, "encoder_q-layer.8": 2734.5762, "encoder_q-layer.9": 2392.3799, "epoch": 0.48, "inbatch_neg_score": 0.545, "inbatch_pos_score": 1.1064, "learning_rate": 1.4333333333333334e-05, "loss": 3.929, "norm_diff": 0.1114, "norm_loss": 0.0, "num_token_doc": 67.0282, "num_token_overlap": 11.6974, "num_token_query": 31.8773, "num_token_union": 65.4446, "num_word_context": 202.8505, "num_word_doc": 49.9712, "num_word_query": 23.5404, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4240.5391, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5449, "query_norm": 1.4073, "queue_k_norm": 1.5142, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8773, "sent_len_1": 67.0282, "sent_len_max_0": 127.4912, "sent_len_max_1": 192.5225, "stdk": 0.0482, "stdq": 0.0427, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 74200 }, { "accuracy": 46.875, "active_queue_size": 16384.0, "cl_loss": 3.9455, "doc_norm": 1.5206, "encoder_q-embeddings": 4672.6133, "encoder_q-layer.0": 3159.6968, "encoder_q-layer.1": 3505.8528, "encoder_q-layer.10": 4861.9243, "encoder_q-layer.11": 12207.4131, "encoder_q-layer.2": 4034.5889, "encoder_q-layer.3": 4132.5693, "encoder_q-layer.4": 4351.3896, "encoder_q-layer.5": 4352.6875, "encoder_q-layer.6": 4556.5063, "encoder_q-layer.7": 4916.2842, "encoder_q-layer.8": 5092.3164, "encoder_q-layer.9": 4566.6226, "epoch": 0.48, "inbatch_neg_score": 0.5483, "inbatch_pos_score": 1.1006, "learning_rate": 1.427777777777778e-05, "loss": 3.9455, "norm_diff": 0.1232, "norm_loss": 0.0, "num_token_doc": 66.7244, "num_token_overlap": 11.635, "num_token_query": 31.893, "num_token_union": 65.3462, "num_word_context": 202.4495, "num_word_doc": 49.7811, "num_word_query": 23.5461, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8384.6116, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5488, "query_norm": 1.3974, "queue_k_norm": 1.5152, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.893, "sent_len_1": 66.7244, "sent_len_max_0": 127.64, "sent_len_max_1": 190.335, "stdk": 0.0482, "stdq": 0.0421, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 74300 }, { "accuracy": 45.8984, "active_queue_size": 16384.0, "cl_loss": 3.9267, "doc_norm": 1.5096, "encoder_q-embeddings": 6100.1343, "encoder_q-layer.0": 4351.4897, "encoder_q-layer.1": 4526.7715, "encoder_q-layer.10": 5747.5249, "encoder_q-layer.11": 12995.5898, "encoder_q-layer.2": 4921.6133, "encoder_q-layer.3": 4874.6729, "encoder_q-layer.4": 5250.2739, "encoder_q-layer.5": 5271.7539, "encoder_q-layer.6": 5065.5581, "encoder_q-layer.7": 5324.875, "encoder_q-layer.8": 5492.2812, "encoder_q-layer.9": 5013.002, "epoch": 0.48, "inbatch_neg_score": 0.5504, "inbatch_pos_score": 1.0889, "learning_rate": 1.4222222222222224e-05, "loss": 3.9267, "norm_diff": 0.1099, "norm_loss": 0.0, "num_token_doc": 66.9128, "num_token_overlap": 11.6913, "num_token_query": 31.9741, "num_token_union": 65.4147, "num_word_context": 202.4347, "num_word_doc": 49.8872, "num_word_query": 23.6086, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9242.9427, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5503, "query_norm": 1.3997, "queue_k_norm": 1.5156, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9741, "sent_len_1": 66.9128, "sent_len_max_0": 127.6562, "sent_len_max_1": 193.1725, "stdk": 0.0478, "stdq": 0.0422, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 74400 }, { "accuracy": 44.3359, "active_queue_size": 16384.0, "cl_loss": 3.9348, "doc_norm": 1.5122, "encoder_q-embeddings": 5285.4028, "encoder_q-layer.0": 3893.9746, "encoder_q-layer.1": 4723.6597, "encoder_q-layer.10": 2853.2566, "encoder_q-layer.11": 6702.2549, "encoder_q-layer.2": 5962.3042, "encoder_q-layer.3": 6181.5391, "encoder_q-layer.4": 5782.5679, "encoder_q-layer.5": 4610.7651, "encoder_q-layer.6": 3418.1768, "encoder_q-layer.7": 3126.7842, "encoder_q-layer.8": 3145.812, "encoder_q-layer.9": 2658.3486, "epoch": 0.48, "inbatch_neg_score": 0.5539, "inbatch_pos_score": 1.0781, "learning_rate": 1.4166666666666668e-05, "loss": 3.9348, "norm_diff": 0.1281, "norm_loss": 0.0, "num_token_doc": 66.5437, "num_token_overlap": 11.6406, "num_token_query": 31.8389, "num_token_union": 65.1812, "num_word_context": 202.0488, "num_word_doc": 49.6617, "num_word_query": 23.5087, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7155.0883, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5532, "query_norm": 1.3841, "queue_k_norm": 1.5155, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8389, "sent_len_1": 66.5437, "sent_len_max_0": 127.4813, "sent_len_max_1": 191.42, "stdk": 0.0478, "stdq": 0.0415, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 74500 }, { "accuracy": 46.875, "active_queue_size": 16384.0, "cl_loss": 3.9097, "doc_norm": 1.5182, "encoder_q-embeddings": 2191.4351, "encoder_q-layer.0": 1510.6604, "encoder_q-layer.1": 1605.5468, "encoder_q-layer.10": 2614.7039, "encoder_q-layer.11": 6566.0122, "encoder_q-layer.2": 1779.5077, "encoder_q-layer.3": 1841.6102, "encoder_q-layer.4": 1938.5721, "encoder_q-layer.5": 1966.2212, "encoder_q-layer.6": 2038.8477, "encoder_q-layer.7": 2171.8931, "encoder_q-layer.8": 2623.4988, "encoder_q-layer.9": 2507.5034, "epoch": 0.49, "inbatch_neg_score": 0.5527, "inbatch_pos_score": 1.1113, "learning_rate": 1.4111111111111112e-05, "loss": 3.9097, "norm_diff": 0.1149, "norm_loss": 0.0, "num_token_doc": 66.8358, "num_token_overlap": 11.6731, "num_token_query": 31.8141, "num_token_union": 65.358, "num_word_context": 202.3039, "num_word_doc": 49.8991, "num_word_query": 23.5121, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4141.9095, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5522, "query_norm": 1.4033, "queue_k_norm": 1.5178, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8141, "sent_len_1": 66.8358, "sent_len_max_0": 127.2963, "sent_len_max_1": 189.6775, "stdk": 0.0481, "stdq": 0.0423, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 74600 }, { "accuracy": 45.8984, "active_queue_size": 16384.0, "cl_loss": 3.9616, "doc_norm": 1.5155, "encoder_q-embeddings": 2178.7102, "encoder_q-layer.0": 1425.083, "encoder_q-layer.1": 1572.6542, "encoder_q-layer.10": 2648.6775, "encoder_q-layer.11": 6308.3809, "encoder_q-layer.2": 1757.7144, "encoder_q-layer.3": 1852.1783, "encoder_q-layer.4": 1941.15, "encoder_q-layer.5": 1988.4167, "encoder_q-layer.6": 2248.696, "encoder_q-layer.7": 2331.5103, "encoder_q-layer.8": 2622.6829, "encoder_q-layer.9": 2316.3862, "epoch": 0.49, "inbatch_neg_score": 0.5521, "inbatch_pos_score": 1.1074, "learning_rate": 1.4055555555555556e-05, "loss": 3.9616, "norm_diff": 0.1113, "norm_loss": 0.0, "num_token_doc": 66.8645, "num_token_overlap": 11.6084, "num_token_query": 31.7126, "num_token_union": 65.3448, "num_word_context": 202.5988, "num_word_doc": 49.8656, "num_word_query": 23.4003, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4107.4218, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5527, "query_norm": 1.4042, "queue_k_norm": 1.5168, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.7126, "sent_len_1": 66.8645, "sent_len_max_0": 127.5925, "sent_len_max_1": 190.7775, "stdk": 0.0479, "stdq": 0.0424, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 74700 }, { "accuracy": 47.168, "active_queue_size": 16384.0, "cl_loss": 3.9402, "doc_norm": 1.5212, "encoder_q-embeddings": 2404.0801, "encoder_q-layer.0": 1609.729, "encoder_q-layer.1": 1697.2074, "encoder_q-layer.10": 2386.2849, "encoder_q-layer.11": 6174.4067, "encoder_q-layer.2": 1992.0466, "encoder_q-layer.3": 2063.4041, "encoder_q-layer.4": 2204.4575, "encoder_q-layer.5": 2238.4836, "encoder_q-layer.6": 2379.1675, "encoder_q-layer.7": 2425.9197, "encoder_q-layer.8": 2581.4685, "encoder_q-layer.9": 2303.301, "epoch": 0.49, "inbatch_neg_score": 0.5502, "inbatch_pos_score": 1.1055, "learning_rate": 1.4000000000000001e-05, "loss": 3.9402, "norm_diff": 0.1201, "norm_loss": 0.0, "num_token_doc": 66.6576, "num_token_overlap": 11.6173, "num_token_query": 31.6626, "num_token_union": 65.1563, "num_word_context": 202.0324, "num_word_doc": 49.734, "num_word_query": 23.3648, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4208.2771, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5508, "query_norm": 1.4011, "queue_k_norm": 1.5176, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.6626, "sent_len_1": 66.6576, "sent_len_max_0": 127.4537, "sent_len_max_1": 189.2325, "stdk": 0.0482, "stdq": 0.0423, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 74800 }, { "accuracy": 46.6797, "active_queue_size": 16384.0, "cl_loss": 3.9215, "doc_norm": 1.5156, "encoder_q-embeddings": 2497.5569, "encoder_q-layer.0": 1731.5851, "encoder_q-layer.1": 1951.5502, "encoder_q-layer.10": 2763.1099, "encoder_q-layer.11": 6495.0239, "encoder_q-layer.2": 2199.5281, "encoder_q-layer.3": 2367.7695, "encoder_q-layer.4": 2712.3992, "encoder_q-layer.5": 2845.7881, "encoder_q-layer.6": 2984.103, "encoder_q-layer.7": 2950.9614, "encoder_q-layer.8": 3240.0125, "encoder_q-layer.9": 2422.4875, "epoch": 0.49, "inbatch_neg_score": 0.5569, "inbatch_pos_score": 1.1123, "learning_rate": 1.3944444444444446e-05, "loss": 3.9215, "norm_diff": 0.1114, "norm_loss": 0.0, "num_token_doc": 66.7818, "num_token_overlap": 11.7252, "num_token_query": 32.0839, "num_token_union": 65.3757, "num_word_context": 202.5654, "num_word_doc": 49.8223, "num_word_query": 23.6796, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4602.9538, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5557, "query_norm": 1.4041, "queue_k_norm": 1.5191, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 32.0839, "sent_len_1": 66.7818, "sent_len_max_0": 127.465, "sent_len_max_1": 189.565, "stdk": 0.0479, "stdq": 0.0423, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 74900 }, { "accuracy": 43.1641, "active_queue_size": 16384.0, "cl_loss": 3.9319, "doc_norm": 1.5164, "encoder_q-embeddings": 29370.6738, "encoder_q-layer.0": 21463.5098, "encoder_q-layer.1": 22166.0625, "encoder_q-layer.10": 1572.7509, "encoder_q-layer.11": 3323.1899, "encoder_q-layer.2": 26789.4609, "encoder_q-layer.3": 26984.1152, "encoder_q-layer.4": 26927.3027, "encoder_q-layer.5": 26351.0879, "encoder_q-layer.6": 25847.168, "encoder_q-layer.7": 24432.5488, "encoder_q-layer.8": 13179.4717, "encoder_q-layer.9": 4982.7529, "epoch": 0.49, "inbatch_neg_score": 0.5552, "inbatch_pos_score": 1.0879, "learning_rate": 1.388888888888889e-05, "loss": 3.9319, "norm_diff": 0.1109, "norm_loss": 0.0, "num_token_doc": 66.7424, "num_token_overlap": 11.6831, "num_token_query": 31.9783, "num_token_union": 65.3517, "num_word_context": 202.0019, "num_word_doc": 49.7486, "num_word_query": 23.6017, "postclip_grad_norm": 1.0, "preclip_grad_norm": 34707.1931, "preclip_grad_norm_avg": 0.0003, "q@queue_neg_score": 0.5552, "query_norm": 1.4055, "queue_k_norm": 1.5214, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9783, "sent_len_1": 66.7424, "sent_len_max_0": 127.6063, "sent_len_max_1": 189.92, "stdk": 0.0479, "stdq": 0.0424, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 75000 }, { "accuracy": 45.0195, "active_queue_size": 16384.0, "cl_loss": 3.9354, "doc_norm": 1.5198, "encoder_q-embeddings": 1144.2389, "encoder_q-layer.0": 802.0822, "encoder_q-layer.1": 901.624, "encoder_q-layer.10": 1314.8035, "encoder_q-layer.11": 3345.9102, "encoder_q-layer.2": 1057.6692, "encoder_q-layer.3": 1139.7888, "encoder_q-layer.4": 1266.61, "encoder_q-layer.5": 1196.9615, "encoder_q-layer.6": 1389.0145, "encoder_q-layer.7": 1525.8348, "encoder_q-layer.8": 1546.4216, "encoder_q-layer.9": 1221.277, "epoch": 0.49, "inbatch_neg_score": 0.5562, "inbatch_pos_score": 1.0996, "learning_rate": 1.3833333333333334e-05, "loss": 3.9354, "norm_diff": 0.1161, "norm_loss": 0.0, "num_token_doc": 66.6025, "num_token_overlap": 11.646, "num_token_query": 31.9678, "num_token_union": 65.3571, "num_word_context": 202.358, "num_word_doc": 49.7035, "num_word_query": 23.6075, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2287.9858, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5557, "query_norm": 1.4037, "queue_k_norm": 1.5183, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9678, "sent_len_1": 66.6025, "sent_len_max_0": 127.46, "sent_len_max_1": 186.9338, "stdk": 0.048, "stdq": 0.0423, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 75100 }, { "accuracy": 45.5078, "active_queue_size": 16384.0, "cl_loss": 3.93, "doc_norm": 1.5177, "encoder_q-embeddings": 1015.444, "encoder_q-layer.0": 666.8125, "encoder_q-layer.1": 711.1831, "encoder_q-layer.10": 1301.0645, "encoder_q-layer.11": 3137.4836, "encoder_q-layer.2": 810.2615, "encoder_q-layer.3": 806.261, "encoder_q-layer.4": 838.5645, "encoder_q-layer.5": 849.854, "encoder_q-layer.6": 986.3974, "encoder_q-layer.7": 1098.8707, "encoder_q-layer.8": 1244.2296, "encoder_q-layer.9": 1166.6028, "epoch": 0.49, "inbatch_neg_score": 0.5563, "inbatch_pos_score": 1.125, "learning_rate": 1.3777777777777778e-05, "loss": 3.93, "norm_diff": 0.1, "norm_loss": 0.0, "num_token_doc": 66.7915, "num_token_overlap": 11.7067, "num_token_query": 32.0004, "num_token_union": 65.4371, "num_word_context": 202.1946, "num_word_doc": 49.831, "num_word_query": 23.6517, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1945.9027, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5562, "query_norm": 1.4177, "queue_k_norm": 1.5204, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 32.0004, "sent_len_1": 66.7915, "sent_len_max_0": 127.5187, "sent_len_max_1": 191.365, "stdk": 0.048, "stdq": 0.0429, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 75200 }, { "accuracy": 45.0195, "active_queue_size": 16384.0, "cl_loss": 3.9283, "doc_norm": 1.522, "encoder_q-embeddings": 980.8981, "encoder_q-layer.0": 664.4304, "encoder_q-layer.1": 687.7096, "encoder_q-layer.10": 1307.1548, "encoder_q-layer.11": 3166.2898, "encoder_q-layer.2": 763.6091, "encoder_q-layer.3": 782.4176, "encoder_q-layer.4": 813.3189, "encoder_q-layer.5": 843.7915, "encoder_q-layer.6": 938.6248, "encoder_q-layer.7": 1051.4529, "encoder_q-layer.8": 1323.8187, "encoder_q-layer.9": 1194.9185, "epoch": 0.49, "inbatch_neg_score": 0.5611, "inbatch_pos_score": 1.1123, "learning_rate": 1.3722222222222222e-05, "loss": 3.9283, "norm_diff": 0.1072, "norm_loss": 0.0, "num_token_doc": 66.9082, "num_token_overlap": 11.7006, "num_token_query": 31.9876, "num_token_union": 65.5011, "num_word_context": 202.6855, "num_word_doc": 49.9002, "num_word_query": 23.6314, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1992.3364, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5596, "query_norm": 1.4148, "queue_k_norm": 1.5208, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9876, "sent_len_1": 66.9082, "sent_len_max_0": 127.4963, "sent_len_max_1": 189.6788, "stdk": 0.0481, "stdq": 0.0426, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 75300 }, { "accuracy": 42.9688, "active_queue_size": 16384.0, "cl_loss": 3.9481, "doc_norm": 1.5263, "encoder_q-embeddings": 1961.1047, "encoder_q-layer.0": 1456.3121, "encoder_q-layer.1": 1495.7543, "encoder_q-layer.10": 1378.2225, "encoder_q-layer.11": 3362.0234, "encoder_q-layer.2": 1814.1846, "encoder_q-layer.3": 1930.9586, "encoder_q-layer.4": 2538.1572, "encoder_q-layer.5": 2638.1631, "encoder_q-layer.6": 3129.2085, "encoder_q-layer.7": 3232.8313, "encoder_q-layer.8": 3301.7722, "encoder_q-layer.9": 1683.6985, "epoch": 0.49, "inbatch_neg_score": 0.5637, "inbatch_pos_score": 1.0762, "learning_rate": 1.3666666666666666e-05, "loss": 3.9481, "norm_diff": 0.1283, "norm_loss": 0.0, "num_token_doc": 66.6158, "num_token_overlap": 11.6925, "num_token_query": 31.8329, "num_token_union": 65.235, "num_word_context": 202.2306, "num_word_doc": 49.7281, "num_word_query": 23.521, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3711.9289, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.562, "query_norm": 1.398, "queue_k_norm": 1.5211, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8329, "sent_len_1": 66.6158, "sent_len_max_0": 127.6437, "sent_len_max_1": 188.675, "stdk": 0.0482, "stdq": 0.0418, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 75400 }, { "accuracy": 48.8281, "active_queue_size": 16384.0, "cl_loss": 3.9357, "doc_norm": 1.52, "encoder_q-embeddings": 949.8995, "encoder_q-layer.0": 621.8509, "encoder_q-layer.1": 645.8781, "encoder_q-layer.10": 1165.8777, "encoder_q-layer.11": 3079.3438, "encoder_q-layer.2": 722.7325, "encoder_q-layer.3": 746.7469, "encoder_q-layer.4": 784.8551, "encoder_q-layer.5": 772.3692, "encoder_q-layer.6": 903.7635, "encoder_q-layer.7": 975.1098, "encoder_q-layer.8": 1205.1443, "encoder_q-layer.9": 1099.9945, "epoch": 0.49, "inbatch_neg_score": 0.5638, "inbatch_pos_score": 1.1357, "learning_rate": 1.3611111111111111e-05, "loss": 3.9357, "norm_diff": 0.1071, "norm_loss": 0.0, "num_token_doc": 66.6317, "num_token_overlap": 11.6389, "num_token_query": 31.8456, "num_token_union": 65.2315, "num_word_context": 202.2055, "num_word_doc": 49.6571, "num_word_query": 23.5046, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1886.6253, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5635, "query_norm": 1.4128, "queue_k_norm": 1.5192, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8456, "sent_len_1": 66.6317, "sent_len_max_0": 127.5312, "sent_len_max_1": 191.7225, "stdk": 0.048, "stdq": 0.0424, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 75500 }, { "accuracy": 42.2852, "active_queue_size": 16384.0, "cl_loss": 3.9198, "doc_norm": 1.5251, "encoder_q-embeddings": 3769.2183, "encoder_q-layer.0": 2704.019, "encoder_q-layer.1": 2990.2507, "encoder_q-layer.10": 1481.0748, "encoder_q-layer.11": 3219.0662, "encoder_q-layer.2": 3777.541, "encoder_q-layer.3": 4065.8928, "encoder_q-layer.4": 3910.2798, "encoder_q-layer.5": 4376.0674, "encoder_q-layer.6": 3785.3423, "encoder_q-layer.7": 3406.5029, "encoder_q-layer.8": 2773.8499, "encoder_q-layer.9": 1444.1176, "epoch": 0.49, "inbatch_neg_score": 0.5659, "inbatch_pos_score": 1.0898, "learning_rate": 1.3555555555555557e-05, "loss": 3.9198, "norm_diff": 0.1122, "norm_loss": 0.0, "num_token_doc": 66.8131, "num_token_overlap": 11.6927, "num_token_query": 31.8622, "num_token_union": 65.308, "num_word_context": 202.3334, "num_word_doc": 49.8734, "num_word_query": 23.5258, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5081.963, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5659, "query_norm": 1.4129, "queue_k_norm": 1.5222, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.8622, "sent_len_1": 66.8131, "sent_len_max_0": 127.62, "sent_len_max_1": 189.58, "stdk": 0.0482, "stdq": 0.0424, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 75600 }, { "accuracy": 45.3125, "active_queue_size": 16384.0, "cl_loss": 3.915, "doc_norm": 1.5169, "encoder_q-embeddings": 691.0057, "encoder_q-layer.0": 461.8676, "encoder_q-layer.1": 484.059, "encoder_q-layer.10": 622.8885, "encoder_q-layer.11": 1622.5073, "encoder_q-layer.2": 568.8861, "encoder_q-layer.3": 610.3175, "encoder_q-layer.4": 662.2253, "encoder_q-layer.5": 662.8257, "encoder_q-layer.6": 736.3759, "encoder_q-layer.7": 706.5883, "encoder_q-layer.8": 681.4946, "encoder_q-layer.9": 587.3381, "epoch": 0.49, "inbatch_neg_score": 0.5696, "inbatch_pos_score": 1.1123, "learning_rate": 1.3500000000000001e-05, "loss": 3.915, "norm_diff": 0.1048, "norm_loss": 0.0, "num_token_doc": 66.6786, "num_token_overlap": 11.7037, "num_token_query": 32.0179, "num_token_union": 65.3589, "num_word_context": 202.3474, "num_word_doc": 49.7728, "num_word_query": 23.6693, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1157.4723, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5688, "query_norm": 1.4121, "queue_k_norm": 1.5205, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 32.0179, "sent_len_1": 66.6786, "sent_len_max_0": 127.44, "sent_len_max_1": 188.95, "stdk": 0.0478, "stdq": 0.0422, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 75700 }, { "accuracy": 45.5078, "active_queue_size": 16384.0, "cl_loss": 3.9271, "doc_norm": 1.522, "encoder_q-embeddings": 485.8285, "encoder_q-layer.0": 333.0759, "encoder_q-layer.1": 351.4067, "encoder_q-layer.10": 660.5865, "encoder_q-layer.11": 1586.5269, "encoder_q-layer.2": 383.7387, "encoder_q-layer.3": 390.1425, "encoder_q-layer.4": 420.8411, "encoder_q-layer.5": 429.5316, "encoder_q-layer.6": 497.335, "encoder_q-layer.7": 571.2239, "encoder_q-layer.8": 654.8862, "encoder_q-layer.9": 615.8256, "epoch": 0.49, "inbatch_neg_score": 0.5717, "inbatch_pos_score": 1.1045, "learning_rate": 1.3444444444444445e-05, "loss": 3.9271, "norm_diff": 0.1291, "norm_loss": 0.0, "num_token_doc": 66.7014, "num_token_overlap": 11.6948, "num_token_query": 31.9324, "num_token_union": 65.3545, "num_word_context": 202.4709, "num_word_doc": 49.7746, "num_word_query": 23.58, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1002.7537, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5718, "query_norm": 1.3929, "queue_k_norm": 1.5248, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9324, "sent_len_1": 66.7014, "sent_len_max_0": 127.4975, "sent_len_max_1": 189.14, "stdk": 0.048, "stdq": 0.0414, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 75800 }, { "accuracy": 45.7031, "active_queue_size": 16384.0, "cl_loss": 3.9285, "doc_norm": 1.5208, "encoder_q-embeddings": 562.1812, "encoder_q-layer.0": 388.7016, "encoder_q-layer.1": 414.6861, "encoder_q-layer.10": 703.3354, "encoder_q-layer.11": 1612.0851, "encoder_q-layer.2": 446.8174, "encoder_q-layer.3": 462.6422, "encoder_q-layer.4": 478.7744, "encoder_q-layer.5": 481.286, "encoder_q-layer.6": 508.4935, "encoder_q-layer.7": 542.9767, "encoder_q-layer.8": 660.0343, "encoder_q-layer.9": 648.1393, "epoch": 0.49, "inbatch_neg_score": 0.577, "inbatch_pos_score": 1.1211, "learning_rate": 1.338888888888889e-05, "loss": 3.9285, "norm_diff": 0.1075, "norm_loss": 0.0, "num_token_doc": 66.7728, "num_token_overlap": 11.7102, "num_token_query": 32.051, "num_token_union": 65.4114, "num_word_context": 202.2203, "num_word_doc": 49.801, "num_word_query": 23.6834, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1026.0099, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5767, "query_norm": 1.4133, "queue_k_norm": 1.5217, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 32.051, "sent_len_1": 66.7728, "sent_len_max_0": 127.6175, "sent_len_max_1": 188.7088, "stdk": 0.0479, "stdq": 0.0421, "stdqueue_k": 0.048, "stdqueue_q": 0.0, "step": 75900 }, { "accuracy": 44.9219, "active_queue_size": 16384.0, "cl_loss": 3.9194, "doc_norm": 1.5205, "encoder_q-embeddings": 490.7238, "encoder_q-layer.0": 320.8194, "encoder_q-layer.1": 338.7308, "encoder_q-layer.10": 634.1116, "encoder_q-layer.11": 1576.0277, "encoder_q-layer.2": 381.4813, "encoder_q-layer.3": 404.6491, "encoder_q-layer.4": 422.523, "encoder_q-layer.5": 436.3596, "encoder_q-layer.6": 489.6472, "encoder_q-layer.7": 539.2053, "encoder_q-layer.8": 627.0718, "encoder_q-layer.9": 608.1066, "epoch": 0.49, "inbatch_neg_score": 0.5792, "inbatch_pos_score": 1.1113, "learning_rate": 1.3333333333333333e-05, "loss": 3.9194, "norm_diff": 0.1083, "norm_loss": 0.0, "num_token_doc": 66.932, "num_token_overlap": 11.7015, "num_token_query": 31.9226, "num_token_union": 65.4575, "num_word_context": 202.5308, "num_word_doc": 49.9469, "num_word_query": 23.5682, "postclip_grad_norm": 1.0, "preclip_grad_norm": 999.4525, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5796, "query_norm": 1.4122, "queue_k_norm": 1.5248, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9226, "sent_len_1": 66.932, "sent_len_max_0": 127.5537, "sent_len_max_1": 190.2887, "stdk": 0.0479, "stdq": 0.042, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 76000 }, { "accuracy": 44.8242, "active_queue_size": 16384.0, "cl_loss": 3.9397, "doc_norm": 1.5188, "encoder_q-embeddings": 555.1044, "encoder_q-layer.0": 390.2768, "encoder_q-layer.1": 415.4913, "encoder_q-layer.10": 657.2157, "encoder_q-layer.11": 1518.8224, "encoder_q-layer.2": 474.8342, "encoder_q-layer.3": 495.1311, "encoder_q-layer.4": 542.7333, "encoder_q-layer.5": 532.9418, "encoder_q-layer.6": 566.2289, "encoder_q-layer.7": 634.9714, "encoder_q-layer.8": 701.0983, "encoder_q-layer.9": 601.2828, "epoch": 0.5, "inbatch_neg_score": 0.5845, "inbatch_pos_score": 1.1191, "learning_rate": 1.3277777777777777e-05, "loss": 3.9397, "norm_diff": 0.1045, "norm_loss": 0.0, "num_token_doc": 66.8327, "num_token_overlap": 11.6789, "num_token_query": 31.9104, "num_token_union": 65.4015, "num_word_context": 202.3129, "num_word_doc": 49.8827, "num_word_query": 23.5657, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1045.1542, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5835, "query_norm": 1.4143, "queue_k_norm": 1.5253, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9104, "sent_len_1": 66.8327, "sent_len_max_0": 127.6762, "sent_len_max_1": 188.0263, "stdk": 0.0478, "stdq": 0.0421, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 76100 }, { "accuracy": 47.5586, "active_queue_size": 16384.0, "cl_loss": 3.9456, "doc_norm": 1.5252, "encoder_q-embeddings": 649.98, "encoder_q-layer.0": 415.0853, "encoder_q-layer.1": 437.9818, "encoder_q-layer.10": 613.9793, "encoder_q-layer.11": 1565.7554, "encoder_q-layer.2": 506.4227, "encoder_q-layer.3": 523.5059, "encoder_q-layer.4": 548.4779, "encoder_q-layer.5": 538.6183, "encoder_q-layer.6": 584.3456, "encoder_q-layer.7": 617.3552, "encoder_q-layer.8": 654.0907, "encoder_q-layer.9": 599.2662, "epoch": 0.5, "inbatch_neg_score": 0.5818, "inbatch_pos_score": 1.1465, "learning_rate": 1.3222222222222221e-05, "loss": 3.9456, "norm_diff": 0.1028, "norm_loss": 0.0, "num_token_doc": 66.8034, "num_token_overlap": 11.6597, "num_token_query": 31.8785, "num_token_union": 65.3653, "num_word_context": 202.4746, "num_word_doc": 49.8179, "num_word_query": 23.5542, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1064.8834, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5825, "query_norm": 1.4223, "queue_k_norm": 1.5249, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8785, "sent_len_1": 66.8034, "sent_len_max_0": 127.4975, "sent_len_max_1": 190.5725, "stdk": 0.048, "stdq": 0.0425, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 76200 }, { "accuracy": 45.6055, "active_queue_size": 16384.0, "cl_loss": 3.9397, "doc_norm": 1.5227, "encoder_q-embeddings": 740.1631, "encoder_q-layer.0": 525.517, "encoder_q-layer.1": 575.5629, "encoder_q-layer.10": 648.9794, "encoder_q-layer.11": 1555.8646, "encoder_q-layer.2": 694.0931, "encoder_q-layer.3": 719.5366, "encoder_q-layer.4": 776.3768, "encoder_q-layer.5": 746.584, "encoder_q-layer.6": 862.3734, "encoder_q-layer.7": 810.6576, "encoder_q-layer.8": 770.6535, "encoder_q-layer.9": 623.5752, "epoch": 0.5, "inbatch_neg_score": 0.5863, "inbatch_pos_score": 1.1289, "learning_rate": 1.3166666666666665e-05, "loss": 3.9397, "norm_diff": 0.1042, "norm_loss": 0.0, "num_token_doc": 66.7646, "num_token_overlap": 11.6439, "num_token_query": 31.7828, "num_token_union": 65.2549, "num_word_context": 202.2405, "num_word_doc": 49.8137, "num_word_query": 23.4615, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1222.6468, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5854, "query_norm": 1.4185, "queue_k_norm": 1.5262, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.7828, "sent_len_1": 66.7646, "sent_len_max_0": 127.63, "sent_len_max_1": 188.6475, "stdk": 0.0479, "stdq": 0.0423, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 76300 }, { "accuracy": 45.8008, "active_queue_size": 16384.0, "cl_loss": 3.9082, "doc_norm": 1.5184, "encoder_q-embeddings": 649.8712, "encoder_q-layer.0": 461.6476, "encoder_q-layer.1": 488.9068, "encoder_q-layer.10": 639.2577, "encoder_q-layer.11": 1611.9686, "encoder_q-layer.2": 584.1377, "encoder_q-layer.3": 604.1908, "encoder_q-layer.4": 636.7272, "encoder_q-layer.5": 620.4716, "encoder_q-layer.6": 597.2602, "encoder_q-layer.7": 597.6349, "encoder_q-layer.8": 640.0128, "encoder_q-layer.9": 593.3608, "epoch": 0.5, "inbatch_neg_score": 0.5893, "inbatch_pos_score": 1.1299, "learning_rate": 1.3111111111111113e-05, "loss": 3.9082, "norm_diff": 0.104, "norm_loss": 0.0, "num_token_doc": 66.7532, "num_token_overlap": 11.6958, "num_token_query": 32.0286, "num_token_union": 65.4103, "num_word_context": 202.6536, "num_word_doc": 49.8084, "num_word_query": 23.6577, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1116.7684, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5884, "query_norm": 1.4145, "queue_k_norm": 1.5274, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 32.0286, "sent_len_1": 66.7532, "sent_len_max_0": 127.575, "sent_len_max_1": 189.9225, "stdk": 0.0477, "stdq": 0.0422, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 76400 }, { "accuracy": 44.6289, "active_queue_size": 16384.0, "cl_loss": 3.9417, "doc_norm": 1.5243, "encoder_q-embeddings": 495.3568, "encoder_q-layer.0": 342.743, "encoder_q-layer.1": 366.8252, "encoder_q-layer.10": 634.6923, "encoder_q-layer.11": 1602.1671, "encoder_q-layer.2": 411.5206, "encoder_q-layer.3": 408.4145, "encoder_q-layer.4": 431.5215, "encoder_q-layer.5": 435.7462, "encoder_q-layer.6": 479.3147, "encoder_q-layer.7": 528.8027, "encoder_q-layer.8": 622.0609, "encoder_q-layer.9": 578.2216, "epoch": 0.5, "inbatch_neg_score": 0.5883, "inbatch_pos_score": 1.1367, "learning_rate": 1.3055555555555557e-05, "loss": 3.9417, "norm_diff": 0.1195, "norm_loss": 0.0, "num_token_doc": 66.6288, "num_token_overlap": 11.6545, "num_token_query": 31.9056, "num_token_union": 65.299, "num_word_context": 202.0097, "num_word_doc": 49.7301, "num_word_query": 23.5701, "postclip_grad_norm": 1.0, "preclip_grad_norm": 993.4241, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5889, "query_norm": 1.4049, "queue_k_norm": 1.5281, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9056, "sent_len_1": 66.6288, "sent_len_max_0": 127.4112, "sent_len_max_1": 188.9675, "stdk": 0.0479, "stdq": 0.0419, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 76500 }, { "accuracy": 44.6289, "active_queue_size": 16384.0, "cl_loss": 3.9414, "doc_norm": 1.5284, "encoder_q-embeddings": 597.3082, "encoder_q-layer.0": 412.5195, "encoder_q-layer.1": 444.3329, "encoder_q-layer.10": 633.3571, "encoder_q-layer.11": 1572.8215, "encoder_q-layer.2": 513.6888, "encoder_q-layer.3": 518.2203, "encoder_q-layer.4": 549.8013, "encoder_q-layer.5": 537.0192, "encoder_q-layer.6": 552.351, "encoder_q-layer.7": 567.4409, "encoder_q-layer.8": 650.8763, "encoder_q-layer.9": 573.3555, "epoch": 0.5, "inbatch_neg_score": 0.5907, "inbatch_pos_score": 1.1406, "learning_rate": 1.3000000000000001e-05, "loss": 3.9414, "norm_diff": 0.1211, "norm_loss": 0.0, "num_token_doc": 66.8282, "num_token_overlap": 11.6295, "num_token_query": 31.7609, "num_token_union": 65.299, "num_word_context": 202.2538, "num_word_doc": 49.8411, "num_word_query": 23.4421, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1062.0517, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5908, "query_norm": 1.4073, "queue_k_norm": 1.5282, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.7609, "sent_len_1": 66.8282, "sent_len_max_0": 127.5, "sent_len_max_1": 189.2975, "stdk": 0.048, "stdq": 0.042, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 76600 }, { "accuracy": 43.8477, "active_queue_size": 16384.0, "cl_loss": 3.9467, "doc_norm": 1.5256, "encoder_q-embeddings": 615.0676, "encoder_q-layer.0": 425.4705, "encoder_q-layer.1": 456.3028, "encoder_q-layer.10": 646.9463, "encoder_q-layer.11": 1658.2228, "encoder_q-layer.2": 541.1385, "encoder_q-layer.3": 569.7291, "encoder_q-layer.4": 609.5092, "encoder_q-layer.5": 633.2296, "encoder_q-layer.6": 650.3971, "encoder_q-layer.7": 646.6019, "encoder_q-layer.8": 695.5786, "encoder_q-layer.9": 607.9819, "epoch": 0.5, "inbatch_neg_score": 0.5931, "inbatch_pos_score": 1.1211, "learning_rate": 1.2944444444444445e-05, "loss": 3.9467, "norm_diff": 0.1073, "norm_loss": 0.0, "num_token_doc": 66.8462, "num_token_overlap": 11.659, "num_token_query": 32.0217, "num_token_union": 65.5014, "num_word_context": 202.3231, "num_word_doc": 49.83, "num_word_query": 23.6771, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1126.7996, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5918, "query_norm": 1.4183, "queue_k_norm": 1.5315, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 32.0217, "sent_len_1": 66.8462, "sent_len_max_0": 127.6137, "sent_len_max_1": 189.8137, "stdk": 0.0479, "stdq": 0.0425, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 76700 }, { "accuracy": 45.0195, "active_queue_size": 16384.0, "cl_loss": 3.9323, "doc_norm": 1.5261, "encoder_q-embeddings": 780.2462, "encoder_q-layer.0": 564.1219, "encoder_q-layer.1": 611.8965, "encoder_q-layer.10": 620.8388, "encoder_q-layer.11": 1575.9169, "encoder_q-layer.2": 662.8498, "encoder_q-layer.3": 640.7921, "encoder_q-layer.4": 623.7521, "encoder_q-layer.5": 619.3811, "encoder_q-layer.6": 633.1288, "encoder_q-layer.7": 610.7958, "encoder_q-layer.8": 682.4246, "encoder_q-layer.9": 587.4855, "epoch": 0.5, "inbatch_neg_score": 0.5914, "inbatch_pos_score": 1.1484, "learning_rate": 1.2888888888888889e-05, "loss": 3.9323, "norm_diff": 0.1128, "norm_loss": 0.0, "num_token_doc": 66.6313, "num_token_overlap": 11.692, "num_token_query": 31.9743, "num_token_union": 65.2886, "num_word_context": 202.1151, "num_word_doc": 49.7483, "num_word_query": 23.6181, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1160.165, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5923, "query_norm": 1.4134, "queue_k_norm": 1.5283, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9743, "sent_len_1": 66.6313, "sent_len_max_0": 127.6975, "sent_len_max_1": 190.1813, "stdk": 0.0479, "stdq": 0.0423, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 76800 }, { "accuracy": 47.2656, "active_queue_size": 16384.0, "cl_loss": 3.9105, "doc_norm": 1.5363, "encoder_q-embeddings": 664.1522, "encoder_q-layer.0": 450.2565, "encoder_q-layer.1": 519.64, "encoder_q-layer.10": 608.7296, "encoder_q-layer.11": 1500.5427, "encoder_q-layer.2": 625.4094, "encoder_q-layer.3": 637.104, "encoder_q-layer.4": 646.3502, "encoder_q-layer.5": 666.1212, "encoder_q-layer.6": 698.3068, "encoder_q-layer.7": 609.0512, "encoder_q-layer.8": 626.7222, "encoder_q-layer.9": 563.9713, "epoch": 0.5, "inbatch_neg_score": 0.5917, "inbatch_pos_score": 1.1543, "learning_rate": 1.2833333333333333e-05, "loss": 3.9105, "norm_diff": 0.1262, "norm_loss": 0.0, "num_token_doc": 66.7567, "num_token_overlap": 11.6519, "num_token_query": 31.8395, "num_token_union": 65.3559, "num_word_context": 202.2229, "num_word_doc": 49.8336, "num_word_query": 23.5168, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1111.081, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5933, "query_norm": 1.4101, "queue_k_norm": 1.5306, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8395, "sent_len_1": 66.7567, "sent_len_max_0": 127.4313, "sent_len_max_1": 189.68, "stdk": 0.0483, "stdq": 0.0421, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 76900 }, { "accuracy": 42.2852, "active_queue_size": 16384.0, "cl_loss": 3.9251, "doc_norm": 1.5317, "encoder_q-embeddings": 854.8786, "encoder_q-layer.0": 574.0427, "encoder_q-layer.1": 670.4078, "encoder_q-layer.10": 624.7202, "encoder_q-layer.11": 1650.9473, "encoder_q-layer.2": 767.0263, "encoder_q-layer.3": 800.6161, "encoder_q-layer.4": 955.1109, "encoder_q-layer.5": 871.6822, "encoder_q-layer.6": 879.2747, "encoder_q-layer.7": 819.798, "encoder_q-layer.8": 916.5817, "encoder_q-layer.9": 695.2864, "epoch": 0.5, "inbatch_neg_score": 0.5966, "inbatch_pos_score": 1.1221, "learning_rate": 1.2777777777777777e-05, "loss": 3.9251, "norm_diff": 0.1252, "norm_loss": 0.0, "num_token_doc": 66.6888, "num_token_overlap": 11.7433, "num_token_query": 32.1231, "num_token_union": 65.3747, "num_word_context": 202.1661, "num_word_doc": 49.7622, "num_word_query": 23.7107, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1377.7952, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5962, "query_norm": 1.4065, "queue_k_norm": 1.5318, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 32.1231, "sent_len_1": 66.6888, "sent_len_max_0": 127.5263, "sent_len_max_1": 190.54, "stdk": 0.0481, "stdq": 0.0419, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 77000 }, { "accuracy": 43.2617, "active_queue_size": 16384.0, "cl_loss": 3.9232, "doc_norm": 1.5314, "encoder_q-embeddings": 616.4267, "encoder_q-layer.0": 417.5731, "encoder_q-layer.1": 474.3357, "encoder_q-layer.10": 601.988, "encoder_q-layer.11": 1561.7179, "encoder_q-layer.2": 524.1064, "encoder_q-layer.3": 463.4321, "encoder_q-layer.4": 495.524, "encoder_q-layer.5": 506.2184, "encoder_q-layer.6": 555.2719, "encoder_q-layer.7": 581.1146, "encoder_q-layer.8": 663.2026, "encoder_q-layer.9": 576.2737, "epoch": 0.5, "inbatch_neg_score": 0.5988, "inbatch_pos_score": 1.126, "learning_rate": 1.2722222222222221e-05, "loss": 3.9232, "norm_diff": 0.1282, "norm_loss": 0.0, "num_token_doc": 66.8456, "num_token_overlap": 11.7427, "num_token_query": 32.0334, "num_token_union": 65.4804, "num_word_context": 202.5437, "num_word_doc": 49.9019, "num_word_query": 23.6567, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1051.6041, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5977, "query_norm": 1.4032, "queue_k_norm": 1.5305, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 32.0334, "sent_len_1": 66.8456, "sent_len_max_0": 127.6437, "sent_len_max_1": 190.0525, "stdk": 0.048, "stdq": 0.0417, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 77100 }, { "accuracy": 48.1445, "active_queue_size": 16384.0, "cl_loss": 3.9358, "doc_norm": 1.5347, "encoder_q-embeddings": 4470.9614, "encoder_q-layer.0": 3466.6628, "encoder_q-layer.1": 3864.8613, "encoder_q-layer.10": 675.9412, "encoder_q-layer.11": 1613.6172, "encoder_q-layer.2": 4550.1978, "encoder_q-layer.3": 5034.4556, "encoder_q-layer.4": 5107.4199, "encoder_q-layer.5": 4338.0679, "encoder_q-layer.6": 4589.8799, "encoder_q-layer.7": 4159.2842, "encoder_q-layer.8": 2926.3623, "encoder_q-layer.9": 1370.7573, "epoch": 0.5, "inbatch_neg_score": 0.5942, "inbatch_pos_score": 1.1602, "learning_rate": 1.2666666666666668e-05, "loss": 3.9358, "norm_diff": 0.1062, "norm_loss": 0.0, "num_token_doc": 66.7035, "num_token_overlap": 11.6702, "num_token_query": 31.9764, "num_token_union": 65.3475, "num_word_context": 202.406, "num_word_doc": 49.7546, "num_word_query": 23.6016, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5849.8697, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.5947, "query_norm": 1.4285, "queue_k_norm": 1.531, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9764, "sent_len_1": 66.7035, "sent_len_max_0": 127.3612, "sent_len_max_1": 191.0712, "stdk": 0.0482, "stdq": 0.0429, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 77200 }, { "accuracy": 44.4336, "active_queue_size": 16384.0, "cl_loss": 3.9151, "doc_norm": 1.5322, "encoder_q-embeddings": 667.8505, "encoder_q-layer.0": 466.7827, "encoder_q-layer.1": 556.1041, "encoder_q-layer.10": 638.0227, "encoder_q-layer.11": 1713.3699, "encoder_q-layer.2": 655.6812, "encoder_q-layer.3": 623.6801, "encoder_q-layer.4": 540.2517, "encoder_q-layer.5": 544.6812, "encoder_q-layer.6": 630.5876, "encoder_q-layer.7": 648.8692, "encoder_q-layer.8": 672.2704, "encoder_q-layer.9": 580.2005, "epoch": 0.5, "inbatch_neg_score": 0.6001, "inbatch_pos_score": 1.1289, "learning_rate": 1.2611111111111113e-05, "loss": 3.9151, "norm_diff": 0.1156, "norm_loss": 0.0, "num_token_doc": 66.6434, "num_token_overlap": 11.7397, "num_token_query": 31.9739, "num_token_union": 65.2647, "num_word_context": 202.1225, "num_word_doc": 49.7419, "num_word_query": 23.621, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1138.6195, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5996, "query_norm": 1.4166, "queue_k_norm": 1.5332, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9739, "sent_len_1": 66.6434, "sent_len_max_0": 127.4712, "sent_len_max_1": 189.2038, "stdk": 0.048, "stdq": 0.0423, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 77300 }, { "accuracy": 45.8008, "active_queue_size": 16384.0, "cl_loss": 3.9213, "doc_norm": 1.5319, "encoder_q-embeddings": 769.1592, "encoder_q-layer.0": 544.8671, "encoder_q-layer.1": 623.714, "encoder_q-layer.10": 641.5283, "encoder_q-layer.11": 1676.0376, "encoder_q-layer.2": 717.4211, "encoder_q-layer.3": 746.4838, "encoder_q-layer.4": 849.2374, "encoder_q-layer.5": 826.4643, "encoder_q-layer.6": 812.9971, "encoder_q-layer.7": 777.3672, "encoder_q-layer.8": 743.2993, "encoder_q-layer.9": 606.2047, "epoch": 0.5, "inbatch_neg_score": 0.6002, "inbatch_pos_score": 1.1309, "learning_rate": 1.2555555555555557e-05, "loss": 3.9213, "norm_diff": 0.1135, "norm_loss": 0.0, "num_token_doc": 66.6184, "num_token_overlap": 11.71, "num_token_query": 31.9877, "num_token_union": 65.2748, "num_word_context": 202.1289, "num_word_doc": 49.7182, "num_word_query": 23.6071, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1279.8786, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.5996, "query_norm": 1.4185, "queue_k_norm": 1.5316, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9877, "sent_len_1": 66.6184, "sent_len_max_0": 127.5238, "sent_len_max_1": 190.06, "stdk": 0.048, "stdq": 0.0423, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 77400 }, { "accuracy": 44.9219, "active_queue_size": 16384.0, "cl_loss": 3.9136, "doc_norm": 1.529, "encoder_q-embeddings": 530.2402, "encoder_q-layer.0": 345.4333, "encoder_q-layer.1": 359.8213, "encoder_q-layer.10": 639.0723, "encoder_q-layer.11": 1575.6466, "encoder_q-layer.2": 405.2586, "encoder_q-layer.3": 412.4114, "encoder_q-layer.4": 456.0052, "encoder_q-layer.5": 469.8668, "encoder_q-layer.6": 525.2981, "encoder_q-layer.7": 564.3259, "encoder_q-layer.8": 666.0668, "encoder_q-layer.9": 625.9092, "epoch": 0.5, "inbatch_neg_score": 0.6031, "inbatch_pos_score": 1.1348, "learning_rate": 1.25e-05, "loss": 3.9136, "norm_diff": 0.1148, "norm_loss": 0.0, "num_token_doc": 66.616, "num_token_overlap": 11.7044, "num_token_query": 31.9785, "num_token_union": 65.2472, "num_word_context": 202.0441, "num_word_doc": 49.7042, "num_word_query": 23.6033, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1022.4312, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.603, "query_norm": 1.4142, "queue_k_norm": 1.5329, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9785, "sent_len_1": 66.616, "sent_len_max_0": 127.5225, "sent_len_max_1": 189.6325, "stdk": 0.0479, "stdq": 0.042, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 77500 }, { "accuracy": 47.0703, "active_queue_size": 16384.0, "cl_loss": 3.9224, "doc_norm": 1.5344, "encoder_q-embeddings": 563.45, "encoder_q-layer.0": 370.7353, "encoder_q-layer.1": 382.247, "encoder_q-layer.10": 625.5356, "encoder_q-layer.11": 1654.1458, "encoder_q-layer.2": 431.211, "encoder_q-layer.3": 425.2544, "encoder_q-layer.4": 452.4828, "encoder_q-layer.5": 450.3216, "encoder_q-layer.6": 496.0601, "encoder_q-layer.7": 557.0149, "encoder_q-layer.8": 626.8884, "encoder_q-layer.9": 590.1447, "epoch": 0.51, "inbatch_neg_score": 0.6052, "inbatch_pos_score": 1.1562, "learning_rate": 1.2444444444444445e-05, "loss": 3.9224, "norm_diff": 0.1179, "norm_loss": 0.0, "num_token_doc": 66.3916, "num_token_overlap": 11.6572, "num_token_query": 31.9476, "num_token_union": 65.1687, "num_word_context": 201.6279, "num_word_doc": 49.5652, "num_word_query": 23.6171, "postclip_grad_norm": 1.0, "preclip_grad_norm": 1042.6642, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.605, "query_norm": 1.4165, "queue_k_norm": 1.534, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9476, "sent_len_1": 66.3916, "sent_len_max_0": 127.5187, "sent_len_max_1": 188.3675, "stdk": 0.0481, "stdq": 0.042, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 77600 }, { "accuracy": 46.7773, "active_queue_size": 16384.0, "cl_loss": 3.9091, "doc_norm": 1.5386, "encoder_q-embeddings": 3343.8259, "encoder_q-layer.0": 2605.3032, "encoder_q-layer.1": 2834.457, "encoder_q-layer.10": 1273.3477, "encoder_q-layer.11": 3199.7334, "encoder_q-layer.2": 3457.7861, "encoder_q-layer.3": 3955.3413, "encoder_q-layer.4": 4271.9702, "encoder_q-layer.5": 4029.6489, "encoder_q-layer.6": 3487.8423, "encoder_q-layer.7": 3105.5601, "encoder_q-layer.8": 2599.6514, "encoder_q-layer.9": 1535.0929, "epoch": 0.51, "inbatch_neg_score": 0.6039, "inbatch_pos_score": 1.1523, "learning_rate": 1.238888888888889e-05, "loss": 3.9091, "norm_diff": 0.1068, "norm_loss": 0.0, "num_token_doc": 66.7044, "num_token_overlap": 11.733, "num_token_query": 32.0457, "num_token_union": 65.3367, "num_word_context": 202.2014, "num_word_doc": 49.7704, "num_word_query": 23.7112, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4779.6569, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.605, "query_norm": 1.4318, "queue_k_norm": 1.5349, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 32.0457, "sent_len_1": 66.7044, "sent_len_max_0": 127.5938, "sent_len_max_1": 190.0213, "stdk": 0.0482, "stdq": 0.0427, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 77700 }, { "accuracy": 43.5547, "active_queue_size": 16384.0, "cl_loss": 3.9227, "doc_norm": 1.5293, "encoder_q-embeddings": 1047.8009, "encoder_q-layer.0": 704.8682, "encoder_q-layer.1": 734.1643, "encoder_q-layer.10": 1509.0745, "encoder_q-layer.11": 3400.3818, "encoder_q-layer.2": 864.7006, "encoder_q-layer.3": 907.3399, "encoder_q-layer.4": 1016.1116, "encoder_q-layer.5": 1043.9186, "encoder_q-layer.6": 1086.5848, "encoder_q-layer.7": 1158.4034, "encoder_q-layer.8": 1355.2159, "encoder_q-layer.9": 1270.507, "epoch": 0.51, "inbatch_neg_score": 0.6075, "inbatch_pos_score": 1.1367, "learning_rate": 1.2333333333333334e-05, "loss": 3.9227, "norm_diff": 0.1184, "norm_loss": 0.0, "num_token_doc": 66.9239, "num_token_overlap": 11.6313, "num_token_query": 31.8168, "num_token_union": 65.4652, "num_word_context": 202.5156, "num_word_doc": 49.9518, "num_word_query": 23.4945, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2160.5642, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6064, "query_norm": 1.4109, "queue_k_norm": 1.5371, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8168, "sent_len_1": 66.9239, "sent_len_max_0": 127.485, "sent_len_max_1": 188.435, "stdk": 0.0478, "stdq": 0.0418, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 77800 }, { "accuracy": 42.3828, "active_queue_size": 16384.0, "cl_loss": 3.9055, "doc_norm": 1.5366, "encoder_q-embeddings": 1949.7297, "encoder_q-layer.0": 1448.4606, "encoder_q-layer.1": 1624.116, "encoder_q-layer.10": 1442.4773, "encoder_q-layer.11": 3308.1653, "encoder_q-layer.2": 1794.6346, "encoder_q-layer.3": 1769.6652, "encoder_q-layer.4": 2040.8505, "encoder_q-layer.5": 1898.4956, "encoder_q-layer.6": 1871.8555, "encoder_q-layer.7": 1769.6067, "encoder_q-layer.8": 1552.577, "encoder_q-layer.9": 1372.9122, "epoch": 0.51, "inbatch_neg_score": 0.6061, "inbatch_pos_score": 1.1436, "learning_rate": 1.2277777777777778e-05, "loss": 3.9055, "norm_diff": 0.1113, "norm_loss": 0.0, "num_token_doc": 66.8496, "num_token_overlap": 11.6974, "num_token_query": 31.9311, "num_token_union": 65.3728, "num_word_context": 201.9846, "num_word_doc": 49.9021, "num_word_query": 23.564, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2847.1952, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6074, "query_norm": 1.4253, "queue_k_norm": 1.5358, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9311, "sent_len_1": 66.8496, "sent_len_max_0": 127.3962, "sent_len_max_1": 188.8812, "stdk": 0.0481, "stdq": 0.0425, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 77900 }, { "accuracy": 47.4609, "active_queue_size": 16384.0, "cl_loss": 3.9264, "doc_norm": 1.5424, "encoder_q-embeddings": 1505.5914, "encoder_q-layer.0": 973.0938, "encoder_q-layer.1": 1188.2849, "encoder_q-layer.10": 1328.467, "encoder_q-layer.11": 3248.0669, "encoder_q-layer.2": 1471.9595, "encoder_q-layer.3": 1634.0057, "encoder_q-layer.4": 1902.2234, "encoder_q-layer.5": 1817.0619, "encoder_q-layer.6": 1750.184, "encoder_q-layer.7": 1662.6747, "encoder_q-layer.8": 1673.2831, "encoder_q-layer.9": 1271.8639, "epoch": 0.51, "inbatch_neg_score": 0.6066, "inbatch_pos_score": 1.167, "learning_rate": 1.2222222222222222e-05, "loss": 3.9264, "norm_diff": 0.114, "norm_loss": 0.0, "num_token_doc": 66.8603, "num_token_overlap": 11.6841, "num_token_query": 31.8967, "num_token_union": 65.3931, "num_word_context": 202.2623, "num_word_doc": 49.9049, "num_word_query": 23.5468, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2607.3058, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6069, "query_norm": 1.4284, "queue_k_norm": 1.5372, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.8967, "sent_len_1": 66.8603, "sent_len_max_0": 127.4537, "sent_len_max_1": 189.5325, "stdk": 0.0483, "stdq": 0.0426, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 78000 }, { "accuracy": 44.6289, "active_queue_size": 16384.0, "cl_loss": 3.9351, "doc_norm": 1.5406, "encoder_q-embeddings": 1080.3524, "encoder_q-layer.0": 735.0583, "encoder_q-layer.1": 784.0455, "encoder_q-layer.10": 1468.4553, "encoder_q-layer.11": 3288.887, "encoder_q-layer.2": 878.8424, "encoder_q-layer.3": 892.8351, "encoder_q-layer.4": 939.8016, "encoder_q-layer.5": 944.8228, "encoder_q-layer.6": 1041.2931, "encoder_q-layer.7": 1126.7554, "encoder_q-layer.8": 1299.7961, "encoder_q-layer.9": 1314.864, "epoch": 0.51, "inbatch_neg_score": 0.6082, "inbatch_pos_score": 1.166, "learning_rate": 1.2166666666666668e-05, "loss": 3.9351, "norm_diff": 0.116, "norm_loss": 0.0, "num_token_doc": 66.6698, "num_token_overlap": 11.6647, "num_token_query": 31.89, "num_token_union": 65.2925, "num_word_context": 201.9937, "num_word_doc": 49.7899, "num_word_query": 23.5608, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2078.1894, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6084, "query_norm": 1.4246, "queue_k_norm": 1.5383, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.89, "sent_len_1": 66.6698, "sent_len_max_0": 127.5875, "sent_len_max_1": 187.1538, "stdk": 0.0483, "stdq": 0.0424, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 78100 }, { "accuracy": 43.6523, "active_queue_size": 16384.0, "cl_loss": 3.9162, "doc_norm": 1.5396, "encoder_q-embeddings": 1022.3877, "encoder_q-layer.0": 697.2398, "encoder_q-layer.1": 737.2108, "encoder_q-layer.10": 1444.1869, "encoder_q-layer.11": 3388.1948, "encoder_q-layer.2": 831.9512, "encoder_q-layer.3": 867.1074, "encoder_q-layer.4": 942.7328, "encoder_q-layer.5": 949.8221, "encoder_q-layer.6": 1040.0222, "encoder_q-layer.7": 1155.9967, "encoder_q-layer.8": 1340.4633, "encoder_q-layer.9": 1282.7295, "epoch": 0.51, "inbatch_neg_score": 0.6095, "inbatch_pos_score": 1.1406, "learning_rate": 1.2111111111111112e-05, "loss": 3.9162, "norm_diff": 0.1252, "norm_loss": 0.0, "num_token_doc": 66.7507, "num_token_overlap": 11.6847, "num_token_query": 31.9355, "num_token_union": 65.3615, "num_word_context": 202.1828, "num_word_doc": 49.8003, "num_word_query": 23.586, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2131.3434, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6094, "query_norm": 1.4144, "queue_k_norm": 1.5362, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9355, "sent_len_1": 66.7507, "sent_len_max_0": 127.6112, "sent_len_max_1": 191.28, "stdk": 0.0482, "stdq": 0.0419, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 78200 }, { "accuracy": 44.1406, "active_queue_size": 16384.0, "cl_loss": 3.9137, "doc_norm": 1.5411, "encoder_q-embeddings": 1329.4967, "encoder_q-layer.0": 871.6969, "encoder_q-layer.1": 951.6574, "encoder_q-layer.10": 1205.7764, "encoder_q-layer.11": 3296.1438, "encoder_q-layer.2": 1143.7063, "encoder_q-layer.3": 1232.4812, "encoder_q-layer.4": 1328.8109, "encoder_q-layer.5": 1523.4368, "encoder_q-layer.6": 1629.4969, "encoder_q-layer.7": 1600.1426, "encoder_q-layer.8": 1540.6088, "encoder_q-layer.9": 1223.2032, "epoch": 0.51, "inbatch_neg_score": 0.6109, "inbatch_pos_score": 1.1602, "learning_rate": 1.2055555555555556e-05, "loss": 3.9137, "norm_diff": 0.125, "norm_loss": 0.0, "num_token_doc": 66.6754, "num_token_overlap": 11.6637, "num_token_query": 31.8452, "num_token_union": 65.2699, "num_word_context": 202.4206, "num_word_doc": 49.765, "num_word_query": 23.5152, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2384.3726, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6099, "query_norm": 1.4161, "queue_k_norm": 1.5385, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8452, "sent_len_1": 66.6754, "sent_len_max_0": 127.4313, "sent_len_max_1": 189.9938, "stdk": 0.0482, "stdq": 0.0421, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 78300 }, { "accuracy": 49.5117, "active_queue_size": 16384.0, "cl_loss": 3.8906, "doc_norm": 1.5401, "encoder_q-embeddings": 1473.033, "encoder_q-layer.0": 1047.0228, "encoder_q-layer.1": 1254.8507, "encoder_q-layer.10": 1196.4971, "encoder_q-layer.11": 3053.6023, "encoder_q-layer.2": 1364.8987, "encoder_q-layer.3": 1380.3256, "encoder_q-layer.4": 1435.3412, "encoder_q-layer.5": 1515.6027, "encoder_q-layer.6": 1535.3423, "encoder_q-layer.7": 1379.6963, "encoder_q-layer.8": 1395.9315, "encoder_q-layer.9": 1148.7705, "epoch": 0.51, "inbatch_neg_score": 0.6108, "inbatch_pos_score": 1.1797, "learning_rate": 1.2e-05, "loss": 3.8906, "norm_diff": 0.1097, "norm_loss": 0.0, "num_token_doc": 66.7697, "num_token_overlap": 11.74, "num_token_query": 31.9844, "num_token_union": 65.3824, "num_word_context": 201.8932, "num_word_doc": 49.8218, "num_word_query": 23.6377, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2382.1676, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6108, "query_norm": 1.4304, "queue_k_norm": 1.5388, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9844, "sent_len_1": 66.7697, "sent_len_max_0": 127.4712, "sent_len_max_1": 186.705, "stdk": 0.0482, "stdq": 0.0426, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 78400 }, { "accuracy": 46.582, "active_queue_size": 16384.0, "cl_loss": 3.9156, "doc_norm": 1.5394, "encoder_q-embeddings": 1167.2795, "encoder_q-layer.0": 781.067, "encoder_q-layer.1": 828.7571, "encoder_q-layer.10": 1269.9836, "encoder_q-layer.11": 3321.175, "encoder_q-layer.2": 941.8644, "encoder_q-layer.3": 998.5862, "encoder_q-layer.4": 1115.2186, "encoder_q-layer.5": 1125.9983, "encoder_q-layer.6": 1168.907, "encoder_q-layer.7": 1296.05, "encoder_q-layer.8": 1373.4644, "encoder_q-layer.9": 1186.7556, "epoch": 0.51, "inbatch_neg_score": 0.6088, "inbatch_pos_score": 1.1787, "learning_rate": 1.1944444444444446e-05, "loss": 3.9156, "norm_diff": 0.1056, "norm_loss": 0.0, "num_token_doc": 66.6605, "num_token_overlap": 11.7394, "num_token_query": 32.1668, "num_token_union": 65.4472, "num_word_context": 202.1179, "num_word_doc": 49.7291, "num_word_query": 23.7788, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2204.2158, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6084, "query_norm": 1.4337, "queue_k_norm": 1.5371, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 32.1668, "sent_len_1": 66.6605, "sent_len_max_0": 127.5012, "sent_len_max_1": 187.2063, "stdk": 0.0481, "stdq": 0.0429, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 78500 }, { "accuracy": 43.2617, "active_queue_size": 16384.0, "cl_loss": 3.9254, "doc_norm": 1.5473, "encoder_q-embeddings": 1267.1805, "encoder_q-layer.0": 842.897, "encoder_q-layer.1": 934.572, "encoder_q-layer.10": 1252.1198, "encoder_q-layer.11": 3282.2769, "encoder_q-layer.2": 1197.7935, "encoder_q-layer.3": 1196.9658, "encoder_q-layer.4": 1118.6389, "encoder_q-layer.5": 942.5474, "encoder_q-layer.6": 1024.4679, "encoder_q-layer.7": 1084.4592, "encoder_q-layer.8": 1250.0232, "encoder_q-layer.9": 1208.915, "epoch": 0.51, "inbatch_neg_score": 0.6125, "inbatch_pos_score": 1.1572, "learning_rate": 1.188888888888889e-05, "loss": 3.9254, "norm_diff": 0.1266, "norm_loss": 0.0, "num_token_doc": 66.6572, "num_token_overlap": 11.6472, "num_token_query": 31.8636, "num_token_union": 65.3017, "num_word_context": 202.4598, "num_word_doc": 49.78, "num_word_query": 23.5732, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2197.4586, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6118, "query_norm": 1.4206, "queue_k_norm": 1.5397, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8636, "sent_len_1": 66.6572, "sent_len_max_0": 127.655, "sent_len_max_1": 186.99, "stdk": 0.0484, "stdq": 0.0421, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 78600 }, { "accuracy": 44.4336, "active_queue_size": 16384.0, "cl_loss": 3.9203, "doc_norm": 1.5372, "encoder_q-embeddings": 1821.0658, "encoder_q-layer.0": 1265.7405, "encoder_q-layer.1": 1567.3208, "encoder_q-layer.10": 1377.4443, "encoder_q-layer.11": 3313.2705, "encoder_q-layer.2": 2152.416, "encoder_q-layer.3": 2341.6895, "encoder_q-layer.4": 2325.6628, "encoder_q-layer.5": 2036.5851, "encoder_q-layer.6": 2563.4021, "encoder_q-layer.7": 2225.1672, "encoder_q-layer.8": 1767.5388, "encoder_q-layer.9": 1239.3118, "epoch": 0.51, "inbatch_neg_score": 0.6101, "inbatch_pos_score": 1.1533, "learning_rate": 1.1833333333333334e-05, "loss": 3.9203, "norm_diff": 0.109, "norm_loss": 0.0, "num_token_doc": 66.8928, "num_token_overlap": 11.6401, "num_token_query": 31.8981, "num_token_union": 65.5018, "num_word_context": 202.6272, "num_word_doc": 49.9014, "num_word_query": 23.5358, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3124.0125, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6113, "query_norm": 1.4282, "queue_k_norm": 1.5387, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8981, "sent_len_1": 66.8928, "sent_len_max_0": 127.4925, "sent_len_max_1": 189.685, "stdk": 0.048, "stdq": 0.0425, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 78700 }, { "accuracy": 47.4609, "active_queue_size": 16384.0, "cl_loss": 3.9159, "doc_norm": 1.5416, "encoder_q-embeddings": 1745.936, "encoder_q-layer.0": 1236.3004, "encoder_q-layer.1": 1407.3333, "encoder_q-layer.10": 1342.5636, "encoder_q-layer.11": 3379.3027, "encoder_q-layer.2": 1605.027, "encoder_q-layer.3": 1613.583, "encoder_q-layer.4": 1618.6814, "encoder_q-layer.5": 1511.601, "encoder_q-layer.6": 1495.5564, "encoder_q-layer.7": 1479.8627, "encoder_q-layer.8": 1527.1887, "encoder_q-layer.9": 1308.8695, "epoch": 0.51, "inbatch_neg_score": 0.6158, "inbatch_pos_score": 1.1689, "learning_rate": 1.1777777777777778e-05, "loss": 3.9159, "norm_diff": 0.1278, "norm_loss": 0.0, "num_token_doc": 66.8024, "num_token_overlap": 11.6859, "num_token_query": 31.9459, "num_token_union": 65.3839, "num_word_context": 202.2997, "num_word_doc": 49.8359, "num_word_query": 23.5884, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2642.4527, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6152, "query_norm": 1.4138, "queue_k_norm": 1.5396, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9459, "sent_len_1": 66.8024, "sent_len_max_0": 127.4513, "sent_len_max_1": 191.7688, "stdk": 0.0482, "stdq": 0.0418, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 78800 }, { "accuracy": 47.7539, "active_queue_size": 16384.0, "cl_loss": 3.8975, "doc_norm": 1.5438, "encoder_q-embeddings": 1580.6908, "encoder_q-layer.0": 1092.2161, "encoder_q-layer.1": 1133.9669, "encoder_q-layer.10": 1242.8774, "encoder_q-layer.11": 3047.428, "encoder_q-layer.2": 1332.1676, "encoder_q-layer.3": 1457.7352, "encoder_q-layer.4": 1591.9446, "encoder_q-layer.5": 1467.2169, "encoder_q-layer.6": 1418.8524, "encoder_q-layer.7": 1543.3506, "encoder_q-layer.8": 1318.0098, "encoder_q-layer.9": 1158.7145, "epoch": 0.51, "inbatch_neg_score": 0.6145, "inbatch_pos_score": 1.167, "learning_rate": 1.1722222222222224e-05, "loss": 3.8975, "norm_diff": 0.1143, "norm_loss": 0.0, "num_token_doc": 67.0215, "num_token_overlap": 11.7702, "num_token_query": 32.1104, "num_token_union": 65.565, "num_word_context": 202.8239, "num_word_doc": 50.0023, "num_word_query": 23.7269, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2396.9264, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6152, "query_norm": 1.4296, "queue_k_norm": 1.5421, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 32.1104, "sent_len_1": 67.0215, "sent_len_max_0": 127.54, "sent_len_max_1": 191.3025, "stdk": 0.0482, "stdq": 0.0425, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 78900 }, { "accuracy": 46.2891, "active_queue_size": 16384.0, "cl_loss": 3.9111, "doc_norm": 1.5441, "encoder_q-embeddings": 1114.9271, "encoder_q-layer.0": 783.9758, "encoder_q-layer.1": 852.6271, "encoder_q-layer.10": 1217.4646, "encoder_q-layer.11": 3226.5964, "encoder_q-layer.2": 982.0603, "encoder_q-layer.3": 1001.0682, "encoder_q-layer.4": 1034.8597, "encoder_q-layer.5": 1082.599, "encoder_q-layer.6": 1134.5117, "encoder_q-layer.7": 1214.7472, "encoder_q-layer.8": 1288.811, "encoder_q-layer.9": 1186.4951, "epoch": 0.51, "inbatch_neg_score": 0.616, "inbatch_pos_score": 1.168, "learning_rate": 1.1666666666666668e-05, "loss": 3.9111, "norm_diff": 0.126, "norm_loss": 0.0, "num_token_doc": 66.6392, "num_token_overlap": 11.6776, "num_token_query": 31.9876, "num_token_union": 65.313, "num_word_context": 202.1018, "num_word_doc": 49.7534, "num_word_query": 23.6096, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2131.92, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6167, "query_norm": 1.4182, "queue_k_norm": 1.5399, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9876, "sent_len_1": 66.6392, "sent_len_max_0": 127.425, "sent_len_max_1": 188.6175, "stdk": 0.0482, "stdq": 0.042, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 79000 }, { "accuracy": 46.4844, "active_queue_size": 16384.0, "cl_loss": 3.9277, "doc_norm": 1.5479, "encoder_q-embeddings": 1092.9435, "encoder_q-layer.0": 748.5654, "encoder_q-layer.1": 799.8151, "encoder_q-layer.10": 1297.5872, "encoder_q-layer.11": 3370.7598, "encoder_q-layer.2": 935.0024, "encoder_q-layer.3": 957.4097, "encoder_q-layer.4": 1060.8329, "encoder_q-layer.5": 1060.4427, "encoder_q-layer.6": 1150.1027, "encoder_q-layer.7": 1203.1506, "encoder_q-layer.8": 1395.5684, "encoder_q-layer.9": 1197.0325, "epoch": 0.51, "inbatch_neg_score": 0.6198, "inbatch_pos_score": 1.1719, "learning_rate": 1.1611111111111112e-05, "loss": 3.9277, "norm_diff": 0.1327, "norm_loss": 0.0, "num_token_doc": 66.6432, "num_token_overlap": 11.6459, "num_token_query": 31.9124, "num_token_union": 65.3602, "num_word_context": 202.5867, "num_word_doc": 49.7448, "num_word_query": 23.5977, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2173.5765, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6206, "query_norm": 1.4152, "queue_k_norm": 1.5414, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9124, "sent_len_1": 66.6432, "sent_len_max_0": 127.6737, "sent_len_max_1": 188.785, "stdk": 0.0484, "stdq": 0.0418, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 79100 }, { "accuracy": 47.0703, "active_queue_size": 16384.0, "cl_loss": 3.9126, "doc_norm": 1.5405, "encoder_q-embeddings": 1101.6512, "encoder_q-layer.0": 755.0414, "encoder_q-layer.1": 794.1685, "encoder_q-layer.10": 1342.2833, "encoder_q-layer.11": 3116.2429, "encoder_q-layer.2": 923.4451, "encoder_q-layer.3": 1002.7162, "encoder_q-layer.4": 1003.068, "encoder_q-layer.5": 1028.3182, "encoder_q-layer.6": 1109.0002, "encoder_q-layer.7": 1195.9515, "encoder_q-layer.8": 1277.8389, "encoder_q-layer.9": 1196.146, "epoch": 0.52, "inbatch_neg_score": 0.6205, "inbatch_pos_score": 1.1797, "learning_rate": 1.1555555555555556e-05, "loss": 3.9126, "norm_diff": 0.1114, "norm_loss": 0.0, "num_token_doc": 66.8803, "num_token_overlap": 11.6648, "num_token_query": 31.8538, "num_token_union": 65.3438, "num_word_context": 202.1579, "num_word_doc": 49.885, "num_word_query": 23.5124, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2078.4235, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6216, "query_norm": 1.429, "queue_k_norm": 1.543, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.8538, "sent_len_1": 66.8803, "sent_len_max_0": 127.6137, "sent_len_max_1": 189.7363, "stdk": 0.0481, "stdq": 0.0423, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 79200 }, { "accuracy": 45.2148, "active_queue_size": 16384.0, "cl_loss": 3.9301, "doc_norm": 1.5396, "encoder_q-embeddings": 1152.5637, "encoder_q-layer.0": 801.0112, "encoder_q-layer.1": 890.6304, "encoder_q-layer.10": 1456.004, "encoder_q-layer.11": 3253.8557, "encoder_q-layer.2": 1060.0422, "encoder_q-layer.3": 1104.4384, "encoder_q-layer.4": 1173.514, "encoder_q-layer.5": 1221.8899, "encoder_q-layer.6": 1215.6259, "encoder_q-layer.7": 1291.8367, "encoder_q-layer.8": 1425.3619, "encoder_q-layer.9": 1216.9351, "epoch": 0.52, "inbatch_neg_score": 0.6242, "inbatch_pos_score": 1.1621, "learning_rate": 1.1500000000000002e-05, "loss": 3.9301, "norm_diff": 0.1242, "norm_loss": 0.0, "num_token_doc": 66.762, "num_token_overlap": 11.6551, "num_token_query": 31.8446, "num_token_union": 65.3043, "num_word_context": 201.8163, "num_word_doc": 49.786, "num_word_query": 23.5195, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2224.1889, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.623, "query_norm": 1.4154, "queue_k_norm": 1.5419, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8446, "sent_len_1": 66.762, "sent_len_max_0": 127.435, "sent_len_max_1": 190.3275, "stdk": 0.048, "stdq": 0.0417, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 79300 }, { "accuracy": 48.4375, "active_queue_size": 16384.0, "cl_loss": 3.9244, "doc_norm": 1.5431, "encoder_q-embeddings": 1578.1802, "encoder_q-layer.0": 1081.2783, "encoder_q-layer.1": 1208.4116, "encoder_q-layer.10": 1245.1819, "encoder_q-layer.11": 3204.4246, "encoder_q-layer.2": 1275.0707, "encoder_q-layer.3": 1207.4727, "encoder_q-layer.4": 1332.9608, "encoder_q-layer.5": 1239.0853, "encoder_q-layer.6": 1183.8042, "encoder_q-layer.7": 1233.0094, "encoder_q-layer.8": 1324.5687, "encoder_q-layer.9": 1162.1641, "epoch": 0.52, "inbatch_neg_score": 0.6238, "inbatch_pos_score": 1.1914, "learning_rate": 1.1444444444444446e-05, "loss": 3.9244, "norm_diff": 0.1097, "norm_loss": 0.0, "num_token_doc": 66.8283, "num_token_overlap": 11.6603, "num_token_query": 31.832, "num_token_union": 65.3648, "num_word_context": 202.5777, "num_word_doc": 49.8781, "num_word_query": 23.4856, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2338.1182, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.624, "query_norm": 1.4333, "queue_k_norm": 1.5442, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.832, "sent_len_1": 66.8283, "sent_len_max_0": 127.4613, "sent_len_max_1": 188.4938, "stdk": 0.0481, "stdq": 0.0425, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 79400 }, { "accuracy": 45.2148, "active_queue_size": 16384.0, "cl_loss": 3.9339, "doc_norm": 1.5395, "encoder_q-embeddings": 1327.5398, "encoder_q-layer.0": 896.032, "encoder_q-layer.1": 978.4297, "encoder_q-layer.10": 1321.8219, "encoder_q-layer.11": 3369.3784, "encoder_q-layer.2": 1142.7611, "encoder_q-layer.3": 1173.5828, "encoder_q-layer.4": 1242.6586, "encoder_q-layer.5": 1317.6737, "encoder_q-layer.6": 1284.3636, "encoder_q-layer.7": 1326.7909, "encoder_q-layer.8": 1476.1646, "encoder_q-layer.9": 1220.7568, "epoch": 0.52, "inbatch_neg_score": 0.6283, "inbatch_pos_score": 1.1719, "learning_rate": 1.138888888888889e-05, "loss": 3.9339, "norm_diff": 0.113, "norm_loss": 0.0, "num_token_doc": 66.7578, "num_token_overlap": 11.7189, "num_token_query": 31.9191, "num_token_union": 65.3165, "num_word_context": 202.4094, "num_word_doc": 49.8141, "num_word_query": 23.5556, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2332.1036, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6274, "query_norm": 1.4265, "queue_k_norm": 1.5434, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9191, "sent_len_1": 66.7578, "sent_len_max_0": 127.5175, "sent_len_max_1": 191.0962, "stdk": 0.048, "stdq": 0.0422, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 79500 }, { "accuracy": 47.2656, "active_queue_size": 16384.0, "cl_loss": 3.9017, "doc_norm": 1.5497, "encoder_q-embeddings": 993.6817, "encoder_q-layer.0": 669.7339, "encoder_q-layer.1": 682.7797, "encoder_q-layer.10": 1283.4927, "encoder_q-layer.11": 3194.3284, "encoder_q-layer.2": 772.2775, "encoder_q-layer.3": 799.5102, "encoder_q-layer.4": 857.2593, "encoder_q-layer.5": 870.3351, "encoder_q-layer.6": 985.8124, "encoder_q-layer.7": 1091.3787, "encoder_q-layer.8": 1253.4683, "encoder_q-layer.9": 1157.0017, "epoch": 0.52, "inbatch_neg_score": 0.6285, "inbatch_pos_score": 1.1816, "learning_rate": 1.1333333333333334e-05, "loss": 3.9017, "norm_diff": 0.1278, "norm_loss": 0.0, "num_token_doc": 66.9131, "num_token_overlap": 11.6086, "num_token_query": 31.8339, "num_token_union": 65.4596, "num_word_context": 202.6551, "num_word_doc": 49.9183, "num_word_query": 23.5073, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2017.5511, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6274, "query_norm": 1.4218, "queue_k_norm": 1.544, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.8339, "sent_len_1": 66.9131, "sent_len_max_0": 127.515, "sent_len_max_1": 189.445, "stdk": 0.0483, "stdq": 0.042, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 79600 }, { "accuracy": 44.6289, "active_queue_size": 16384.0, "cl_loss": 3.9215, "doc_norm": 1.5375, "encoder_q-embeddings": 2480.0188, "encoder_q-layer.0": 1652.7491, "encoder_q-layer.1": 1622.1278, "encoder_q-layer.10": 2537.8088, "encoder_q-layer.11": 6566.292, "encoder_q-layer.2": 1895.6869, "encoder_q-layer.3": 1952.7329, "encoder_q-layer.4": 2121.332, "encoder_q-layer.5": 2061.8396, "encoder_q-layer.6": 2217.5962, "encoder_q-layer.7": 2312.144, "encoder_q-layer.8": 2515.1284, "encoder_q-layer.9": 2330.6714, "epoch": 0.52, "inbatch_neg_score": 0.6299, "inbatch_pos_score": 1.1924, "learning_rate": 1.127777777777778e-05, "loss": 3.9215, "norm_diff": 0.1056, "norm_loss": 0.0, "num_token_doc": 66.7796, "num_token_overlap": 11.6729, "num_token_query": 31.9475, "num_token_union": 65.3682, "num_word_context": 201.9261, "num_word_doc": 49.7708, "num_word_query": 23.576, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4293.051, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6294, "query_norm": 1.4319, "queue_k_norm": 1.5464, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9475, "sent_len_1": 66.7796, "sent_len_max_0": 127.6412, "sent_len_max_1": 192.085, "stdk": 0.0478, "stdq": 0.0425, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 79700 }, { "accuracy": 47.168, "active_queue_size": 16384.0, "cl_loss": 3.892, "doc_norm": 1.5416, "encoder_q-embeddings": 3171.7993, "encoder_q-layer.0": 2220.5049, "encoder_q-layer.1": 2358.8657, "encoder_q-layer.10": 2625.303, "encoder_q-layer.11": 6695.9443, "encoder_q-layer.2": 2753.2722, "encoder_q-layer.3": 2911.7761, "encoder_q-layer.4": 3025.6858, "encoder_q-layer.5": 3062.7253, "encoder_q-layer.6": 3239.5996, "encoder_q-layer.7": 3240.7888, "encoder_q-layer.8": 3111.0625, "encoder_q-layer.9": 2578.8884, "epoch": 0.52, "inbatch_neg_score": 0.6311, "inbatch_pos_score": 1.1924, "learning_rate": 1.1222222222222224e-05, "loss": 3.892, "norm_diff": 0.0994, "norm_loss": 0.0, "num_token_doc": 66.7672, "num_token_overlap": 11.7098, "num_token_query": 31.9653, "num_token_union": 65.3444, "num_word_context": 202.3046, "num_word_doc": 49.8381, "num_word_query": 23.5838, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4990.2342, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6313, "query_norm": 1.4422, "queue_k_norm": 1.5463, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9653, "sent_len_1": 66.7672, "sent_len_max_0": 127.5863, "sent_len_max_1": 189.8975, "stdk": 0.048, "stdq": 0.043, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 79800 }, { "accuracy": 46.582, "active_queue_size": 16384.0, "cl_loss": 3.9295, "doc_norm": 1.5492, "encoder_q-embeddings": 2587.1316, "encoder_q-layer.0": 1753.3431, "encoder_q-layer.1": 1866.1453, "encoder_q-layer.10": 2454.9868, "encoder_q-layer.11": 6806.4614, "encoder_q-layer.2": 2201.5864, "encoder_q-layer.3": 2282.2678, "encoder_q-layer.4": 2541.9832, "encoder_q-layer.5": 2494.5957, "encoder_q-layer.6": 2804.5872, "encoder_q-layer.7": 2772.3433, "encoder_q-layer.8": 2878.5698, "encoder_q-layer.9": 2489.2971, "epoch": 0.52, "inbatch_neg_score": 0.6361, "inbatch_pos_score": 1.1953, "learning_rate": 1.1166666666666668e-05, "loss": 3.9295, "norm_diff": 0.1211, "norm_loss": 0.0, "num_token_doc": 66.7581, "num_token_overlap": 11.6918, "num_token_query": 31.9079, "num_token_union": 65.2989, "num_word_context": 202.1885, "num_word_doc": 49.8053, "num_word_query": 23.5757, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4703.3442, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6357, "query_norm": 1.4281, "queue_k_norm": 1.5439, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9079, "sent_len_1": 66.7581, "sent_len_max_0": 127.5463, "sent_len_max_1": 189.1813, "stdk": 0.0483, "stdq": 0.0423, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 79900 }, { "accuracy": 44.7266, "active_queue_size": 16384.0, "cl_loss": 3.9201, "doc_norm": 1.5434, "encoder_q-embeddings": 2099.2654, "encoder_q-layer.0": 1392.4563, "encoder_q-layer.1": 1446.9753, "encoder_q-layer.10": 2715.6265, "encoder_q-layer.11": 6529.688, "encoder_q-layer.2": 1580.9425, "encoder_q-layer.3": 1636.1285, "encoder_q-layer.4": 1716.0349, "encoder_q-layer.5": 1754.6124, "encoder_q-layer.6": 2007.2513, "encoder_q-layer.7": 2107.9678, "encoder_q-layer.8": 2569.6218, "encoder_q-layer.9": 2420.7356, "epoch": 0.52, "inbatch_neg_score": 0.6354, "inbatch_pos_score": 1.1797, "learning_rate": 1.1111111111111112e-05, "loss": 3.9201, "norm_diff": 0.1213, "norm_loss": 0.0, "num_token_doc": 66.7681, "num_token_overlap": 11.6382, "num_token_query": 31.7646, "num_token_union": 65.307, "num_word_context": 201.7491, "num_word_doc": 49.8073, "num_word_query": 23.4773, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4125.6783, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6353, "query_norm": 1.4221, "queue_k_norm": 1.5469, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.7646, "sent_len_1": 66.7681, "sent_len_max_0": 127.565, "sent_len_max_1": 188.3525, "stdk": 0.048, "stdq": 0.0421, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 80000 }, { "dev_runtime": 44.1823, "dev_samples_per_second": 1.449, "dev_steps_per_second": 0.023, "epoch": 0.52, "step": 80000, "test_accuracy": 93.17626953125, "test_active_queue_size": 16384.0, "test_cl_loss": 0.3849891424179077, "test_doc_norm": 1.514592170715332, "test_inbatch_neg_score": 0.9753116965293884, "test_inbatch_pos_score": 1.8808138370513916, "test_loss": 0.3849891424179077, "test_loss_align": 0.9347415566444397, "test_loss_unif": 3.23838210105896, "test_loss_unif_q@queue": 3.23838210105896, "test_norm_diff": 0.02694034017622471, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.6218376755714417, "test_query_norm": 1.5415325164794922, "test_queue_k_norm": 1.546311855316162, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04155101627111435, "test_stdq": 0.04189368337392807, "test_stdqueue_k": 0.04823862761259079, "test_stdqueue_q": 0.0 }, { "dev_runtime": 44.1823, "dev_samples_per_second": 1.449, "dev_steps_per_second": 0.023, "epoch": 0.52, "eval_beir-arguana_ndcg@10": 0.38477, "eval_beir-arguana_recall@10": 0.65647, "eval_beir-arguana_recall@100": 0.94026, "eval_beir-arguana_recall@20": 0.79232, "eval_beir-avg_ndcg@10": 0.3812823333333333, "eval_beir-avg_recall@10": 0.4510923333333333, "eval_beir-avg_recall@100": 0.6342379166666666, "eval_beir-avg_recall@20": 0.5127360833333333, "eval_beir-cqadupstack_ndcg@10": 0.26383333333333336, "eval_beir-cqadupstack_recall@10": 0.3605933333333333, "eval_beir-cqadupstack_recall@100": 0.5940891666666667, "eval_beir-cqadupstack_recall@20": 0.4261208333333333, "eval_beir-fiqa_ndcg@10": 0.25405, "eval_beir-fiqa_recall@10": 0.31231, "eval_beir-fiqa_recall@100": 0.56985, "eval_beir-fiqa_recall@20": 0.37915, "eval_beir-nfcorpus_ndcg@10": 0.29835, "eval_beir-nfcorpus_recall@10": 0.15199, "eval_beir-nfcorpus_recall@100": 0.28551, "eval_beir-nfcorpus_recall@20": 0.18146, "eval_beir-nq_ndcg@10": 0.28462, "eval_beir-nq_recall@10": 0.46432, "eval_beir-nq_recall@100": 0.80579, "eval_beir-nq_recall@20": 0.58988, "eval_beir-quora_ndcg@10": 0.77888, "eval_beir-quora_recall@10": 0.8886, "eval_beir-quora_recall@100": 0.9771, "eval_beir-quora_recall@20": 0.93012, "eval_beir-scidocs_ndcg@10": 0.15369, "eval_beir-scidocs_recall@10": 0.16008, "eval_beir-scidocs_recall@100": 0.36938, "eval_beir-scidocs_recall@20": 0.21738, "eval_beir-scifact_ndcg@10": 0.64611, "eval_beir-scifact_recall@10": 0.78567, "eval_beir-scifact_recall@100": 0.92989, "eval_beir-scifact_recall@20": 0.83467, "eval_beir-trec-covid_ndcg@10": 0.56618, "eval_beir-trec-covid_recall@10": 0.602, "eval_beir-trec-covid_recall@100": 0.443, "eval_beir-trec-covid_recall@20": 0.576, "eval_beir-webis-touche2020_ndcg@10": 0.18234, "eval_beir-webis-touche2020_recall@10": 0.12889, "eval_beir-webis-touche2020_recall@100": 0.42751, "eval_beir-webis-touche2020_recall@20": 0.20026, "eval_senteval-avg_sts": 0.7437963735256761, "eval_senteval-sickr_spearman": 0.7092322871008225, "eval_senteval-stsb_spearman": 0.7783604599505295, "step": 80000, "test_accuracy": 93.17626953125, "test_active_queue_size": 16384.0, "test_cl_loss": 0.3849891424179077, "test_doc_norm": 1.514592170715332, "test_inbatch_neg_score": 0.9753116965293884, "test_inbatch_pos_score": 1.8808138370513916, "test_loss": 0.3849891424179077, "test_loss_align": 0.9347415566444397, "test_loss_unif": 3.23838210105896, "test_loss_unif_q@queue": 3.23838210105896, "test_norm_diff": 0.02694034017622471, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.6218376755714417, "test_query_norm": 1.5415325164794922, "test_queue_k_norm": 1.546311855316162, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04155101627111435, "test_stdq": 0.04189368337392807, "test_stdqueue_k": 0.04823862761259079, "test_stdqueue_q": 0.0 }, { "accuracy": 45.2148, "active_queue_size": 16384.0, "cl_loss": 3.8976, "doc_norm": 1.5514, "encoder_q-embeddings": 3886.7122, "encoder_q-layer.0": 2667.291, "encoder_q-layer.1": 3385.9714, "encoder_q-layer.10": 2617.1079, "encoder_q-layer.11": 6654.0225, "encoder_q-layer.2": 4222.603, "encoder_q-layer.3": 4445.6885, "encoder_q-layer.4": 4655.5376, "encoder_q-layer.5": 4635.0322, "encoder_q-layer.6": 4531.4917, "encoder_q-layer.7": 3954.8091, "encoder_q-layer.8": 3599.0608, "encoder_q-layer.9": 2565.8765, "epoch": 0.52, "inbatch_neg_score": 0.638, "inbatch_pos_score": 1.1924, "learning_rate": 1.1055555555555556e-05, "loss": 3.8976, "norm_diff": 0.1189, "norm_loss": 0.0, "num_token_doc": 66.8973, "num_token_overlap": 11.6533, "num_token_query": 31.8828, "num_token_union": 65.4066, "num_word_context": 202.5465, "num_word_doc": 49.8881, "num_word_query": 23.5486, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6205.8141, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.6377, "query_norm": 1.4325, "queue_k_norm": 1.5479, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8828, "sent_len_1": 66.8973, "sent_len_max_0": 127.41, "sent_len_max_1": 190.7475, "stdk": 0.0483, "stdq": 0.0425, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 80100 }, { "accuracy": 45.4102, "active_queue_size": 16384.0, "cl_loss": 3.9428, "doc_norm": 1.551, "encoder_q-embeddings": 2398.2878, "encoder_q-layer.0": 1630.8435, "encoder_q-layer.1": 1719.0802, "encoder_q-layer.10": 2825.8684, "encoder_q-layer.11": 6744.1938, "encoder_q-layer.2": 2106.6035, "encoder_q-layer.3": 2235.3232, "encoder_q-layer.4": 2189.2695, "encoder_q-layer.5": 2205.3755, "encoder_q-layer.6": 2418.7803, "encoder_q-layer.7": 2575.7993, "encoder_q-layer.8": 2642.9856, "encoder_q-layer.9": 2369.1804, "epoch": 0.52, "inbatch_neg_score": 0.6401, "inbatch_pos_score": 1.1807, "learning_rate": 1.1000000000000001e-05, "loss": 3.9428, "norm_diff": 0.1309, "norm_loss": 0.0, "num_token_doc": 66.6272, "num_token_overlap": 11.609, "num_token_query": 31.8288, "num_token_union": 65.24, "num_word_context": 202.0493, "num_word_doc": 49.6699, "num_word_query": 23.4851, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4500.8035, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6392, "query_norm": 1.4202, "queue_k_norm": 1.5458, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8288, "sent_len_1": 66.6272, "sent_len_max_0": 127.6863, "sent_len_max_1": 190.7738, "stdk": 0.0483, "stdq": 0.0419, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 80200 }, { "accuracy": 48.1445, "active_queue_size": 16384.0, "cl_loss": 3.9166, "doc_norm": 1.5503, "encoder_q-embeddings": 2053.2627, "encoder_q-layer.0": 1351.2465, "encoder_q-layer.1": 1414.5974, "encoder_q-layer.10": 2628.1208, "encoder_q-layer.11": 6277.4238, "encoder_q-layer.2": 1584.2, "encoder_q-layer.3": 1646.1646, "encoder_q-layer.4": 1725.8232, "encoder_q-layer.5": 1754.0728, "encoder_q-layer.6": 1913.67, "encoder_q-layer.7": 2135.2188, "encoder_q-layer.8": 2472.0596, "encoder_q-layer.9": 2330.0188, "epoch": 0.52, "inbatch_neg_score": 0.6393, "inbatch_pos_score": 1.207, "learning_rate": 1.0944444444444445e-05, "loss": 3.9166, "norm_diff": 0.1108, "norm_loss": 0.0, "num_token_doc": 66.785, "num_token_overlap": 11.6424, "num_token_query": 31.8499, "num_token_union": 65.3336, "num_word_context": 202.1954, "num_word_doc": 49.8074, "num_word_query": 23.5281, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3965.9347, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6392, "query_norm": 1.4395, "queue_k_norm": 1.5478, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8499, "sent_len_1": 66.785, "sent_len_max_0": 127.42, "sent_len_max_1": 190.46, "stdk": 0.0482, "stdq": 0.0427, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 80300 }, { "accuracy": 46.6797, "active_queue_size": 16384.0, "cl_loss": 3.928, "doc_norm": 1.5459, "encoder_q-embeddings": 2403.2227, "encoder_q-layer.0": 1587.6176, "encoder_q-layer.1": 1777.9255, "encoder_q-layer.10": 2827.3689, "encoder_q-layer.11": 6531.0483, "encoder_q-layer.2": 2032.2241, "encoder_q-layer.3": 2124.2568, "encoder_q-layer.4": 2276.907, "encoder_q-layer.5": 2199.7834, "encoder_q-layer.6": 2354.1338, "encoder_q-layer.7": 2506.1448, "encoder_q-layer.8": 2747.4375, "encoder_q-layer.9": 2521.478, "epoch": 0.52, "inbatch_neg_score": 0.6411, "inbatch_pos_score": 1.2139, "learning_rate": 1.088888888888889e-05, "loss": 3.928, "norm_diff": 0.1081, "norm_loss": 0.0, "num_token_doc": 66.8004, "num_token_overlap": 11.6933, "num_token_query": 31.965, "num_token_union": 65.3874, "num_word_context": 202.0798, "num_word_doc": 49.8108, "num_word_query": 23.5924, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4426.5192, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6406, "query_norm": 1.4378, "queue_k_norm": 1.5492, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.965, "sent_len_1": 66.8004, "sent_len_max_0": 127.3525, "sent_len_max_1": 189.7713, "stdk": 0.048, "stdq": 0.0427, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 80400 }, { "accuracy": 46.3867, "active_queue_size": 16384.0, "cl_loss": 3.9077, "doc_norm": 1.5474, "encoder_q-embeddings": 2066.5293, "encoder_q-layer.0": 1391.6136, "encoder_q-layer.1": 1427.2303, "encoder_q-layer.10": 2571.3132, "encoder_q-layer.11": 6536.584, "encoder_q-layer.2": 1600.9684, "encoder_q-layer.3": 1668.6608, "encoder_q-layer.4": 1756.3582, "encoder_q-layer.5": 1819.3365, "encoder_q-layer.6": 2094.8931, "encoder_q-layer.7": 2241.3496, "encoder_q-layer.8": 2565.7983, "encoder_q-layer.9": 2347.6987, "epoch": 0.52, "inbatch_neg_score": 0.6387, "inbatch_pos_score": 1.2129, "learning_rate": 1.0833333333333334e-05, "loss": 3.9077, "norm_diff": 0.1117, "norm_loss": 0.0, "num_token_doc": 66.8678, "num_token_overlap": 11.6532, "num_token_query": 31.7695, "num_token_union": 65.3217, "num_word_context": 202.0973, "num_word_doc": 49.8945, "num_word_query": 23.4498, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4096.01, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6396, "query_norm": 1.4357, "queue_k_norm": 1.548, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.7695, "sent_len_1": 66.8678, "sent_len_max_0": 127.5837, "sent_len_max_1": 188.195, "stdk": 0.0481, "stdq": 0.0426, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 80500 }, { "accuracy": 44.6289, "active_queue_size": 16384.0, "cl_loss": 3.9206, "doc_norm": 1.5537, "encoder_q-embeddings": 2291.3735, "encoder_q-layer.0": 1540.6946, "encoder_q-layer.1": 1615.8735, "encoder_q-layer.10": 2914.2075, "encoder_q-layer.11": 6592.6577, "encoder_q-layer.2": 1887.1094, "encoder_q-layer.3": 2011.3206, "encoder_q-layer.4": 2203.6887, "encoder_q-layer.5": 2205.9666, "encoder_q-layer.6": 2419.4727, "encoder_q-layer.7": 2446.9377, "encoder_q-layer.8": 2645.3562, "encoder_q-layer.9": 2387.6895, "epoch": 0.52, "inbatch_neg_score": 0.6431, "inbatch_pos_score": 1.1787, "learning_rate": 1.0777777777777778e-05, "loss": 3.9206, "norm_diff": 0.1364, "norm_loss": 0.0, "num_token_doc": 66.8267, "num_token_overlap": 11.6473, "num_token_query": 31.8967, "num_token_union": 65.421, "num_word_context": 202.1496, "num_word_doc": 49.8479, "num_word_query": 23.5601, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4373.0577, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6436, "query_norm": 1.4173, "queue_k_norm": 1.5485, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8967, "sent_len_1": 66.8267, "sent_len_max_0": 127.375, "sent_len_max_1": 188.9062, "stdk": 0.0483, "stdq": 0.0417, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 80600 }, { "accuracy": 46.582, "active_queue_size": 16384.0, "cl_loss": 3.9289, "doc_norm": 1.5554, "encoder_q-embeddings": 2040.7544, "encoder_q-layer.0": 1362.3383, "encoder_q-layer.1": 1412.2249, "encoder_q-layer.10": 2466.0378, "encoder_q-layer.11": 6477.6953, "encoder_q-layer.2": 1631.3159, "encoder_q-layer.3": 1688.6681, "encoder_q-layer.4": 1842.3752, "encoder_q-layer.5": 1808.9993, "encoder_q-layer.6": 2090.6855, "encoder_q-layer.7": 2216.5293, "encoder_q-layer.8": 2662.5508, "encoder_q-layer.9": 2368.6348, "epoch": 0.53, "inbatch_neg_score": 0.6415, "inbatch_pos_score": 1.2021, "learning_rate": 1.0722222222222222e-05, "loss": 3.9289, "norm_diff": 0.127, "norm_loss": 0.0, "num_token_doc": 66.7697, "num_token_overlap": 11.6577, "num_token_query": 31.8682, "num_token_union": 65.3973, "num_word_context": 202.4882, "num_word_doc": 49.8379, "num_word_query": 23.5416, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4057.967, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6426, "query_norm": 1.4284, "queue_k_norm": 1.5489, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8682, "sent_len_1": 66.7697, "sent_len_max_0": 127.5312, "sent_len_max_1": 187.4812, "stdk": 0.0484, "stdq": 0.0422, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 80700 }, { "accuracy": 47.6562, "active_queue_size": 16384.0, "cl_loss": 3.9262, "doc_norm": 1.5503, "encoder_q-embeddings": 5185.4287, "encoder_q-layer.0": 3508.5632, "encoder_q-layer.1": 3808.0862, "encoder_q-layer.10": 2645.4365, "encoder_q-layer.11": 6759.916, "encoder_q-layer.2": 4551.2183, "encoder_q-layer.3": 4819.8892, "encoder_q-layer.4": 4750.7217, "encoder_q-layer.5": 5194.5522, "encoder_q-layer.6": 5661.2134, "encoder_q-layer.7": 4936.6162, "encoder_q-layer.8": 4442.4531, "encoder_q-layer.9": 2876.5088, "epoch": 0.53, "inbatch_neg_score": 0.6466, "inbatch_pos_score": 1.2012, "learning_rate": 1.0666666666666667e-05, "loss": 3.9262, "norm_diff": 0.1256, "norm_loss": 0.0, "num_token_doc": 66.6084, "num_token_overlap": 11.6891, "num_token_query": 32.0167, "num_token_union": 65.314, "num_word_context": 202.4264, "num_word_doc": 49.7351, "num_word_query": 23.6759, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7135.5307, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.6475, "query_norm": 1.4247, "queue_k_norm": 1.5508, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 32.0167, "sent_len_1": 66.6084, "sent_len_max_0": 127.49, "sent_len_max_1": 188.8862, "stdk": 0.0482, "stdq": 0.0419, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 80800 }, { "accuracy": 45.8984, "active_queue_size": 16384.0, "cl_loss": 3.9047, "doc_norm": 1.5537, "encoder_q-embeddings": 2377.063, "encoder_q-layer.0": 1602.6981, "encoder_q-layer.1": 1785.3995, "encoder_q-layer.10": 2611.5378, "encoder_q-layer.11": 6411.4307, "encoder_q-layer.2": 2009.8666, "encoder_q-layer.3": 2067.7234, "encoder_q-layer.4": 2073.9814, "encoder_q-layer.5": 2070.6399, "encoder_q-layer.6": 2280.165, "encoder_q-layer.7": 2344.8513, "encoder_q-layer.8": 2526.6641, "encoder_q-layer.9": 2361.6685, "epoch": 0.53, "inbatch_neg_score": 0.6478, "inbatch_pos_score": 1.1973, "learning_rate": 1.0611111111111111e-05, "loss": 3.9047, "norm_diff": 0.1122, "norm_loss": 0.0, "num_token_doc": 66.804, "num_token_overlap": 11.6796, "num_token_query": 31.9246, "num_token_union": 65.3671, "num_word_context": 202.2663, "num_word_doc": 49.8406, "num_word_query": 23.5887, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4283.6954, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6479, "query_norm": 1.4415, "queue_k_norm": 1.5516, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9246, "sent_len_1": 66.804, "sent_len_max_0": 127.4938, "sent_len_max_1": 190.19, "stdk": 0.0483, "stdq": 0.0426, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 80900 }, { "accuracy": 47.7539, "active_queue_size": 16384.0, "cl_loss": 3.9006, "doc_norm": 1.5433, "encoder_q-embeddings": 2838.2036, "encoder_q-layer.0": 1909.2324, "encoder_q-layer.1": 2067.6021, "encoder_q-layer.10": 2568.7688, "encoder_q-layer.11": 6743.2754, "encoder_q-layer.2": 2290.6755, "encoder_q-layer.3": 2413.3118, "encoder_q-layer.4": 2618.8491, "encoder_q-layer.5": 2582.9653, "encoder_q-layer.6": 2777.8079, "encoder_q-layer.7": 2774.7441, "encoder_q-layer.8": 2985.438, "encoder_q-layer.9": 2498.5181, "epoch": 0.53, "inbatch_neg_score": 0.6515, "inbatch_pos_score": 1.2051, "learning_rate": 1.0555555555555555e-05, "loss": 3.9006, "norm_diff": 0.1109, "norm_loss": 0.0, "num_token_doc": 66.6277, "num_token_overlap": 11.7106, "num_token_query": 31.945, "num_token_union": 65.2153, "num_word_context": 201.9508, "num_word_doc": 49.6981, "num_word_query": 23.5981, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4673.6315, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6514, "query_norm": 1.4324, "queue_k_norm": 1.5524, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.945, "sent_len_1": 66.6277, "sent_len_max_0": 127.3925, "sent_len_max_1": 190.965, "stdk": 0.0478, "stdq": 0.0421, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 81000 }, { "accuracy": 43.6523, "active_queue_size": 16384.0, "cl_loss": 3.9207, "doc_norm": 1.5534, "encoder_q-embeddings": 2919.4194, "encoder_q-layer.0": 2062.9121, "encoder_q-layer.1": 2202.2188, "encoder_q-layer.10": 2570.9651, "encoder_q-layer.11": 6683.3154, "encoder_q-layer.2": 2644.4355, "encoder_q-layer.3": 2864.9453, "encoder_q-layer.4": 3524.4202, "encoder_q-layer.5": 3332.5735, "encoder_q-layer.6": 3843.5957, "encoder_q-layer.7": 3225.4568, "encoder_q-layer.8": 3083.5466, "encoder_q-layer.9": 2531.4478, "epoch": 0.53, "inbatch_neg_score": 0.6515, "inbatch_pos_score": 1.1836, "learning_rate": 1.05e-05, "loss": 3.9207, "norm_diff": 0.1253, "norm_loss": 0.0, "num_token_doc": 66.8615, "num_token_overlap": 11.6868, "num_token_query": 31.9204, "num_token_union": 65.445, "num_word_context": 202.267, "num_word_doc": 49.9243, "num_word_query": 23.5863, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5155.2186, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6519, "query_norm": 1.4281, "queue_k_norm": 1.5524, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9204, "sent_len_1": 66.8615, "sent_len_max_0": 127.3688, "sent_len_max_1": 189.0737, "stdk": 0.0482, "stdq": 0.0419, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 81100 }, { "accuracy": 47.168, "active_queue_size": 16384.0, "cl_loss": 3.9283, "doc_norm": 1.5565, "encoder_q-embeddings": 2132.1096, "encoder_q-layer.0": 1404.9719, "encoder_q-layer.1": 1447.8572, "encoder_q-layer.10": 2687.0586, "encoder_q-layer.11": 6387.9336, "encoder_q-layer.2": 1664.6959, "encoder_q-layer.3": 1686.8181, "encoder_q-layer.4": 1793.4214, "encoder_q-layer.5": 1834.5, "encoder_q-layer.6": 1990.0494, "encoder_q-layer.7": 2186.2646, "encoder_q-layer.8": 2613.9333, "encoder_q-layer.9": 2364.6565, "epoch": 0.53, "inbatch_neg_score": 0.6538, "inbatch_pos_score": 1.2139, "learning_rate": 1.0444444444444445e-05, "loss": 3.9283, "norm_diff": 0.1157, "norm_loss": 0.0, "num_token_doc": 66.9272, "num_token_overlap": 11.7156, "num_token_query": 32.0433, "num_token_union": 65.4841, "num_word_context": 202.0756, "num_word_doc": 49.9251, "num_word_query": 23.6696, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4081.0541, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6528, "query_norm": 1.4408, "queue_k_norm": 1.552, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 32.0433, "sent_len_1": 66.9272, "sent_len_max_0": 127.4862, "sent_len_max_1": 190.1287, "stdk": 0.0483, "stdq": 0.0425, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 81200 }, { "accuracy": 44.6289, "active_queue_size": 16384.0, "cl_loss": 3.8875, "doc_norm": 1.5566, "encoder_q-embeddings": 2405.4023, "encoder_q-layer.0": 1621.2332, "encoder_q-layer.1": 1753.0795, "encoder_q-layer.10": 2414.2241, "encoder_q-layer.11": 6442.6328, "encoder_q-layer.2": 2255.2373, "encoder_q-layer.3": 2441.8171, "encoder_q-layer.4": 2559.011, "encoder_q-layer.5": 2339.3191, "encoder_q-layer.6": 2210.2407, "encoder_q-layer.7": 2441.0005, "encoder_q-layer.8": 2695.425, "encoder_q-layer.9": 2362.231, "epoch": 0.53, "inbatch_neg_score": 0.6561, "inbatch_pos_score": 1.209, "learning_rate": 1.038888888888889e-05, "loss": 3.8875, "norm_diff": 0.1262, "norm_loss": 0.0, "num_token_doc": 67.0022, "num_token_overlap": 11.7598, "num_token_query": 32.0752, "num_token_union": 65.4961, "num_word_context": 202.7946, "num_word_doc": 49.9747, "num_word_query": 23.7011, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4478.5321, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6558, "query_norm": 1.4304, "queue_k_norm": 1.5531, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 32.0752, "sent_len_1": 67.0022, "sent_len_max_0": 127.5925, "sent_len_max_1": 188.4538, "stdk": 0.0483, "stdq": 0.042, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 81300 }, { "accuracy": 48.1445, "active_queue_size": 16384.0, "cl_loss": 3.9087, "doc_norm": 1.5561, "encoder_q-embeddings": 2142.8467, "encoder_q-layer.0": 1483.7665, "encoder_q-layer.1": 1639.8063, "encoder_q-layer.10": 2451.4858, "encoder_q-layer.11": 6486.5977, "encoder_q-layer.2": 1979.6836, "encoder_q-layer.3": 2030.9713, "encoder_q-layer.4": 2100.8074, "encoder_q-layer.5": 2147.8752, "encoder_q-layer.6": 2314.1721, "encoder_q-layer.7": 2457.5894, "encoder_q-layer.8": 2666.2532, "encoder_q-layer.9": 2397.053, "epoch": 0.53, "inbatch_neg_score": 0.6564, "inbatch_pos_score": 1.2148, "learning_rate": 1.0333333333333333e-05, "loss": 3.9087, "norm_diff": 0.1258, "norm_loss": 0.0, "num_token_doc": 66.6958, "num_token_overlap": 11.6621, "num_token_query": 31.8428, "num_token_union": 65.3135, "num_word_context": 202.3202, "num_word_doc": 49.796, "num_word_query": 23.5078, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4243.2271, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6562, "query_norm": 1.4303, "queue_k_norm": 1.5531, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8428, "sent_len_1": 66.6958, "sent_len_max_0": 127.42, "sent_len_max_1": 189.815, "stdk": 0.0482, "stdq": 0.042, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 81400 }, { "accuracy": 46.3867, "active_queue_size": 16384.0, "cl_loss": 3.9203, "doc_norm": 1.5516, "encoder_q-embeddings": 2216.1633, "encoder_q-layer.0": 1454.9709, "encoder_q-layer.1": 1581.9607, "encoder_q-layer.10": 2515.167, "encoder_q-layer.11": 6562.8843, "encoder_q-layer.2": 1763.6428, "encoder_q-layer.3": 1830.1987, "encoder_q-layer.4": 1951.1809, "encoder_q-layer.5": 2003.2231, "encoder_q-layer.6": 2199.4526, "encoder_q-layer.7": 2267.0127, "encoder_q-layer.8": 2603.314, "encoder_q-layer.9": 2383.0784, "epoch": 0.53, "inbatch_neg_score": 0.6569, "inbatch_pos_score": 1.1982, "learning_rate": 1.0277777777777777e-05, "loss": 3.9203, "norm_diff": 0.1169, "norm_loss": 0.0, "num_token_doc": 66.8315, "num_token_overlap": 11.6669, "num_token_query": 31.8605, "num_token_union": 65.3805, "num_word_context": 202.235, "num_word_doc": 49.8532, "num_word_query": 23.5257, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4200.1738, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6567, "query_norm": 1.4347, "queue_k_norm": 1.5555, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8605, "sent_len_1": 66.8315, "sent_len_max_0": 127.55, "sent_len_max_1": 191.3063, "stdk": 0.048, "stdq": 0.0421, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 81500 }, { "accuracy": 44.7266, "active_queue_size": 16384.0, "cl_loss": 3.8956, "doc_norm": 1.5564, "encoder_q-embeddings": 2240.8267, "encoder_q-layer.0": 1447.1499, "encoder_q-layer.1": 1531.2852, "encoder_q-layer.10": 2509.6523, "encoder_q-layer.11": 6543.3398, "encoder_q-layer.2": 1697.2695, "encoder_q-layer.3": 1786.6877, "encoder_q-layer.4": 1872.4811, "encoder_q-layer.5": 1949.3037, "encoder_q-layer.6": 2106.854, "encoder_q-layer.7": 2315.8796, "encoder_q-layer.8": 2605.8091, "encoder_q-layer.9": 2285.5889, "epoch": 0.53, "inbatch_neg_score": 0.657, "inbatch_pos_score": 1.1992, "learning_rate": 1.0222222222222223e-05, "loss": 3.8956, "norm_diff": 0.1243, "norm_loss": 0.0, "num_token_doc": 66.6698, "num_token_overlap": 11.7038, "num_token_query": 31.9961, "num_token_union": 65.3042, "num_word_context": 202.387, "num_word_doc": 49.7342, "num_word_query": 23.6286, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4159.585, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6567, "query_norm": 1.4321, "queue_k_norm": 1.5544, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9961, "sent_len_1": 66.6698, "sent_len_max_0": 127.5725, "sent_len_max_1": 188.2363, "stdk": 0.0482, "stdq": 0.0421, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 81600 }, { "accuracy": 46.4844, "active_queue_size": 16384.0, "cl_loss": 3.927, "doc_norm": 1.552, "encoder_q-embeddings": 2296.3943, "encoder_q-layer.0": 1546.3195, "encoder_q-layer.1": 1685.1401, "encoder_q-layer.10": 2671.0925, "encoder_q-layer.11": 6177.1328, "encoder_q-layer.2": 1824.7688, "encoder_q-layer.3": 1937.5536, "encoder_q-layer.4": 2017.0005, "encoder_q-layer.5": 2100.5618, "encoder_q-layer.6": 2258.1018, "encoder_q-layer.7": 2400.4536, "encoder_q-layer.8": 2618.208, "encoder_q-layer.9": 2489.6143, "epoch": 0.53, "inbatch_neg_score": 0.6601, "inbatch_pos_score": 1.2168, "learning_rate": 1.0166666666666667e-05, "loss": 3.927, "norm_diff": 0.1076, "norm_loss": 0.0, "num_token_doc": 66.6484, "num_token_overlap": 11.6342, "num_token_query": 31.8584, "num_token_union": 65.2937, "num_word_context": 202.417, "num_word_doc": 49.7455, "num_word_query": 23.5475, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4121.8208, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6602, "query_norm": 1.4444, "queue_k_norm": 1.554, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8584, "sent_len_1": 66.6484, "sent_len_max_0": 127.485, "sent_len_max_1": 189.7612, "stdk": 0.048, "stdq": 0.0425, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 81700 }, { "accuracy": 45.6055, "active_queue_size": 16384.0, "cl_loss": 3.908, "doc_norm": 1.5497, "encoder_q-embeddings": 1943.4386, "encoder_q-layer.0": 1318.5261, "encoder_q-layer.1": 1405.3098, "encoder_q-layer.10": 2536.7629, "encoder_q-layer.11": 6659.4785, "encoder_q-layer.2": 1547.1312, "encoder_q-layer.3": 1596.0037, "encoder_q-layer.4": 1658.5315, "encoder_q-layer.5": 1705.0537, "encoder_q-layer.6": 2022.5898, "encoder_q-layer.7": 2230.7979, "encoder_q-layer.8": 2573.0271, "encoder_q-layer.9": 2403.5635, "epoch": 0.53, "inbatch_neg_score": 0.6632, "inbatch_pos_score": 1.2129, "learning_rate": 1.0111111111111111e-05, "loss": 3.908, "norm_diff": 0.1136, "norm_loss": 0.0, "num_token_doc": 66.7337, "num_token_overlap": 11.6547, "num_token_query": 31.9801, "num_token_union": 65.3832, "num_word_context": 202.2531, "num_word_doc": 49.7814, "num_word_query": 23.6041, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4096.1055, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6621, "query_norm": 1.4361, "queue_k_norm": 1.556, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9801, "sent_len_1": 66.7337, "sent_len_max_0": 127.5062, "sent_len_max_1": 190.1425, "stdk": 0.0479, "stdq": 0.0421, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 81800 }, { "accuracy": 46.3867, "active_queue_size": 16384.0, "cl_loss": 3.9187, "doc_norm": 1.5579, "encoder_q-embeddings": 9061.9766, "encoder_q-layer.0": 5945.0049, "encoder_q-layer.1": 6931.1587, "encoder_q-layer.10": 2575.8669, "encoder_q-layer.11": 6516.8818, "encoder_q-layer.2": 7431.9155, "encoder_q-layer.3": 7825.5356, "encoder_q-layer.4": 6460.082, "encoder_q-layer.5": 5070.1489, "encoder_q-layer.6": 4474.8271, "encoder_q-layer.7": 3891.241, "encoder_q-layer.8": 3052.0005, "encoder_q-layer.9": 2496.1936, "epoch": 0.53, "inbatch_neg_score": 0.6652, "inbatch_pos_score": 1.2188, "learning_rate": 1.0055555555555555e-05, "loss": 3.9187, "norm_diff": 0.1165, "norm_loss": 0.0, "num_token_doc": 66.8485, "num_token_overlap": 11.6713, "num_token_query": 31.8318, "num_token_union": 65.3356, "num_word_context": 202.282, "num_word_doc": 49.8578, "num_word_query": 23.4997, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9227.1625, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.665, "query_norm": 1.4414, "queue_k_norm": 1.557, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8318, "sent_len_1": 66.8485, "sent_len_max_0": 127.33, "sent_len_max_1": 190.7025, "stdk": 0.0482, "stdq": 0.0423, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 81900 }, { "accuracy": 46.9727, "active_queue_size": 16384.0, "cl_loss": 3.9163, "doc_norm": 1.552, "encoder_q-embeddings": 2378.3125, "encoder_q-layer.0": 1584.5934, "encoder_q-layer.1": 1709.3259, "encoder_q-layer.10": 2605.7927, "encoder_q-layer.11": 6664.4619, "encoder_q-layer.2": 1921.0031, "encoder_q-layer.3": 1923.3407, "encoder_q-layer.4": 2130.0098, "encoder_q-layer.5": 2205.5674, "encoder_q-layer.6": 2293.2446, "encoder_q-layer.7": 2311.3613, "encoder_q-layer.8": 2583.6584, "encoder_q-layer.9": 2335.4873, "epoch": 0.53, "inbatch_neg_score": 0.6675, "inbatch_pos_score": 1.2109, "learning_rate": 1e-05, "loss": 3.9163, "norm_diff": 0.1173, "norm_loss": 0.0, "num_token_doc": 66.7245, "num_token_overlap": 11.6623, "num_token_query": 31.8929, "num_token_union": 65.357, "num_word_context": 202.2642, "num_word_doc": 49.8254, "num_word_query": 23.5547, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4431.5816, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.667, "query_norm": 1.4348, "queue_k_norm": 1.5559, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.8929, "sent_len_1": 66.7245, "sent_len_max_0": 127.4725, "sent_len_max_1": 187.3137, "stdk": 0.0479, "stdq": 0.042, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 82000 }, { "accuracy": 44.9219, "active_queue_size": 16384.0, "cl_loss": 3.9186, "doc_norm": 1.5546, "encoder_q-embeddings": 1930.3855, "encoder_q-layer.0": 1333.0446, "encoder_q-layer.1": 1362.5654, "encoder_q-layer.10": 2404.7112, "encoder_q-layer.11": 6363.0273, "encoder_q-layer.2": 1529.3004, "encoder_q-layer.3": 1550.1803, "encoder_q-layer.4": 1641.8507, "encoder_q-layer.5": 1692.9076, "encoder_q-layer.6": 1884.0552, "encoder_q-layer.7": 2081.1069, "encoder_q-layer.8": 2453.7322, "encoder_q-layer.9": 2311.3271, "epoch": 0.53, "inbatch_neg_score": 0.6695, "inbatch_pos_score": 1.2197, "learning_rate": 9.944444444444445e-06, "loss": 3.9186, "norm_diff": 0.1189, "norm_loss": 0.0, "num_token_doc": 66.7041, "num_token_overlap": 11.653, "num_token_query": 31.8112, "num_token_union": 65.2039, "num_word_context": 202.1885, "num_word_doc": 49.8071, "num_word_query": 23.4913, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3928.3708, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6694, "query_norm": 1.4356, "queue_k_norm": 1.5551, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8112, "sent_len_1": 66.7041, "sent_len_max_0": 127.6175, "sent_len_max_1": 187.3413, "stdk": 0.048, "stdq": 0.0419, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 82100 }, { "accuracy": 45.8984, "active_queue_size": 16384.0, "cl_loss": 3.902, "doc_norm": 1.5556, "encoder_q-embeddings": 2726.3179, "encoder_q-layer.0": 1928.7319, "encoder_q-layer.1": 2015.0138, "encoder_q-layer.10": 2517.5916, "encoder_q-layer.11": 6624.4346, "encoder_q-layer.2": 2445.1997, "encoder_q-layer.3": 2603.1353, "encoder_q-layer.4": 2846.0483, "encoder_q-layer.5": 3004.6887, "encoder_q-layer.6": 3018.5442, "encoder_q-layer.7": 3232.3916, "encoder_q-layer.8": 3751.5659, "encoder_q-layer.9": 2678.3235, "epoch": 0.54, "inbatch_neg_score": 0.671, "inbatch_pos_score": 1.2197, "learning_rate": 9.888888888888889e-06, "loss": 3.902, "norm_diff": 0.12, "norm_loss": 0.0, "num_token_doc": 66.7758, "num_token_overlap": 11.6784, "num_token_query": 31.845, "num_token_union": 65.3152, "num_word_context": 202.1693, "num_word_doc": 49.8596, "num_word_query": 23.5392, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4984.441, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6704, "query_norm": 1.4356, "queue_k_norm": 1.5573, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.845, "sent_len_1": 66.7758, "sent_len_max_0": 127.6388, "sent_len_max_1": 188.5263, "stdk": 0.0481, "stdq": 0.0419, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 82200 }, { "accuracy": 44.6289, "active_queue_size": 16384.0, "cl_loss": 3.9336, "doc_norm": 1.5525, "encoder_q-embeddings": 2053.9573, "encoder_q-layer.0": 1389.972, "encoder_q-layer.1": 1471.7462, "encoder_q-layer.10": 2841.1699, "encoder_q-layer.11": 6505.6084, "encoder_q-layer.2": 1686.6821, "encoder_q-layer.3": 1734.1823, "encoder_q-layer.4": 1801.1295, "encoder_q-layer.5": 1907.444, "encoder_q-layer.6": 2090.8276, "encoder_q-layer.7": 2279.8728, "encoder_q-layer.8": 2786.1411, "encoder_q-layer.9": 2467.6514, "epoch": 0.54, "inbatch_neg_score": 0.6718, "inbatch_pos_score": 1.2256, "learning_rate": 9.833333333333333e-06, "loss": 3.9336, "norm_diff": 0.1064, "norm_loss": 0.0, "num_token_doc": 66.6223, "num_token_overlap": 11.6519, "num_token_query": 31.8483, "num_token_union": 65.2418, "num_word_context": 202.0234, "num_word_doc": 49.6985, "num_word_query": 23.4956, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4132.8159, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6724, "query_norm": 1.4461, "queue_k_norm": 1.5558, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8483, "sent_len_1": 66.6223, "sent_len_max_0": 127.5312, "sent_len_max_1": 190.31, "stdk": 0.0479, "stdq": 0.0424, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 82300 }, { "accuracy": 45.8984, "active_queue_size": 16384.0, "cl_loss": 3.918, "doc_norm": 1.5546, "encoder_q-embeddings": 4358.8271, "encoder_q-layer.0": 2784.512, "encoder_q-layer.1": 3148.0513, "encoder_q-layer.10": 2515.3372, "encoder_q-layer.11": 6660.5679, "encoder_q-layer.2": 3731.0959, "encoder_q-layer.3": 3537.3901, "encoder_q-layer.4": 3603.5276, "encoder_q-layer.5": 3555.8521, "encoder_q-layer.6": 3180.5564, "encoder_q-layer.7": 2987.457, "encoder_q-layer.8": 2724.884, "encoder_q-layer.9": 2341.8718, "epoch": 0.54, "inbatch_neg_score": 0.6726, "inbatch_pos_score": 1.2344, "learning_rate": 9.777777777777779e-06, "loss": 3.918, "norm_diff": 0.0973, "norm_loss": 0.0, "num_token_doc": 66.7941, "num_token_overlap": 11.6692, "num_token_query": 31.9517, "num_token_union": 65.4123, "num_word_context": 202.5626, "num_word_doc": 49.8635, "num_word_query": 23.5947, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5669.0394, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.6729, "query_norm": 1.4573, "queue_k_norm": 1.5579, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9517, "sent_len_1": 66.7941, "sent_len_max_0": 127.5263, "sent_len_max_1": 187.7912, "stdk": 0.048, "stdq": 0.0429, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 82400 }, { "accuracy": 44.6289, "active_queue_size": 16384.0, "cl_loss": 3.9144, "doc_norm": 1.558, "encoder_q-embeddings": 1946.9475, "encoder_q-layer.0": 1313.3656, "encoder_q-layer.1": 1397.8773, "encoder_q-layer.10": 2635.2146, "encoder_q-layer.11": 6731.5186, "encoder_q-layer.2": 1514.8105, "encoder_q-layer.3": 1570.7505, "encoder_q-layer.4": 1707.5088, "encoder_q-layer.5": 1732.7374, "encoder_q-layer.6": 1887.1517, "encoder_q-layer.7": 2087.8367, "encoder_q-layer.8": 2589.5537, "encoder_q-layer.9": 2482.6421, "epoch": 0.54, "inbatch_neg_score": 0.6765, "inbatch_pos_score": 1.2178, "learning_rate": 9.722222222222223e-06, "loss": 3.9144, "norm_diff": 0.1101, "norm_loss": 0.0, "num_token_doc": 66.7587, "num_token_overlap": 11.7189, "num_token_query": 31.9808, "num_token_union": 65.3963, "num_word_context": 202.1661, "num_word_doc": 49.8096, "num_word_query": 23.623, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4154.9988, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6768, "query_norm": 1.4479, "queue_k_norm": 1.5588, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9808, "sent_len_1": 66.7587, "sent_len_max_0": 127.5125, "sent_len_max_1": 190.6387, "stdk": 0.0481, "stdq": 0.0424, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 82500 }, { "accuracy": 47.8516, "active_queue_size": 16384.0, "cl_loss": 3.922, "doc_norm": 1.5603, "encoder_q-embeddings": 2784.488, "encoder_q-layer.0": 2058.0425, "encoder_q-layer.1": 2456.5105, "encoder_q-layer.10": 2437.6069, "encoder_q-layer.11": 6337.7329, "encoder_q-layer.2": 2757.1587, "encoder_q-layer.3": 2838.5647, "encoder_q-layer.4": 2939.6514, "encoder_q-layer.5": 2785.6897, "encoder_q-layer.6": 2858.157, "encoder_q-layer.7": 2694.3396, "encoder_q-layer.8": 2720.6665, "encoder_q-layer.9": 2404.8975, "epoch": 0.54, "inbatch_neg_score": 0.6777, "inbatch_pos_score": 1.2441, "learning_rate": 9.666666666666667e-06, "loss": 3.922, "norm_diff": 0.1205, "norm_loss": 0.0, "num_token_doc": 66.7066, "num_token_overlap": 11.6411, "num_token_query": 31.8152, "num_token_union": 65.2564, "num_word_context": 202.2478, "num_word_doc": 49.7463, "num_word_query": 23.4916, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4821.6097, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6787, "query_norm": 1.4398, "queue_k_norm": 1.5571, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8152, "sent_len_1": 66.7066, "sent_len_max_0": 127.5362, "sent_len_max_1": 191.3988, "stdk": 0.0482, "stdq": 0.042, "stdqueue_k": 0.0481, "stdqueue_q": 0.0, "step": 82600 }, { "accuracy": 46.582, "active_queue_size": 16384.0, "cl_loss": 3.9103, "doc_norm": 1.5549, "encoder_q-embeddings": 4035.5693, "encoder_q-layer.0": 2701.3899, "encoder_q-layer.1": 3193.9614, "encoder_q-layer.10": 2639.2004, "encoder_q-layer.11": 6643.3442, "encoder_q-layer.2": 3578.3701, "encoder_q-layer.3": 3661.9426, "encoder_q-layer.4": 4066.4963, "encoder_q-layer.5": 4038.8796, "encoder_q-layer.6": 3830.0083, "encoder_q-layer.7": 3881.6211, "encoder_q-layer.8": 3276.6438, "encoder_q-layer.9": 2476.2998, "epoch": 0.54, "inbatch_neg_score": 0.6799, "inbatch_pos_score": 1.2402, "learning_rate": 9.61111111111111e-06, "loss": 3.9103, "norm_diff": 0.1015, "norm_loss": 0.0, "num_token_doc": 66.8111, "num_token_overlap": 11.6971, "num_token_query": 31.9342, "num_token_union": 65.3371, "num_word_context": 202.53, "num_word_doc": 49.8842, "num_word_query": 23.5951, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5862.5446, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.6802, "query_norm": 1.4535, "queue_k_norm": 1.5591, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9342, "sent_len_1": 66.8111, "sent_len_max_0": 127.4437, "sent_len_max_1": 190.8875, "stdk": 0.0479, "stdq": 0.0427, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 82700 }, { "accuracy": 45.7031, "active_queue_size": 16384.0, "cl_loss": 3.9054, "doc_norm": 1.5513, "encoder_q-embeddings": 4429.0566, "encoder_q-layer.0": 3054.4846, "encoder_q-layer.1": 3011.3584, "encoder_q-layer.10": 2631.0156, "encoder_q-layer.11": 6837.7817, "encoder_q-layer.2": 3070.9927, "encoder_q-layer.3": 2790.7761, "encoder_q-layer.4": 2798.1685, "encoder_q-layer.5": 2758.6406, "encoder_q-layer.6": 2721.4014, "encoder_q-layer.7": 2673.8633, "encoder_q-layer.8": 2904.2437, "encoder_q-layer.9": 2506.0942, "epoch": 0.54, "inbatch_neg_score": 0.6813, "inbatch_pos_score": 1.2305, "learning_rate": 9.555555555555556e-06, "loss": 3.9054, "norm_diff": 0.1062, "norm_loss": 0.0, "num_token_doc": 66.9677, "num_token_overlap": 11.707, "num_token_query": 31.8644, "num_token_union": 65.392, "num_word_context": 202.4532, "num_word_doc": 49.9756, "num_word_query": 23.5193, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5438.391, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6812, "query_norm": 1.4451, "queue_k_norm": 1.5606, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.8644, "sent_len_1": 66.9677, "sent_len_max_0": 127.6188, "sent_len_max_1": 189.135, "stdk": 0.0477, "stdq": 0.0423, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 82800 }, { "accuracy": 46.6797, "active_queue_size": 16384.0, "cl_loss": 3.9155, "doc_norm": 1.5611, "encoder_q-embeddings": 3406.0454, "encoder_q-layer.0": 2516.0645, "encoder_q-layer.1": 2887.6528, "encoder_q-layer.10": 2842.2307, "encoder_q-layer.11": 6844.7026, "encoder_q-layer.2": 2972.8506, "encoder_q-layer.3": 2540.4397, "encoder_q-layer.4": 2521.1008, "encoder_q-layer.5": 2246.9021, "encoder_q-layer.6": 2312.4475, "encoder_q-layer.7": 2468.5442, "encoder_q-layer.8": 2806.8445, "encoder_q-layer.9": 2626.4468, "epoch": 0.54, "inbatch_neg_score": 0.6817, "inbatch_pos_score": 1.2246, "learning_rate": 9.5e-06, "loss": 3.9155, "norm_diff": 0.1187, "norm_loss": 0.0, "num_token_doc": 66.554, "num_token_overlap": 11.6101, "num_token_query": 31.7954, "num_token_union": 65.198, "num_word_context": 202.0555, "num_word_doc": 49.684, "num_word_query": 23.4665, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5038.6402, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6816, "query_norm": 1.4424, "queue_k_norm": 1.5621, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.7954, "sent_len_1": 66.554, "sent_len_max_0": 127.4513, "sent_len_max_1": 188.9712, "stdk": 0.0481, "stdq": 0.0422, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 82900 }, { "accuracy": 45.1172, "active_queue_size": 16384.0, "cl_loss": 3.9111, "doc_norm": 1.5634, "encoder_q-embeddings": 2413.5085, "encoder_q-layer.0": 1733.7855, "encoder_q-layer.1": 1925.8236, "encoder_q-layer.10": 2956.7056, "encoder_q-layer.11": 6875.8643, "encoder_q-layer.2": 2258.5376, "encoder_q-layer.3": 2413.1367, "encoder_q-layer.4": 2819.9697, "encoder_q-layer.5": 2844.3896, "encoder_q-layer.6": 2931.7434, "encoder_q-layer.7": 2917.4819, "encoder_q-layer.8": 3363.1707, "encoder_q-layer.9": 2756.873, "epoch": 0.54, "inbatch_neg_score": 0.6868, "inbatch_pos_score": 1.2295, "learning_rate": 9.444444444444445e-06, "loss": 3.9111, "norm_diff": 0.1161, "norm_loss": 0.0, "num_token_doc": 66.5758, "num_token_overlap": 11.6308, "num_token_query": 31.8279, "num_token_union": 65.1776, "num_word_context": 202.2053, "num_word_doc": 49.6687, "num_word_query": 23.4879, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4786.446, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6855, "query_norm": 1.4473, "queue_k_norm": 1.5623, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8279, "sent_len_1": 66.5758, "sent_len_max_0": 127.4038, "sent_len_max_1": 190.6362, "stdk": 0.0482, "stdq": 0.0424, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 83000 }, { "accuracy": 45.1172, "active_queue_size": 16384.0, "cl_loss": 3.9027, "doc_norm": 1.5644, "encoder_q-embeddings": 3994.4221, "encoder_q-layer.0": 2923.324, "encoder_q-layer.1": 3074.28, "encoder_q-layer.10": 2507.856, "encoder_q-layer.11": 6620.4209, "encoder_q-layer.2": 3407.9272, "encoder_q-layer.3": 3560.5664, "encoder_q-layer.4": 3846.0874, "encoder_q-layer.5": 4006.5059, "encoder_q-layer.6": 3945.3809, "encoder_q-layer.7": 3584.7646, "encoder_q-layer.8": 3042.3972, "encoder_q-layer.9": 2428.7275, "epoch": 0.54, "inbatch_neg_score": 0.6856, "inbatch_pos_score": 1.2373, "learning_rate": 9.388888888888889e-06, "loss": 3.9027, "norm_diff": 0.1199, "norm_loss": 0.0, "num_token_doc": 66.6557, "num_token_overlap": 11.6809, "num_token_query": 31.9184, "num_token_union": 65.2865, "num_word_context": 202.0943, "num_word_doc": 49.7586, "num_word_query": 23.5869, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5653.9779, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.6865, "query_norm": 1.4445, "queue_k_norm": 1.5629, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9184, "sent_len_1": 66.6557, "sent_len_max_0": 127.655, "sent_len_max_1": 187.4787, "stdk": 0.0482, "stdq": 0.0423, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 83100 }, { "accuracy": 45.2148, "active_queue_size": 16384.0, "cl_loss": 3.9027, "doc_norm": 1.5674, "encoder_q-embeddings": 10565.8633, "encoder_q-layer.0": 7610.9297, "encoder_q-layer.1": 7593.3667, "encoder_q-layer.10": 2569.1326, "encoder_q-layer.11": 6598.2705, "encoder_q-layer.2": 8239.2939, "encoder_q-layer.3": 8848.3027, "encoder_q-layer.4": 9968.9336, "encoder_q-layer.5": 10623.2529, "encoder_q-layer.6": 10376.541, "encoder_q-layer.7": 7883.0845, "encoder_q-layer.8": 4637.3408, "encoder_q-layer.9": 2535.2417, "epoch": 0.54, "inbatch_neg_score": 0.6881, "inbatch_pos_score": 1.2363, "learning_rate": 9.333333333333334e-06, "loss": 3.9027, "norm_diff": 0.1254, "norm_loss": 0.0, "num_token_doc": 66.6437, "num_token_overlap": 11.7147, "num_token_query": 31.9363, "num_token_union": 65.2935, "num_word_context": 202.0379, "num_word_doc": 49.7352, "num_word_query": 23.6069, "postclip_grad_norm": 1.0, "preclip_grad_norm": 12440.6255, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.6875, "query_norm": 1.442, "queue_k_norm": 1.5629, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9363, "sent_len_1": 66.6437, "sent_len_max_0": 127.515, "sent_len_max_1": 189.6425, "stdk": 0.0483, "stdq": 0.0422, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 83200 }, { "accuracy": 45.0195, "active_queue_size": 16384.0, "cl_loss": 3.9464, "doc_norm": 1.5581, "encoder_q-embeddings": 2230.5596, "encoder_q-layer.0": 1507.9043, "encoder_q-layer.1": 1677.6365, "encoder_q-layer.10": 2505.7056, "encoder_q-layer.11": 6874.1387, "encoder_q-layer.2": 1855.5327, "encoder_q-layer.3": 1952.7343, "encoder_q-layer.4": 2120.7056, "encoder_q-layer.5": 2173.4407, "encoder_q-layer.6": 2322.3882, "encoder_q-layer.7": 2386.0779, "encoder_q-layer.8": 2701.719, "encoder_q-layer.9": 2445.1514, "epoch": 0.54, "inbatch_neg_score": 0.6914, "inbatch_pos_score": 1.2188, "learning_rate": 9.277777777777778e-06, "loss": 3.9464, "norm_diff": 0.1203, "norm_loss": 0.0, "num_token_doc": 66.727, "num_token_overlap": 11.6115, "num_token_query": 31.7241, "num_token_union": 65.3073, "num_word_context": 202.3241, "num_word_doc": 49.7687, "num_word_query": 23.4358, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4417.7326, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6904, "query_norm": 1.4378, "queue_k_norm": 1.5642, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.7241, "sent_len_1": 66.727, "sent_len_max_0": 127.565, "sent_len_max_1": 190.4125, "stdk": 0.0479, "stdq": 0.0419, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 83300 }, { "accuracy": 45.9961, "active_queue_size": 16384.0, "cl_loss": 3.9066, "doc_norm": 1.5676, "encoder_q-embeddings": 2448.4165, "encoder_q-layer.0": 1687.7358, "encoder_q-layer.1": 1814.2045, "encoder_q-layer.10": 2580.1594, "encoder_q-layer.11": 6558.4697, "encoder_q-layer.2": 2015.1505, "encoder_q-layer.3": 1977.9486, "encoder_q-layer.4": 2170.0225, "encoder_q-layer.5": 2184.2061, "encoder_q-layer.6": 2268.3828, "encoder_q-layer.7": 2237.0959, "encoder_q-layer.8": 2564.4453, "encoder_q-layer.9": 2356.9526, "epoch": 0.54, "inbatch_neg_score": 0.6912, "inbatch_pos_score": 1.2402, "learning_rate": 9.222222222222222e-06, "loss": 3.9066, "norm_diff": 0.1179, "norm_loss": 0.0, "num_token_doc": 66.8646, "num_token_overlap": 11.6483, "num_token_query": 31.7974, "num_token_union": 65.3947, "num_word_context": 202.5545, "num_word_doc": 49.9054, "num_word_query": 23.465, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4352.4096, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6909, "query_norm": 1.4496, "queue_k_norm": 1.5642, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.7974, "sent_len_1": 66.8646, "sent_len_max_0": 127.3875, "sent_len_max_1": 187.9575, "stdk": 0.0483, "stdq": 0.0424, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 83400 }, { "accuracy": 46.4844, "active_queue_size": 16384.0, "cl_loss": 3.9201, "doc_norm": 1.566, "encoder_q-embeddings": 2046.1921, "encoder_q-layer.0": 1338.8883, "encoder_q-layer.1": 1357.679, "encoder_q-layer.10": 2544.4756, "encoder_q-layer.11": 6618.1108, "encoder_q-layer.2": 1497.479, "encoder_q-layer.3": 1564.6062, "encoder_q-layer.4": 1713.0516, "encoder_q-layer.5": 1782.7729, "encoder_q-layer.6": 2031.5452, "encoder_q-layer.7": 2256.8098, "encoder_q-layer.8": 2449.2268, "encoder_q-layer.9": 2278.6306, "epoch": 0.54, "inbatch_neg_score": 0.6925, "inbatch_pos_score": 1.2422, "learning_rate": 9.166666666666666e-06, "loss": 3.9201, "norm_diff": 0.1321, "norm_loss": 0.0, "num_token_doc": 66.7891, "num_token_overlap": 11.6576, "num_token_query": 31.8552, "num_token_union": 65.3501, "num_word_context": 202.7644, "num_word_doc": 49.8646, "num_word_query": 23.5218, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4107.3867, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6938, "query_norm": 1.434, "queue_k_norm": 1.5638, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8552, "sent_len_1": 66.7891, "sent_len_max_0": 127.6425, "sent_len_max_1": 188.5062, "stdk": 0.0482, "stdq": 0.0417, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 83500 }, { "accuracy": 43.8477, "active_queue_size": 16384.0, "cl_loss": 3.913, "doc_norm": 1.5524, "encoder_q-embeddings": 1977.2621, "encoder_q-layer.0": 1391.4744, "encoder_q-layer.1": 1428.157, "encoder_q-layer.10": 2615.9795, "encoder_q-layer.11": 6256.7231, "encoder_q-layer.2": 1683.9774, "encoder_q-layer.3": 1704.4133, "encoder_q-layer.4": 1783.561, "encoder_q-layer.5": 1856.4668, "encoder_q-layer.6": 1963.8782, "encoder_q-layer.7": 2210.3359, "encoder_q-layer.8": 2464.0278, "encoder_q-layer.9": 2414.6106, "epoch": 0.54, "inbatch_neg_score": 0.6954, "inbatch_pos_score": 1.2412, "learning_rate": 9.111111111111112e-06, "loss": 3.913, "norm_diff": 0.0998, "norm_loss": 0.0, "num_token_doc": 66.5584, "num_token_overlap": 11.7015, "num_token_query": 31.9792, "num_token_union": 65.2594, "num_word_context": 201.9265, "num_word_doc": 49.612, "num_word_query": 23.6101, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4020.2052, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6953, "query_norm": 1.4526, "queue_k_norm": 1.565, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9792, "sent_len_1": 66.5584, "sent_len_max_0": 127.6388, "sent_len_max_1": 190.7475, "stdk": 0.0476, "stdq": 0.0425, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 83600 }, { "accuracy": 43.9453, "active_queue_size": 16384.0, "cl_loss": 3.9052, "doc_norm": 1.5591, "encoder_q-embeddings": 5347.7051, "encoder_q-layer.0": 3758.0046, "encoder_q-layer.1": 4055.0232, "encoder_q-layer.10": 4961.0923, "encoder_q-layer.11": 13549.4912, "encoder_q-layer.2": 4523.0181, "encoder_q-layer.3": 4708.4736, "encoder_q-layer.4": 5039.7827, "encoder_q-layer.5": 5531.3047, "encoder_q-layer.6": 5574.8726, "encoder_q-layer.7": 6289.1943, "encoder_q-layer.8": 5923.3389, "encoder_q-layer.9": 4724.1392, "epoch": 0.54, "inbatch_neg_score": 0.6993, "inbatch_pos_score": 1.2324, "learning_rate": 9.055555555555556e-06, "loss": 3.9052, "norm_diff": 0.1165, "norm_loss": 0.0, "num_token_doc": 66.7892, "num_token_overlap": 11.6456, "num_token_query": 31.795, "num_token_union": 65.293, "num_word_context": 202.3542, "num_word_doc": 49.7755, "num_word_query": 23.4742, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9544.8837, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.6982, "query_norm": 1.4425, "queue_k_norm": 1.5639, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.795, "sent_len_1": 66.7892, "sent_len_max_0": 127.4437, "sent_len_max_1": 192.7425, "stdk": 0.0478, "stdq": 0.0419, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 83700 }, { "accuracy": 44.7266, "active_queue_size": 16384.0, "cl_loss": 3.915, "doc_norm": 1.5718, "encoder_q-embeddings": 4444.7803, "encoder_q-layer.0": 3298.6379, "encoder_q-layer.1": 3371.3879, "encoder_q-layer.10": 5134.8887, "encoder_q-layer.11": 12886.5195, "encoder_q-layer.2": 3713.3667, "encoder_q-layer.3": 3619.6218, "encoder_q-layer.4": 3822.8875, "encoder_q-layer.5": 3794.4636, "encoder_q-layer.6": 3980.5579, "encoder_q-layer.7": 4373.7197, "encoder_q-layer.8": 5138.9365, "encoder_q-layer.9": 4834.373, "epoch": 0.55, "inbatch_neg_score": 0.6954, "inbatch_pos_score": 1.2344, "learning_rate": 9e-06, "loss": 3.915, "norm_diff": 0.1265, "norm_loss": 0.0, "num_token_doc": 66.6972, "num_token_overlap": 11.6703, "num_token_query": 31.9535, "num_token_union": 65.3886, "num_word_context": 202.3728, "num_word_doc": 49.7872, "num_word_query": 23.5835, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8341.1573, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.6973, "query_norm": 1.4453, "queue_k_norm": 1.566, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9535, "sent_len_1": 66.6972, "sent_len_max_0": 127.4663, "sent_len_max_1": 188.2575, "stdk": 0.0484, "stdq": 0.0421, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 83800 }, { "accuracy": 45.8984, "active_queue_size": 16384.0, "cl_loss": 3.9065, "doc_norm": 1.5695, "encoder_q-embeddings": 10914.207, "encoder_q-layer.0": 7617.9858, "encoder_q-layer.1": 9094.7588, "encoder_q-layer.10": 5055.4688, "encoder_q-layer.11": 12913.4824, "encoder_q-layer.2": 9797.3408, "encoder_q-layer.3": 10566.1377, "encoder_q-layer.4": 12699.2744, "encoder_q-layer.5": 13605.7295, "encoder_q-layer.6": 14657.6963, "encoder_q-layer.7": 11875.7227, "encoder_q-layer.8": 9757.2812, "encoder_q-layer.9": 5475.1226, "epoch": 0.55, "inbatch_neg_score": 0.6943, "inbatch_pos_score": 1.2598, "learning_rate": 8.944444444444444e-06, "loss": 3.9065, "norm_diff": 0.1177, "norm_loss": 0.0, "num_token_doc": 66.7025, "num_token_overlap": 11.6129, "num_token_query": 31.7549, "num_token_union": 65.2845, "num_word_context": 202.2723, "num_word_doc": 49.7893, "num_word_query": 23.4731, "postclip_grad_norm": 1.0, "preclip_grad_norm": 15982.442, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.6943, "query_norm": 1.4518, "queue_k_norm": 1.5669, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.7549, "sent_len_1": 66.7025, "sent_len_max_0": 127.555, "sent_len_max_1": 188.2962, "stdk": 0.0483, "stdq": 0.0425, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 83900 }, { "accuracy": 47.3633, "active_queue_size": 16384.0, "cl_loss": 3.9186, "doc_norm": 1.561, "encoder_q-embeddings": 2173.436, "encoder_q-layer.0": 1430.8004, "encoder_q-layer.1": 1549.7285, "encoder_q-layer.10": 2756.4487, "encoder_q-layer.11": 6403.3188, "encoder_q-layer.2": 1815.381, "encoder_q-layer.3": 1848.4666, "encoder_q-layer.4": 1976.3387, "encoder_q-layer.5": 2031.3063, "encoder_q-layer.6": 2170.0576, "encoder_q-layer.7": 2253.8596, "encoder_q-layer.8": 2619.6733, "encoder_q-layer.9": 2415.8833, "epoch": 0.55, "inbatch_neg_score": 0.6969, "inbatch_pos_score": 1.252, "learning_rate": 8.88888888888889e-06, "loss": 3.9186, "norm_diff": 0.1114, "norm_loss": 0.0, "num_token_doc": 66.8364, "num_token_overlap": 11.6258, "num_token_query": 31.6993, "num_token_union": 65.2903, "num_word_context": 202.4617, "num_word_doc": 49.8352, "num_word_query": 23.3961, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4155.9854, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6963, "query_norm": 1.4496, "queue_k_norm": 1.5677, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.6993, "sent_len_1": 66.8364, "sent_len_max_0": 127.4025, "sent_len_max_1": 190.8988, "stdk": 0.0479, "stdq": 0.0424, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 84000 }, { "accuracy": 47.4609, "active_queue_size": 16384.0, "cl_loss": 3.8989, "doc_norm": 1.574, "encoder_q-embeddings": 1972.8641, "encoder_q-layer.0": 1317.1196, "encoder_q-layer.1": 1369.181, "encoder_q-layer.10": 2761.845, "encoder_q-layer.11": 6969.0537, "encoder_q-layer.2": 1548.0304, "encoder_q-layer.3": 1622.9327, "encoder_q-layer.4": 1799.2812, "encoder_q-layer.5": 1828.171, "encoder_q-layer.6": 2087.3069, "encoder_q-layer.7": 2219.2292, "encoder_q-layer.8": 2766.5532, "encoder_q-layer.9": 2618.479, "epoch": 0.55, "inbatch_neg_score": 0.6965, "inbatch_pos_score": 1.2686, "learning_rate": 8.833333333333334e-06, "loss": 3.8989, "norm_diff": 0.1217, "norm_loss": 0.0, "num_token_doc": 66.8373, "num_token_overlap": 11.7259, "num_token_query": 31.9871, "num_token_union": 65.4088, "num_word_context": 202.2378, "num_word_doc": 49.8728, "num_word_query": 23.626, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4200.191, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6968, "query_norm": 1.4523, "queue_k_norm": 1.5675, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9871, "sent_len_1": 66.8373, "sent_len_max_0": 127.4663, "sent_len_max_1": 188.675, "stdk": 0.0484, "stdq": 0.0424, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 84100 }, { "accuracy": 47.2656, "active_queue_size": 16384.0, "cl_loss": 3.8955, "doc_norm": 1.5706, "encoder_q-embeddings": 2432.1016, "encoder_q-layer.0": 1569.8953, "encoder_q-layer.1": 1707.3342, "encoder_q-layer.10": 2560.833, "encoder_q-layer.11": 6612.7891, "encoder_q-layer.2": 2018.5051, "encoder_q-layer.3": 2066.6296, "encoder_q-layer.4": 2411.3816, "encoder_q-layer.5": 2526.094, "encoder_q-layer.6": 2511.696, "encoder_q-layer.7": 2520.4568, "encoder_q-layer.8": 2783.8508, "encoder_q-layer.9": 2495.7341, "epoch": 0.55, "inbatch_neg_score": 0.6959, "inbatch_pos_score": 1.2529, "learning_rate": 8.777777777777778e-06, "loss": 3.8955, "norm_diff": 0.1233, "norm_loss": 0.0, "num_token_doc": 66.5846, "num_token_overlap": 11.6991, "num_token_query": 31.9556, "num_token_union": 65.2416, "num_word_context": 202.1038, "num_word_doc": 49.6362, "num_word_query": 23.6133, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4477.6724, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6963, "query_norm": 1.4473, "queue_k_norm": 1.5675, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9556, "sent_len_1": 66.5846, "sent_len_max_0": 127.63, "sent_len_max_1": 189.24, "stdk": 0.0483, "stdq": 0.0423, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 84200 }, { "accuracy": 44.6289, "active_queue_size": 16384.0, "cl_loss": 3.9016, "doc_norm": 1.563, "encoder_q-embeddings": 2272.4199, "encoder_q-layer.0": 1567.011, "encoder_q-layer.1": 1645.8779, "encoder_q-layer.10": 2591.2336, "encoder_q-layer.11": 6683.1167, "encoder_q-layer.2": 1993.5994, "encoder_q-layer.3": 2025.3481, "encoder_q-layer.4": 2210.3481, "encoder_q-layer.5": 2137.8618, "encoder_q-layer.6": 2348.6255, "encoder_q-layer.7": 2464.7563, "encoder_q-layer.8": 2897.1943, "encoder_q-layer.9": 2382.3066, "epoch": 0.55, "inbatch_neg_score": 0.6968, "inbatch_pos_score": 1.2324, "learning_rate": 8.722222222222224e-06, "loss": 3.9016, "norm_diff": 0.1284, "norm_loss": 0.0, "num_token_doc": 66.8434, "num_token_overlap": 11.6836, "num_token_query": 31.9233, "num_token_union": 65.4122, "num_word_context": 202.1158, "num_word_doc": 49.9024, "num_word_query": 23.5773, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4470.721, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.6973, "query_norm": 1.4346, "queue_k_norm": 1.5663, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9233, "sent_len_1": 66.8434, "sent_len_max_0": 127.5138, "sent_len_max_1": 190.2312, "stdk": 0.0479, "stdq": 0.0417, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 84300 }, { "accuracy": 46.9727, "active_queue_size": 16384.0, "cl_loss": 3.8813, "doc_norm": 1.5695, "encoder_q-embeddings": 7749.2627, "encoder_q-layer.0": 5015.6343, "encoder_q-layer.1": 5162.7495, "encoder_q-layer.10": 2610.6943, "encoder_q-layer.11": 6425.6167, "encoder_q-layer.2": 5999.2798, "encoder_q-layer.3": 6931.4531, "encoder_q-layer.4": 7244.9014, "encoder_q-layer.5": 7765.1758, "encoder_q-layer.6": 8374.5801, "encoder_q-layer.7": 7054.6035, "encoder_q-layer.8": 6206.7583, "encoder_q-layer.9": 3886.1133, "epoch": 0.55, "inbatch_neg_score": 0.6969, "inbatch_pos_score": 1.2656, "learning_rate": 8.666666666666668e-06, "loss": 3.8813, "norm_diff": 0.1094, "norm_loss": 0.0, "num_token_doc": 67.1091, "num_token_overlap": 11.7482, "num_token_query": 32.0886, "num_token_union": 65.6138, "num_word_context": 203.0957, "num_word_doc": 50.0719, "num_word_query": 23.7243, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9790.697, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.6978, "query_norm": 1.4601, "queue_k_norm": 1.5691, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 32.0886, "sent_len_1": 67.1091, "sent_len_max_0": 127.605, "sent_len_max_1": 189.2175, "stdk": 0.0482, "stdq": 0.0428, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 84400 }, { "accuracy": 45.1172, "active_queue_size": 16384.0, "cl_loss": 3.8917, "doc_norm": 1.5696, "encoder_q-embeddings": 2390.5513, "encoder_q-layer.0": 1675.5209, "encoder_q-layer.1": 1881.8374, "encoder_q-layer.10": 2413.7505, "encoder_q-layer.11": 6246.4204, "encoder_q-layer.2": 2144.0239, "encoder_q-layer.3": 2225.5466, "encoder_q-layer.4": 2251.3503, "encoder_q-layer.5": 2235.228, "encoder_q-layer.6": 2189.3081, "encoder_q-layer.7": 2539.2024, "encoder_q-layer.8": 2550.3635, "encoder_q-layer.9": 2359.5747, "epoch": 0.55, "inbatch_neg_score": 0.7003, "inbatch_pos_score": 1.252, "learning_rate": 8.611111111111112e-06, "loss": 3.8917, "norm_diff": 0.1165, "norm_loss": 0.0, "num_token_doc": 66.7767, "num_token_overlap": 11.6982, "num_token_query": 31.9228, "num_token_union": 65.3755, "num_word_context": 202.1584, "num_word_doc": 49.8495, "num_word_query": 23.5973, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4289.1366, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7007, "query_norm": 1.4531, "queue_k_norm": 1.5687, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9228, "sent_len_1": 66.7767, "sent_len_max_0": 127.5387, "sent_len_max_1": 189.1775, "stdk": 0.0482, "stdq": 0.0425, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 84500 }, { "accuracy": 47.5586, "active_queue_size": 16384.0, "cl_loss": 3.9131, "doc_norm": 1.5809, "encoder_q-embeddings": 3188.3442, "encoder_q-layer.0": 2222.8977, "encoder_q-layer.1": 2251.8171, "encoder_q-layer.10": 2840.5298, "encoder_q-layer.11": 6874.9243, "encoder_q-layer.2": 2528.1201, "encoder_q-layer.3": 2579.4924, "encoder_q-layer.4": 2601.6343, "encoder_q-layer.5": 2481.5815, "encoder_q-layer.6": 2721.4412, "encoder_q-layer.7": 2851.1531, "encoder_q-layer.8": 3139.8745, "encoder_q-layer.9": 2658.0642, "epoch": 0.55, "inbatch_neg_score": 0.7022, "inbatch_pos_score": 1.2578, "learning_rate": 8.555555555555556e-06, "loss": 3.9131, "norm_diff": 0.142, "norm_loss": 0.0, "num_token_doc": 66.9031, "num_token_overlap": 11.649, "num_token_query": 31.885, "num_token_union": 65.395, "num_word_context": 202.603, "num_word_doc": 49.8476, "num_word_query": 23.5459, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4945.4785, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7021, "query_norm": 1.4388, "queue_k_norm": 1.5691, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.885, "sent_len_1": 66.9031, "sent_len_max_0": 127.5863, "sent_len_max_1": 193.515, "stdk": 0.0486, "stdq": 0.0418, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 84600 }, { "accuracy": 46.7773, "active_queue_size": 16384.0, "cl_loss": 3.8981, "doc_norm": 1.5651, "encoder_q-embeddings": 3116.0664, "encoder_q-layer.0": 2237.6455, "encoder_q-layer.1": 2641.8542, "encoder_q-layer.10": 2605.3589, "encoder_q-layer.11": 6570.5771, "encoder_q-layer.2": 2898.4351, "encoder_q-layer.3": 2988.0305, "encoder_q-layer.4": 3193.1235, "encoder_q-layer.5": 3117.0828, "encoder_q-layer.6": 3072.4133, "encoder_q-layer.7": 2638.9458, "encoder_q-layer.8": 2651.8101, "encoder_q-layer.9": 2366.3923, "epoch": 0.55, "inbatch_neg_score": 0.7037, "inbatch_pos_score": 1.2549, "learning_rate": 8.500000000000002e-06, "loss": 3.8981, "norm_diff": 0.119, "norm_loss": 0.0, "num_token_doc": 66.4945, "num_token_overlap": 11.6594, "num_token_query": 31.8287, "num_token_union": 65.1387, "num_word_context": 201.7771, "num_word_doc": 49.6161, "num_word_query": 23.4945, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4973.9483, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7036, "query_norm": 1.4462, "queue_k_norm": 1.5696, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8287, "sent_len_1": 66.4945, "sent_len_max_0": 127.5325, "sent_len_max_1": 189.5225, "stdk": 0.048, "stdq": 0.0421, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 84700 }, { "accuracy": 45.6055, "active_queue_size": 16384.0, "cl_loss": 3.8985, "doc_norm": 1.5741, "encoder_q-embeddings": 3520.1931, "encoder_q-layer.0": 2493.3914, "encoder_q-layer.1": 2958.6616, "encoder_q-layer.10": 2736.5923, "encoder_q-layer.11": 7299.9248, "encoder_q-layer.2": 3501.1257, "encoder_q-layer.3": 3293.573, "encoder_q-layer.4": 3702.4209, "encoder_q-layer.5": 3785.2212, "encoder_q-layer.6": 3631.125, "encoder_q-layer.7": 3866.1001, "encoder_q-layer.8": 4008.7964, "encoder_q-layer.9": 3092.6526, "epoch": 0.55, "inbatch_neg_score": 0.7045, "inbatch_pos_score": 1.2539, "learning_rate": 8.444444444444446e-06, "loss": 3.8985, "norm_diff": 0.1175, "norm_loss": 0.0, "num_token_doc": 66.6493, "num_token_overlap": 11.6835, "num_token_query": 31.8936, "num_token_union": 65.2623, "num_word_context": 202.3064, "num_word_doc": 49.7358, "num_word_query": 23.5627, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5894.792, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.7046, "query_norm": 1.4565, "queue_k_norm": 1.5699, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.8936, "sent_len_1": 66.6493, "sent_len_max_0": 127.5537, "sent_len_max_1": 189.195, "stdk": 0.0483, "stdq": 0.0425, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 84800 }, { "accuracy": 46.7773, "active_queue_size": 16384.0, "cl_loss": 3.8939, "doc_norm": 1.5594, "encoder_q-embeddings": 3318.6306, "encoder_q-layer.0": 2451.478, "encoder_q-layer.1": 2698.8215, "encoder_q-layer.10": 2550.0476, "encoder_q-layer.11": 6656.772, "encoder_q-layer.2": 3149.5881, "encoder_q-layer.3": 3340.0859, "encoder_q-layer.4": 3645.5693, "encoder_q-layer.5": 3660.6716, "encoder_q-layer.6": 3899.7246, "encoder_q-layer.7": 3692.4927, "encoder_q-layer.8": 3785.824, "encoder_q-layer.9": 2734.6611, "epoch": 0.55, "inbatch_neg_score": 0.7054, "inbatch_pos_score": 1.2559, "learning_rate": 8.38888888888889e-06, "loss": 3.8939, "norm_diff": 0.1185, "norm_loss": 0.0, "num_token_doc": 66.7365, "num_token_overlap": 11.6653, "num_token_query": 31.9936, "num_token_union": 65.4148, "num_word_context": 202.5886, "num_word_doc": 49.7897, "num_word_query": 23.6439, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5613.2083, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.7061, "query_norm": 1.4409, "queue_k_norm": 1.5716, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9936, "sent_len_1": 66.7365, "sent_len_max_0": 127.4688, "sent_len_max_1": 189.8275, "stdk": 0.0477, "stdq": 0.0418, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 84900 }, { "accuracy": 45.1172, "active_queue_size": 16384.0, "cl_loss": 3.9008, "doc_norm": 1.5729, "encoder_q-embeddings": 4113.4512, "encoder_q-layer.0": 2947.2341, "encoder_q-layer.1": 3460.5249, "encoder_q-layer.10": 2559.4805, "encoder_q-layer.11": 6247.752, "encoder_q-layer.2": 4259.7266, "encoder_q-layer.3": 4782.6406, "encoder_q-layer.4": 5000.5854, "encoder_q-layer.5": 4911.3496, "encoder_q-layer.6": 5283.4565, "encoder_q-layer.7": 5750.0039, "encoder_q-layer.8": 4479.8755, "encoder_q-layer.9": 2531.8499, "epoch": 0.55, "inbatch_neg_score": 0.7062, "inbatch_pos_score": 1.2432, "learning_rate": 8.333333333333334e-06, "loss": 3.9008, "norm_diff": 0.131, "norm_loss": 0.0, "num_token_doc": 66.782, "num_token_overlap": 11.6817, "num_token_query": 31.8741, "num_token_union": 65.3354, "num_word_context": 202.339, "num_word_doc": 49.8977, "num_word_query": 23.5479, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6703.4798, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.7051, "query_norm": 1.4419, "queue_k_norm": 1.5728, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8741, "sent_len_1": 66.782, "sent_len_max_0": 127.5012, "sent_len_max_1": 186.8325, "stdk": 0.0482, "stdq": 0.0419, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 85000 }, { "accuracy": 44.043, "active_queue_size": 16384.0, "cl_loss": 3.9049, "doc_norm": 1.5705, "encoder_q-embeddings": 2350.8411, "encoder_q-layer.0": 1647.7898, "encoder_q-layer.1": 1768.1862, "encoder_q-layer.10": 2629.0574, "encoder_q-layer.11": 6576.2939, "encoder_q-layer.2": 2132.4138, "encoder_q-layer.3": 2337.0552, "encoder_q-layer.4": 2450.9387, "encoder_q-layer.5": 2630.9993, "encoder_q-layer.6": 2701.3877, "encoder_q-layer.7": 2637.5454, "encoder_q-layer.8": 2928.6084, "encoder_q-layer.9": 2391.4429, "epoch": 0.55, "inbatch_neg_score": 0.7065, "inbatch_pos_score": 1.249, "learning_rate": 8.27777777777778e-06, "loss": 3.9049, "norm_diff": 0.1296, "norm_loss": 0.0, "num_token_doc": 66.9832, "num_token_overlap": 11.7217, "num_token_query": 32.0117, "num_token_union": 65.5375, "num_word_context": 202.7577, "num_word_doc": 49.9963, "num_word_query": 23.6591, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4487.1308, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7061, "query_norm": 1.441, "queue_k_norm": 1.5722, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 32.0117, "sent_len_1": 66.9832, "sent_len_max_0": 127.495, "sent_len_max_1": 188.995, "stdk": 0.0481, "stdq": 0.0419, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 85100 }, { "accuracy": 46.4844, "active_queue_size": 16384.0, "cl_loss": 3.8895, "doc_norm": 1.5779, "encoder_q-embeddings": 2877.2449, "encoder_q-layer.0": 2010.2456, "encoder_q-layer.1": 2185.9365, "encoder_q-layer.10": 2657.313, "encoder_q-layer.11": 6478.3491, "encoder_q-layer.2": 2547.8408, "encoder_q-layer.3": 2607.0583, "encoder_q-layer.4": 2918.6306, "encoder_q-layer.5": 3157.6101, "encoder_q-layer.6": 3419.0049, "encoder_q-layer.7": 3111.7871, "encoder_q-layer.8": 2955.5962, "encoder_q-layer.9": 2575.3521, "epoch": 0.55, "inbatch_neg_score": 0.7053, "inbatch_pos_score": 1.2598, "learning_rate": 8.222222222222223e-06, "loss": 3.8895, "norm_diff": 0.1241, "norm_loss": 0.0, "num_token_doc": 66.7373, "num_token_overlap": 11.7086, "num_token_query": 31.99, "num_token_union": 65.3378, "num_word_context": 201.8974, "num_word_doc": 49.8011, "num_word_query": 23.6007, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4937.1796, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7065, "query_norm": 1.4538, "queue_k_norm": 1.571, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.99, "sent_len_1": 66.7373, "sent_len_max_0": 127.5875, "sent_len_max_1": 189.3038, "stdk": 0.0484, "stdq": 0.0424, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 85200 }, { "accuracy": 44.6289, "active_queue_size": 16384.0, "cl_loss": 3.8964, "doc_norm": 1.5718, "encoder_q-embeddings": 2310.2507, "encoder_q-layer.0": 1601.1597, "encoder_q-layer.1": 1651.2563, "encoder_q-layer.10": 2474.5476, "encoder_q-layer.11": 6551.6318, "encoder_q-layer.2": 1975.5378, "encoder_q-layer.3": 2026.4749, "encoder_q-layer.4": 2252.6387, "encoder_q-layer.5": 2292.0447, "encoder_q-layer.6": 2518.7625, "encoder_q-layer.7": 2544.9158, "encoder_q-layer.8": 2774.7717, "encoder_q-layer.9": 2549.186, "epoch": 0.56, "inbatch_neg_score": 0.7084, "inbatch_pos_score": 1.2363, "learning_rate": 8.166666666666668e-06, "loss": 3.8964, "norm_diff": 0.1302, "norm_loss": 0.0, "num_token_doc": 66.6926, "num_token_overlap": 11.6688, "num_token_query": 31.8758, "num_token_union": 65.351, "num_word_context": 202.32, "num_word_doc": 49.7929, "num_word_query": 23.5339, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4394.3952, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.708, "query_norm": 1.4416, "queue_k_norm": 1.5706, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8758, "sent_len_1": 66.6926, "sent_len_max_0": 127.4613, "sent_len_max_1": 187.5987, "stdk": 0.0481, "stdq": 0.0419, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 85300 }, { "accuracy": 46.0938, "active_queue_size": 16384.0, "cl_loss": 3.9139, "doc_norm": 1.5749, "encoder_q-embeddings": 2448.8281, "encoder_q-layer.0": 1643.2794, "encoder_q-layer.1": 1688.3479, "encoder_q-layer.10": 2925.812, "encoder_q-layer.11": 6513.6079, "encoder_q-layer.2": 1900.1429, "encoder_q-layer.3": 2059.7563, "encoder_q-layer.4": 2056.8936, "encoder_q-layer.5": 2093.5476, "encoder_q-layer.6": 2439.4751, "encoder_q-layer.7": 2580.7407, "encoder_q-layer.8": 2743.8008, "encoder_q-layer.9": 2351.9756, "epoch": 0.56, "inbatch_neg_score": 0.7106, "inbatch_pos_score": 1.2666, "learning_rate": 8.111111111111112e-06, "loss": 3.9139, "norm_diff": 0.1223, "norm_loss": 0.0, "num_token_doc": 66.6789, "num_token_overlap": 11.6611, "num_token_query": 31.8355, "num_token_union": 65.236, "num_word_context": 202.6294, "num_word_doc": 49.7777, "num_word_query": 23.4977, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4437.0972, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.71, "query_norm": 1.4526, "queue_k_norm": 1.573, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8355, "sent_len_1": 66.6789, "sent_len_max_0": 127.4375, "sent_len_max_1": 189.6875, "stdk": 0.0483, "stdq": 0.0423, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 85400 }, { "accuracy": 45.4102, "active_queue_size": 16384.0, "cl_loss": 3.9123, "doc_norm": 1.5751, "encoder_q-embeddings": 2020.554, "encoder_q-layer.0": 1362.3241, "encoder_q-layer.1": 1465.1758, "encoder_q-layer.10": 2476.4148, "encoder_q-layer.11": 6469.4243, "encoder_q-layer.2": 1718.858, "encoder_q-layer.3": 1743.562, "encoder_q-layer.4": 1939.3098, "encoder_q-layer.5": 1938.6443, "encoder_q-layer.6": 2156.0134, "encoder_q-layer.7": 2381.7917, "encoder_q-layer.8": 2662.5334, "encoder_q-layer.9": 2316.9324, "epoch": 0.56, "inbatch_neg_score": 0.7108, "inbatch_pos_score": 1.2676, "learning_rate": 8.055555555555557e-06, "loss": 3.9123, "norm_diff": 0.1223, "norm_loss": 0.0, "num_token_doc": 66.8218, "num_token_overlap": 11.7034, "num_token_query": 32.0182, "num_token_union": 65.4396, "num_word_context": 202.376, "num_word_doc": 49.8387, "num_word_query": 23.6666, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4148.5866, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7114, "query_norm": 1.4527, "queue_k_norm": 1.5732, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 32.0182, "sent_len_1": 66.8218, "sent_len_max_0": 127.5088, "sent_len_max_1": 189.5112, "stdk": 0.0483, "stdq": 0.0423, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 85500 }, { "accuracy": 46.7773, "active_queue_size": 16384.0, "cl_loss": 3.8968, "doc_norm": 1.5764, "encoder_q-embeddings": 11955.749, "encoder_q-layer.0": 8839.6582, "encoder_q-layer.1": 10064.3682, "encoder_q-layer.10": 2856.6377, "encoder_q-layer.11": 6473.0254, "encoder_q-layer.2": 12106.0674, "encoder_q-layer.3": 12808.1006, "encoder_q-layer.4": 11854.918, "encoder_q-layer.5": 10519.7891, "encoder_q-layer.6": 10220.7188, "encoder_q-layer.7": 8634.0205, "encoder_q-layer.8": 8452.1016, "encoder_q-layer.9": 4356.6504, "epoch": 0.56, "inbatch_neg_score": 0.7123, "inbatch_pos_score": 1.292, "learning_rate": 8.000000000000001e-06, "loss": 3.8968, "norm_diff": 0.1133, "norm_loss": 0.0, "num_token_doc": 67.0374, "num_token_overlap": 11.7405, "num_token_query": 32.0882, "num_token_union": 65.5347, "num_word_context": 202.4278, "num_word_doc": 49.9832, "num_word_query": 23.7096, "postclip_grad_norm": 1.0, "preclip_grad_norm": 14529.301, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.7129, "query_norm": 1.463, "queue_k_norm": 1.5732, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 32.0882, "sent_len_1": 67.0374, "sent_len_max_0": 127.595, "sent_len_max_1": 191.1887, "stdk": 0.0483, "stdq": 0.0428, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 85600 }, { "accuracy": 43.75, "active_queue_size": 16384.0, "cl_loss": 3.8889, "doc_norm": 1.5769, "encoder_q-embeddings": 1911.9398, "encoder_q-layer.0": 1269.5492, "encoder_q-layer.1": 1339.0781, "encoder_q-layer.10": 2532.6399, "encoder_q-layer.11": 6515.2388, "encoder_q-layer.2": 1479.5081, "encoder_q-layer.3": 1567.3885, "encoder_q-layer.4": 1659.864, "encoder_q-layer.5": 1615.4714, "encoder_q-layer.6": 1866.1846, "encoder_q-layer.7": 2254.47, "encoder_q-layer.8": 2586.6682, "encoder_q-layer.9": 2429.8101, "epoch": 0.56, "inbatch_neg_score": 0.7157, "inbatch_pos_score": 1.25, "learning_rate": 7.944444444444445e-06, "loss": 3.8889, "norm_diff": 0.1299, "norm_loss": 0.0, "num_token_doc": 66.8458, "num_token_overlap": 11.665, "num_token_query": 31.8973, "num_token_union": 65.405, "num_word_context": 202.3374, "num_word_doc": 49.8976, "num_word_query": 23.5791, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4096.8777, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7163, "query_norm": 1.447, "queue_k_norm": 1.5741, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8973, "sent_len_1": 66.8458, "sent_len_max_0": 127.6162, "sent_len_max_1": 189.94, "stdk": 0.0483, "stdq": 0.042, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 85700 }, { "accuracy": 45.5078, "active_queue_size": 16384.0, "cl_loss": 3.9113, "doc_norm": 1.5735, "encoder_q-embeddings": 2207.2996, "encoder_q-layer.0": 1477.4287, "encoder_q-layer.1": 1574.6184, "encoder_q-layer.10": 2612.4851, "encoder_q-layer.11": 6622.1899, "encoder_q-layer.2": 1767.3973, "encoder_q-layer.3": 1825.6447, "encoder_q-layer.4": 1884.439, "encoder_q-layer.5": 1957.7677, "encoder_q-layer.6": 2190.1089, "encoder_q-layer.7": 2353.8176, "encoder_q-layer.8": 2697.387, "encoder_q-layer.9": 2532.8069, "epoch": 0.56, "inbatch_neg_score": 0.7193, "inbatch_pos_score": 1.2666, "learning_rate": 7.88888888888889e-06, "loss": 3.9113, "norm_diff": 0.1185, "norm_loss": 0.0, "num_token_doc": 66.5759, "num_token_overlap": 11.6368, "num_token_query": 31.8658, "num_token_union": 65.2483, "num_word_context": 202.0398, "num_word_doc": 49.6634, "num_word_query": 23.5279, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4273.9869, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7188, "query_norm": 1.455, "queue_k_norm": 1.5736, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8658, "sent_len_1": 66.5759, "sent_len_max_0": 127.4688, "sent_len_max_1": 190.4712, "stdk": 0.0481, "stdq": 0.0422, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 85800 }, { "accuracy": 46.1914, "active_queue_size": 16384.0, "cl_loss": 3.8947, "doc_norm": 1.5741, "encoder_q-embeddings": 1666.5684, "encoder_q-layer.0": 1176.9305, "encoder_q-layer.1": 1280.8198, "encoder_q-layer.10": 1216.2325, "encoder_q-layer.11": 3149.7441, "encoder_q-layer.2": 1482.0865, "encoder_q-layer.3": 1387.6801, "encoder_q-layer.4": 1465.7915, "encoder_q-layer.5": 1638.1156, "encoder_q-layer.6": 1681.9357, "encoder_q-layer.7": 1609.0111, "encoder_q-layer.8": 1473.3049, "encoder_q-layer.9": 1209.965, "epoch": 0.56, "inbatch_neg_score": 0.7193, "inbatch_pos_score": 1.2754, "learning_rate": 7.833333333333333e-06, "loss": 3.8947, "norm_diff": 0.1123, "norm_loss": 0.0, "num_token_doc": 66.7625, "num_token_overlap": 11.6466, "num_token_query": 31.7221, "num_token_union": 65.2833, "num_word_context": 202.216, "num_word_doc": 49.8224, "num_word_query": 23.4179, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2540.4905, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7197, "query_norm": 1.4619, "queue_k_norm": 1.5735, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.7221, "sent_len_1": 66.7625, "sent_len_max_0": 127.5225, "sent_len_max_1": 189.895, "stdk": 0.0482, "stdq": 0.0425, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 85900 }, { "accuracy": 46.6797, "active_queue_size": 16384.0, "cl_loss": 3.897, "doc_norm": 1.5725, "encoder_q-embeddings": 1608.3611, "encoder_q-layer.0": 1129.7021, "encoder_q-layer.1": 1312.8267, "encoder_q-layer.10": 1411.5021, "encoder_q-layer.11": 3149.0979, "encoder_q-layer.2": 1495.4497, "encoder_q-layer.3": 1564.0273, "encoder_q-layer.4": 1669.2277, "encoder_q-layer.5": 1696.2019, "encoder_q-layer.6": 1548.2057, "encoder_q-layer.7": 1638.5896, "encoder_q-layer.8": 1573.4249, "encoder_q-layer.9": 1276.7841, "epoch": 0.56, "inbatch_neg_score": 0.7191, "inbatch_pos_score": 1.2676, "learning_rate": 7.777777777777777e-06, "loss": 3.897, "norm_diff": 0.1098, "norm_loss": 0.0, "num_token_doc": 66.8021, "num_token_overlap": 11.6723, "num_token_query": 31.818, "num_token_union": 65.3537, "num_word_context": 202.5471, "num_word_doc": 49.8495, "num_word_query": 23.497, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2603.9876, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7188, "query_norm": 1.4627, "queue_k_norm": 1.5735, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.818, "sent_len_1": 66.8021, "sent_len_max_0": 127.5837, "sent_len_max_1": 189.8925, "stdk": 0.048, "stdq": 0.0426, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 86000 }, { "accuracy": 47.0703, "active_queue_size": 16384.0, "cl_loss": 3.8912, "doc_norm": 1.5741, "encoder_q-embeddings": 1416.4138, "encoder_q-layer.0": 951.5704, "encoder_q-layer.1": 1026.7396, "encoder_q-layer.10": 1244.3533, "encoder_q-layer.11": 3181.4082, "encoder_q-layer.2": 1219.6411, "encoder_q-layer.3": 1325.6277, "encoder_q-layer.4": 1485.5599, "encoder_q-layer.5": 1462.0012, "encoder_q-layer.6": 1503.0656, "encoder_q-layer.7": 1526.9872, "encoder_q-layer.8": 1549.3621, "encoder_q-layer.9": 1295.9594, "epoch": 0.56, "inbatch_neg_score": 0.7213, "inbatch_pos_score": 1.2646, "learning_rate": 7.722222222222223e-06, "loss": 3.8912, "norm_diff": 0.1228, "norm_loss": 0.0, "num_token_doc": 66.7168, "num_token_overlap": 11.6819, "num_token_query": 31.9704, "num_token_union": 65.3829, "num_word_context": 202.279, "num_word_doc": 49.77, "num_word_query": 23.6151, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2389.5006, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7207, "query_norm": 1.4513, "queue_k_norm": 1.576, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9704, "sent_len_1": 66.7168, "sent_len_max_0": 127.5062, "sent_len_max_1": 188.8288, "stdk": 0.0481, "stdq": 0.0421, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 86100 }, { "accuracy": 45.6055, "active_queue_size": 16384.0, "cl_loss": 3.8841, "doc_norm": 1.5803, "encoder_q-embeddings": 2326.2668, "encoder_q-layer.0": 1796.3826, "encoder_q-layer.1": 2020.6074, "encoder_q-layer.10": 1245.1066, "encoder_q-layer.11": 3213.1304, "encoder_q-layer.2": 2466.6064, "encoder_q-layer.3": 2495.3745, "encoder_q-layer.4": 2504.665, "encoder_q-layer.5": 2279.7024, "encoder_q-layer.6": 2400.9736, "encoder_q-layer.7": 2481.4702, "encoder_q-layer.8": 2059.9937, "encoder_q-layer.9": 1293.6533, "epoch": 0.56, "inbatch_neg_score": 0.7219, "inbatch_pos_score": 1.2793, "learning_rate": 7.666666666666667e-06, "loss": 3.8841, "norm_diff": 0.1233, "norm_loss": 0.0, "num_token_doc": 66.7945, "num_token_overlap": 11.6942, "num_token_query": 32.0003, "num_token_union": 65.3835, "num_word_context": 202.6037, "num_word_doc": 49.8233, "num_word_query": 23.6486, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3501.2385, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7217, "query_norm": 1.457, "queue_k_norm": 1.577, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 32.0003, "sent_len_1": 66.7945, "sent_len_max_0": 127.5162, "sent_len_max_1": 191.9675, "stdk": 0.0484, "stdq": 0.0423, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 86200 }, { "accuracy": 45.8984, "active_queue_size": 16384.0, "cl_loss": 3.8913, "doc_norm": 1.5739, "encoder_q-embeddings": 1132.8053, "encoder_q-layer.0": 766.7595, "encoder_q-layer.1": 802.5815, "encoder_q-layer.10": 1315.974, "encoder_q-layer.11": 3318.9836, "encoder_q-layer.2": 961.9411, "encoder_q-layer.3": 1007.524, "encoder_q-layer.4": 1084.5212, "encoder_q-layer.5": 1046.9158, "encoder_q-layer.6": 1164.3226, "encoder_q-layer.7": 1292.0295, "encoder_q-layer.8": 1474.4941, "encoder_q-layer.9": 1265.9841, "epoch": 0.56, "inbatch_neg_score": 0.7226, "inbatch_pos_score": 1.2891, "learning_rate": 7.611111111111112e-06, "loss": 3.8913, "norm_diff": 0.1134, "norm_loss": 0.0, "num_token_doc": 66.6211, "num_token_overlap": 11.6711, "num_token_query": 31.9234, "num_token_union": 65.2859, "num_word_context": 201.7313, "num_word_doc": 49.684, "num_word_query": 23.5712, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2209.8236, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7227, "query_norm": 1.4605, "queue_k_norm": 1.5741, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9234, "sent_len_1": 66.6211, "sent_len_max_0": 127.4575, "sent_len_max_1": 189.49, "stdk": 0.0481, "stdq": 0.0425, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 86300 }, { "accuracy": 44.5312, "active_queue_size": 16384.0, "cl_loss": 3.8959, "doc_norm": 1.5786, "encoder_q-embeddings": 1404.8785, "encoder_q-layer.0": 919.6583, "encoder_q-layer.1": 1037.9462, "encoder_q-layer.10": 1285.682, "encoder_q-layer.11": 3430.4407, "encoder_q-layer.2": 1215.705, "encoder_q-layer.3": 1284.4203, "encoder_q-layer.4": 1377.8124, "encoder_q-layer.5": 1229.9785, "encoder_q-layer.6": 1217.677, "encoder_q-layer.7": 1290.2858, "encoder_q-layer.8": 1414.7074, "encoder_q-layer.9": 1212.5549, "epoch": 0.56, "inbatch_neg_score": 0.7233, "inbatch_pos_score": 1.2656, "learning_rate": 7.555555555555556e-06, "loss": 3.8959, "norm_diff": 0.1286, "norm_loss": 0.0, "num_token_doc": 66.921, "num_token_overlap": 11.6764, "num_token_query": 31.923, "num_token_union": 65.4536, "num_word_context": 202.2403, "num_word_doc": 49.9269, "num_word_query": 23.5936, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2406.7438, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7246, "query_norm": 1.4499, "queue_k_norm": 1.577, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.923, "sent_len_1": 66.921, "sent_len_max_0": 127.4862, "sent_len_max_1": 189.9725, "stdk": 0.0482, "stdq": 0.0421, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 86400 }, { "accuracy": 44.3359, "active_queue_size": 16384.0, "cl_loss": 3.8869, "doc_norm": 1.579, "encoder_q-embeddings": 1128.527, "encoder_q-layer.0": 729.956, "encoder_q-layer.1": 762.9922, "encoder_q-layer.10": 1289.9816, "encoder_q-layer.11": 3196.7488, "encoder_q-layer.2": 857.071, "encoder_q-layer.3": 859.1959, "encoder_q-layer.4": 923.7478, "encoder_q-layer.5": 948.5878, "encoder_q-layer.6": 1055.7404, "encoder_q-layer.7": 1124.3885, "encoder_q-layer.8": 1206.8042, "encoder_q-layer.9": 1116.7119, "epoch": 0.56, "inbatch_neg_score": 0.7241, "inbatch_pos_score": 1.2598, "learning_rate": 7.5e-06, "loss": 3.8869, "norm_diff": 0.134, "norm_loss": 0.0, "num_token_doc": 66.8944, "num_token_overlap": 11.7366, "num_token_query": 32.0148, "num_token_union": 65.4434, "num_word_context": 202.4073, "num_word_doc": 49.9207, "num_word_query": 23.6285, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2042.9127, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7246, "query_norm": 1.445, "queue_k_norm": 1.5765, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 32.0148, "sent_len_1": 66.8944, "sent_len_max_0": 127.4437, "sent_len_max_1": 190.525, "stdk": 0.0482, "stdq": 0.0418, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 86500 }, { "accuracy": 47.9492, "active_queue_size": 16384.0, "cl_loss": 3.8925, "doc_norm": 1.5812, "encoder_q-embeddings": 3133.1926, "encoder_q-layer.0": 2143.793, "encoder_q-layer.1": 2304.1714, "encoder_q-layer.10": 1327.0485, "encoder_q-layer.11": 3255.5894, "encoder_q-layer.2": 2795.3757, "encoder_q-layer.3": 3125.6631, "encoder_q-layer.4": 3241.5525, "encoder_q-layer.5": 2899.7473, "encoder_q-layer.6": 2917.4478, "encoder_q-layer.7": 2965.3826, "encoder_q-layer.8": 2512.7505, "encoder_q-layer.9": 1256.4371, "epoch": 0.56, "inbatch_neg_score": 0.7266, "inbatch_pos_score": 1.2812, "learning_rate": 7.444444444444444e-06, "loss": 3.8925, "norm_diff": 0.1272, "norm_loss": 0.0, "num_token_doc": 66.918, "num_token_overlap": 11.6866, "num_token_query": 31.8204, "num_token_union": 65.3713, "num_word_context": 202.1225, "num_word_doc": 49.8888, "num_word_query": 23.5246, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4162.9082, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7261, "query_norm": 1.454, "queue_k_norm": 1.5775, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8204, "sent_len_1": 66.918, "sent_len_max_0": 127.5162, "sent_len_max_1": 191.4925, "stdk": 0.0483, "stdq": 0.0422, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 86600 }, { "accuracy": 47.168, "active_queue_size": 16384.0, "cl_loss": 3.8976, "doc_norm": 1.5852, "encoder_q-embeddings": 1048.7563, "encoder_q-layer.0": 709.3474, "encoder_q-layer.1": 751.5842, "encoder_q-layer.10": 1248.7209, "encoder_q-layer.11": 3257.2686, "encoder_q-layer.2": 839.2576, "encoder_q-layer.3": 858.6104, "encoder_q-layer.4": 895.2605, "encoder_q-layer.5": 928.5437, "encoder_q-layer.6": 1033.1274, "encoder_q-layer.7": 1143.0702, "encoder_q-layer.8": 1269.2264, "encoder_q-layer.9": 1176.3895, "epoch": 0.56, "inbatch_neg_score": 0.7265, "inbatch_pos_score": 1.2764, "learning_rate": 7.38888888888889e-06, "loss": 3.8976, "norm_diff": 0.1366, "norm_loss": 0.0, "num_token_doc": 66.7567, "num_token_overlap": 11.6784, "num_token_query": 31.9056, "num_token_union": 65.3744, "num_word_context": 202.2104, "num_word_doc": 49.8818, "num_word_query": 23.5551, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2074.1919, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7261, "query_norm": 1.4486, "queue_k_norm": 1.5783, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9056, "sent_len_1": 66.7567, "sent_len_max_0": 127.4188, "sent_len_max_1": 186.9162, "stdk": 0.0485, "stdq": 0.042, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 86700 }, { "accuracy": 45.9961, "active_queue_size": 16384.0, "cl_loss": 3.9107, "doc_norm": 1.5827, "encoder_q-embeddings": 4017.5625, "encoder_q-layer.0": 2661.1125, "encoder_q-layer.1": 3203.988, "encoder_q-layer.10": 1278.9873, "encoder_q-layer.11": 3369.4358, "encoder_q-layer.2": 4109.9512, "encoder_q-layer.3": 4457.1328, "encoder_q-layer.4": 4850.4248, "encoder_q-layer.5": 4543.4844, "encoder_q-layer.6": 4699.104, "encoder_q-layer.7": 4399.7681, "encoder_q-layer.8": 3534.3997, "encoder_q-layer.9": 1596.3937, "epoch": 0.56, "inbatch_neg_score": 0.7267, "inbatch_pos_score": 1.2715, "learning_rate": 7.333333333333334e-06, "loss": 3.9107, "norm_diff": 0.1298, "norm_loss": 0.0, "num_token_doc": 66.8385, "num_token_overlap": 11.6448, "num_token_query": 31.8863, "num_token_union": 65.3987, "num_word_context": 202.3407, "num_word_doc": 49.8416, "num_word_query": 23.5439, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5779.3292, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.7256, "query_norm": 1.4529, "queue_k_norm": 1.5798, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.8863, "sent_len_1": 66.8385, "sent_len_max_0": 127.4887, "sent_len_max_1": 190.0987, "stdk": 0.0484, "stdq": 0.0423, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 86800 }, { "accuracy": 44.5312, "active_queue_size": 16384.0, "cl_loss": 3.8974, "doc_norm": 1.5735, "encoder_q-embeddings": 1568.8365, "encoder_q-layer.0": 1035.4585, "encoder_q-layer.1": 1273.7283, "encoder_q-layer.10": 1379.2878, "encoder_q-layer.11": 3466.3074, "encoder_q-layer.2": 1470.6113, "encoder_q-layer.3": 1530.6384, "encoder_q-layer.4": 1596.5363, "encoder_q-layer.5": 1585.4827, "encoder_q-layer.6": 1503.5555, "encoder_q-layer.7": 1669.8175, "encoder_q-layer.8": 1641.3542, "encoder_q-layer.9": 1252.6929, "epoch": 0.57, "inbatch_neg_score": 0.7289, "inbatch_pos_score": 1.2764, "learning_rate": 7.277777777777778e-06, "loss": 3.8974, "norm_diff": 0.1116, "norm_loss": 0.0, "num_token_doc": 66.5, "num_token_overlap": 11.647, "num_token_query": 31.9384, "num_token_union": 65.2374, "num_word_context": 202.0944, "num_word_doc": 49.6049, "num_word_query": 23.599, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2601.7195, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7275, "query_norm": 1.4619, "queue_k_norm": 1.5774, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9384, "sent_len_1": 66.5, "sent_len_max_0": 127.5475, "sent_len_max_1": 189.6113, "stdk": 0.0479, "stdq": 0.0426, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 86900 }, { "accuracy": 45.8008, "active_queue_size": 16384.0, "cl_loss": 3.9043, "doc_norm": 1.5793, "encoder_q-embeddings": 1174.5642, "encoder_q-layer.0": 825.8442, "encoder_q-layer.1": 864.2883, "encoder_q-layer.10": 1203.4995, "encoder_q-layer.11": 3263.7161, "encoder_q-layer.2": 965.989, "encoder_q-layer.3": 1008.8698, "encoder_q-layer.4": 1025.7343, "encoder_q-layer.5": 1040.4344, "encoder_q-layer.6": 1173.4855, "encoder_q-layer.7": 1297.4673, "encoder_q-layer.8": 1436.538, "encoder_q-layer.9": 1251.8333, "epoch": 0.57, "inbatch_neg_score": 0.7265, "inbatch_pos_score": 1.2832, "learning_rate": 7.222222222222222e-06, "loss": 3.9043, "norm_diff": 0.121, "norm_loss": 0.0, "num_token_doc": 67.0841, "num_token_overlap": 11.6547, "num_token_query": 31.7795, "num_token_union": 65.4563, "num_word_context": 202.5369, "num_word_doc": 50.0527, "num_word_query": 23.4476, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2193.5756, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7261, "query_norm": 1.4583, "queue_k_norm": 1.5786, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.7795, "sent_len_1": 67.0841, "sent_len_max_0": 127.4688, "sent_len_max_1": 192.785, "stdk": 0.0482, "stdq": 0.0425, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 87000 }, { "accuracy": 45.9961, "active_queue_size": 16384.0, "cl_loss": 3.9053, "doc_norm": 1.5808, "encoder_q-embeddings": 1884.1256, "encoder_q-layer.0": 1341.5109, "encoder_q-layer.1": 1525.9443, "encoder_q-layer.10": 1316.4545, "encoder_q-layer.11": 3349.6743, "encoder_q-layer.2": 1793.2555, "encoder_q-layer.3": 2012.4026, "encoder_q-layer.4": 2149.126, "encoder_q-layer.5": 2105.9329, "encoder_q-layer.6": 2127.9744, "encoder_q-layer.7": 1831.4868, "encoder_q-layer.8": 1698.354, "encoder_q-layer.9": 1311.4109, "epoch": 0.57, "inbatch_neg_score": 0.7276, "inbatch_pos_score": 1.2822, "learning_rate": 7.166666666666667e-06, "loss": 3.9053, "norm_diff": 0.1243, "norm_loss": 0.0, "num_token_doc": 66.8349, "num_token_overlap": 11.6809, "num_token_query": 31.9172, "num_token_union": 65.3718, "num_word_context": 202.2692, "num_word_doc": 49.8151, "num_word_query": 23.5732, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2969.5453, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.728, "query_norm": 1.4565, "queue_k_norm": 1.578, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9172, "sent_len_1": 66.8349, "sent_len_max_0": 127.5413, "sent_len_max_1": 191.2962, "stdk": 0.0482, "stdq": 0.0424, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 87100 }, { "accuracy": 47.8516, "active_queue_size": 16384.0, "cl_loss": 3.9177, "doc_norm": 1.5811, "encoder_q-embeddings": 1116.3871, "encoder_q-layer.0": 737.0248, "encoder_q-layer.1": 786.8397, "encoder_q-layer.10": 1246.7504, "encoder_q-layer.11": 3268.6934, "encoder_q-layer.2": 921.8275, "encoder_q-layer.3": 959.756, "encoder_q-layer.4": 996.234, "encoder_q-layer.5": 1003.0042, "encoder_q-layer.6": 1074.1971, "encoder_q-layer.7": 1222.7162, "encoder_q-layer.8": 1327.9799, "encoder_q-layer.9": 1138.3669, "epoch": 0.57, "inbatch_neg_score": 0.728, "inbatch_pos_score": 1.2793, "learning_rate": 7.111111111111112e-06, "loss": 3.9177, "norm_diff": 0.1283, "norm_loss": 0.0, "num_token_doc": 66.7158, "num_token_overlap": 11.6523, "num_token_query": 31.8875, "num_token_union": 65.3861, "num_word_context": 202.3454, "num_word_doc": 49.8157, "num_word_query": 23.5585, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2113.6467, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7285, "query_norm": 1.4527, "queue_k_norm": 1.5791, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.8875, "sent_len_1": 66.7158, "sent_len_max_0": 127.5625, "sent_len_max_1": 189.1362, "stdk": 0.0482, "stdq": 0.0422, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 87200 }, { "accuracy": 47.3633, "active_queue_size": 16384.0, "cl_loss": 3.8963, "doc_norm": 1.5804, "encoder_q-embeddings": 1057.5052, "encoder_q-layer.0": 717.5419, "encoder_q-layer.1": 786.8118, "encoder_q-layer.10": 1257.1819, "encoder_q-layer.11": 3286.0288, "encoder_q-layer.2": 886.457, "encoder_q-layer.3": 910.2563, "encoder_q-layer.4": 958.0387, "encoder_q-layer.5": 969.5115, "encoder_q-layer.6": 1056.2463, "encoder_q-layer.7": 1108.0488, "encoder_q-layer.8": 1267.2637, "encoder_q-layer.9": 1147.0114, "epoch": 0.57, "inbatch_neg_score": 0.7283, "inbatch_pos_score": 1.2715, "learning_rate": 7.055555555555556e-06, "loss": 3.8963, "norm_diff": 0.1284, "norm_loss": 0.0, "num_token_doc": 66.8825, "num_token_overlap": 11.6414, "num_token_query": 31.882, "num_token_union": 65.423, "num_word_context": 202.331, "num_word_doc": 49.8853, "num_word_query": 23.5579, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2082.6214, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.729, "query_norm": 1.452, "queue_k_norm": 1.5804, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.882, "sent_len_1": 66.8825, "sent_len_max_0": 127.4587, "sent_len_max_1": 187.6813, "stdk": 0.0482, "stdq": 0.0421, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 87300 }, { "accuracy": 45.8984, "active_queue_size": 16384.0, "cl_loss": 3.8978, "doc_norm": 1.5791, "encoder_q-embeddings": 967.4779, "encoder_q-layer.0": 640.4988, "encoder_q-layer.1": 677.2916, "encoder_q-layer.10": 1255.6879, "encoder_q-layer.11": 3304.3081, "encoder_q-layer.2": 765.2315, "encoder_q-layer.3": 798.1284, "encoder_q-layer.4": 827.6454, "encoder_q-layer.5": 877.4807, "encoder_q-layer.6": 962.2126, "encoder_q-layer.7": 1100.9595, "encoder_q-layer.8": 1313.707, "encoder_q-layer.9": 1206.7236, "epoch": 0.57, "inbatch_neg_score": 0.7306, "inbatch_pos_score": 1.2754, "learning_rate": 7.000000000000001e-06, "loss": 3.8978, "norm_diff": 0.1199, "norm_loss": 0.0, "num_token_doc": 66.5943, "num_token_overlap": 11.6914, "num_token_query": 31.8675, "num_token_union": 65.2527, "num_word_context": 202.1262, "num_word_doc": 49.7356, "num_word_query": 23.5347, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2077.3444, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7295, "query_norm": 1.4592, "queue_k_norm": 1.5796, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8675, "sent_len_1": 66.5943, "sent_len_max_0": 127.5075, "sent_len_max_1": 187.7788, "stdk": 0.0481, "stdq": 0.0425, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 87400 }, { "accuracy": 45.4102, "active_queue_size": 16384.0, "cl_loss": 3.897, "doc_norm": 1.5757, "encoder_q-embeddings": 1375.9373, "encoder_q-layer.0": 960.0629, "encoder_q-layer.1": 1113.7719, "encoder_q-layer.10": 1294.0853, "encoder_q-layer.11": 3401.2256, "encoder_q-layer.2": 1341.5034, "encoder_q-layer.3": 1433.645, "encoder_q-layer.4": 1252.9521, "encoder_q-layer.5": 1100.5624, "encoder_q-layer.6": 1086.8119, "encoder_q-layer.7": 1149.4529, "encoder_q-layer.8": 1290.5005, "encoder_q-layer.9": 1203.7086, "epoch": 0.57, "inbatch_neg_score": 0.7307, "inbatch_pos_score": 1.2559, "learning_rate": 6.944444444444445e-06, "loss": 3.897, "norm_diff": 0.1268, "norm_loss": 0.0, "num_token_doc": 66.8637, "num_token_overlap": 11.6806, "num_token_query": 31.9855, "num_token_union": 65.4933, "num_word_context": 202.6933, "num_word_doc": 49.8814, "num_word_query": 23.6319, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2316.4976, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7305, "query_norm": 1.4489, "queue_k_norm": 1.5824, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9855, "sent_len_1": 66.8637, "sent_len_max_0": 127.6425, "sent_len_max_1": 189.6875, "stdk": 0.048, "stdq": 0.042, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 87500 }, { "accuracy": 43.8477, "active_queue_size": 16384.0, "cl_loss": 3.8882, "doc_norm": 1.5846, "encoder_q-embeddings": 980.3712, "encoder_q-layer.0": 653.1663, "encoder_q-layer.1": 672.2955, "encoder_q-layer.10": 1371.0894, "encoder_q-layer.11": 3459.7412, "encoder_q-layer.2": 750.8591, "encoder_q-layer.3": 786.744, "encoder_q-layer.4": 809.4287, "encoder_q-layer.5": 826.2032, "encoder_q-layer.6": 984.0858, "encoder_q-layer.7": 1090.1222, "encoder_q-layer.8": 1340.6394, "encoder_q-layer.9": 1249.2448, "epoch": 0.57, "inbatch_neg_score": 0.7318, "inbatch_pos_score": 1.2842, "learning_rate": 6.888888888888889e-06, "loss": 3.8882, "norm_diff": 0.1289, "norm_loss": 0.0, "num_token_doc": 66.8574, "num_token_overlap": 11.6756, "num_token_query": 31.8486, "num_token_union": 65.4091, "num_word_context": 202.1487, "num_word_doc": 49.8753, "num_word_query": 23.5158, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2118.1123, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7324, "query_norm": 1.4556, "queue_k_norm": 1.5808, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.8486, "sent_len_1": 66.8574, "sent_len_max_0": 127.5687, "sent_len_max_1": 189.3088, "stdk": 0.0484, "stdq": 0.0422, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 87600 }, { "accuracy": 45.4102, "active_queue_size": 16384.0, "cl_loss": 3.8859, "doc_norm": 1.5864, "encoder_q-embeddings": 1014.8397, "encoder_q-layer.0": 680.9679, "encoder_q-layer.1": 700.6444, "encoder_q-layer.10": 1396.5171, "encoder_q-layer.11": 3352.1973, "encoder_q-layer.2": 814.7537, "encoder_q-layer.3": 846.2347, "encoder_q-layer.4": 916.5317, "encoder_q-layer.5": 947.3578, "encoder_q-layer.6": 1038.889, "encoder_q-layer.7": 1107.6685, "encoder_q-layer.8": 1359.0858, "encoder_q-layer.9": 1257.1224, "epoch": 0.57, "inbatch_neg_score": 0.7315, "inbatch_pos_score": 1.2812, "learning_rate": 6.833333333333333e-06, "loss": 3.8859, "norm_diff": 0.1204, "norm_loss": 0.0, "num_token_doc": 66.6891, "num_token_overlap": 11.6458, "num_token_query": 31.7725, "num_token_union": 65.2481, "num_word_context": 202.2026, "num_word_doc": 49.7382, "num_word_query": 23.4526, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2106.1401, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7305, "query_norm": 1.466, "queue_k_norm": 1.583, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.7725, "sent_len_1": 66.6891, "sent_len_max_0": 127.56, "sent_len_max_1": 191.3925, "stdk": 0.0484, "stdq": 0.0427, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 87700 }, { "accuracy": 43.5547, "active_queue_size": 16384.0, "cl_loss": 3.8934, "doc_norm": 1.5804, "encoder_q-embeddings": 1639.7271, "encoder_q-layer.0": 1197.4705, "encoder_q-layer.1": 1333.2336, "encoder_q-layer.10": 1288.9709, "encoder_q-layer.11": 3308.3528, "encoder_q-layer.2": 1530.2107, "encoder_q-layer.3": 1641.7886, "encoder_q-layer.4": 1767.7369, "encoder_q-layer.5": 1899.4816, "encoder_q-layer.6": 2212.9548, "encoder_q-layer.7": 2194.8608, "encoder_q-layer.8": 1963.0796, "encoder_q-layer.9": 1336.5265, "epoch": 0.57, "inbatch_neg_score": 0.7298, "inbatch_pos_score": 1.2637, "learning_rate": 6.777777777777779e-06, "loss": 3.8934, "norm_diff": 0.1301, "norm_loss": 0.0, "num_token_doc": 66.6899, "num_token_overlap": 11.7146, "num_token_query": 31.8708, "num_token_union": 65.2877, "num_word_context": 202.2419, "num_word_doc": 49.7879, "num_word_query": 23.5207, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2837.5278, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7314, "query_norm": 1.4503, "queue_k_norm": 1.5817, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8708, "sent_len_1": 66.6899, "sent_len_max_0": 127.3175, "sent_len_max_1": 188.3187, "stdk": 0.0482, "stdq": 0.042, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 87800 }, { "accuracy": 46.4844, "active_queue_size": 16384.0, "cl_loss": 3.886, "doc_norm": 1.5841, "encoder_q-embeddings": 2714.2563, "encoder_q-layer.0": 1800.1641, "encoder_q-layer.1": 2033.1045, "encoder_q-layer.10": 2987.3943, "encoder_q-layer.11": 6805.5312, "encoder_q-layer.2": 2485.6421, "encoder_q-layer.3": 2573.0649, "encoder_q-layer.4": 2781.6318, "encoder_q-layer.5": 2757.6479, "encoder_q-layer.6": 2849.3794, "encoder_q-layer.7": 2781.2483, "encoder_q-layer.8": 2728.4321, "encoder_q-layer.9": 2445.1948, "epoch": 0.57, "inbatch_neg_score": 0.7316, "inbatch_pos_score": 1.3008, "learning_rate": 6.722222222222223e-06, "loss": 3.886, "norm_diff": 0.1256, "norm_loss": 0.0, "num_token_doc": 66.696, "num_token_overlap": 11.7081, "num_token_query": 31.9858, "num_token_union": 65.3213, "num_word_context": 202.5756, "num_word_doc": 49.7745, "num_word_query": 23.616, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4742.1053, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7324, "query_norm": 1.4585, "queue_k_norm": 1.5828, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9858, "sent_len_1": 66.696, "sent_len_max_0": 127.4038, "sent_len_max_1": 190.4375, "stdk": 0.0483, "stdq": 0.0424, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 87900 }, { "accuracy": 46.9727, "active_queue_size": 16384.0, "cl_loss": 3.9133, "doc_norm": 1.5819, "encoder_q-embeddings": 2658.1228, "encoder_q-layer.0": 1767.7273, "encoder_q-layer.1": 1942.7037, "encoder_q-layer.10": 2687.5884, "encoder_q-layer.11": 6574.1709, "encoder_q-layer.2": 2188.2988, "encoder_q-layer.3": 2144.3398, "encoder_q-layer.4": 2176.3469, "encoder_q-layer.5": 2093.2832, "encoder_q-layer.6": 2409.8433, "encoder_q-layer.7": 2469.6333, "encoder_q-layer.8": 2677.1909, "encoder_q-layer.9": 2362.2957, "epoch": 0.57, "inbatch_neg_score": 0.7327, "inbatch_pos_score": 1.2881, "learning_rate": 6.666666666666667e-06, "loss": 3.9133, "norm_diff": 0.1314, "norm_loss": 0.0, "num_token_doc": 66.8732, "num_token_overlap": 11.6672, "num_token_query": 31.7918, "num_token_union": 65.4043, "num_word_context": 202.5792, "num_word_doc": 49.9405, "num_word_query": 23.4907, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4435.1667, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7329, "query_norm": 1.4506, "queue_k_norm": 1.5837, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.7918, "sent_len_1": 66.8732, "sent_len_max_0": 127.5125, "sent_len_max_1": 187.6625, "stdk": 0.0482, "stdq": 0.042, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 88000 }, { "accuracy": 46.3867, "active_queue_size": 16384.0, "cl_loss": 3.893, "doc_norm": 1.5827, "encoder_q-embeddings": 2656.8389, "encoder_q-layer.0": 1713.9674, "encoder_q-layer.1": 2003.1785, "encoder_q-layer.10": 2696.6658, "encoder_q-layer.11": 6733.0952, "encoder_q-layer.2": 2298.3228, "encoder_q-layer.3": 2250.4321, "encoder_q-layer.4": 2479.1709, "encoder_q-layer.5": 2357.009, "encoder_q-layer.6": 2636.489, "encoder_q-layer.7": 2879.9058, "encoder_q-layer.8": 3101.8748, "encoder_q-layer.9": 2696.9443, "epoch": 0.57, "inbatch_neg_score": 0.7355, "inbatch_pos_score": 1.292, "learning_rate": 6.611111111111111e-06, "loss": 3.893, "norm_diff": 0.1221, "norm_loss": 0.0, "num_token_doc": 66.785, "num_token_overlap": 11.696, "num_token_query": 31.8841, "num_token_union": 65.2972, "num_word_context": 202.2681, "num_word_doc": 49.8616, "num_word_query": 23.5612, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4729.5277, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7363, "query_norm": 1.4606, "queue_k_norm": 1.5827, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8841, "sent_len_1": 66.785, "sent_len_max_0": 127.4363, "sent_len_max_1": 188.715, "stdk": 0.0482, "stdq": 0.0424, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 88100 }, { "accuracy": 46.4844, "active_queue_size": 16384.0, "cl_loss": 3.8938, "doc_norm": 1.5822, "encoder_q-embeddings": 2793.5845, "encoder_q-layer.0": 1888.3419, "encoder_q-layer.1": 2104.74, "encoder_q-layer.10": 2563.3555, "encoder_q-layer.11": 6719.4712, "encoder_q-layer.2": 2481.3164, "encoder_q-layer.3": 2580.2664, "encoder_q-layer.4": 2882.1602, "encoder_q-layer.5": 2896.4146, "encoder_q-layer.6": 2994.0107, "encoder_q-layer.7": 2705.302, "encoder_q-layer.8": 2842.2517, "encoder_q-layer.9": 2426.5232, "epoch": 0.57, "inbatch_neg_score": 0.736, "inbatch_pos_score": 1.2881, "learning_rate": 6.555555555555556e-06, "loss": 3.8938, "norm_diff": 0.1213, "norm_loss": 0.0, "num_token_doc": 66.6826, "num_token_overlap": 11.6587, "num_token_query": 31.8118, "num_token_union": 65.2713, "num_word_context": 202.1431, "num_word_doc": 49.7647, "num_word_query": 23.5124, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4787.5542, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7358, "query_norm": 1.4609, "queue_k_norm": 1.5834, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8118, "sent_len_1": 66.6826, "sent_len_max_0": 127.53, "sent_len_max_1": 187.4988, "stdk": 0.0482, "stdq": 0.0424, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 88200 }, { "accuracy": 45.6055, "active_queue_size": 16384.0, "cl_loss": 3.9022, "doc_norm": 1.5868, "encoder_q-embeddings": 2519.1538, "encoder_q-layer.0": 1644.7074, "encoder_q-layer.1": 1796.6136, "encoder_q-layer.10": 2461.3936, "encoder_q-layer.11": 6511.8374, "encoder_q-layer.2": 1972.8016, "encoder_q-layer.3": 1935.7382, "encoder_q-layer.4": 2095.9419, "encoder_q-layer.5": 2035.3505, "encoder_q-layer.6": 2126.0232, "encoder_q-layer.7": 2307.2314, "encoder_q-layer.8": 2539.7729, "encoder_q-layer.9": 2290.5361, "epoch": 0.57, "inbatch_neg_score": 0.7375, "inbatch_pos_score": 1.2842, "learning_rate": 6.5000000000000004e-06, "loss": 3.9022, "norm_diff": 0.1368, "norm_loss": 0.0, "num_token_doc": 66.767, "num_token_overlap": 11.6467, "num_token_query": 31.8703, "num_token_union": 65.3714, "num_word_context": 202.2672, "num_word_doc": 49.7886, "num_word_query": 23.4999, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4318.3954, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7368, "query_norm": 1.45, "queue_k_norm": 1.5813, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8703, "sent_len_1": 66.767, "sent_len_max_0": 127.5062, "sent_len_max_1": 191.0062, "stdk": 0.0484, "stdq": 0.0419, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 88300 }, { "accuracy": 47.7539, "active_queue_size": 16384.0, "cl_loss": 3.8959, "doc_norm": 1.588, "encoder_q-embeddings": 2463.0188, "encoder_q-layer.0": 1689.1974, "encoder_q-layer.1": 1798.7603, "encoder_q-layer.10": 2527.6982, "encoder_q-layer.11": 6463.7632, "encoder_q-layer.2": 2073.0625, "encoder_q-layer.3": 2049.2603, "encoder_q-layer.4": 2119.7244, "encoder_q-layer.5": 2075.3953, "encoder_q-layer.6": 2363.8313, "encoder_q-layer.7": 2520.6011, "encoder_q-layer.8": 2727.7861, "encoder_q-layer.9": 2444.374, "epoch": 0.58, "inbatch_neg_score": 0.7371, "inbatch_pos_score": 1.2969, "learning_rate": 6.4444444444444445e-06, "loss": 3.8959, "norm_diff": 0.1284, "norm_loss": 0.0, "num_token_doc": 67.0667, "num_token_overlap": 11.7443, "num_token_query": 32.0706, "num_token_union": 65.6253, "num_word_context": 202.2003, "num_word_doc": 50.058, "num_word_query": 23.6841, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4422.5381, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7368, "query_norm": 1.4596, "queue_k_norm": 1.5852, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 32.0706, "sent_len_1": 67.0667, "sent_len_max_0": 127.6875, "sent_len_max_1": 190.8875, "stdk": 0.0484, "stdq": 0.0423, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 88400 }, { "accuracy": 45.7031, "active_queue_size": 16384.0, "cl_loss": 3.9021, "doc_norm": 1.5863, "encoder_q-embeddings": 2325.137, "encoder_q-layer.0": 1603.7101, "encoder_q-layer.1": 1806.5173, "encoder_q-layer.10": 2718.1853, "encoder_q-layer.11": 6742.356, "encoder_q-layer.2": 2039.6191, "encoder_q-layer.3": 2127.0979, "encoder_q-layer.4": 2322.4971, "encoder_q-layer.5": 2459.9592, "encoder_q-layer.6": 2458.9983, "encoder_q-layer.7": 2618.3694, "encoder_q-layer.8": 2893.1389, "encoder_q-layer.9": 2448.9304, "epoch": 0.58, "inbatch_neg_score": 0.7359, "inbatch_pos_score": 1.291, "learning_rate": 6.3888888888888885e-06, "loss": 3.9021, "norm_diff": 0.1182, "norm_loss": 0.0, "num_token_doc": 66.8637, "num_token_overlap": 11.691, "num_token_query": 31.8046, "num_token_union": 65.311, "num_word_context": 202.3357, "num_word_doc": 49.8817, "num_word_query": 23.4944, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4444.8603, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7358, "query_norm": 1.4681, "queue_k_norm": 1.5834, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8046, "sent_len_1": 66.8637, "sent_len_max_0": 127.625, "sent_len_max_1": 189.5337, "stdk": 0.0483, "stdq": 0.0427, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 88500 }, { "accuracy": 45.5078, "active_queue_size": 16384.0, "cl_loss": 3.8965, "doc_norm": 1.5857, "encoder_q-embeddings": 2273.4609, "encoder_q-layer.0": 1536.3401, "encoder_q-layer.1": 1575.5089, "encoder_q-layer.10": 2726.6523, "encoder_q-layer.11": 6735.3984, "encoder_q-layer.2": 1786.8594, "encoder_q-layer.3": 1885.4683, "encoder_q-layer.4": 2066.9277, "encoder_q-layer.5": 2062.0786, "encoder_q-layer.6": 2248.8774, "encoder_q-layer.7": 2517.5474, "encoder_q-layer.8": 2804.0818, "encoder_q-layer.9": 2461.8237, "epoch": 0.58, "inbatch_neg_score": 0.7403, "inbatch_pos_score": 1.2773, "learning_rate": 6.333333333333334e-06, "loss": 3.8965, "norm_diff": 0.1253, "norm_loss": 0.0, "num_token_doc": 66.7398, "num_token_overlap": 11.7282, "num_token_query": 31.8899, "num_token_union": 65.2791, "num_word_context": 201.9495, "num_word_doc": 49.8375, "num_word_query": 23.5749, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4432.7149, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7402, "query_norm": 1.4604, "queue_k_norm": 1.5826, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8899, "sent_len_1": 66.7398, "sent_len_max_0": 127.7288, "sent_len_max_1": 189.2637, "stdk": 0.0483, "stdq": 0.0423, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 88600 }, { "accuracy": 44.9219, "active_queue_size": 16384.0, "cl_loss": 3.8898, "doc_norm": 1.5867, "encoder_q-embeddings": 5687.2754, "encoder_q-layer.0": 3955.7153, "encoder_q-layer.1": 4396.7124, "encoder_q-layer.10": 2870.688, "encoder_q-layer.11": 7133.5049, "encoder_q-layer.2": 5155.2886, "encoder_q-layer.3": 5642.4917, "encoder_q-layer.4": 6040.3823, "encoder_q-layer.5": 6830.5576, "encoder_q-layer.6": 6806.77, "encoder_q-layer.7": 6214.8384, "encoder_q-layer.8": 6078.416, "encoder_q-layer.9": 3909.7654, "epoch": 0.58, "inbatch_neg_score": 0.74, "inbatch_pos_score": 1.2881, "learning_rate": 6.277777777777778e-06, "loss": 3.8898, "norm_diff": 0.1283, "norm_loss": 0.0, "num_token_doc": 66.9018, "num_token_overlap": 11.6548, "num_token_query": 31.7465, "num_token_union": 65.3927, "num_word_context": 202.6797, "num_word_doc": 49.94, "num_word_query": 23.4378, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8362.6836, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.7417, "query_norm": 1.4584, "queue_k_norm": 1.5846, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.7465, "sent_len_1": 66.9018, "sent_len_max_0": 127.5438, "sent_len_max_1": 189.3275, "stdk": 0.0483, "stdq": 0.0422, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 88700 }, { "accuracy": 44.6289, "active_queue_size": 16384.0, "cl_loss": 3.8862, "doc_norm": 1.5847, "encoder_q-embeddings": 3287.2456, "encoder_q-layer.0": 2232.4119, "encoder_q-layer.1": 2671.1909, "encoder_q-layer.10": 2837.9194, "encoder_q-layer.11": 6667.6562, "encoder_q-layer.2": 3138.2563, "encoder_q-layer.3": 3136.26, "encoder_q-layer.4": 3126.1526, "encoder_q-layer.5": 3066.4307, "encoder_q-layer.6": 3588.8208, "encoder_q-layer.7": 3947.2317, "encoder_q-layer.8": 3847.7388, "encoder_q-layer.9": 2647.0591, "epoch": 0.58, "inbatch_neg_score": 0.7411, "inbatch_pos_score": 1.2881, "learning_rate": 6.222222222222222e-06, "loss": 3.8862, "norm_diff": 0.1162, "norm_loss": 0.0, "num_token_doc": 66.8509, "num_token_overlap": 11.7004, "num_token_query": 31.9777, "num_token_union": 65.4519, "num_word_context": 202.6063, "num_word_doc": 49.9019, "num_word_query": 23.6324, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5474.9032, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.7422, "query_norm": 1.4685, "queue_k_norm": 1.5832, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9777, "sent_len_1": 66.8509, "sent_len_max_0": 127.6, "sent_len_max_1": 189.1725, "stdk": 0.0482, "stdq": 0.0427, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 88800 }, { "accuracy": 45.5078, "active_queue_size": 16384.0, "cl_loss": 3.9044, "doc_norm": 1.5806, "encoder_q-embeddings": 2277.1191, "encoder_q-layer.0": 1587.1775, "encoder_q-layer.1": 1834.5929, "encoder_q-layer.10": 2510.9211, "encoder_q-layer.11": 6401.2803, "encoder_q-layer.2": 2176.3748, "encoder_q-layer.3": 2360.0632, "encoder_q-layer.4": 2573.2227, "encoder_q-layer.5": 2477.9082, "encoder_q-layer.6": 2406.623, "encoder_q-layer.7": 2481.1758, "encoder_q-layer.8": 2770.6104, "encoder_q-layer.9": 2468.063, "epoch": 0.58, "inbatch_neg_score": 0.7441, "inbatch_pos_score": 1.2988, "learning_rate": 6.166666666666667e-06, "loss": 3.9044, "norm_diff": 0.1231, "norm_loss": 0.0, "num_token_doc": 66.7931, "num_token_overlap": 11.6908, "num_token_query": 31.8981, "num_token_union": 65.3379, "num_word_context": 202.1997, "num_word_doc": 49.8371, "num_word_query": 23.5518, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4444.7398, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7441, "query_norm": 1.4575, "queue_k_norm": 1.5848, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8981, "sent_len_1": 66.7931, "sent_len_max_0": 127.4188, "sent_len_max_1": 189.5112, "stdk": 0.048, "stdq": 0.0421, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 88900 }, { "accuracy": 45.2148, "active_queue_size": 16384.0, "cl_loss": 3.8959, "doc_norm": 1.5855, "encoder_q-embeddings": 13315.7393, "encoder_q-layer.0": 10354.1113, "encoder_q-layer.1": 14000.9023, "encoder_q-layer.10": 2672.2498, "encoder_q-layer.11": 6869.4927, "encoder_q-layer.2": 16187.0, "encoder_q-layer.3": 17943.2695, "encoder_q-layer.4": 16161.4951, "encoder_q-layer.5": 13263.5664, "encoder_q-layer.6": 14286.7158, "encoder_q-layer.7": 11255.6914, "encoder_q-layer.8": 7897.4448, "encoder_q-layer.9": 3228.8574, "epoch": 0.58, "inbatch_neg_score": 0.7445, "inbatch_pos_score": 1.2881, "learning_rate": 6.111111111111111e-06, "loss": 3.8959, "norm_diff": 0.128, "norm_loss": 0.0, "num_token_doc": 66.7884, "num_token_overlap": 11.7276, "num_token_query": 32.0325, "num_token_union": 65.4539, "num_word_context": 202.3269, "num_word_doc": 49.8569, "num_word_query": 23.6903, "postclip_grad_norm": 1.0, "preclip_grad_norm": 19273.3831, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.7441, "query_norm": 1.4575, "queue_k_norm": 1.5845, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 32.0325, "sent_len_1": 66.7884, "sent_len_max_0": 127.5025, "sent_len_max_1": 190.1413, "stdk": 0.0482, "stdq": 0.0421, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 89000 }, { "accuracy": 45.4102, "active_queue_size": 16384.0, "cl_loss": 3.9092, "doc_norm": 1.5882, "encoder_q-embeddings": 2149.1931, "encoder_q-layer.0": 1452.0409, "encoder_q-layer.1": 1472.4065, "encoder_q-layer.10": 2618.7949, "encoder_q-layer.11": 6221.5498, "encoder_q-layer.2": 1612.976, "encoder_q-layer.3": 1684.7, "encoder_q-layer.4": 1780.7922, "encoder_q-layer.5": 1759.0237, "encoder_q-layer.6": 2028.9075, "encoder_q-layer.7": 2172.8271, "encoder_q-layer.8": 2490.2893, "encoder_q-layer.9": 2260.0513, "epoch": 0.58, "inbatch_neg_score": 0.7469, "inbatch_pos_score": 1.3115, "learning_rate": 6.055555555555556e-06, "loss": 3.9092, "norm_diff": 0.1247, "norm_loss": 0.0, "num_token_doc": 66.6891, "num_token_overlap": 11.6325, "num_token_query": 31.7701, "num_token_union": 65.2298, "num_word_context": 202.1473, "num_word_doc": 49.7499, "num_word_query": 23.4718, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4013.4364, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7461, "query_norm": 1.4635, "queue_k_norm": 1.5855, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.7701, "sent_len_1": 66.6891, "sent_len_max_0": 127.5525, "sent_len_max_1": 190.7312, "stdk": 0.0483, "stdq": 0.0423, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 89100 }, { "accuracy": 44.6289, "active_queue_size": 16384.0, "cl_loss": 3.8999, "doc_norm": 1.5839, "encoder_q-embeddings": 4753.4888, "encoder_q-layer.0": 3262.8711, "encoder_q-layer.1": 4063.7073, "encoder_q-layer.10": 2782.1536, "encoder_q-layer.11": 6901.3589, "encoder_q-layer.2": 4771.6611, "encoder_q-layer.3": 5405.8799, "encoder_q-layer.4": 5776.5093, "encoder_q-layer.5": 5572.4146, "encoder_q-layer.6": 5452.3262, "encoder_q-layer.7": 5216.103, "encoder_q-layer.8": 4446.6587, "encoder_q-layer.9": 2587.9646, "epoch": 0.58, "inbatch_neg_score": 0.7489, "inbatch_pos_score": 1.2988, "learning_rate": 6e-06, "loss": 3.8999, "norm_diff": 0.124, "norm_loss": 0.0, "num_token_doc": 66.7928, "num_token_overlap": 11.6821, "num_token_query": 31.9265, "num_token_union": 65.4178, "num_word_context": 202.49, "num_word_doc": 49.8892, "num_word_query": 23.5966, "postclip_grad_norm": 1.0, "preclip_grad_norm": 7311.6487, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.748, "query_norm": 1.46, "queue_k_norm": 1.5864, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9265, "sent_len_1": 66.7928, "sent_len_max_0": 127.4325, "sent_len_max_1": 187.6438, "stdk": 0.0481, "stdq": 0.0421, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 89200 }, { "accuracy": 45.8008, "active_queue_size": 16384.0, "cl_loss": 3.881, "doc_norm": 1.5864, "encoder_q-embeddings": 2393.8416, "encoder_q-layer.0": 1568.6649, "encoder_q-layer.1": 1681.5673, "encoder_q-layer.10": 2561.6609, "encoder_q-layer.11": 6436.3213, "encoder_q-layer.2": 1903.4948, "encoder_q-layer.3": 1982.0051, "encoder_q-layer.4": 2124.2371, "encoder_q-layer.5": 2153.8625, "encoder_q-layer.6": 2333.197, "encoder_q-layer.7": 2596.0562, "encoder_q-layer.8": 2858.0449, "encoder_q-layer.9": 2446.1143, "epoch": 0.58, "inbatch_neg_score": 0.7491, "inbatch_pos_score": 1.3086, "learning_rate": 5.944444444444445e-06, "loss": 3.881, "norm_diff": 0.1243, "norm_loss": 0.0, "num_token_doc": 66.6409, "num_token_overlap": 11.6942, "num_token_query": 31.9146, "num_token_union": 65.2798, "num_word_context": 201.5565, "num_word_doc": 49.684, "num_word_query": 23.56, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4380.2157, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7495, "query_norm": 1.4621, "queue_k_norm": 1.5862, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9146, "sent_len_1": 66.6409, "sent_len_max_0": 127.5037, "sent_len_max_1": 190.6012, "stdk": 0.0482, "stdq": 0.0422, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 89300 }, { "accuracy": 48.3398, "active_queue_size": 16384.0, "cl_loss": 3.9176, "doc_norm": 1.5869, "encoder_q-embeddings": 8968.0303, "encoder_q-layer.0": 6660.6133, "encoder_q-layer.1": 7126.6685, "encoder_q-layer.10": 2509.3823, "encoder_q-layer.11": 6417.2368, "encoder_q-layer.2": 7822.2476, "encoder_q-layer.3": 8315.0967, "encoder_q-layer.4": 8788.9121, "encoder_q-layer.5": 10263.751, "encoder_q-layer.6": 10771.3457, "encoder_q-layer.7": 10241.0918, "encoder_q-layer.8": 6484.2998, "encoder_q-layer.9": 3593.1008, "epoch": 0.58, "inbatch_neg_score": 0.7496, "inbatch_pos_score": 1.3135, "learning_rate": 5.888888888888889e-06, "loss": 3.9176, "norm_diff": 0.1194, "norm_loss": 0.0, "num_token_doc": 66.8675, "num_token_overlap": 11.6605, "num_token_query": 31.778, "num_token_union": 65.348, "num_word_context": 202.2667, "num_word_doc": 49.898, "num_word_query": 23.4528, "postclip_grad_norm": 1.0, "preclip_grad_norm": 11904.0461, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.75, "query_norm": 1.4675, "queue_k_norm": 1.5842, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.778, "sent_len_1": 66.8675, "sent_len_max_0": 127.4062, "sent_len_max_1": 189.4688, "stdk": 0.0482, "stdq": 0.0425, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 89400 }, { "accuracy": 44.9219, "active_queue_size": 16384.0, "cl_loss": 3.8917, "doc_norm": 1.5835, "encoder_q-embeddings": 3359.6416, "encoder_q-layer.0": 2328.7046, "encoder_q-layer.1": 2416.9031, "encoder_q-layer.10": 2648.8125, "encoder_q-layer.11": 6494.4131, "encoder_q-layer.2": 2992.9736, "encoder_q-layer.3": 3168.1743, "encoder_q-layer.4": 3397.7847, "encoder_q-layer.5": 3211.2666, "encoder_q-layer.6": 3609.1699, "encoder_q-layer.7": 3467.2227, "encoder_q-layer.8": 2907.6189, "encoder_q-layer.9": 2478.4783, "epoch": 0.58, "inbatch_neg_score": 0.7527, "inbatch_pos_score": 1.2988, "learning_rate": 5.833333333333334e-06, "loss": 3.8917, "norm_diff": 0.122, "norm_loss": 0.0, "num_token_doc": 66.7943, "num_token_overlap": 11.6652, "num_token_query": 31.8381, "num_token_union": 65.3657, "num_word_context": 202.6999, "num_word_doc": 49.8788, "num_word_query": 23.507, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5327.2847, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.752, "query_norm": 1.4614, "queue_k_norm": 1.5855, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8381, "sent_len_1": 66.7943, "sent_len_max_0": 127.49, "sent_len_max_1": 187.1575, "stdk": 0.0481, "stdq": 0.0421, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 89500 }, { "accuracy": 48.0469, "active_queue_size": 16384.0, "cl_loss": 3.8975, "doc_norm": 1.5816, "encoder_q-embeddings": 2652.2085, "encoder_q-layer.0": 1713.6152, "encoder_q-layer.1": 1944.2046, "encoder_q-layer.10": 2638.8638, "encoder_q-layer.11": 6659.3154, "encoder_q-layer.2": 2157.8447, "encoder_q-layer.3": 2301.6587, "encoder_q-layer.4": 2614.8169, "encoder_q-layer.5": 2678.3433, "encoder_q-layer.6": 2902.4219, "encoder_q-layer.7": 3034.5671, "encoder_q-layer.8": 3303.5164, "encoder_q-layer.9": 2444.6626, "epoch": 0.58, "inbatch_neg_score": 0.752, "inbatch_pos_score": 1.3145, "learning_rate": 5.777777777777778e-06, "loss": 3.8975, "norm_diff": 0.1222, "norm_loss": 0.0, "num_token_doc": 66.5499, "num_token_overlap": 11.6582, "num_token_query": 31.8349, "num_token_union": 65.1923, "num_word_context": 202.003, "num_word_doc": 49.6554, "num_word_query": 23.4908, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4723.292, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.752, "query_norm": 1.4594, "queue_k_norm": 1.587, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.8349, "sent_len_1": 66.5499, "sent_len_max_0": 127.4013, "sent_len_max_1": 189.5087, "stdk": 0.048, "stdq": 0.042, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 89600 }, { "accuracy": 48.1445, "active_queue_size": 16384.0, "cl_loss": 3.8798, "doc_norm": 1.5885, "encoder_q-embeddings": 3724.1895, "encoder_q-layer.0": 2605.0557, "encoder_q-layer.1": 2847.7417, "encoder_q-layer.10": 2518.802, "encoder_q-layer.11": 6291.9766, "encoder_q-layer.2": 3159.9634, "encoder_q-layer.3": 3625.2935, "encoder_q-layer.4": 3803.624, "encoder_q-layer.5": 4094.592, "encoder_q-layer.6": 3969.8811, "encoder_q-layer.7": 3747.5256, "encoder_q-layer.8": 3219.9941, "encoder_q-layer.9": 2392.7581, "epoch": 0.58, "inbatch_neg_score": 0.7534, "inbatch_pos_score": 1.3203, "learning_rate": 5.722222222222223e-06, "loss": 3.8798, "norm_diff": 0.1194, "norm_loss": 0.0, "num_token_doc": 66.6885, "num_token_overlap": 11.6501, "num_token_query": 31.848, "num_token_union": 65.3285, "num_word_context": 202.4868, "num_word_doc": 49.7643, "num_word_query": 23.5179, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5662.1934, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.7539, "query_norm": 1.4691, "queue_k_norm": 1.5873, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.848, "sent_len_1": 66.6885, "sent_len_max_0": 127.6225, "sent_len_max_1": 187.9613, "stdk": 0.0483, "stdq": 0.0425, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 89700 }, { "accuracy": 45.8008, "active_queue_size": 16384.0, "cl_loss": 3.8947, "doc_norm": 1.5822, "encoder_q-embeddings": 2288.7034, "encoder_q-layer.0": 1569.8856, "encoder_q-layer.1": 1728.075, "encoder_q-layer.10": 3099.9548, "encoder_q-layer.11": 6736.3965, "encoder_q-layer.2": 1964.0247, "encoder_q-layer.3": 1977.9652, "encoder_q-layer.4": 2064.6421, "encoder_q-layer.5": 2088.3162, "encoder_q-layer.6": 2249.7502, "encoder_q-layer.7": 2310.6565, "encoder_q-layer.8": 2760.1575, "encoder_q-layer.9": 2481.5649, "epoch": 0.58, "inbatch_neg_score": 0.7543, "inbatch_pos_score": 1.3125, "learning_rate": 5.666666666666667e-06, "loss": 3.8947, "norm_diff": 0.1116, "norm_loss": 0.0, "num_token_doc": 66.8131, "num_token_overlap": 11.7215, "num_token_query": 31.9473, "num_token_union": 65.3772, "num_word_context": 202.3998, "num_word_doc": 49.8434, "num_word_query": 23.5801, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4465.9204, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7549, "query_norm": 1.4706, "queue_k_norm": 1.5864, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9473, "sent_len_1": 66.8131, "sent_len_max_0": 127.4387, "sent_len_max_1": 188.7, "stdk": 0.048, "stdq": 0.0425, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 89800 }, { "accuracy": 45.5078, "active_queue_size": 16384.0, "cl_loss": 3.8927, "doc_norm": 1.5787, "encoder_q-embeddings": 10721.0547, "encoder_q-layer.0": 7479.8828, "encoder_q-layer.1": 9874.4951, "encoder_q-layer.10": 5588.0278, "encoder_q-layer.11": 12600.2812, "encoder_q-layer.2": 12319.5664, "encoder_q-layer.3": 9890.5244, "encoder_q-layer.4": 9929.1797, "encoder_q-layer.5": 8070.4087, "encoder_q-layer.6": 7881.6597, "encoder_q-layer.7": 8018.9868, "encoder_q-layer.8": 6034.2793, "encoder_q-layer.9": 4830.9287, "epoch": 0.59, "inbatch_neg_score": 0.756, "inbatch_pos_score": 1.3037, "learning_rate": 5.611111111111112e-06, "loss": 3.8927, "norm_diff": 0.1073, "norm_loss": 0.0, "num_token_doc": 66.7081, "num_token_overlap": 11.6362, "num_token_query": 31.8402, "num_token_union": 65.3244, "num_word_context": 202.4324, "num_word_doc": 49.8054, "num_word_query": 23.4941, "postclip_grad_norm": 1.0, "preclip_grad_norm": 14111.028, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.7559, "query_norm": 1.4714, "queue_k_norm": 1.5883, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8402, "sent_len_1": 66.7081, "sent_len_max_0": 127.475, "sent_len_max_1": 189.4563, "stdk": 0.0478, "stdq": 0.0425, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 89900 }, { "accuracy": 43.6523, "active_queue_size": 16384.0, "cl_loss": 3.8967, "doc_norm": 1.5918, "encoder_q-embeddings": 4233.3657, "encoder_q-layer.0": 2916.3115, "encoder_q-layer.1": 3283.3677, "encoder_q-layer.10": 4999.6997, "encoder_q-layer.11": 13263.9209, "encoder_q-layer.2": 3580.8979, "encoder_q-layer.3": 3817.3423, "encoder_q-layer.4": 3996.8665, "encoder_q-layer.5": 4322.3867, "encoder_q-layer.6": 4562.0469, "encoder_q-layer.7": 4806.396, "encoder_q-layer.8": 5383.9092, "encoder_q-layer.9": 4806.6113, "epoch": 0.59, "inbatch_neg_score": 0.7591, "inbatch_pos_score": 1.2852, "learning_rate": 5.555555555555556e-06, "loss": 3.8967, "norm_diff": 0.1383, "norm_loss": 0.0, "num_token_doc": 66.4318, "num_token_overlap": 11.6579, "num_token_query": 31.9763, "num_token_union": 65.2357, "num_word_context": 202.1421, "num_word_doc": 49.6209, "num_word_query": 23.6248, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8629.7308, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.7598, "query_norm": 1.4535, "queue_k_norm": 1.5887, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9763, "sent_len_1": 66.4318, "sent_len_max_0": 127.4925, "sent_len_max_1": 188.8438, "stdk": 0.0484, "stdq": 0.0417, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 90000 }, { "dev_runtime": 44.0757, "dev_samples_per_second": 1.452, "dev_steps_per_second": 0.023, "epoch": 0.59, "step": 90000, "test_accuracy": 93.76220703125, "test_active_queue_size": 16384.0, "test_cl_loss": 0.36957016587257385, "test_doc_norm": 1.5594813823699951, "test_inbatch_neg_score": 1.0945709943771362, "test_inbatch_pos_score": 2.0048184394836426, "test_loss": 0.36957016587257385, "test_loss_align": 0.9179236888885498, "test_loss_unif": 2.9137585163116455, "test_loss_unif_q@queue": 2.9137582778930664, "test_norm_diff": 0.013072298839688301, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.7452419996261597, "test_query_norm": 1.5721384286880493, "test_queue_k_norm": 1.5887845754623413, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04169077426195145, "test_stdq": 0.04178181290626526, "test_stdqueue_k": 0.0483221635222435, "test_stdqueue_q": 0.0 }, { "dev_runtime": 44.0757, "dev_samples_per_second": 1.452, "dev_steps_per_second": 0.023, "epoch": 0.59, "eval_beir-arguana_ndcg@10": 0.37491, "eval_beir-arguana_recall@10": 0.64225, "eval_beir-arguana_recall@100": 0.92745, "eval_beir-arguana_recall@20": 0.77383, "eval_beir-avg_ndcg@10": 0.38415058333333335, "eval_beir-avg_recall@10": 0.45304325, "eval_beir-avg_recall@100": 0.6365820833333332, "eval_beir-avg_recall@20": 0.51412575, "eval_beir-cqadupstack_ndcg@10": 0.26126583333333336, "eval_beir-cqadupstack_recall@10": 0.3561425, "eval_beir-cqadupstack_recall@100": 0.5962608333333334, "eval_beir-cqadupstack_recall@20": 0.42852749999999995, "eval_beir-fiqa_ndcg@10": 0.25058, "eval_beir-fiqa_recall@10": 0.3068, "eval_beir-fiqa_recall@100": 0.57336, "eval_beir-fiqa_recall@20": 0.37931, "eval_beir-nfcorpus_ndcg@10": 0.30078, "eval_beir-nfcorpus_recall@10": 0.14965, "eval_beir-nfcorpus_recall@100": 0.28597, "eval_beir-nfcorpus_recall@20": 0.18357, "eval_beir-nq_ndcg@10": 0.28472, "eval_beir-nq_recall@10": 0.47002, "eval_beir-nq_recall@100": 0.80864, "eval_beir-nq_recall@20": 0.592, "eval_beir-quora_ndcg@10": 0.78685, "eval_beir-quora_recall@10": 0.89391, "eval_beir-quora_recall@100": 0.97886, "eval_beir-quora_recall@20": 0.93456, "eval_beir-scidocs_ndcg@10": 0.15236, "eval_beir-scidocs_recall@10": 0.16178, "eval_beir-scidocs_recall@100": 0.36943, "eval_beir-scidocs_recall@20": 0.21628, "eval_beir-scifact_ndcg@10": 0.64748, "eval_beir-scifact_recall@10": 0.79233, "eval_beir-scifact_recall@100": 0.92989, "eval_beir-scifact_recall@20": 0.833, "eval_beir-trec-covid_ndcg@10": 0.58166, "eval_beir-trec-covid_recall@10": 0.618, "eval_beir-trec-covid_recall@100": 0.4574, "eval_beir-trec-covid_recall@20": 0.593, "eval_beir-webis-touche2020_ndcg@10": 0.2009, "eval_beir-webis-touche2020_recall@10": 0.13955, "eval_beir-webis-touche2020_recall@100": 0.43856, "eval_beir-webis-touche2020_recall@20": 0.20718, "eval_senteval-avg_sts": 0.7468602289121653, "eval_senteval-sickr_spearman": 0.7151367984317111, "eval_senteval-stsb_spearman": 0.7785836593926196, "step": 90000, "test_accuracy": 93.76220703125, "test_active_queue_size": 16384.0, "test_cl_loss": 0.36957016587257385, "test_doc_norm": 1.5594813823699951, "test_inbatch_neg_score": 1.0945709943771362, "test_inbatch_pos_score": 2.0048184394836426, "test_loss": 0.36957016587257385, "test_loss_align": 0.9179236888885498, "test_loss_unif": 2.9137585163116455, "test_loss_unif_q@queue": 2.9137582778930664, "test_norm_diff": 0.013072298839688301, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.7452419996261597, "test_query_norm": 1.5721384286880493, "test_queue_k_norm": 1.5887845754623413, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.04169077426195145, "test_stdq": 0.04178181290626526, "test_stdqueue_k": 0.0483221635222435, "test_stdqueue_q": 0.0 }, { "accuracy": 45.8984, "active_queue_size": 16384.0, "cl_loss": 3.8828, "doc_norm": 1.5885, "encoder_q-embeddings": 11061.9121, "encoder_q-layer.0": 7324.3164, "encoder_q-layer.1": 9236.8857, "encoder_q-layer.10": 4965.2559, "encoder_q-layer.11": 13429.9541, "encoder_q-layer.2": 10696.3574, "encoder_q-layer.3": 10978.1738, "encoder_q-layer.4": 10486.9443, "encoder_q-layer.5": 11393.8359, "encoder_q-layer.6": 11571.2188, "encoder_q-layer.7": 11981.6562, "encoder_q-layer.8": 9110.5049, "encoder_q-layer.9": 5163.0142, "epoch": 0.59, "inbatch_neg_score": 0.7627, "inbatch_pos_score": 1.3086, "learning_rate": 5.500000000000001e-06, "loss": 3.8828, "norm_diff": 0.1252, "norm_loss": 0.0, "num_token_doc": 67.0763, "num_token_overlap": 11.6694, "num_token_query": 31.8822, "num_token_union": 65.5144, "num_word_context": 202.6197, "num_word_doc": 50.0127, "num_word_query": 23.5121, "postclip_grad_norm": 1.0, "preclip_grad_norm": 15456.7519, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.7627, "query_norm": 1.4633, "queue_k_norm": 1.5889, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8822, "sent_len_1": 67.0763, "sent_len_max_0": 127.2575, "sent_len_max_1": 191.095, "stdk": 0.0482, "stdq": 0.0421, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 90100 }, { "accuracy": 43.2617, "active_queue_size": 16384.0, "cl_loss": 3.9166, "doc_norm": 1.5793, "encoder_q-embeddings": 19602.4238, "encoder_q-layer.0": 14297.791, "encoder_q-layer.1": 13795.8799, "encoder_q-layer.10": 5466.7896, "encoder_q-layer.11": 13648.7715, "encoder_q-layer.2": 15058.2109, "encoder_q-layer.3": 14871.6543, "encoder_q-layer.4": 16394.4473, "encoder_q-layer.5": 17613.3027, "encoder_q-layer.6": 16164.6504, "encoder_q-layer.7": 15572.7373, "encoder_q-layer.8": 13988.3906, "encoder_q-layer.9": 8035.9922, "epoch": 0.59, "inbatch_neg_score": 0.7606, "inbatch_pos_score": 1.2891, "learning_rate": 5.444444444444445e-06, "loss": 3.9166, "norm_diff": 0.1204, "norm_loss": 0.0, "num_token_doc": 66.7953, "num_token_overlap": 11.6496, "num_token_query": 31.8662, "num_token_union": 65.3628, "num_word_context": 202.4175, "num_word_doc": 49.8306, "num_word_query": 23.5255, "postclip_grad_norm": 1.0, "preclip_grad_norm": 22994.7065, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.7612, "query_norm": 1.459, "queue_k_norm": 1.5906, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8662, "sent_len_1": 66.7953, "sent_len_max_0": 127.4688, "sent_len_max_1": 189.6625, "stdk": 0.0478, "stdq": 0.0419, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 90200 }, { "accuracy": 43.3594, "active_queue_size": 16384.0, "cl_loss": 3.887, "doc_norm": 1.5898, "encoder_q-embeddings": 4330.3247, "encoder_q-layer.0": 2926.3708, "encoder_q-layer.1": 2997.2271, "encoder_q-layer.10": 5313.7393, "encoder_q-layer.11": 13439.0508, "encoder_q-layer.2": 3474.1731, "encoder_q-layer.3": 3575.1685, "encoder_q-layer.4": 3646.9929, "encoder_q-layer.5": 3767.6782, "encoder_q-layer.6": 4201.3403, "encoder_q-layer.7": 4691.2549, "encoder_q-layer.8": 5329.7837, "encoder_q-layer.9": 5002.7886, "epoch": 0.59, "inbatch_neg_score": 0.7625, "inbatch_pos_score": 1.3008, "learning_rate": 5.388888888888889e-06, "loss": 3.887, "norm_diff": 0.1326, "norm_loss": 0.0, "num_token_doc": 66.8877, "num_token_overlap": 11.7024, "num_token_query": 31.8756, "num_token_union": 65.3892, "num_word_context": 202.2926, "num_word_doc": 49.9412, "num_word_query": 23.5269, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8665.1637, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.7627, "query_norm": 1.4572, "queue_k_norm": 1.5911, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8756, "sent_len_1": 66.8877, "sent_len_max_0": 127.5425, "sent_len_max_1": 188.5462, "stdk": 0.0482, "stdq": 0.0418, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 90300 }, { "accuracy": 43.5547, "active_queue_size": 16384.0, "cl_loss": 3.9072, "doc_norm": 1.5825, "encoder_q-embeddings": 4390.127, "encoder_q-layer.0": 2870.739, "encoder_q-layer.1": 3136.8765, "encoder_q-layer.10": 5629.8931, "encoder_q-layer.11": 13760.3008, "encoder_q-layer.2": 3553.7463, "encoder_q-layer.3": 3745.8035, "encoder_q-layer.4": 4165.8345, "encoder_q-layer.5": 4177.7139, "encoder_q-layer.6": 4564.1855, "encoder_q-layer.7": 4728.1934, "encoder_q-layer.8": 5370.4062, "encoder_q-layer.9": 5043.5522, "epoch": 0.59, "inbatch_neg_score": 0.7638, "inbatch_pos_score": 1.2998, "learning_rate": 5.333333333333334e-06, "loss": 3.9072, "norm_diff": 0.1166, "norm_loss": 0.0, "num_token_doc": 66.6818, "num_token_overlap": 11.6312, "num_token_query": 31.8775, "num_token_union": 65.3569, "num_word_context": 202.2411, "num_word_doc": 49.7715, "num_word_query": 23.5649, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8770.3461, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.7632, "query_norm": 1.466, "queue_k_norm": 1.5914, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.8775, "sent_len_1": 66.6818, "sent_len_max_0": 127.5975, "sent_len_max_1": 189.605, "stdk": 0.0479, "stdq": 0.0422, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 90400 }, { "accuracy": 45.8008, "active_queue_size": 16384.0, "cl_loss": 3.8771, "doc_norm": 1.5933, "encoder_q-embeddings": 4131.647, "encoder_q-layer.0": 2765.7505, "encoder_q-layer.1": 2861.7395, "encoder_q-layer.10": 5438.8438, "encoder_q-layer.11": 13328.6152, "encoder_q-layer.2": 3242.4951, "encoder_q-layer.3": 3417.3999, "encoder_q-layer.4": 3517.4897, "encoder_q-layer.5": 3468.3486, "encoder_q-layer.6": 3945.9517, "encoder_q-layer.7": 4309.3516, "encoder_q-layer.8": 5030.8184, "encoder_q-layer.9": 4778.8789, "epoch": 0.59, "inbatch_neg_score": 0.7637, "inbatch_pos_score": 1.3145, "learning_rate": 5.277777777777778e-06, "loss": 3.8771, "norm_diff": 0.1278, "norm_loss": 0.0, "num_token_doc": 66.6607, "num_token_overlap": 11.7273, "num_token_query": 31.9038, "num_token_union": 65.2432, "num_word_context": 202.4169, "num_word_doc": 49.7497, "num_word_query": 23.5971, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8328.7191, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.7642, "query_norm": 1.4655, "queue_k_norm": 1.591, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9038, "sent_len_1": 66.6607, "sent_len_max_0": 127.6488, "sent_len_max_1": 189.6012, "stdk": 0.0483, "stdq": 0.0422, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 90500 }, { "accuracy": 47.5586, "active_queue_size": 16384.0, "cl_loss": 3.8959, "doc_norm": 1.5942, "encoder_q-embeddings": 7909.0054, "encoder_q-layer.0": 5625.5396, "encoder_q-layer.1": 5668.188, "encoder_q-layer.10": 5451.3911, "encoder_q-layer.11": 13235.5664, "encoder_q-layer.2": 6577.8052, "encoder_q-layer.3": 6491.4805, "encoder_q-layer.4": 7010.4243, "encoder_q-layer.5": 6821.1294, "encoder_q-layer.6": 6223.5781, "encoder_q-layer.7": 6268.7241, "encoder_q-layer.8": 5695.4111, "encoder_q-layer.9": 4810.6396, "epoch": 0.59, "inbatch_neg_score": 0.7662, "inbatch_pos_score": 1.3271, "learning_rate": 5.2222222222222226e-06, "loss": 3.8959, "norm_diff": 0.1279, "norm_loss": 0.0, "num_token_doc": 67.0649, "num_token_overlap": 11.6617, "num_token_query": 31.7462, "num_token_union": 65.4396, "num_word_context": 202.7254, "num_word_doc": 49.9965, "num_word_query": 23.4432, "postclip_grad_norm": 1.0, "preclip_grad_norm": 11046.3893, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.7666, "query_norm": 1.4662, "queue_k_norm": 1.5928, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.7462, "sent_len_1": 67.0649, "sent_len_max_0": 127.5113, "sent_len_max_1": 191.6138, "stdk": 0.0483, "stdq": 0.0422, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 90600 }, { "accuracy": 50.0, "active_queue_size": 16384.0, "cl_loss": 3.9055, "doc_norm": 1.5991, "encoder_q-embeddings": 4284.3955, "encoder_q-layer.0": 2823.8135, "encoder_q-layer.1": 2962.2822, "encoder_q-layer.10": 5769.7905, "encoder_q-layer.11": 13301.8711, "encoder_q-layer.2": 3313.6104, "encoder_q-layer.3": 3539.6631, "encoder_q-layer.4": 3916.5188, "encoder_q-layer.5": 3677.6018, "encoder_q-layer.6": 4156.1631, "encoder_q-layer.7": 4423.1885, "encoder_q-layer.8": 5292.6763, "encoder_q-layer.9": 4980.9429, "epoch": 0.59, "inbatch_neg_score": 0.7642, "inbatch_pos_score": 1.332, "learning_rate": 5.166666666666667e-06, "loss": 3.9055, "norm_diff": 0.138, "norm_loss": 0.0, "num_token_doc": 66.6452, "num_token_overlap": 11.6294, "num_token_query": 31.769, "num_token_union": 65.2649, "num_word_context": 202.446, "num_word_doc": 49.7618, "num_word_query": 23.444, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8402.2749, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.7656, "query_norm": 1.4611, "queue_k_norm": 1.5905, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.769, "sent_len_1": 66.6452, "sent_len_max_0": 127.4425, "sent_len_max_1": 187.6775, "stdk": 0.0486, "stdq": 0.042, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 90700 }, { "accuracy": 46.6797, "active_queue_size": 16384.0, "cl_loss": 3.8968, "doc_norm": 1.5949, "encoder_q-embeddings": 4975.5093, "encoder_q-layer.0": 3413.8782, "encoder_q-layer.1": 3634.564, "encoder_q-layer.10": 5123.8169, "encoder_q-layer.11": 13346.8789, "encoder_q-layer.2": 4437.665, "encoder_q-layer.3": 4757.5459, "encoder_q-layer.4": 5303.2749, "encoder_q-layer.5": 4918.9976, "encoder_q-layer.6": 5166.0586, "encoder_q-layer.7": 5407.4712, "encoder_q-layer.8": 5622.4771, "encoder_q-layer.9": 4812.0249, "epoch": 0.59, "inbatch_neg_score": 0.767, "inbatch_pos_score": 1.3184, "learning_rate": 5.1111111111111115e-06, "loss": 3.8968, "norm_diff": 0.1262, "norm_loss": 0.0, "num_token_doc": 66.5796, "num_token_overlap": 11.6516, "num_token_query": 31.8386, "num_token_union": 65.2519, "num_word_context": 202.1636, "num_word_doc": 49.6807, "num_word_query": 23.5141, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9193.3156, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.7656, "query_norm": 1.4687, "queue_k_norm": 1.5926, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.8386, "sent_len_1": 66.5796, "sent_len_max_0": 127.445, "sent_len_max_1": 188.7463, "stdk": 0.0483, "stdq": 0.0424, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 90800 }, { "accuracy": 47.8516, "active_queue_size": 16384.0, "cl_loss": 3.9067, "doc_norm": 1.5998, "encoder_q-embeddings": 4506.5049, "encoder_q-layer.0": 3006.1438, "encoder_q-layer.1": 3163.5378, "encoder_q-layer.10": 5493.5693, "encoder_q-layer.11": 13096.6533, "encoder_q-layer.2": 3561.6077, "encoder_q-layer.3": 3788.9072, "encoder_q-layer.4": 3987.9426, "encoder_q-layer.5": 4023.5984, "encoder_q-layer.6": 4491.1226, "encoder_q-layer.7": 4715.1797, "encoder_q-layer.8": 5146.3599, "encoder_q-layer.9": 4734.3408, "epoch": 0.59, "inbatch_neg_score": 0.7653, "inbatch_pos_score": 1.3477, "learning_rate": 5.0555555555555555e-06, "loss": 3.9067, "norm_diff": 0.1267, "norm_loss": 0.0, "num_token_doc": 66.6724, "num_token_overlap": 11.7004, "num_token_query": 32.0004, "num_token_union": 65.3662, "num_word_context": 202.3577, "num_word_doc": 49.7436, "num_word_query": 23.6427, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8625.5758, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.7661, "query_norm": 1.4731, "queue_k_norm": 1.5924, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 32.0004, "sent_len_1": 66.6724, "sent_len_max_0": 127.54, "sent_len_max_1": 189.225, "stdk": 0.0486, "stdq": 0.0426, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 90900 }, { "accuracy": 42.1875, "active_queue_size": 16384.0, "cl_loss": 3.8909, "doc_norm": 1.5942, "encoder_q-embeddings": 3714.3254, "encoder_q-layer.0": 2502.8762, "encoder_q-layer.1": 3069.1394, "encoder_q-layer.10": 2743.8088, "encoder_q-layer.11": 6779.9092, "encoder_q-layer.2": 3724.8845, "encoder_q-layer.3": 4238.7949, "encoder_q-layer.4": 4893.7422, "encoder_q-layer.5": 5632.6694, "encoder_q-layer.6": 5538.0576, "encoder_q-layer.7": 5185.2461, "encoder_q-layer.8": 3938.762, "encoder_q-layer.9": 2632.5327, "epoch": 0.59, "inbatch_neg_score": 0.7678, "inbatch_pos_score": 1.3193, "learning_rate": 5e-06, "loss": 3.8909, "norm_diff": 0.1182, "norm_loss": 0.0, "num_token_doc": 66.8687, "num_token_overlap": 11.682, "num_token_query": 31.9641, "num_token_union": 65.4451, "num_word_context": 202.6003, "num_word_doc": 49.9007, "num_word_query": 23.626, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6624.9524, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.7671, "query_norm": 1.476, "queue_k_norm": 1.5949, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9641, "sent_len_1": 66.8687, "sent_len_max_0": 127.4275, "sent_len_max_1": 190.2812, "stdk": 0.0483, "stdq": 0.0427, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 91000 }, { "accuracy": 45.8984, "active_queue_size": 16384.0, "cl_loss": 3.9163, "doc_norm": 1.592, "encoder_q-embeddings": 2402.7168, "encoder_q-layer.0": 1603.6855, "encoder_q-layer.1": 1776.0264, "encoder_q-layer.10": 2687.5542, "encoder_q-layer.11": 6936.5298, "encoder_q-layer.2": 2050.0247, "encoder_q-layer.3": 2170.1926, "encoder_q-layer.4": 2385.9067, "encoder_q-layer.5": 2549.3098, "encoder_q-layer.6": 2510.5393, "encoder_q-layer.7": 2787.4387, "encoder_q-layer.8": 2826.8188, "encoder_q-layer.9": 2477.8162, "epoch": 0.59, "inbatch_neg_score": 0.7659, "inbatch_pos_score": 1.3359, "learning_rate": 4.9444444444444444e-06, "loss": 3.9163, "norm_diff": 0.121, "norm_loss": 0.0, "num_token_doc": 66.6979, "num_token_overlap": 11.6853, "num_token_query": 31.9297, "num_token_union": 65.3127, "num_word_context": 202.3833, "num_word_doc": 49.7871, "num_word_query": 23.5844, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4602.38, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7661, "query_norm": 1.471, "queue_k_norm": 1.5931, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9297, "sent_len_1": 66.6979, "sent_len_max_0": 127.3462, "sent_len_max_1": 190.8587, "stdk": 0.0482, "stdq": 0.0425, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 91100 }, { "accuracy": 46.6797, "active_queue_size": 16384.0, "cl_loss": 3.8711, "doc_norm": 1.6035, "encoder_q-embeddings": 2483.9895, "encoder_q-layer.0": 1832.6853, "encoder_q-layer.1": 1980.3179, "encoder_q-layer.10": 2662.4568, "encoder_q-layer.11": 6637.3364, "encoder_q-layer.2": 2070.6497, "encoder_q-layer.3": 2042.391, "encoder_q-layer.4": 2107.5986, "encoder_q-layer.5": 1985.4108, "encoder_q-layer.6": 2167.5908, "encoder_q-layer.7": 2371.416, "encoder_q-layer.8": 2687.8389, "encoder_q-layer.9": 2442.9473, "epoch": 0.59, "inbatch_neg_score": 0.7661, "inbatch_pos_score": 1.335, "learning_rate": 4.888888888888889e-06, "loss": 3.8711, "norm_diff": 0.1321, "norm_loss": 0.0, "num_token_doc": 66.8974, "num_token_overlap": 11.7321, "num_token_query": 32.0542, "num_token_union": 65.4776, "num_word_context": 202.4924, "num_word_doc": 49.9068, "num_word_query": 23.6828, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4458.4788, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7666, "query_norm": 1.4714, "queue_k_norm": 1.5939, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 32.0542, "sent_len_1": 66.8974, "sent_len_max_0": 127.5713, "sent_len_max_1": 189.6925, "stdk": 0.0487, "stdq": 0.0425, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 91200 }, { "accuracy": 46.2891, "active_queue_size": 16384.0, "cl_loss": 3.8916, "doc_norm": 1.5921, "encoder_q-embeddings": 3161.002, "encoder_q-layer.0": 2131.1516, "encoder_q-layer.1": 2576.5264, "encoder_q-layer.10": 2517.5061, "encoder_q-layer.11": 6763.9966, "encoder_q-layer.2": 3052.5696, "encoder_q-layer.3": 2991.1399, "encoder_q-layer.4": 3025.9495, "encoder_q-layer.5": 2981.4053, "encoder_q-layer.6": 2827.1443, "encoder_q-layer.7": 2822.8728, "encoder_q-layer.8": 3004.5898, "encoder_q-layer.9": 2489.8979, "epoch": 0.59, "inbatch_neg_score": 0.7695, "inbatch_pos_score": 1.3184, "learning_rate": 4.833333333333333e-06, "loss": 3.8916, "norm_diff": 0.1284, "norm_loss": 0.0, "num_token_doc": 66.5947, "num_token_overlap": 11.6535, "num_token_query": 31.8544, "num_token_union": 65.2471, "num_word_context": 202.4672, "num_word_doc": 49.7152, "num_word_query": 23.5292, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5073.192, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7681, "query_norm": 1.4637, "queue_k_norm": 1.5939, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8544, "sent_len_1": 66.5947, "sent_len_max_0": 127.3325, "sent_len_max_1": 189.5712, "stdk": 0.0481, "stdq": 0.0421, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 91300 }, { "accuracy": 44.5312, "active_queue_size": 16384.0, "cl_loss": 3.8935, "doc_norm": 1.5927, "encoder_q-embeddings": 3832.301, "encoder_q-layer.0": 2659.7925, "encoder_q-layer.1": 3032.1523, "encoder_q-layer.10": 2684.2983, "encoder_q-layer.11": 6943.1729, "encoder_q-layer.2": 3495.1089, "encoder_q-layer.3": 3688.5444, "encoder_q-layer.4": 3753.6006, "encoder_q-layer.5": 3390.8423, "encoder_q-layer.6": 3301.2202, "encoder_q-layer.7": 3377.9568, "encoder_q-layer.8": 3198.5571, "encoder_q-layer.9": 2556.822, "epoch": 0.59, "inbatch_neg_score": 0.768, "inbatch_pos_score": 1.3105, "learning_rate": 4.777777777777778e-06, "loss": 3.8935, "norm_diff": 0.1266, "norm_loss": 0.0, "num_token_doc": 66.7288, "num_token_overlap": 11.6575, "num_token_query": 31.8174, "num_token_union": 65.2901, "num_word_context": 202.2455, "num_word_doc": 49.8323, "num_word_query": 23.5001, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5699.336, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.7666, "query_norm": 1.4661, "queue_k_norm": 1.5936, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8174, "sent_len_1": 66.7288, "sent_len_max_0": 127.53, "sent_len_max_1": 188.5087, "stdk": 0.0482, "stdq": 0.0423, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 91400 }, { "accuracy": 46.0938, "active_queue_size": 16384.0, "cl_loss": 3.8976, "doc_norm": 1.5931, "encoder_q-embeddings": 1945.4348, "encoder_q-layer.0": 1303.6119, "encoder_q-layer.1": 1369.7563, "encoder_q-layer.10": 2541.2854, "encoder_q-layer.11": 6608.5137, "encoder_q-layer.2": 1510.0057, "encoder_q-layer.3": 1524.3765, "encoder_q-layer.4": 1617.3445, "encoder_q-layer.5": 1677.4307, "encoder_q-layer.6": 1933.4218, "encoder_q-layer.7": 2116.2109, "encoder_q-layer.8": 2532.4585, "encoder_q-layer.9": 2370.0986, "epoch": 0.6, "inbatch_neg_score": 0.7654, "inbatch_pos_score": 1.3145, "learning_rate": 4.722222222222222e-06, "loss": 3.8976, "norm_diff": 0.1295, "norm_loss": 0.0, "num_token_doc": 66.7539, "num_token_overlap": 11.6981, "num_token_query": 32.0343, "num_token_union": 65.3971, "num_word_context": 202.2712, "num_word_doc": 49.8096, "num_word_query": 23.6536, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4111.7168, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7661, "query_norm": 1.4636, "queue_k_norm": 1.5946, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 32.0343, "sent_len_1": 66.7539, "sent_len_max_0": 127.5812, "sent_len_max_1": 189.6213, "stdk": 0.0482, "stdq": 0.0422, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 91500 }, { "accuracy": 46.582, "active_queue_size": 16384.0, "cl_loss": 3.9019, "doc_norm": 1.5978, "encoder_q-embeddings": 2103.4873, "encoder_q-layer.0": 1423.8352, "encoder_q-layer.1": 1515.4506, "encoder_q-layer.10": 2503.4177, "encoder_q-layer.11": 6644.4814, "encoder_q-layer.2": 1660.4326, "encoder_q-layer.3": 1677.1886, "encoder_q-layer.4": 1740.8655, "encoder_q-layer.5": 1820.0402, "encoder_q-layer.6": 2012.2651, "encoder_q-layer.7": 2296.3125, "encoder_q-layer.8": 2556.6274, "encoder_q-layer.9": 2300.6047, "epoch": 0.6, "inbatch_neg_score": 0.767, "inbatch_pos_score": 1.3223, "learning_rate": 4.666666666666667e-06, "loss": 3.9019, "norm_diff": 0.1346, "norm_loss": 0.0, "num_token_doc": 66.7521, "num_token_overlap": 11.5939, "num_token_query": 31.7783, "num_token_union": 65.3421, "num_word_context": 202.3922, "num_word_doc": 49.8546, "num_word_query": 23.4695, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4200.546, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7686, "query_norm": 1.4632, "queue_k_norm": 1.5957, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.7783, "sent_len_1": 66.7521, "sent_len_max_0": 127.6063, "sent_len_max_1": 187.95, "stdk": 0.0484, "stdq": 0.0421, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 91600 }, { "accuracy": 46.9727, "active_queue_size": 16384.0, "cl_loss": 3.892, "doc_norm": 1.5918, "encoder_q-embeddings": 2141.8506, "encoder_q-layer.0": 1469.194, "encoder_q-layer.1": 1551.3802, "encoder_q-layer.10": 2779.8542, "encoder_q-layer.11": 6798.4702, "encoder_q-layer.2": 1857.4928, "encoder_q-layer.3": 1897.3505, "encoder_q-layer.4": 2093.0518, "encoder_q-layer.5": 2013.8884, "encoder_q-layer.6": 2199.0151, "encoder_q-layer.7": 2451.7747, "encoder_q-layer.8": 2669.853, "encoder_q-layer.9": 2385.1147, "epoch": 0.6, "inbatch_neg_score": 0.7667, "inbatch_pos_score": 1.3311, "learning_rate": 4.611111111111111e-06, "loss": 3.892, "norm_diff": 0.1295, "norm_loss": 0.0, "num_token_doc": 66.82, "num_token_overlap": 11.7148, "num_token_query": 31.9516, "num_token_union": 65.3625, "num_word_context": 202.6941, "num_word_doc": 49.874, "num_word_query": 23.5996, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4389.8362, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7676, "query_norm": 1.4623, "queue_k_norm": 1.5951, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9516, "sent_len_1": 66.82, "sent_len_max_0": 127.5475, "sent_len_max_1": 190.79, "stdk": 0.0482, "stdq": 0.0421, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 91700 }, { "accuracy": 46.4844, "active_queue_size": 16384.0, "cl_loss": 3.8744, "doc_norm": 1.5898, "encoder_q-embeddings": 22955.8184, "encoder_q-layer.0": 17800.0996, "encoder_q-layer.1": 17984.6035, "encoder_q-layer.10": 2545.5322, "encoder_q-layer.11": 6287.2783, "encoder_q-layer.2": 19222.9121, "encoder_q-layer.3": 18073.9043, "encoder_q-layer.4": 17981.4473, "encoder_q-layer.5": 18490.623, "encoder_q-layer.6": 18104.418, "encoder_q-layer.7": 17701.0078, "encoder_q-layer.8": 10362.833, "encoder_q-layer.9": 4582.0903, "epoch": 0.6, "inbatch_neg_score": 0.7685, "inbatch_pos_score": 1.3262, "learning_rate": 4.555555555555556e-06, "loss": 3.8744, "norm_diff": 0.1186, "norm_loss": 0.0, "num_token_doc": 66.8829, "num_token_overlap": 11.6945, "num_token_query": 31.941, "num_token_union": 65.3955, "num_word_context": 202.6212, "num_word_doc": 49.932, "num_word_query": 23.5844, "postclip_grad_norm": 1.0, "preclip_grad_norm": 25154.3214, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.7681, "query_norm": 1.4712, "queue_k_norm": 1.5968, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.941, "sent_len_1": 66.8829, "sent_len_max_0": 127.5387, "sent_len_max_1": 189.4238, "stdk": 0.048, "stdq": 0.0425, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 91800 }, { "accuracy": 47.168, "active_queue_size": 16384.0, "cl_loss": 3.8953, "doc_norm": 1.5906, "encoder_q-embeddings": 3198.8296, "encoder_q-layer.0": 2310.5315, "encoder_q-layer.1": 2323.7588, "encoder_q-layer.10": 2366.2087, "encoder_q-layer.11": 6160.8765, "encoder_q-layer.2": 2640.5688, "encoder_q-layer.3": 2790.6511, "encoder_q-layer.4": 3192.3423, "encoder_q-layer.5": 2759.1555, "encoder_q-layer.6": 2757.5977, "encoder_q-layer.7": 2661.8311, "encoder_q-layer.8": 2690.3494, "encoder_q-layer.9": 2206.5403, "epoch": 0.6, "inbatch_neg_score": 0.7665, "inbatch_pos_score": 1.3223, "learning_rate": 4.5e-06, "loss": 3.8953, "norm_diff": 0.1373, "norm_loss": 0.0, "num_token_doc": 66.7951, "num_token_overlap": 11.6619, "num_token_query": 31.8985, "num_token_union": 65.4253, "num_word_context": 202.4153, "num_word_doc": 49.8218, "num_word_query": 23.5519, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4792.1369, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7666, "query_norm": 1.4532, "queue_k_norm": 1.5953, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8985, "sent_len_1": 66.7951, "sent_len_max_0": 127.485, "sent_len_max_1": 190.6012, "stdk": 0.0481, "stdq": 0.0417, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 91900 }, { "accuracy": 45.1172, "active_queue_size": 16384.0, "cl_loss": 3.9014, "doc_norm": 1.5933, "encoder_q-embeddings": 2245.3794, "encoder_q-layer.0": 1484.7354, "encoder_q-layer.1": 1603.1904, "encoder_q-layer.10": 2872.5437, "encoder_q-layer.11": 6823.0161, "encoder_q-layer.2": 1853.5153, "encoder_q-layer.3": 1895.1536, "encoder_q-layer.4": 2055.1143, "encoder_q-layer.5": 2091.5759, "encoder_q-layer.6": 2314.863, "encoder_q-layer.7": 2368.7234, "encoder_q-layer.8": 2751.4509, "encoder_q-layer.9": 2520.6204, "epoch": 0.6, "inbatch_neg_score": 0.766, "inbatch_pos_score": 1.3145, "learning_rate": 4.444444444444445e-06, "loss": 3.9014, "norm_diff": 0.1315, "norm_loss": 0.0, "num_token_doc": 66.7575, "num_token_overlap": 11.6892, "num_token_query": 31.939, "num_token_union": 65.3773, "num_word_context": 202.4778, "num_word_doc": 49.8035, "num_word_query": 23.6236, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4364.2547, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7666, "query_norm": 1.4619, "queue_k_norm": 1.5934, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.939, "sent_len_1": 66.7575, "sent_len_max_0": 127.3762, "sent_len_max_1": 191.0175, "stdk": 0.0482, "stdq": 0.0421, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 92000 }, { "accuracy": 45.6055, "active_queue_size": 16384.0, "cl_loss": 3.9055, "doc_norm": 1.5977, "encoder_q-embeddings": 2658.0403, "encoder_q-layer.0": 1783.8745, "encoder_q-layer.1": 1900.2665, "encoder_q-layer.10": 2489.5984, "encoder_q-layer.11": 6568.9863, "encoder_q-layer.2": 2147.9595, "encoder_q-layer.3": 2125.3379, "encoder_q-layer.4": 2448.301, "encoder_q-layer.5": 2369.1785, "encoder_q-layer.6": 2664.5181, "encoder_q-layer.7": 2874.9624, "encoder_q-layer.8": 3236.98, "encoder_q-layer.9": 2386.4641, "epoch": 0.6, "inbatch_neg_score": 0.7668, "inbatch_pos_score": 1.3125, "learning_rate": 4.388888888888889e-06, "loss": 3.9055, "norm_diff": 0.1377, "norm_loss": 0.0, "num_token_doc": 66.737, "num_token_overlap": 11.6007, "num_token_query": 31.8076, "num_token_union": 65.3659, "num_word_context": 202.5365, "num_word_doc": 49.7731, "num_word_query": 23.4964, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4610.2758, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7681, "query_norm": 1.46, "queue_k_norm": 1.5947, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8076, "sent_len_1": 66.737, "sent_len_max_0": 127.6825, "sent_len_max_1": 190.705, "stdk": 0.0484, "stdq": 0.042, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 92100 }, { "accuracy": 45.1172, "active_queue_size": 16384.0, "cl_loss": 3.8903, "doc_norm": 1.5958, "encoder_q-embeddings": 2526.0261, "encoder_q-layer.0": 1806.9475, "encoder_q-layer.1": 1821.7354, "encoder_q-layer.10": 2606.717, "encoder_q-layer.11": 6616.9712, "encoder_q-layer.2": 2037.8693, "encoder_q-layer.3": 2023.1384, "encoder_q-layer.4": 2276.0417, "encoder_q-layer.5": 2217.55, "encoder_q-layer.6": 2331.0742, "encoder_q-layer.7": 2508.7441, "encoder_q-layer.8": 2739.3447, "encoder_q-layer.9": 2468.7593, "epoch": 0.6, "inbatch_neg_score": 0.7671, "inbatch_pos_score": 1.3252, "learning_rate": 4.333333333333334e-06, "loss": 3.8903, "norm_diff": 0.1356, "norm_loss": 0.0, "num_token_doc": 66.5544, "num_token_overlap": 11.6751, "num_token_query": 31.8388, "num_token_union": 65.1638, "num_word_context": 201.962, "num_word_doc": 49.7093, "num_word_query": 23.4951, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4488.3982, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7676, "query_norm": 1.4602, "queue_k_norm": 1.5944, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8388, "sent_len_1": 66.5544, "sent_len_max_0": 127.6038, "sent_len_max_1": 188.1287, "stdk": 0.0483, "stdq": 0.042, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 92200 }, { "accuracy": 46.4844, "active_queue_size": 16384.0, "cl_loss": 3.9015, "doc_norm": 1.5961, "encoder_q-embeddings": 2312.47, "encoder_q-layer.0": 1615.876, "encoder_q-layer.1": 1905.618, "encoder_q-layer.10": 2512.0881, "encoder_q-layer.11": 6415.5718, "encoder_q-layer.2": 2067.1067, "encoder_q-layer.3": 2073.156, "encoder_q-layer.4": 2380.2661, "encoder_q-layer.5": 2567.1968, "encoder_q-layer.6": 2576.9314, "encoder_q-layer.7": 2635.804, "encoder_q-layer.8": 2877.0876, "encoder_q-layer.9": 2392.4829, "epoch": 0.6, "inbatch_neg_score": 0.7671, "inbatch_pos_score": 1.3281, "learning_rate": 4.277777777777778e-06, "loss": 3.9015, "norm_diff": 0.1276, "norm_loss": 0.0, "num_token_doc": 66.5923, "num_token_overlap": 11.6767, "num_token_query": 31.9452, "num_token_union": 65.2652, "num_word_context": 202.2148, "num_word_doc": 49.7008, "num_word_query": 23.6028, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4407.1996, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7671, "query_norm": 1.4686, "queue_k_norm": 1.5943, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9452, "sent_len_1": 66.5923, "sent_len_max_0": 127.6112, "sent_len_max_1": 189.42, "stdk": 0.0483, "stdq": 0.0425, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 92300 }, { "accuracy": 49.0234, "active_queue_size": 16384.0, "cl_loss": 3.886, "doc_norm": 1.5991, "encoder_q-embeddings": 1887.839, "encoder_q-layer.0": 1280.4728, "encoder_q-layer.1": 1330.0336, "encoder_q-layer.10": 2560.9812, "encoder_q-layer.11": 6447.4092, "encoder_q-layer.2": 1486.5967, "encoder_q-layer.3": 1539.6348, "encoder_q-layer.4": 1595.9556, "encoder_q-layer.5": 1664.7917, "encoder_q-layer.6": 1907.2571, "encoder_q-layer.7": 2112.0078, "encoder_q-layer.8": 2469.7048, "encoder_q-layer.9": 2315.7825, "epoch": 0.6, "inbatch_neg_score": 0.7658, "inbatch_pos_score": 1.3379, "learning_rate": 4.222222222222223e-06, "loss": 3.886, "norm_diff": 0.1412, "norm_loss": 0.0, "num_token_doc": 66.6949, "num_token_overlap": 11.717, "num_token_query": 32.0466, "num_token_union": 65.3542, "num_word_context": 202.3721, "num_word_doc": 49.8073, "num_word_query": 23.6875, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3984.978, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7671, "query_norm": 1.4579, "queue_k_norm": 1.5935, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 32.0466, "sent_len_1": 66.6949, "sent_len_max_0": 127.6038, "sent_len_max_1": 189.6138, "stdk": 0.0484, "stdq": 0.042, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 92400 }, { "accuracy": 42.0898, "active_queue_size": 16384.0, "cl_loss": 3.8706, "doc_norm": 1.5889, "encoder_q-embeddings": 2566.4465, "encoder_q-layer.0": 1766.9968, "encoder_q-layer.1": 2053.187, "encoder_q-layer.10": 2897.2969, "encoder_q-layer.11": 7014.1333, "encoder_q-layer.2": 2405.5439, "encoder_q-layer.3": 2596.9438, "encoder_q-layer.4": 2587.8201, "encoder_q-layer.5": 2591.3376, "encoder_q-layer.6": 2661.5034, "encoder_q-layer.7": 2642.3931, "encoder_q-layer.8": 2909.594, "encoder_q-layer.9": 2552.5994, "epoch": 0.6, "inbatch_neg_score": 0.7683, "inbatch_pos_score": 1.2891, "learning_rate": 4.166666666666667e-06, "loss": 3.8706, "norm_diff": 0.1339, "norm_loss": 0.0, "num_token_doc": 66.7629, "num_token_overlap": 11.7329, "num_token_query": 32.0939, "num_token_union": 65.393, "num_word_context": 202.4242, "num_word_doc": 49.7979, "num_word_query": 23.7117, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4836.8104, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7686, "query_norm": 1.455, "queue_k_norm": 1.5942, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 32.0939, "sent_len_1": 66.7629, "sent_len_max_0": 127.5913, "sent_len_max_1": 189.6, "stdk": 0.048, "stdq": 0.0418, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 92500 }, { "accuracy": 45.1172, "active_queue_size": 16384.0, "cl_loss": 3.8893, "doc_norm": 1.5965, "encoder_q-embeddings": 2449.7148, "encoder_q-layer.0": 1629.8885, "encoder_q-layer.1": 1688.7129, "encoder_q-layer.10": 2855.0793, "encoder_q-layer.11": 6621.2178, "encoder_q-layer.2": 2005.8749, "encoder_q-layer.3": 1872.0583, "encoder_q-layer.4": 1952.4895, "encoder_q-layer.5": 1866.1016, "encoder_q-layer.6": 2063.1646, "encoder_q-layer.7": 2245.3481, "encoder_q-layer.8": 2658.6355, "encoder_q-layer.9": 2406.2761, "epoch": 0.6, "inbatch_neg_score": 0.7669, "inbatch_pos_score": 1.3281, "learning_rate": 4.111111111111112e-06, "loss": 3.8893, "norm_diff": 0.124, "norm_loss": 0.0, "num_token_doc": 66.8588, "num_token_overlap": 11.6653, "num_token_query": 31.8354, "num_token_union": 65.418, "num_word_context": 201.9814, "num_word_doc": 49.9155, "num_word_query": 23.5068, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4312.0693, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7676, "query_norm": 1.4725, "queue_k_norm": 1.5955, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8354, "sent_len_1": 66.8588, "sent_len_max_0": 127.5637, "sent_len_max_1": 188.7663, "stdk": 0.0483, "stdq": 0.0426, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 92600 }, { "accuracy": 44.8242, "active_queue_size": 16384.0, "cl_loss": 3.8892, "doc_norm": 1.5998, "encoder_q-embeddings": 2855.0557, "encoder_q-layer.0": 1981.207, "encoder_q-layer.1": 2334.0061, "encoder_q-layer.10": 2712.4153, "encoder_q-layer.11": 6675.9663, "encoder_q-layer.2": 2781.2051, "encoder_q-layer.3": 3127.8728, "encoder_q-layer.4": 3417.3396, "encoder_q-layer.5": 3409.2407, "encoder_q-layer.6": 3984.5256, "encoder_q-layer.7": 4199.2339, "encoder_q-layer.8": 4049.8953, "encoder_q-layer.9": 2499.5359, "epoch": 0.6, "inbatch_neg_score": 0.7678, "inbatch_pos_score": 1.3057, "learning_rate": 4.055555555555556e-06, "loss": 3.8892, "norm_diff": 0.1359, "norm_loss": 0.0, "num_token_doc": 66.8043, "num_token_overlap": 11.6933, "num_token_query": 31.9607, "num_token_union": 65.4193, "num_word_context": 202.1124, "num_word_doc": 49.8471, "num_word_query": 23.6108, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5444.0173, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7676, "query_norm": 1.464, "queue_k_norm": 1.5954, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9607, "sent_len_1": 66.8043, "sent_len_max_0": 127.55, "sent_len_max_1": 190.2063, "stdk": 0.0484, "stdq": 0.0422, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 92700 }, { "accuracy": 48.9258, "active_queue_size": 16384.0, "cl_loss": 3.8687, "doc_norm": 1.5957, "encoder_q-embeddings": 1171.0404, "encoder_q-layer.0": 825.1335, "encoder_q-layer.1": 915.5566, "encoder_q-layer.10": 1371.9552, "encoder_q-layer.11": 3132.8372, "encoder_q-layer.2": 1064.7086, "encoder_q-layer.3": 1170.6581, "encoder_q-layer.4": 1208.799, "encoder_q-layer.5": 1273.74, "encoder_q-layer.6": 1392.5676, "encoder_q-layer.7": 1570.8835, "encoder_q-layer.8": 1372.1816, "encoder_q-layer.9": 1202.8362, "epoch": 0.6, "inbatch_neg_score": 0.7692, "inbatch_pos_score": 1.3457, "learning_rate": 4.000000000000001e-06, "loss": 3.8687, "norm_diff": 0.1153, "norm_loss": 0.0, "num_token_doc": 66.9598, "num_token_overlap": 11.7176, "num_token_query": 31.9282, "num_token_union": 65.4181, "num_word_context": 202.4086, "num_word_doc": 49.9865, "num_word_query": 23.5893, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2235.9296, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7681, "query_norm": 1.4804, "queue_k_norm": 1.5957, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9282, "sent_len_1": 66.9598, "sent_len_max_0": 127.5288, "sent_len_max_1": 191.0725, "stdk": 0.0482, "stdq": 0.043, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 92800 }, { "accuracy": 47.7539, "active_queue_size": 16384.0, "cl_loss": 3.874, "doc_norm": 1.5938, "encoder_q-embeddings": 5963.9658, "encoder_q-layer.0": 4314.6411, "encoder_q-layer.1": 5420.2529, "encoder_q-layer.10": 1453.9471, "encoder_q-layer.11": 3482.4688, "encoder_q-layer.2": 7100.854, "encoder_q-layer.3": 7751.0967, "encoder_q-layer.4": 7451.1533, "encoder_q-layer.5": 6472.1768, "encoder_q-layer.6": 7179.8271, "encoder_q-layer.7": 6340.4312, "encoder_q-layer.8": 4726.0815, "encoder_q-layer.9": 2363.1533, "epoch": 0.6, "inbatch_neg_score": 0.7657, "inbatch_pos_score": 1.3242, "learning_rate": 3.944444444444445e-06, "loss": 3.874, "norm_diff": 0.1198, "norm_loss": 0.0, "num_token_doc": 66.8842, "num_token_overlap": 11.7147, "num_token_query": 31.9888, "num_token_union": 65.465, "num_word_context": 202.5882, "num_word_doc": 49.9259, "num_word_query": 23.6325, "postclip_grad_norm": 1.0, "preclip_grad_norm": 8613.8882, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.7666, "query_norm": 1.474, "queue_k_norm": 1.5965, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9888, "sent_len_1": 66.8842, "sent_len_max_0": 127.4375, "sent_len_max_1": 189.0662, "stdk": 0.0482, "stdq": 0.0427, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 92900 }, { "accuracy": 48.9258, "active_queue_size": 16384.0, "cl_loss": 3.8933, "doc_norm": 1.5982, "encoder_q-embeddings": 1104.8398, "encoder_q-layer.0": 752.2078, "encoder_q-layer.1": 813.876, "encoder_q-layer.10": 1251.7775, "encoder_q-layer.11": 3192.6938, "encoder_q-layer.2": 932.0073, "encoder_q-layer.3": 990.0553, "encoder_q-layer.4": 1019.5124, "encoder_q-layer.5": 1053.5684, "encoder_q-layer.6": 1130.1989, "encoder_q-layer.7": 1237.4183, "encoder_q-layer.8": 1374.2551, "encoder_q-layer.9": 1159.5729, "epoch": 0.61, "inbatch_neg_score": 0.7665, "inbatch_pos_score": 1.3359, "learning_rate": 3.888888888888889e-06, "loss": 3.8933, "norm_diff": 0.1324, "norm_loss": 0.0, "num_token_doc": 66.9093, "num_token_overlap": 11.661, "num_token_query": 31.7945, "num_token_union": 65.3708, "num_word_context": 202.596, "num_word_doc": 49.9511, "num_word_query": 23.4905, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2096.3957, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7661, "query_norm": 1.4658, "queue_k_norm": 1.5963, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.7945, "sent_len_1": 66.9093, "sent_len_max_0": 127.5263, "sent_len_max_1": 190.2937, "stdk": 0.0483, "stdq": 0.0424, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 93000 }, { "accuracy": 45.2148, "active_queue_size": 16384.0, "cl_loss": 3.8812, "doc_norm": 1.595, "encoder_q-embeddings": 1261.0159, "encoder_q-layer.0": 873.2936, "encoder_q-layer.1": 941.3362, "encoder_q-layer.10": 1216.238, "encoder_q-layer.11": 3247.2749, "encoder_q-layer.2": 989.0179, "encoder_q-layer.3": 1009.2734, "encoder_q-layer.4": 1153.5602, "encoder_q-layer.5": 1248.4758, "encoder_q-layer.6": 1256.5908, "encoder_q-layer.7": 1200.7203, "encoder_q-layer.8": 1259.325, "encoder_q-layer.9": 1116.8379, "epoch": 0.61, "inbatch_neg_score": 0.7672, "inbatch_pos_score": 1.3164, "learning_rate": 3.833333333333334e-06, "loss": 3.8812, "norm_diff": 0.1305, "norm_loss": 0.0, "num_token_doc": 66.7302, "num_token_overlap": 11.667, "num_token_query": 31.9621, "num_token_union": 65.4342, "num_word_context": 202.2248, "num_word_doc": 49.8358, "num_word_query": 23.6287, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2198.9949, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7681, "query_norm": 1.4645, "queue_k_norm": 1.5946, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9621, "sent_len_1": 66.7302, "sent_len_max_0": 127.5138, "sent_len_max_1": 189.9787, "stdk": 0.0482, "stdq": 0.0423, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 93100 }, { "accuracy": 47.2656, "active_queue_size": 16384.0, "cl_loss": 3.9108, "doc_norm": 1.5995, "encoder_q-embeddings": 1634.52, "encoder_q-layer.0": 1140.9231, "encoder_q-layer.1": 1291.389, "encoder_q-layer.10": 1235.7657, "encoder_q-layer.11": 3201.6323, "encoder_q-layer.2": 1627.4951, "encoder_q-layer.3": 1780.3362, "encoder_q-layer.4": 2031.244, "encoder_q-layer.5": 1944.2539, "encoder_q-layer.6": 2058.166, "encoder_q-layer.7": 1964.8374, "encoder_q-layer.8": 1822.6199, "encoder_q-layer.9": 1250.6931, "epoch": 0.61, "inbatch_neg_score": 0.7686, "inbatch_pos_score": 1.3252, "learning_rate": 3.777777777777778e-06, "loss": 3.9108, "norm_diff": 0.1391, "norm_loss": 0.0, "num_token_doc": 66.6674, "num_token_overlap": 11.6614, "num_token_query": 31.8, "num_token_union": 65.2569, "num_word_context": 202.4502, "num_word_doc": 49.7781, "num_word_query": 23.5133, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2808.4424, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7676, "query_norm": 1.4604, "queue_k_norm": 1.5957, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.8, "sent_len_1": 66.6674, "sent_len_max_0": 127.3975, "sent_len_max_1": 190.3363, "stdk": 0.0484, "stdq": 0.0421, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 93200 }, { "accuracy": 48.0469, "active_queue_size": 16384.0, "cl_loss": 3.8781, "doc_norm": 1.5941, "encoder_q-embeddings": 1109.6814, "encoder_q-layer.0": 715.6072, "encoder_q-layer.1": 775.6393, "encoder_q-layer.10": 1367.36, "encoder_q-layer.11": 3222.449, "encoder_q-layer.2": 870.1074, "encoder_q-layer.3": 923.4276, "encoder_q-layer.4": 1015.6065, "encoder_q-layer.5": 1057.781, "encoder_q-layer.6": 1193.1891, "encoder_q-layer.7": 1229.2385, "encoder_q-layer.8": 1377.849, "encoder_q-layer.9": 1160.0846, "epoch": 0.61, "inbatch_neg_score": 0.7676, "inbatch_pos_score": 1.3408, "learning_rate": 3.722222222222222e-06, "loss": 3.8781, "norm_diff": 0.1281, "norm_loss": 0.0, "num_token_doc": 66.684, "num_token_overlap": 11.6693, "num_token_query": 31.9934, "num_token_union": 65.3545, "num_word_context": 202.1243, "num_word_doc": 49.7508, "num_word_query": 23.6492, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2141.0595, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7681, "query_norm": 1.466, "queue_k_norm": 1.5954, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9934, "sent_len_1": 66.684, "sent_len_max_0": 127.6, "sent_len_max_1": 190.255, "stdk": 0.0482, "stdq": 0.0424, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 93300 }, { "accuracy": 46.875, "active_queue_size": 16384.0, "cl_loss": 3.9036, "doc_norm": 1.5985, "encoder_q-embeddings": 1151.0601, "encoder_q-layer.0": 769.768, "encoder_q-layer.1": 843.0826, "encoder_q-layer.10": 1431.4303, "encoder_q-layer.11": 3453.4031, "encoder_q-layer.2": 1007.1891, "encoder_q-layer.3": 987.8526, "encoder_q-layer.4": 1018.6272, "encoder_q-layer.5": 1061.0552, "encoder_q-layer.6": 1136.0939, "encoder_q-layer.7": 1305.9126, "encoder_q-layer.8": 1418.8083, "encoder_q-layer.9": 1259.0071, "epoch": 0.61, "inbatch_neg_score": 0.7676, "inbatch_pos_score": 1.3105, "learning_rate": 3.666666666666667e-06, "loss": 3.9036, "norm_diff": 0.1321, "norm_loss": 0.0, "num_token_doc": 66.7882, "num_token_overlap": 11.6503, "num_token_query": 31.8174, "num_token_union": 65.3408, "num_word_context": 202.4079, "num_word_doc": 49.8053, "num_word_query": 23.4992, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2241.3492, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7681, "query_norm": 1.4664, "queue_k_norm": 1.5963, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8174, "sent_len_1": 66.7882, "sent_len_max_0": 127.4437, "sent_len_max_1": 187.965, "stdk": 0.0484, "stdq": 0.0423, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 93400 }, { "accuracy": 47.9492, "active_queue_size": 16384.0, "cl_loss": 3.8728, "doc_norm": 1.5886, "encoder_q-embeddings": 1039.3696, "encoder_q-layer.0": 683.3691, "encoder_q-layer.1": 730.411, "encoder_q-layer.10": 1344.5853, "encoder_q-layer.11": 3392.7122, "encoder_q-layer.2": 816.119, "encoder_q-layer.3": 858.502, "encoder_q-layer.4": 892.6502, "encoder_q-layer.5": 912.0853, "encoder_q-layer.6": 1037.8387, "encoder_q-layer.7": 1230.6307, "encoder_q-layer.8": 1302.3214, "encoder_q-layer.9": 1218.3875, "epoch": 0.61, "inbatch_neg_score": 0.7657, "inbatch_pos_score": 1.335, "learning_rate": 3.611111111111111e-06, "loss": 3.8728, "norm_diff": 0.1193, "norm_loss": 0.0, "num_token_doc": 66.789, "num_token_overlap": 11.7127, "num_token_query": 32.0082, "num_token_union": 65.413, "num_word_context": 202.5337, "num_word_doc": 49.8477, "num_word_query": 23.6586, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2105.824, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7661, "query_norm": 1.4694, "queue_k_norm": 1.5949, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 32.0082, "sent_len_1": 66.789, "sent_len_max_0": 127.6188, "sent_len_max_1": 189.0538, "stdk": 0.0479, "stdq": 0.0425, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 93500 }, { "accuracy": 44.3359, "active_queue_size": 16384.0, "cl_loss": 3.8867, "doc_norm": 1.5878, "encoder_q-embeddings": 1021.1569, "encoder_q-layer.0": 681.4307, "encoder_q-layer.1": 734.8621, "encoder_q-layer.10": 1536.1573, "encoder_q-layer.11": 3759.5967, "encoder_q-layer.2": 799.6823, "encoder_q-layer.3": 838.881, "encoder_q-layer.4": 885.8456, "encoder_q-layer.5": 888.8386, "encoder_q-layer.6": 1017.0416, "encoder_q-layer.7": 1182.8262, "encoder_q-layer.8": 1462.0592, "encoder_q-layer.9": 1328.2095, "epoch": 0.61, "inbatch_neg_score": 0.7681, "inbatch_pos_score": 1.3086, "learning_rate": 3.555555555555556e-06, "loss": 3.8867, "norm_diff": 0.12, "norm_loss": 0.0, "num_token_doc": 66.886, "num_token_overlap": 11.6381, "num_token_query": 31.7263, "num_token_union": 65.3215, "num_word_context": 202.145, "num_word_doc": 49.923, "num_word_query": 23.4338, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2286.1683, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7686, "query_norm": 1.4679, "queue_k_norm": 1.5941, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.7263, "sent_len_1": 66.886, "sent_len_max_0": 127.42, "sent_len_max_1": 189.4087, "stdk": 0.0479, "stdq": 0.0424, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 93600 }, { "accuracy": 48.2422, "active_queue_size": 16384.0, "cl_loss": 3.9118, "doc_norm": 1.6046, "encoder_q-embeddings": 1077.1505, "encoder_q-layer.0": 717.2845, "encoder_q-layer.1": 771.465, "encoder_q-layer.10": 1316.9468, "encoder_q-layer.11": 3312.2212, "encoder_q-layer.2": 887.5233, "encoder_q-layer.3": 903.0101, "encoder_q-layer.4": 955.6473, "encoder_q-layer.5": 999.3205, "encoder_q-layer.6": 1134.05, "encoder_q-layer.7": 1190.4915, "encoder_q-layer.8": 1418.5914, "encoder_q-layer.9": 1268.4629, "epoch": 0.61, "inbatch_neg_score": 0.7666, "inbatch_pos_score": 1.3291, "learning_rate": 3.5000000000000004e-06, "loss": 3.9118, "norm_diff": 0.1409, "norm_loss": 0.0, "num_token_doc": 66.942, "num_token_overlap": 11.6958, "num_token_query": 31.9096, "num_token_union": 65.4547, "num_word_context": 202.4686, "num_word_doc": 49.9763, "num_word_query": 23.5622, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2166.7034, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7666, "query_norm": 1.4637, "queue_k_norm": 1.5948, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9096, "sent_len_1": 66.942, "sent_len_max_0": 127.4488, "sent_len_max_1": 188.3587, "stdk": 0.0486, "stdq": 0.0423, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 93700 }, { "accuracy": 45.5078, "active_queue_size": 16384.0, "cl_loss": 3.881, "doc_norm": 1.5952, "encoder_q-embeddings": 1081.0978, "encoder_q-layer.0": 741.4825, "encoder_q-layer.1": 799.3005, "encoder_q-layer.10": 1260.5844, "encoder_q-layer.11": 3314.947, "encoder_q-layer.2": 863.5047, "encoder_q-layer.3": 900.8452, "encoder_q-layer.4": 959.3539, "encoder_q-layer.5": 943.8088, "encoder_q-layer.6": 1028.1307, "encoder_q-layer.7": 1122.0647, "encoder_q-layer.8": 1293.2229, "encoder_q-layer.9": 1185.8813, "epoch": 0.61, "inbatch_neg_score": 0.7686, "inbatch_pos_score": 1.3291, "learning_rate": 3.4444444444444444e-06, "loss": 3.881, "norm_diff": 0.1294, "norm_loss": 0.0, "num_token_doc": 67.0105, "num_token_overlap": 11.719, "num_token_query": 31.9431, "num_token_union": 65.4347, "num_word_context": 202.5687, "num_word_doc": 50.0392, "num_word_query": 23.6033, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2116.8796, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7676, "query_norm": 1.4658, "queue_k_norm": 1.5965, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9431, "sent_len_1": 67.0105, "sent_len_max_0": 127.515, "sent_len_max_1": 190.4963, "stdk": 0.0482, "stdq": 0.0424, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 93800 }, { "accuracy": 48.0469, "active_queue_size": 16384.0, "cl_loss": 3.8943, "doc_norm": 1.6043, "encoder_q-embeddings": 2655.1282, "encoder_q-layer.0": 1813.6328, "encoder_q-layer.1": 1892.8964, "encoder_q-layer.10": 1304.5964, "encoder_q-layer.11": 3349.0293, "encoder_q-layer.2": 2224.1177, "encoder_q-layer.3": 2391.0183, "encoder_q-layer.4": 2594.6655, "encoder_q-layer.5": 2921.3262, "encoder_q-layer.6": 3157.6672, "encoder_q-layer.7": 3045.5647, "encoder_q-layer.8": 2920.4399, "encoder_q-layer.9": 1978.7617, "epoch": 0.61, "inbatch_neg_score": 0.7683, "inbatch_pos_score": 1.3398, "learning_rate": 3.3888888888888893e-06, "loss": 3.8943, "norm_diff": 0.1314, "norm_loss": 0.0, "num_token_doc": 66.7313, "num_token_overlap": 11.6678, "num_token_query": 31.8883, "num_token_union": 65.345, "num_word_context": 202.1537, "num_word_doc": 49.7821, "num_word_query": 23.5134, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3933.1944, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7686, "query_norm": 1.4729, "queue_k_norm": 1.5967, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8883, "sent_len_1": 66.7313, "sent_len_max_0": 127.4663, "sent_len_max_1": 189.995, "stdk": 0.0486, "stdq": 0.0427, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 93900 }, { "accuracy": 46.0938, "active_queue_size": 16384.0, "cl_loss": 3.886, "doc_norm": 1.5923, "encoder_q-embeddings": 927.4904, "encoder_q-layer.0": 611.2764, "encoder_q-layer.1": 640.1444, "encoder_q-layer.10": 1336.9443, "encoder_q-layer.11": 3307.427, "encoder_q-layer.2": 725.6271, "encoder_q-layer.3": 738.5249, "encoder_q-layer.4": 755.8811, "encoder_q-layer.5": 807.9496, "encoder_q-layer.6": 896.8409, "encoder_q-layer.7": 1072.796, "encoder_q-layer.8": 1210.3572, "encoder_q-layer.9": 1149.5006, "epoch": 0.61, "inbatch_neg_score": 0.7692, "inbatch_pos_score": 1.3193, "learning_rate": 3.3333333333333333e-06, "loss": 3.886, "norm_diff": 0.1348, "norm_loss": 0.0, "num_token_doc": 67.0634, "num_token_overlap": 11.654, "num_token_query": 31.8227, "num_token_union": 65.4892, "num_word_context": 202.6232, "num_word_doc": 50.0165, "num_word_query": 23.4942, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2030.6857, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7695, "query_norm": 1.4575, "queue_k_norm": 1.596, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.8227, "sent_len_1": 67.0634, "sent_len_max_0": 127.3762, "sent_len_max_1": 190.5163, "stdk": 0.0481, "stdq": 0.0419, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 94000 }, { "accuracy": 47.5586, "active_queue_size": 16384.0, "cl_loss": 3.8945, "doc_norm": 1.6005, "encoder_q-embeddings": 1888.1185, "encoder_q-layer.0": 1301.4762, "encoder_q-layer.1": 1554.9907, "encoder_q-layer.10": 1399.6823, "encoder_q-layer.11": 3434.437, "encoder_q-layer.2": 1821.4865, "encoder_q-layer.3": 1994.0065, "encoder_q-layer.4": 2189.6223, "encoder_q-layer.5": 2313.5911, "encoder_q-layer.6": 2282.6235, "encoder_q-layer.7": 1745.6877, "encoder_q-layer.8": 1567.0272, "encoder_q-layer.9": 1316.6201, "epoch": 0.61, "inbatch_neg_score": 0.769, "inbatch_pos_score": 1.3271, "learning_rate": 3.277777777777778e-06, "loss": 3.8945, "norm_diff": 0.1421, "norm_loss": 0.0, "num_token_doc": 66.6842, "num_token_overlap": 11.6708, "num_token_query": 31.9391, "num_token_union": 65.3309, "num_word_context": 201.9912, "num_word_doc": 49.7653, "num_word_query": 23.5681, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2981.1556, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.769, "query_norm": 1.4583, "queue_k_norm": 1.5952, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9391, "sent_len_1": 66.6842, "sent_len_max_0": 127.4963, "sent_len_max_1": 189.69, "stdk": 0.0484, "stdq": 0.042, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 94100 }, { "accuracy": 47.6562, "active_queue_size": 16384.0, "cl_loss": 3.9041, "doc_norm": 1.5948, "encoder_q-embeddings": 2344.1885, "encoder_q-layer.0": 1632.0201, "encoder_q-layer.1": 2050.2048, "encoder_q-layer.10": 1235.8219, "encoder_q-layer.11": 3180.2847, "encoder_q-layer.2": 2415.9163, "encoder_q-layer.3": 2664.0449, "encoder_q-layer.4": 2899.6682, "encoder_q-layer.5": 2744.8103, "encoder_q-layer.6": 2517.4084, "encoder_q-layer.7": 2271.3071, "encoder_q-layer.8": 1696.2682, "encoder_q-layer.9": 1119.1907, "epoch": 0.61, "inbatch_neg_score": 0.7697, "inbatch_pos_score": 1.3193, "learning_rate": 3.2222222222222222e-06, "loss": 3.9041, "norm_diff": 0.1335, "norm_loss": 0.0, "num_token_doc": 66.617, "num_token_overlap": 11.5523, "num_token_query": 31.6806, "num_token_union": 65.2545, "num_word_context": 202.2823, "num_word_doc": 49.7068, "num_word_query": 23.3738, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3454.0635, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.77, "query_norm": 1.4613, "queue_k_norm": 1.5956, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.6806, "sent_len_1": 66.617, "sent_len_max_0": 127.435, "sent_len_max_1": 189.6775, "stdk": 0.0482, "stdq": 0.0421, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 94200 }, { "accuracy": 46.3867, "active_queue_size": 16384.0, "cl_loss": 3.9006, "doc_norm": 1.5951, "encoder_q-embeddings": 1030.9473, "encoder_q-layer.0": 679.543, "encoder_q-layer.1": 716.3154, "encoder_q-layer.10": 1281.2617, "encoder_q-layer.11": 3329.7527, "encoder_q-layer.2": 821.6008, "encoder_q-layer.3": 854.9486, "encoder_q-layer.4": 938.9042, "encoder_q-layer.5": 960.1129, "encoder_q-layer.6": 1053.7911, "encoder_q-layer.7": 1124.1925, "encoder_q-layer.8": 1336.3947, "encoder_q-layer.9": 1179.1533, "epoch": 0.61, "inbatch_neg_score": 0.7699, "inbatch_pos_score": 1.332, "learning_rate": 3.166666666666667e-06, "loss": 3.9006, "norm_diff": 0.1338, "norm_loss": 0.0, "num_token_doc": 66.5717, "num_token_overlap": 11.6689, "num_token_query": 31.8436, "num_token_union": 65.2189, "num_word_context": 201.992, "num_word_doc": 49.6398, "num_word_query": 23.5132, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2125.3338, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7695, "query_norm": 1.4612, "queue_k_norm": 1.5964, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8436, "sent_len_1": 66.5717, "sent_len_max_0": 127.6012, "sent_len_max_1": 189.3262, "stdk": 0.0482, "stdq": 0.0421, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 94300 }, { "accuracy": 45.2148, "active_queue_size": 16384.0, "cl_loss": 3.8935, "doc_norm": 1.5955, "encoder_q-embeddings": 1074.7228, "encoder_q-layer.0": 747.7576, "encoder_q-layer.1": 799.0096, "encoder_q-layer.10": 1501.5791, "encoder_q-layer.11": 3341.8284, "encoder_q-layer.2": 929.5427, "encoder_q-layer.3": 934.4778, "encoder_q-layer.4": 1001.978, "encoder_q-layer.5": 1086.3613, "encoder_q-layer.6": 1231.1996, "encoder_q-layer.7": 1377.9886, "encoder_q-layer.8": 1547.1975, "encoder_q-layer.9": 1205.8879, "epoch": 0.61, "inbatch_neg_score": 0.77, "inbatch_pos_score": 1.3271, "learning_rate": 3.111111111111111e-06, "loss": 3.8935, "norm_diff": 0.1304, "norm_loss": 0.0, "num_token_doc": 66.7262, "num_token_overlap": 11.6283, "num_token_query": 31.823, "num_token_union": 65.3271, "num_word_context": 202.2003, "num_word_doc": 49.7723, "num_word_query": 23.4866, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2276.986, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.77, "query_norm": 1.4651, "queue_k_norm": 1.5945, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.823, "sent_len_1": 66.7262, "sent_len_max_0": 127.5175, "sent_len_max_1": 190.7262, "stdk": 0.0482, "stdq": 0.0422, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 94400 }, { "accuracy": 45.8984, "active_queue_size": 16384.0, "cl_loss": 3.8717, "doc_norm": 1.5959, "encoder_q-embeddings": 982.1776, "encoder_q-layer.0": 655.8055, "encoder_q-layer.1": 685.397, "encoder_q-layer.10": 1264.1288, "encoder_q-layer.11": 3389.074, "encoder_q-layer.2": 769.0604, "encoder_q-layer.3": 798.3481, "encoder_q-layer.4": 861.7033, "encoder_q-layer.5": 885.0796, "encoder_q-layer.6": 963.7377, "encoder_q-layer.7": 1099.3462, "encoder_q-layer.8": 1263.0793, "encoder_q-layer.9": 1152.1025, "epoch": 0.62, "inbatch_neg_score": 0.7718, "inbatch_pos_score": 1.3145, "learning_rate": 3.0555555555555556e-06, "loss": 3.8717, "norm_diff": 0.1364, "norm_loss": 0.0, "num_token_doc": 66.7746, "num_token_overlap": 11.7254, "num_token_query": 31.8788, "num_token_union": 65.24, "num_word_context": 202.1476, "num_word_doc": 49.8303, "num_word_query": 23.5445, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2140.2504, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.771, "query_norm": 1.4595, "queue_k_norm": 1.5953, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8788, "sent_len_1": 66.7746, "sent_len_max_0": 127.5962, "sent_len_max_1": 188.925, "stdk": 0.0482, "stdq": 0.042, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 94500 }, { "accuracy": 44.8242, "active_queue_size": 16384.0, "cl_loss": 3.9031, "doc_norm": 1.6016, "encoder_q-embeddings": 1003.0481, "encoder_q-layer.0": 677.3924, "encoder_q-layer.1": 707.7581, "encoder_q-layer.10": 1361.0679, "encoder_q-layer.11": 3543.2468, "encoder_q-layer.2": 774.6243, "encoder_q-layer.3": 801.2474, "encoder_q-layer.4": 849.9459, "encoder_q-layer.5": 851.4855, "encoder_q-layer.6": 991.2582, "encoder_q-layer.7": 1142.0267, "encoder_q-layer.8": 1350.6141, "encoder_q-layer.9": 1229.626, "epoch": 0.62, "inbatch_neg_score": 0.7705, "inbatch_pos_score": 1.3076, "learning_rate": 3e-06, "loss": 3.9031, "norm_diff": 0.1383, "norm_loss": 0.0, "num_token_doc": 66.588, "num_token_overlap": 11.6507, "num_token_query": 31.937, "num_token_union": 65.3058, "num_word_context": 201.8718, "num_word_doc": 49.7001, "num_word_query": 23.5973, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2159.7172, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7705, "query_norm": 1.4633, "queue_k_norm": 1.5942, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.937, "sent_len_1": 66.588, "sent_len_max_0": 127.4325, "sent_len_max_1": 188.0312, "stdk": 0.0485, "stdq": 0.0422, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 94600 }, { "accuracy": 45.2148, "active_queue_size": 16384.0, "cl_loss": 3.9099, "doc_norm": 1.5966, "encoder_q-embeddings": 1390.6229, "encoder_q-layer.0": 1005.689, "encoder_q-layer.1": 1084.5662, "encoder_q-layer.10": 1347.475, "encoder_q-layer.11": 3482.3464, "encoder_q-layer.2": 1095.6344, "encoder_q-layer.3": 1182.0461, "encoder_q-layer.4": 1224.4794, "encoder_q-layer.5": 1137.8197, "encoder_q-layer.6": 1231.7239, "encoder_q-layer.7": 1316.201, "encoder_q-layer.8": 1398.3046, "encoder_q-layer.9": 1288.3423, "epoch": 0.62, "inbatch_neg_score": 0.7717, "inbatch_pos_score": 1.3311, "learning_rate": 2.9444444444444445e-06, "loss": 3.9099, "norm_diff": 0.1296, "norm_loss": 0.0, "num_token_doc": 66.7255, "num_token_overlap": 11.6666, "num_token_query": 31.9603, "num_token_union": 65.3645, "num_word_context": 202.166, "num_word_doc": 49.8255, "num_word_query": 23.5983, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2399.8785, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.771, "query_norm": 1.467, "queue_k_norm": 1.5949, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9603, "sent_len_1": 66.7255, "sent_len_max_0": 127.5687, "sent_len_max_1": 187.84, "stdk": 0.0482, "stdq": 0.0423, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 94700 }, { "accuracy": 46.2891, "active_queue_size": 16384.0, "cl_loss": 3.8784, "doc_norm": 1.6002, "encoder_q-embeddings": 2661.7292, "encoder_q-layer.0": 1822.049, "encoder_q-layer.1": 2034.2263, "encoder_q-layer.10": 2630.6221, "encoder_q-layer.11": 6457.1099, "encoder_q-layer.2": 2350.6548, "encoder_q-layer.3": 2388.3186, "encoder_q-layer.4": 2526.6541, "encoder_q-layer.5": 2516.2864, "encoder_q-layer.6": 2601.4375, "encoder_q-layer.7": 2683.1218, "encoder_q-layer.8": 2687.1816, "encoder_q-layer.9": 2463.8904, "epoch": 0.62, "inbatch_neg_score": 0.7733, "inbatch_pos_score": 1.334, "learning_rate": 2.888888888888889e-06, "loss": 3.8784, "norm_diff": 0.1316, "norm_loss": 0.0, "num_token_doc": 66.7604, "num_token_overlap": 11.634, "num_token_query": 31.9853, "num_token_union": 65.4173, "num_word_context": 202.0555, "num_word_doc": 49.7905, "num_word_query": 23.6262, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4548.8911, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7729, "query_norm": 1.4686, "queue_k_norm": 1.5955, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9853, "sent_len_1": 66.7604, "sent_len_max_0": 127.5375, "sent_len_max_1": 190.3988, "stdk": 0.0484, "stdq": 0.0424, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 94800 }, { "accuracy": 44.1406, "active_queue_size": 16384.0, "cl_loss": 3.8802, "doc_norm": 1.5981, "encoder_q-embeddings": 1990.3612, "encoder_q-layer.0": 1333.4619, "encoder_q-layer.1": 1415.6158, "encoder_q-layer.10": 2501.1567, "encoder_q-layer.11": 6584.5376, "encoder_q-layer.2": 1602.7482, "encoder_q-layer.3": 1651.9795, "encoder_q-layer.4": 1695.2904, "encoder_q-layer.5": 1735.8822, "encoder_q-layer.6": 1965.1855, "encoder_q-layer.7": 2231.2664, "encoder_q-layer.8": 2537.6055, "encoder_q-layer.9": 2358.3567, "epoch": 0.62, "inbatch_neg_score": 0.7719, "inbatch_pos_score": 1.3203, "learning_rate": 2.8333333333333335e-06, "loss": 3.8802, "norm_diff": 0.1302, "norm_loss": 0.0, "num_token_doc": 66.795, "num_token_overlap": 11.7126, "num_token_query": 31.9661, "num_token_union": 65.3614, "num_word_context": 202.3137, "num_word_doc": 49.8204, "num_word_query": 23.605, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4144.8115, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.772, "query_norm": 1.4679, "queue_k_norm": 1.5955, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9661, "sent_len_1": 66.795, "sent_len_max_0": 127.41, "sent_len_max_1": 191.7975, "stdk": 0.0483, "stdq": 0.0424, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 94900 }, { "accuracy": 46.4844, "active_queue_size": 16384.0, "cl_loss": 3.8894, "doc_norm": 1.6019, "encoder_q-embeddings": 18009.7168, "encoder_q-layer.0": 13192.792, "encoder_q-layer.1": 13359.1572, "encoder_q-layer.10": 2756.1985, "encoder_q-layer.11": 6712.6504, "encoder_q-layer.2": 15906.1406, "encoder_q-layer.3": 16922.5391, "encoder_q-layer.4": 15738.1611, "encoder_q-layer.5": 16611.584, "encoder_q-layer.6": 15044.1299, "encoder_q-layer.7": 15557.292, "encoder_q-layer.8": 11218.4043, "encoder_q-layer.9": 4047.6802, "epoch": 0.62, "inbatch_neg_score": 0.7715, "inbatch_pos_score": 1.3379, "learning_rate": 2.777777777777778e-06, "loss": 3.8894, "norm_diff": 0.1333, "norm_loss": 0.0, "num_token_doc": 66.701, "num_token_overlap": 11.6695, "num_token_query": 31.9222, "num_token_union": 65.3271, "num_word_context": 202.0793, "num_word_doc": 49.7707, "num_word_query": 23.571, "postclip_grad_norm": 1.0, "preclip_grad_norm": 21067.5839, "preclip_grad_norm_avg": 0.0002, "q@queue_neg_score": 0.772, "query_norm": 1.4686, "queue_k_norm": 1.5965, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9222, "sent_len_1": 66.701, "sent_len_max_0": 127.5525, "sent_len_max_1": 187.9025, "stdk": 0.0485, "stdq": 0.0425, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 95000 }, { "accuracy": 43.8477, "active_queue_size": 16384.0, "cl_loss": 3.8638, "doc_norm": 1.5973, "encoder_q-embeddings": 2104.3303, "encoder_q-layer.0": 1418.9691, "encoder_q-layer.1": 1510.7247, "encoder_q-layer.10": 2843.0911, "encoder_q-layer.11": 6697.188, "encoder_q-layer.2": 1721.9996, "encoder_q-layer.3": 1826.9375, "encoder_q-layer.4": 1949.1747, "encoder_q-layer.5": 2086.3872, "encoder_q-layer.6": 2241.0583, "encoder_q-layer.7": 2463.3323, "encoder_q-layer.8": 2829.8711, "encoder_q-layer.9": 2607.958, "epoch": 0.62, "inbatch_neg_score": 0.7738, "inbatch_pos_score": 1.3154, "learning_rate": 2.7222222222222224e-06, "loss": 3.8638, "norm_diff": 0.1343, "norm_loss": 0.0, "num_token_doc": 66.7427, "num_token_overlap": 11.7159, "num_token_query": 31.8848, "num_token_union": 65.3331, "num_word_context": 202.2863, "num_word_doc": 49.8135, "num_word_query": 23.544, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4336.0147, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7729, "query_norm": 1.4631, "queue_k_norm": 1.5968, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8848, "sent_len_1": 66.7427, "sent_len_max_0": 127.595, "sent_len_max_1": 189.1337, "stdk": 0.0482, "stdq": 0.0422, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 95100 }, { "accuracy": 47.0703, "active_queue_size": 16384.0, "cl_loss": 3.8868, "doc_norm": 1.6033, "encoder_q-embeddings": 1248.4291, "encoder_q-layer.0": 855.6667, "encoder_q-layer.1": 924.62, "encoder_q-layer.10": 1279.2135, "encoder_q-layer.11": 3317.6631, "encoder_q-layer.2": 1040.4337, "encoder_q-layer.3": 1080.0903, "encoder_q-layer.4": 1156.1338, "encoder_q-layer.5": 1152.021, "encoder_q-layer.6": 1220.4314, "encoder_q-layer.7": 1272.4166, "encoder_q-layer.8": 1353.2271, "encoder_q-layer.9": 1220.4573, "epoch": 0.62, "inbatch_neg_score": 0.7741, "inbatch_pos_score": 1.3281, "learning_rate": 2.666666666666667e-06, "loss": 3.8868, "norm_diff": 0.1305, "norm_loss": 0.0, "num_token_doc": 67.0839, "num_token_overlap": 11.7024, "num_token_query": 32.0627, "num_token_union": 65.6614, "num_word_context": 202.5868, "num_word_doc": 50.1263, "num_word_query": 23.6841, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2247.83, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7734, "query_norm": 1.4729, "queue_k_norm": 1.5982, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 32.0627, "sent_len_1": 67.0839, "sent_len_max_0": 127.4437, "sent_len_max_1": 188.665, "stdk": 0.0485, "stdq": 0.0426, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 95200 }, { "accuracy": 46.9727, "active_queue_size": 16384.0, "cl_loss": 3.878, "doc_norm": 1.6062, "encoder_q-embeddings": 1126.4103, "encoder_q-layer.0": 751.3322, "encoder_q-layer.1": 796.8398, "encoder_q-layer.10": 1327.6604, "encoder_q-layer.11": 3311.0771, "encoder_q-layer.2": 906.2025, "encoder_q-layer.3": 926.4934, "encoder_q-layer.4": 999.7603, "encoder_q-layer.5": 999.5793, "encoder_q-layer.6": 1061.5126, "encoder_q-layer.7": 1189.2146, "encoder_q-layer.8": 1333.6858, "encoder_q-layer.9": 1243.7151, "epoch": 0.62, "inbatch_neg_score": 0.7725, "inbatch_pos_score": 1.3301, "learning_rate": 2.6111111111111113e-06, "loss": 3.878, "norm_diff": 0.1373, "norm_loss": 0.0, "num_token_doc": 66.7561, "num_token_overlap": 11.7141, "num_token_query": 31.9987, "num_token_union": 65.3752, "num_word_context": 202.3859, "num_word_doc": 49.8271, "num_word_query": 23.6504, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2144.9172, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7734, "query_norm": 1.4689, "queue_k_norm": 1.5968, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9987, "sent_len_1": 66.7561, "sent_len_max_0": 127.5863, "sent_len_max_1": 187.805, "stdk": 0.0486, "stdq": 0.0424, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 95300 }, { "accuracy": 47.3633, "active_queue_size": 16384.0, "cl_loss": 3.8893, "doc_norm": 1.6007, "encoder_q-embeddings": 1411.4449, "encoder_q-layer.0": 979.8757, "encoder_q-layer.1": 1095.7017, "encoder_q-layer.10": 1193.5281, "encoder_q-layer.11": 3228.0068, "encoder_q-layer.2": 1289.923, "encoder_q-layer.3": 1308.7368, "encoder_q-layer.4": 1436.7233, "encoder_q-layer.5": 1437.7914, "encoder_q-layer.6": 1555.4153, "encoder_q-layer.7": 1557.4839, "encoder_q-layer.8": 1518.6519, "encoder_q-layer.9": 1186.8224, "epoch": 0.62, "inbatch_neg_score": 0.7743, "inbatch_pos_score": 1.3301, "learning_rate": 2.5555555555555557e-06, "loss": 3.8893, "norm_diff": 0.1376, "norm_loss": 0.0, "num_token_doc": 66.4832, "num_token_overlap": 11.6801, "num_token_query": 31.9167, "num_token_union": 65.1879, "num_word_context": 201.8568, "num_word_doc": 49.5872, "num_word_query": 23.5783, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2430.1192, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7749, "query_norm": 1.4631, "queue_k_norm": 1.5948, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9167, "sent_len_1": 66.4832, "sent_len_max_0": 127.6, "sent_len_max_1": 190.4412, "stdk": 0.0484, "stdq": 0.0421, "stdqueue_k": 0.0482, "stdqueue_q": 0.0, "step": 95400 }, { "accuracy": 44.5312, "active_queue_size": 16384.0, "cl_loss": 3.8587, "doc_norm": 1.5985, "encoder_q-embeddings": 1526.2885, "encoder_q-layer.0": 1030.1287, "encoder_q-layer.1": 1213.0145, "encoder_q-layer.10": 1332.2703, "encoder_q-layer.11": 3513.9238, "encoder_q-layer.2": 1530.1614, "encoder_q-layer.3": 1618.1869, "encoder_q-layer.4": 1703.2449, "encoder_q-layer.5": 1776.9027, "encoder_q-layer.6": 1928.8375, "encoder_q-layer.7": 1725.1545, "encoder_q-layer.8": 1705.0883, "encoder_q-layer.9": 1383.0853, "epoch": 0.62, "inbatch_neg_score": 0.7743, "inbatch_pos_score": 1.3262, "learning_rate": 2.5e-06, "loss": 3.8587, "norm_diff": 0.1209, "norm_loss": 0.0, "num_token_doc": 66.9408, "num_token_overlap": 11.6762, "num_token_query": 31.8883, "num_token_union": 65.4308, "num_word_context": 202.2554, "num_word_doc": 49.928, "num_word_query": 23.5379, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2740.2356, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7749, "query_norm": 1.4776, "queue_k_norm": 1.5978, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8883, "sent_len_1": 66.9408, "sent_len_max_0": 127.5238, "sent_len_max_1": 189.6325, "stdk": 0.0483, "stdq": 0.0428, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 95500 }, { "accuracy": 45.8984, "active_queue_size": 16384.0, "cl_loss": 3.9008, "doc_norm": 1.5943, "encoder_q-embeddings": 2293.6323, "encoder_q-layer.0": 1523.647, "encoder_q-layer.1": 1960.1953, "encoder_q-layer.10": 1327.9962, "encoder_q-layer.11": 3366.46, "encoder_q-layer.2": 2557.74, "encoder_q-layer.3": 2675.5984, "encoder_q-layer.4": 2987.6631, "encoder_q-layer.5": 2865.8374, "encoder_q-layer.6": 2355.8335, "encoder_q-layer.7": 2285.4082, "encoder_q-layer.8": 2107.3179, "encoder_q-layer.9": 1256.3229, "epoch": 0.62, "inbatch_neg_score": 0.7747, "inbatch_pos_score": 1.3281, "learning_rate": 2.4444444444444447e-06, "loss": 3.9008, "norm_diff": 0.133, "norm_loss": 0.0, "num_token_doc": 66.6579, "num_token_overlap": 11.6899, "num_token_query": 31.8934, "num_token_union": 65.2344, "num_word_context": 201.8843, "num_word_doc": 49.7015, "num_word_query": 23.5373, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3554.3086, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7749, "query_norm": 1.4614, "queue_k_norm": 1.5974, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.8934, "sent_len_1": 66.6579, "sent_len_max_0": 127.66, "sent_len_max_1": 189.68, "stdk": 0.0481, "stdq": 0.0421, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 95600 }, { "accuracy": 46.0938, "active_queue_size": 16384.0, "cl_loss": 3.8845, "doc_norm": 1.6034, "encoder_q-embeddings": 1101.8911, "encoder_q-layer.0": 716.0344, "encoder_q-layer.1": 774.3631, "encoder_q-layer.10": 1279.2878, "encoder_q-layer.11": 3269.4824, "encoder_q-layer.2": 909.5735, "encoder_q-layer.3": 935.0732, "encoder_q-layer.4": 1006.3846, "encoder_q-layer.5": 1075.7329, "encoder_q-layer.6": 1175.5074, "encoder_q-layer.7": 1163.4827, "encoder_q-layer.8": 1316.7498, "encoder_q-layer.9": 1156.6455, "epoch": 0.62, "inbatch_neg_score": 0.7735, "inbatch_pos_score": 1.3408, "learning_rate": 2.388888888888889e-06, "loss": 3.8845, "norm_diff": 0.1363, "norm_loss": 0.0, "num_token_doc": 66.5902, "num_token_overlap": 11.6016, "num_token_query": 31.7803, "num_token_union": 65.2515, "num_word_context": 202.1834, "num_word_doc": 49.695, "num_word_query": 23.4651, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2116.207, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7749, "query_norm": 1.467, "queue_k_norm": 1.5976, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.7803, "sent_len_1": 66.5902, "sent_len_max_0": 127.4737, "sent_len_max_1": 188.2537, "stdk": 0.0485, "stdq": 0.0424, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 95700 }, { "accuracy": 45.7031, "active_queue_size": 16384.0, "cl_loss": 3.89, "doc_norm": 1.5963, "encoder_q-embeddings": 1157.7554, "encoder_q-layer.0": 762.7082, "encoder_q-layer.1": 845.978, "encoder_q-layer.10": 1287.4893, "encoder_q-layer.11": 3371.9841, "encoder_q-layer.2": 983.0679, "encoder_q-layer.3": 1078.1405, "encoder_q-layer.4": 1251.0037, "encoder_q-layer.5": 1230.5001, "encoder_q-layer.6": 1210.0011, "encoder_q-layer.7": 1278.8003, "encoder_q-layer.8": 1374.5942, "encoder_q-layer.9": 1194.6765, "epoch": 0.62, "inbatch_neg_score": 0.7752, "inbatch_pos_score": 1.3291, "learning_rate": 2.3333333333333336e-06, "loss": 3.89, "norm_diff": 0.1316, "norm_loss": 0.0, "num_token_doc": 66.6168, "num_token_overlap": 11.7398, "num_token_query": 32.1133, "num_token_union": 65.3075, "num_word_context": 202.2853, "num_word_doc": 49.7167, "num_word_query": 23.7213, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2253.6536, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7759, "query_norm": 1.4648, "queue_k_norm": 1.5977, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 32.1133, "sent_len_1": 66.6168, "sent_len_max_0": 127.575, "sent_len_max_1": 188.795, "stdk": 0.0482, "stdq": 0.0422, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 95800 }, { "accuracy": 45.9961, "active_queue_size": 16384.0, "cl_loss": 3.8685, "doc_norm": 1.5985, "encoder_q-embeddings": 1389.6375, "encoder_q-layer.0": 962.4177, "encoder_q-layer.1": 1036.6454, "encoder_q-layer.10": 1445.0177, "encoder_q-layer.11": 3501.353, "encoder_q-layer.2": 1236.697, "encoder_q-layer.3": 1279.9916, "encoder_q-layer.4": 1285.5822, "encoder_q-layer.5": 1383.6827, "encoder_q-layer.6": 1649.6494, "encoder_q-layer.7": 1863.5248, "encoder_q-layer.8": 1833.5028, "encoder_q-layer.9": 1345.6302, "epoch": 0.62, "inbatch_neg_score": 0.7771, "inbatch_pos_score": 1.3301, "learning_rate": 2.277777777777778e-06, "loss": 3.8685, "norm_diff": 0.1284, "norm_loss": 0.0, "num_token_doc": 66.5103, "num_token_overlap": 11.6657, "num_token_query": 31.8439, "num_token_union": 65.1952, "num_word_context": 202.106, "num_word_doc": 49.6073, "num_word_query": 23.5335, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2599.7809, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7769, "query_norm": 1.4701, "queue_k_norm": 1.5985, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8439, "sent_len_1": 66.5103, "sent_len_max_0": 127.5837, "sent_len_max_1": 190.9588, "stdk": 0.0483, "stdq": 0.0424, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 95900 }, { "accuracy": 46.4844, "active_queue_size": 16384.0, "cl_loss": 3.8585, "doc_norm": 1.5963, "encoder_q-embeddings": 1093.608, "encoder_q-layer.0": 744.2073, "encoder_q-layer.1": 819.4103, "encoder_q-layer.10": 1361.8534, "encoder_q-layer.11": 3428.6589, "encoder_q-layer.2": 956.2693, "encoder_q-layer.3": 965.0816, "encoder_q-layer.4": 1054.2201, "encoder_q-layer.5": 1076.0776, "encoder_q-layer.6": 1140.0007, "encoder_q-layer.7": 1128.4391, "encoder_q-layer.8": 1387.8744, "encoder_q-layer.9": 1271.4615, "epoch": 0.62, "inbatch_neg_score": 0.7757, "inbatch_pos_score": 1.3291, "learning_rate": 2.2222222222222225e-06, "loss": 3.8585, "norm_diff": 0.133, "norm_loss": 0.0, "num_token_doc": 66.9688, "num_token_overlap": 11.7326, "num_token_query": 31.9226, "num_token_union": 65.3895, "num_word_context": 202.1449, "num_word_doc": 49.9023, "num_word_query": 23.5822, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2198.5717, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7764, "query_norm": 1.4633, "queue_k_norm": 1.6003, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9226, "sent_len_1": 66.9688, "sent_len_max_0": 127.5713, "sent_len_max_1": 192.2937, "stdk": 0.0482, "stdq": 0.0421, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 96000 }, { "accuracy": 48.0469, "active_queue_size": 16384.0, "cl_loss": 3.8855, "doc_norm": 1.6008, "encoder_q-embeddings": 1066.9792, "encoder_q-layer.0": 739.7167, "encoder_q-layer.1": 792.0448, "encoder_q-layer.10": 1240.8672, "encoder_q-layer.11": 3351.5691, "encoder_q-layer.2": 906.9955, "encoder_q-layer.3": 933.4272, "encoder_q-layer.4": 980.6958, "encoder_q-layer.5": 1032.1929, "encoder_q-layer.6": 1132.1006, "encoder_q-layer.7": 1226.0634, "encoder_q-layer.8": 1283.9576, "encoder_q-layer.9": 1182.4845, "epoch": 0.63, "inbatch_neg_score": 0.7757, "inbatch_pos_score": 1.3555, "learning_rate": 2.166666666666667e-06, "loss": 3.8855, "norm_diff": 0.1377, "norm_loss": 0.0, "num_token_doc": 66.9055, "num_token_overlap": 11.7073, "num_token_query": 31.8717, "num_token_union": 65.4189, "num_word_context": 202.4154, "num_word_doc": 49.9701, "num_word_query": 23.5407, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2177.1102, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7764, "query_norm": 1.4631, "queue_k_norm": 1.6007, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.8717, "sent_len_1": 66.9055, "sent_len_max_0": 127.4825, "sent_len_max_1": 189.0062, "stdk": 0.0484, "stdq": 0.0421, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 96100 }, { "accuracy": 46.4844, "active_queue_size": 16384.0, "cl_loss": 3.8811, "doc_norm": 1.6079, "encoder_q-embeddings": 1451.7094, "encoder_q-layer.0": 986.518, "encoder_q-layer.1": 1057.5017, "encoder_q-layer.10": 1282.0092, "encoder_q-layer.11": 3423.0396, "encoder_q-layer.2": 1252.5143, "encoder_q-layer.3": 1340.4952, "encoder_q-layer.4": 1395.0333, "encoder_q-layer.5": 1463.9744, "encoder_q-layer.6": 1458.8175, "encoder_q-layer.7": 1433.6317, "encoder_q-layer.8": 1491.9622, "encoder_q-layer.9": 1211.826, "epoch": 0.63, "inbatch_neg_score": 0.7782, "inbatch_pos_score": 1.3369, "learning_rate": 2.1111111111111114e-06, "loss": 3.8811, "norm_diff": 0.1399, "norm_loss": 0.0, "num_token_doc": 67.0168, "num_token_overlap": 11.6571, "num_token_query": 31.8161, "num_token_union": 65.4728, "num_word_context": 202.3932, "num_word_doc": 49.9895, "num_word_query": 23.4846, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2496.8479, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7778, "query_norm": 1.468, "queue_k_norm": 1.5999, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8161, "sent_len_1": 67.0168, "sent_len_max_0": 127.3713, "sent_len_max_1": 189.4038, "stdk": 0.0486, "stdq": 0.0423, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 96200 }, { "accuracy": 41.6992, "active_queue_size": 16384.0, "cl_loss": 3.8792, "doc_norm": 1.5913, "encoder_q-embeddings": 1496.3984, "encoder_q-layer.0": 1013.2278, "encoder_q-layer.1": 1149.9327, "encoder_q-layer.10": 1453.8372, "encoder_q-layer.11": 3523.365, "encoder_q-layer.2": 1333.2604, "encoder_q-layer.3": 1409.0894, "encoder_q-layer.4": 1468.5983, "encoder_q-layer.5": 1575.8928, "encoder_q-layer.6": 1477.8696, "encoder_q-layer.7": 1551.6344, "encoder_q-layer.8": 1539.8929, "encoder_q-layer.9": 1306.7792, "epoch": 0.63, "inbatch_neg_score": 0.779, "inbatch_pos_score": 1.3096, "learning_rate": 2.055555555555556e-06, "loss": 3.8792, "norm_diff": 0.1312, "norm_loss": 0.0, "num_token_doc": 66.8267, "num_token_overlap": 11.7134, "num_token_query": 31.9385, "num_token_union": 65.3676, "num_word_context": 202.2133, "num_word_doc": 49.8464, "num_word_query": 23.5765, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2577.2648, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7783, "query_norm": 1.4601, "queue_k_norm": 1.5993, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9385, "sent_len_1": 66.8267, "sent_len_max_0": 127.645, "sent_len_max_1": 190.6738, "stdk": 0.0479, "stdq": 0.0419, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 96300 }, { "accuracy": 47.0703, "active_queue_size": 16384.0, "cl_loss": 3.8951, "doc_norm": 1.6082, "encoder_q-embeddings": 1008.9828, "encoder_q-layer.0": 683.078, "encoder_q-layer.1": 700.9421, "encoder_q-layer.10": 1273.6606, "encoder_q-layer.11": 3246.7622, "encoder_q-layer.2": 800.5411, "encoder_q-layer.3": 825.2476, "encoder_q-layer.4": 892.6656, "encoder_q-layer.5": 962.3618, "encoder_q-layer.6": 1054.0034, "encoder_q-layer.7": 1155.4352, "encoder_q-layer.8": 1275.1891, "encoder_q-layer.9": 1152.7938, "epoch": 0.63, "inbatch_neg_score": 0.7772, "inbatch_pos_score": 1.3447, "learning_rate": 2.0000000000000003e-06, "loss": 3.8951, "norm_diff": 0.1479, "norm_loss": 0.0, "num_token_doc": 66.8011, "num_token_overlap": 11.6544, "num_token_query": 31.853, "num_token_union": 65.3421, "num_word_context": 202.5089, "num_word_doc": 49.8143, "num_word_query": 23.5384, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2073.1152, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7773, "query_norm": 1.4602, "queue_k_norm": 1.5988, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.853, "sent_len_1": 66.8011, "sent_len_max_0": 127.5037, "sent_len_max_1": 189.5075, "stdk": 0.0487, "stdq": 0.042, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 96400 }, { "accuracy": 47.2656, "active_queue_size": 16384.0, "cl_loss": 3.8815, "doc_norm": 1.5989, "encoder_q-embeddings": 1874.7415, "encoder_q-layer.0": 1278.835, "encoder_q-layer.1": 1435.5135, "encoder_q-layer.10": 1331.2452, "encoder_q-layer.11": 3454.6272, "encoder_q-layer.2": 1721.2271, "encoder_q-layer.3": 1696.6759, "encoder_q-layer.4": 1807.3031, "encoder_q-layer.5": 1673.4917, "encoder_q-layer.6": 1798.537, "encoder_q-layer.7": 1633.5413, "encoder_q-layer.8": 1557.3301, "encoder_q-layer.9": 1240.1891, "epoch": 0.63, "inbatch_neg_score": 0.7773, "inbatch_pos_score": 1.3369, "learning_rate": 1.9444444444444444e-06, "loss": 3.8815, "norm_diff": 0.1311, "norm_loss": 0.0, "num_token_doc": 66.8816, "num_token_overlap": 11.6745, "num_token_query": 31.9976, "num_token_union": 65.4977, "num_word_context": 202.3493, "num_word_doc": 49.8779, "num_word_query": 23.6275, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2783.0421, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7778, "query_norm": 1.4678, "queue_k_norm": 1.5978, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9976, "sent_len_1": 66.8816, "sent_len_max_0": 127.4925, "sent_len_max_1": 188.7463, "stdk": 0.0483, "stdq": 0.0423, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 96500 }, { "accuracy": 45.3125, "active_queue_size": 16384.0, "cl_loss": 3.8787, "doc_norm": 1.5955, "encoder_q-embeddings": 1312.0823, "encoder_q-layer.0": 885.2714, "encoder_q-layer.1": 970.7673, "encoder_q-layer.10": 1395.1857, "encoder_q-layer.11": 3339.4839, "encoder_q-layer.2": 1140.1882, "encoder_q-layer.3": 1174.8826, "encoder_q-layer.4": 1308.5449, "encoder_q-layer.5": 1385.079, "encoder_q-layer.6": 1514.3804, "encoder_q-layer.7": 1593.9592, "encoder_q-layer.8": 1590.4613, "encoder_q-layer.9": 1244.1886, "epoch": 0.63, "inbatch_neg_score": 0.7793, "inbatch_pos_score": 1.3164, "learning_rate": 1.888888888888889e-06, "loss": 3.8787, "norm_diff": 0.1339, "norm_loss": 0.0, "num_token_doc": 66.5524, "num_token_overlap": 11.7097, "num_token_query": 31.945, "num_token_union": 65.2374, "num_word_context": 201.926, "num_word_doc": 49.7046, "num_word_query": 23.6022, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2420.4601, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7793, "query_norm": 1.4615, "queue_k_norm": 1.6008, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.945, "sent_len_1": 66.5524, "sent_len_max_0": 127.4387, "sent_len_max_1": 187.8225, "stdk": 0.0481, "stdq": 0.042, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 96600 }, { "accuracy": 44.2383, "active_queue_size": 16384.0, "cl_loss": 3.8928, "doc_norm": 1.5956, "encoder_q-embeddings": 1015.0681, "encoder_q-layer.0": 669.6117, "encoder_q-layer.1": 705.0969, "encoder_q-layer.10": 1327.5864, "encoder_q-layer.11": 3243.5869, "encoder_q-layer.2": 800.6646, "encoder_q-layer.3": 816.0602, "encoder_q-layer.4": 897.551, "encoder_q-layer.5": 952.4857, "encoder_q-layer.6": 1042.5098, "encoder_q-layer.7": 1085.6293, "encoder_q-layer.8": 1265.389, "encoder_q-layer.9": 1195.6884, "epoch": 0.63, "inbatch_neg_score": 0.7779, "inbatch_pos_score": 1.3096, "learning_rate": 1.8333333333333335e-06, "loss": 3.8928, "norm_diff": 0.1341, "norm_loss": 0.0, "num_token_doc": 66.794, "num_token_overlap": 11.678, "num_token_query": 31.9972, "num_token_union": 65.4306, "num_word_context": 202.396, "num_word_doc": 49.8444, "num_word_query": 23.6274, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2048.3867, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7778, "query_norm": 1.4615, "queue_k_norm": 1.5991, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9972, "sent_len_1": 66.794, "sent_len_max_0": 127.3425, "sent_len_max_1": 189.9575, "stdk": 0.0481, "stdq": 0.042, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 96700 }, { "accuracy": 45.0195, "active_queue_size": 16384.0, "cl_loss": 3.8876, "doc_norm": 1.6016, "encoder_q-embeddings": 1409.0244, "encoder_q-layer.0": 936.243, "encoder_q-layer.1": 1016.9914, "encoder_q-layer.10": 1340.5312, "encoder_q-layer.11": 3502.7229, "encoder_q-layer.2": 1166.3771, "encoder_q-layer.3": 1285.2507, "encoder_q-layer.4": 1399.4401, "encoder_q-layer.5": 1595.726, "encoder_q-layer.6": 1867.3711, "encoder_q-layer.7": 1869.0775, "encoder_q-layer.8": 1674.7521, "encoder_q-layer.9": 1285.1895, "epoch": 0.63, "inbatch_neg_score": 0.7774, "inbatch_pos_score": 1.3262, "learning_rate": 1.777777777777778e-06, "loss": 3.8876, "norm_diff": 0.1342, "norm_loss": 0.0, "num_token_doc": 66.7435, "num_token_overlap": 11.5782, "num_token_query": 31.609, "num_token_union": 65.2079, "num_word_context": 202.1247, "num_word_doc": 49.8371, "num_word_query": 23.3336, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2544.5586, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7778, "query_norm": 1.4674, "queue_k_norm": 1.5991, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.609, "sent_len_1": 66.7435, "sent_len_max_0": 127.48, "sent_len_max_1": 190.1075, "stdk": 0.0484, "stdq": 0.0423, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 96800 }, { "accuracy": 47.8516, "active_queue_size": 16384.0, "cl_loss": 3.8639, "doc_norm": 1.5991, "encoder_q-embeddings": 1649.3179, "encoder_q-layer.0": 1165.0099, "encoder_q-layer.1": 1345.8932, "encoder_q-layer.10": 1522.4896, "encoder_q-layer.11": 3321.9978, "encoder_q-layer.2": 1479.4552, "encoder_q-layer.3": 1534.7078, "encoder_q-layer.4": 1598.3068, "encoder_q-layer.5": 1685.4789, "encoder_q-layer.6": 1870.3242, "encoder_q-layer.7": 1788.3508, "encoder_q-layer.8": 1759.3391, "encoder_q-layer.9": 1238.5387, "epoch": 0.63, "inbatch_neg_score": 0.7774, "inbatch_pos_score": 1.3477, "learning_rate": 1.7222222222222222e-06, "loss": 3.8639, "norm_diff": 0.1291, "norm_loss": 0.0, "num_token_doc": 67.0113, "num_token_overlap": 11.7063, "num_token_query": 31.9427, "num_token_union": 65.4884, "num_word_context": 202.9098, "num_word_doc": 49.952, "num_word_query": 23.5802, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2706.3868, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7773, "query_norm": 1.47, "queue_k_norm": 1.5994, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9427, "sent_len_1": 67.0113, "sent_len_max_0": 127.4587, "sent_len_max_1": 192.3613, "stdk": 0.0483, "stdq": 0.0424, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 96900 }, { "accuracy": 49.5117, "active_queue_size": 16384.0, "cl_loss": 3.8734, "doc_norm": 1.6042, "encoder_q-embeddings": 1090.2914, "encoder_q-layer.0": 773.8982, "encoder_q-layer.1": 832.4699, "encoder_q-layer.10": 1350.429, "encoder_q-layer.11": 3466.2319, "encoder_q-layer.2": 979.7515, "encoder_q-layer.3": 1035.9985, "encoder_q-layer.4": 1114.416, "encoder_q-layer.5": 1199.4589, "encoder_q-layer.6": 1348.0017, "encoder_q-layer.7": 1363.8676, "encoder_q-layer.8": 1380.5096, "encoder_q-layer.9": 1238.9503, "epoch": 0.63, "inbatch_neg_score": 0.7779, "inbatch_pos_score": 1.3477, "learning_rate": 1.6666666666666667e-06, "loss": 3.8734, "norm_diff": 0.1334, "norm_loss": 0.0, "num_token_doc": 66.9416, "num_token_overlap": 11.6951, "num_token_query": 31.869, "num_token_union": 65.434, "num_word_context": 202.51, "num_word_doc": 49.9606, "num_word_query": 23.5396, "postclip_grad_norm": 1.0, "preclip_grad_norm": 2273.1102, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7788, "query_norm": 1.4707, "queue_k_norm": 1.6005, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.869, "sent_len_1": 66.9416, "sent_len_max_0": 127.4712, "sent_len_max_1": 188.4, "stdk": 0.0485, "stdq": 0.0424, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 97000 }, { "accuracy": 46.875, "active_queue_size": 16384.0, "cl_loss": 3.8785, "doc_norm": 1.6017, "encoder_q-embeddings": 7059.3496, "encoder_q-layer.0": 5356.4502, "encoder_q-layer.1": 6246.6123, "encoder_q-layer.10": 1308.1287, "encoder_q-layer.11": 3381.8306, "encoder_q-layer.2": 7348.4624, "encoder_q-layer.3": 6903.377, "encoder_q-layer.4": 6611.2227, "encoder_q-layer.5": 5845.4102, "encoder_q-layer.6": 6575.604, "encoder_q-layer.7": 7919.1074, "encoder_q-layer.8": 5528.0547, "encoder_q-layer.9": 1979.5061, "epoch": 0.63, "inbatch_neg_score": 0.7794, "inbatch_pos_score": 1.3379, "learning_rate": 1.6111111111111111e-06, "loss": 3.8785, "norm_diff": 0.129, "norm_loss": 0.0, "num_token_doc": 66.9426, "num_token_overlap": 11.7245, "num_token_query": 32.038, "num_token_union": 65.5207, "num_word_context": 202.2801, "num_word_doc": 49.9459, "num_word_query": 23.6648, "postclip_grad_norm": 1.0, "preclip_grad_norm": 9221.6543, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.7798, "query_norm": 1.4727, "queue_k_norm": 1.6004, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 32.038, "sent_len_1": 66.9426, "sent_len_max_0": 127.395, "sent_len_max_1": 189.6375, "stdk": 0.0484, "stdq": 0.0425, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 97100 }, { "accuracy": 44.9219, "active_queue_size": 16384.0, "cl_loss": 3.9034, "doc_norm": 1.5986, "encoder_q-embeddings": 2420.8735, "encoder_q-layer.0": 1680.6172, "encoder_q-layer.1": 1838.0851, "encoder_q-layer.10": 2522.7454, "encoder_q-layer.11": 6568.4336, "encoder_q-layer.2": 2151.9514, "encoder_q-layer.3": 2334.3948, "encoder_q-layer.4": 2536.1135, "encoder_q-layer.5": 2874.4612, "encoder_q-layer.6": 3039.2324, "encoder_q-layer.7": 2963.4192, "encoder_q-layer.8": 2759.7244, "encoder_q-layer.9": 2346.5046, "epoch": 0.63, "inbatch_neg_score": 0.7796, "inbatch_pos_score": 1.3184, "learning_rate": 1.5555555555555556e-06, "loss": 3.9034, "norm_diff": 0.1427, "norm_loss": 0.0, "num_token_doc": 66.794, "num_token_overlap": 11.6502, "num_token_query": 31.8296, "num_token_union": 65.331, "num_word_context": 202.1561, "num_word_doc": 49.8011, "num_word_query": 23.518, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4616.5224, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7803, "query_norm": 1.4559, "queue_k_norm": 1.5982, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.8296, "sent_len_1": 66.794, "sent_len_max_0": 127.525, "sent_len_max_1": 190.3925, "stdk": 0.0482, "stdq": 0.0417, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 97200 }, { "accuracy": 45.4102, "active_queue_size": 16384.0, "cl_loss": 3.8786, "doc_norm": 1.6047, "encoder_q-embeddings": 13435.9707, "encoder_q-layer.0": 9703.0518, "encoder_q-layer.1": 12117.6641, "encoder_q-layer.10": 2535.4431, "encoder_q-layer.11": 6850.9868, "encoder_q-layer.2": 14851.9229, "encoder_q-layer.3": 13410.0117, "encoder_q-layer.4": 13396.1123, "encoder_q-layer.5": 9752.1475, "encoder_q-layer.6": 8401.3418, "encoder_q-layer.7": 7979.2744, "encoder_q-layer.8": 5554.6694, "encoder_q-layer.9": 2720.6719, "epoch": 0.63, "inbatch_neg_score": 0.779, "inbatch_pos_score": 1.334, "learning_rate": 1.5e-06, "loss": 3.8786, "norm_diff": 0.1461, "norm_loss": 0.0, "num_token_doc": 66.7284, "num_token_overlap": 11.7306, "num_token_query": 31.9862, "num_token_union": 65.3714, "num_word_context": 202.5627, "num_word_doc": 49.8019, "num_word_query": 23.6457, "postclip_grad_norm": 1.0, "preclip_grad_norm": 15621.8859, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.7803, "query_norm": 1.4586, "queue_k_norm": 1.6008, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9862, "sent_len_1": 66.7284, "sent_len_max_0": 127.4675, "sent_len_max_1": 190.0613, "stdk": 0.0485, "stdq": 0.0419, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 97300 }, { "accuracy": 43.2617, "active_queue_size": 16384.0, "cl_loss": 3.8994, "doc_norm": 1.5993, "encoder_q-embeddings": 2030.3667, "encoder_q-layer.0": 1366.4357, "encoder_q-layer.1": 1466.684, "encoder_q-layer.10": 2809.2014, "encoder_q-layer.11": 6902.498, "encoder_q-layer.2": 1600.5576, "encoder_q-layer.3": 1606.717, "encoder_q-layer.4": 1743.7634, "encoder_q-layer.5": 1762.6339, "encoder_q-layer.6": 2036.0535, "encoder_q-layer.7": 2252.6213, "encoder_q-layer.8": 2652.9875, "encoder_q-layer.9": 2429.5864, "epoch": 0.63, "inbatch_neg_score": 0.78, "inbatch_pos_score": 1.3203, "learning_rate": 1.4444444444444445e-06, "loss": 3.8994, "norm_diff": 0.1357, "norm_loss": 0.0, "num_token_doc": 66.7189, "num_token_overlap": 11.6421, "num_token_query": 31.7517, "num_token_union": 65.2518, "num_word_context": 202.2552, "num_word_doc": 49.778, "num_word_query": 23.4576, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4324.0363, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7808, "query_norm": 1.4636, "queue_k_norm": 1.6007, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.7517, "sent_len_1": 66.7189, "sent_len_max_0": 127.6137, "sent_len_max_1": 189.305, "stdk": 0.0483, "stdq": 0.0421, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 97400 }, { "accuracy": 48.5352, "active_queue_size": 16384.0, "cl_loss": 3.8847, "doc_norm": 1.6012, "encoder_q-embeddings": 2046.2618, "encoder_q-layer.0": 1363.244, "encoder_q-layer.1": 1440.6318, "encoder_q-layer.10": 3054.0962, "encoder_q-layer.11": 6860.3784, "encoder_q-layer.2": 1611.528, "encoder_q-layer.3": 1656.2129, "encoder_q-layer.4": 1702.4922, "encoder_q-layer.5": 1722.967, "encoder_q-layer.6": 1996.6185, "encoder_q-layer.7": 2197.9443, "encoder_q-layer.8": 2560.0178, "encoder_q-layer.9": 2501.7366, "epoch": 0.63, "inbatch_neg_score": 0.7811, "inbatch_pos_score": 1.3467, "learning_rate": 1.388888888888889e-06, "loss": 3.8847, "norm_diff": 0.1369, "norm_loss": 0.0, "num_token_doc": 66.6654, "num_token_overlap": 11.7213, "num_token_query": 31.927, "num_token_union": 65.2921, "num_word_context": 202.012, "num_word_doc": 49.7217, "num_word_query": 23.5843, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4225.4681, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7808, "query_norm": 1.4642, "queue_k_norm": 1.5994, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.927, "sent_len_1": 66.6654, "sent_len_max_0": 127.465, "sent_len_max_1": 189.7113, "stdk": 0.0483, "stdq": 0.0421, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 97500 }, { "accuracy": 47.0703, "active_queue_size": 16384.0, "cl_loss": 3.8949, "doc_norm": 1.602, "encoder_q-embeddings": 2552.5598, "encoder_q-layer.0": 1732.6177, "encoder_q-layer.1": 1839.7493, "encoder_q-layer.10": 2497.2239, "encoder_q-layer.11": 6696.9565, "encoder_q-layer.2": 2004.0048, "encoder_q-layer.3": 2128.6157, "encoder_q-layer.4": 2298.5078, "encoder_q-layer.5": 2342.5957, "encoder_q-layer.6": 2460.709, "encoder_q-layer.7": 2608.6326, "encoder_q-layer.8": 2754.5127, "encoder_q-layer.9": 2353.9861, "epoch": 0.64, "inbatch_neg_score": 0.7795, "inbatch_pos_score": 1.3398, "learning_rate": 1.3333333333333334e-06, "loss": 3.8949, "norm_diff": 0.1412, "norm_loss": 0.0, "num_token_doc": 66.7604, "num_token_overlap": 11.6878, "num_token_query": 31.9994, "num_token_union": 65.4176, "num_word_context": 202.3389, "num_word_doc": 49.829, "num_word_query": 23.6452, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4607.1057, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7803, "query_norm": 1.4609, "queue_k_norm": 1.6015, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9994, "sent_len_1": 66.7604, "sent_len_max_0": 127.585, "sent_len_max_1": 187.8187, "stdk": 0.0484, "stdq": 0.042, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 97600 }, { "accuracy": 45.8008, "active_queue_size": 16384.0, "cl_loss": 3.8748, "doc_norm": 1.603, "encoder_q-embeddings": 1901.2834, "encoder_q-layer.0": 1269.0286, "encoder_q-layer.1": 1274.5286, "encoder_q-layer.10": 2420.3733, "encoder_q-layer.11": 6306.1929, "encoder_q-layer.2": 1408.142, "encoder_q-layer.3": 1483.0027, "encoder_q-layer.4": 1542.0801, "encoder_q-layer.5": 1583.5605, "encoder_q-layer.6": 1774.8333, "encoder_q-layer.7": 2008.4069, "encoder_q-layer.8": 2465.7754, "encoder_q-layer.9": 2310.1111, "epoch": 0.64, "inbatch_neg_score": 0.78, "inbatch_pos_score": 1.335, "learning_rate": 1.2777777777777779e-06, "loss": 3.8748, "norm_diff": 0.139, "norm_loss": 0.0, "num_token_doc": 66.7275, "num_token_overlap": 11.6764, "num_token_query": 31.9826, "num_token_union": 65.3582, "num_word_context": 202.4287, "num_word_doc": 49.7824, "num_word_query": 23.6436, "postclip_grad_norm": 1.0, "preclip_grad_norm": 3932.9202, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7803, "query_norm": 1.464, "queue_k_norm": 1.6015, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9826, "sent_len_1": 66.7275, "sent_len_max_0": 127.5512, "sent_len_max_1": 189.1987, "stdk": 0.0484, "stdq": 0.0421, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 97700 }, { "accuracy": 45.3125, "active_queue_size": 16384.0, "cl_loss": 3.9151, "doc_norm": 1.6048, "encoder_q-embeddings": 1956.0392, "encoder_q-layer.0": 1335.074, "encoder_q-layer.1": 1379.9376, "encoder_q-layer.10": 2645.7219, "encoder_q-layer.11": 6552.4077, "encoder_q-layer.2": 1508.6476, "encoder_q-layer.3": 1612.8771, "encoder_q-layer.4": 1751.5693, "encoder_q-layer.5": 1752.5535, "encoder_q-layer.6": 2046.8956, "encoder_q-layer.7": 2303.0986, "encoder_q-layer.8": 2667.8755, "encoder_q-layer.9": 2363.2043, "epoch": 0.64, "inbatch_neg_score": 0.7815, "inbatch_pos_score": 1.3408, "learning_rate": 1.2222222222222223e-06, "loss": 3.9151, "norm_diff": 0.1446, "norm_loss": 0.0, "num_token_doc": 66.6271, "num_token_overlap": 11.6795, "num_token_query": 31.806, "num_token_union": 65.164, "num_word_context": 202.4221, "num_word_doc": 49.7005, "num_word_query": 23.4916, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4174.2275, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7822, "query_norm": 1.4602, "queue_k_norm": 1.5996, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.806, "sent_len_1": 66.6271, "sent_len_max_0": 127.45, "sent_len_max_1": 188.7413, "stdk": 0.0485, "stdq": 0.0419, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 97800 }, { "accuracy": 47.4609, "active_queue_size": 16384.0, "cl_loss": 3.8683, "doc_norm": 1.6049, "encoder_q-embeddings": 2348.8989, "encoder_q-layer.0": 1630.9148, "encoder_q-layer.1": 1702.5315, "encoder_q-layer.10": 2757.7124, "encoder_q-layer.11": 7042.7803, "encoder_q-layer.2": 1972.5956, "encoder_q-layer.3": 2091.4163, "encoder_q-layer.4": 2251.0664, "encoder_q-layer.5": 2158.0134, "encoder_q-layer.6": 2332.4382, "encoder_q-layer.7": 2510.3567, "encoder_q-layer.8": 2882.104, "encoder_q-layer.9": 2648.9233, "epoch": 0.64, "inbatch_neg_score": 0.7806, "inbatch_pos_score": 1.3467, "learning_rate": 1.1666666666666668e-06, "loss": 3.8683, "norm_diff": 0.1383, "norm_loss": 0.0, "num_token_doc": 66.9351, "num_token_overlap": 11.7065, "num_token_query": 31.8756, "num_token_union": 65.4154, "num_word_context": 202.0404, "num_word_doc": 49.9399, "num_word_query": 23.535, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4617.5193, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7812, "query_norm": 1.4666, "queue_k_norm": 1.6016, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8756, "sent_len_1": 66.9351, "sent_len_max_0": 127.6825, "sent_len_max_1": 187.8562, "stdk": 0.0485, "stdq": 0.0422, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 97900 }, { "accuracy": 44.9219, "active_queue_size": 16384.0, "cl_loss": 3.8648, "doc_norm": 1.6044, "encoder_q-embeddings": 2074.7576, "encoder_q-layer.0": 1387.1102, "encoder_q-layer.1": 1466.292, "encoder_q-layer.10": 2663.0696, "encoder_q-layer.11": 6806.6616, "encoder_q-layer.2": 1646.9413, "encoder_q-layer.3": 1676.1648, "encoder_q-layer.4": 1823.7947, "encoder_q-layer.5": 1886.1229, "encoder_q-layer.6": 2067.9021, "encoder_q-layer.7": 2267.5676, "encoder_q-layer.8": 2586.416, "encoder_q-layer.9": 2351.8525, "epoch": 0.64, "inbatch_neg_score": 0.7812, "inbatch_pos_score": 1.3311, "learning_rate": 1.1111111111111112e-06, "loss": 3.8648, "norm_diff": 0.1343, "norm_loss": 0.0, "num_token_doc": 66.7807, "num_token_overlap": 11.7042, "num_token_query": 31.8605, "num_token_union": 65.321, "num_word_context": 202.2292, "num_word_doc": 49.8257, "num_word_query": 23.5197, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4280.3422, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7812, "query_norm": 1.4701, "queue_k_norm": 1.5999, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.8605, "sent_len_1": 66.7807, "sent_len_max_0": 127.545, "sent_len_max_1": 189.2188, "stdk": 0.0484, "stdq": 0.0424, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 98000 }, { "accuracy": 48.0469, "active_queue_size": 16384.0, "cl_loss": 3.8994, "doc_norm": 1.6006, "encoder_q-embeddings": 2715.2732, "encoder_q-layer.0": 1884.7885, "encoder_q-layer.1": 2052.002, "encoder_q-layer.10": 2432.0686, "encoder_q-layer.11": 6604.4136, "encoder_q-layer.2": 2403.8992, "encoder_q-layer.3": 2636.7224, "encoder_q-layer.4": 2739.2192, "encoder_q-layer.5": 2745.9937, "encoder_q-layer.6": 3136.9004, "encoder_q-layer.7": 3535.3625, "encoder_q-layer.8": 3133.658, "encoder_q-layer.9": 2350.6064, "epoch": 0.64, "inbatch_neg_score": 0.7821, "inbatch_pos_score": 1.3613, "learning_rate": 1.0555555555555557e-06, "loss": 3.8994, "norm_diff": 0.1284, "norm_loss": 0.0, "num_token_doc": 66.877, "num_token_overlap": 11.6776, "num_token_query": 31.9342, "num_token_union": 65.4812, "num_word_context": 202.6598, "num_word_doc": 49.9254, "num_word_query": 23.6043, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4849.9445, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7817, "query_norm": 1.4722, "queue_k_norm": 1.6022, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9342, "sent_len_1": 66.877, "sent_len_max_0": 127.545, "sent_len_max_1": 189.885, "stdk": 0.0483, "stdq": 0.0425, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 98100 }, { "accuracy": 46.7773, "active_queue_size": 16384.0, "cl_loss": 3.8904, "doc_norm": 1.5988, "encoder_q-embeddings": 2289.3994, "encoder_q-layer.0": 1618.4598, "encoder_q-layer.1": 1839.3842, "encoder_q-layer.10": 2819.0852, "encoder_q-layer.11": 7014.1069, "encoder_q-layer.2": 2089.0967, "encoder_q-layer.3": 2225.7666, "encoder_q-layer.4": 2354.658, "encoder_q-layer.5": 2283.5425, "encoder_q-layer.6": 2568.1233, "encoder_q-layer.7": 2626.9431, "encoder_q-layer.8": 2754.522, "encoder_q-layer.9": 2479.9729, "epoch": 0.64, "inbatch_neg_score": 0.7833, "inbatch_pos_score": 1.3438, "learning_rate": 1.0000000000000002e-06, "loss": 3.8904, "norm_diff": 0.1345, "norm_loss": 0.0, "num_token_doc": 66.4964, "num_token_overlap": 11.6342, "num_token_query": 31.7264, "num_token_union": 65.0879, "num_word_context": 201.7881, "num_word_doc": 49.6395, "num_word_query": 23.4424, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4588.3626, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7827, "query_norm": 1.4643, "queue_k_norm": 1.6017, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.7264, "sent_len_1": 66.4964, "sent_len_max_0": 127.5113, "sent_len_max_1": 188.925, "stdk": 0.0482, "stdq": 0.0421, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 98200 }, { "accuracy": 46.0938, "active_queue_size": 16384.0, "cl_loss": 3.8853, "doc_norm": 1.598, "encoder_q-embeddings": 2915.1023, "encoder_q-layer.0": 2009.0365, "encoder_q-layer.1": 2259.6904, "encoder_q-layer.10": 2546.9751, "encoder_q-layer.11": 6670.9155, "encoder_q-layer.2": 2705.75, "encoder_q-layer.3": 2983.7954, "encoder_q-layer.4": 3368.2939, "encoder_q-layer.5": 3647.1704, "encoder_q-layer.6": 3859.8303, "encoder_q-layer.7": 4166.4697, "encoder_q-layer.8": 3590.0015, "encoder_q-layer.9": 2559.7336, "epoch": 0.64, "inbatch_neg_score": 0.7829, "inbatch_pos_score": 1.3379, "learning_rate": 9.444444444444445e-07, "loss": 3.8853, "norm_diff": 0.1238, "norm_loss": 0.0, "num_token_doc": 66.7179, "num_token_overlap": 11.6619, "num_token_query": 31.9693, "num_token_union": 65.4067, "num_word_context": 202.5144, "num_word_doc": 49.7742, "num_word_query": 23.6305, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5337.2242, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7832, "query_norm": 1.4741, "queue_k_norm": 1.6028, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9693, "sent_len_1": 66.7179, "sent_len_max_0": 127.375, "sent_len_max_1": 191.265, "stdk": 0.0481, "stdq": 0.0425, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 98300 }, { "accuracy": 45.7031, "active_queue_size": 16384.0, "cl_loss": 3.8865, "doc_norm": 1.6012, "encoder_q-embeddings": 2056.5637, "encoder_q-layer.0": 1342.5116, "encoder_q-layer.1": 1495.6669, "encoder_q-layer.10": 2562.0732, "encoder_q-layer.11": 6398.9019, "encoder_q-layer.2": 1648.1268, "encoder_q-layer.3": 1610.9908, "encoder_q-layer.4": 1726.3982, "encoder_q-layer.5": 1755.4012, "encoder_q-layer.6": 2050.3738, "encoder_q-layer.7": 2157.1021, "encoder_q-layer.8": 2458.0862, "encoder_q-layer.9": 2262.6121, "epoch": 0.64, "inbatch_neg_score": 0.7853, "inbatch_pos_score": 1.3271, "learning_rate": 8.88888888888889e-07, "loss": 3.8865, "norm_diff": 0.1401, "norm_loss": 0.0, "num_token_doc": 66.7398, "num_token_overlap": 11.6513, "num_token_query": 31.836, "num_token_union": 65.364, "num_word_context": 202.2048, "num_word_doc": 49.8327, "num_word_query": 23.4951, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4048.8606, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7842, "query_norm": 1.4611, "queue_k_norm": 1.603, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.836, "sent_len_1": 66.7398, "sent_len_max_0": 127.5413, "sent_len_max_1": 186.965, "stdk": 0.0483, "stdq": 0.0419, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 98400 }, { "accuracy": 47.4609, "active_queue_size": 16384.0, "cl_loss": 3.8698, "doc_norm": 1.6024, "encoder_q-embeddings": 1947.244, "encoder_q-layer.0": 1330.9479, "encoder_q-layer.1": 1437.64, "encoder_q-layer.10": 2549.3982, "encoder_q-layer.11": 6658.4199, "encoder_q-layer.2": 1665.6863, "encoder_q-layer.3": 1637.0115, "encoder_q-layer.4": 1788.8124, "encoder_q-layer.5": 1828.8873, "encoder_q-layer.6": 2024.8759, "encoder_q-layer.7": 2287.145, "encoder_q-layer.8": 2599.0488, "encoder_q-layer.9": 2494.6338, "epoch": 0.64, "inbatch_neg_score": 0.7835, "inbatch_pos_score": 1.3467, "learning_rate": 8.333333333333333e-07, "loss": 3.8698, "norm_diff": 0.1388, "norm_loss": 0.0, "num_token_doc": 66.5999, "num_token_overlap": 11.6062, "num_token_query": 31.7307, "num_token_union": 65.1869, "num_word_context": 202.3418, "num_word_doc": 49.6971, "num_word_query": 23.4294, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4221.9022, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7842, "query_norm": 1.4636, "queue_k_norm": 1.6031, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.7307, "sent_len_1": 66.5999, "sent_len_max_0": 127.55, "sent_len_max_1": 190.2088, "stdk": 0.0483, "stdq": 0.0421, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 98500 }, { "accuracy": 43.5547, "active_queue_size": 16384.0, "cl_loss": 3.8655, "doc_norm": 1.6052, "encoder_q-embeddings": 2144.2607, "encoder_q-layer.0": 1405.0872, "encoder_q-layer.1": 1468.2344, "encoder_q-layer.10": 2852.7432, "encoder_q-layer.11": 7215.3354, "encoder_q-layer.2": 1715.0098, "encoder_q-layer.3": 1761.754, "encoder_q-layer.4": 1820.7988, "encoder_q-layer.5": 1921.8719, "encoder_q-layer.6": 2147.8953, "encoder_q-layer.7": 2236.6423, "encoder_q-layer.8": 2625.6194, "encoder_q-layer.9": 2705.4075, "epoch": 0.64, "inbatch_neg_score": 0.7838, "inbatch_pos_score": 1.3193, "learning_rate": 7.777777777777778e-07, "loss": 3.8655, "norm_diff": 0.1414, "norm_loss": 0.0, "num_token_doc": 66.6879, "num_token_overlap": 11.7317, "num_token_query": 32.0276, "num_token_union": 65.357, "num_word_context": 202.3457, "num_word_doc": 49.7493, "num_word_query": 23.657, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4419.5036, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7832, "query_norm": 1.4638, "queue_k_norm": 1.6032, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 32.0276, "sent_len_1": 66.6879, "sent_len_max_0": 127.4862, "sent_len_max_1": 191.055, "stdk": 0.0484, "stdq": 0.0421, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 98600 }, { "accuracy": 48.8281, "active_queue_size": 16384.0, "cl_loss": 3.8956, "doc_norm": 1.6092, "encoder_q-embeddings": 3103.658, "encoder_q-layer.0": 2077.7471, "encoder_q-layer.1": 2385.9546, "encoder_q-layer.10": 3032.6855, "encoder_q-layer.11": 6749.5859, "encoder_q-layer.2": 3017.6047, "encoder_q-layer.3": 3309.2544, "encoder_q-layer.4": 3681.4312, "encoder_q-layer.5": 3474.6787, "encoder_q-layer.6": 3356.929, "encoder_q-layer.7": 3055.8914, "encoder_q-layer.8": 3082.2869, "encoder_q-layer.9": 2524.3154, "epoch": 0.64, "inbatch_neg_score": 0.7835, "inbatch_pos_score": 1.333, "learning_rate": 7.222222222222222e-07, "loss": 3.8956, "norm_diff": 0.1387, "norm_loss": 0.0, "num_token_doc": 66.8358, "num_token_overlap": 11.6657, "num_token_query": 31.8729, "num_token_union": 65.3728, "num_word_context": 202.5934, "num_word_doc": 49.8866, "num_word_query": 23.5317, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5283.3143, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7832, "query_norm": 1.4705, "queue_k_norm": 1.6026, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8729, "sent_len_1": 66.8358, "sent_len_max_0": 127.6663, "sent_len_max_1": 190.0913, "stdk": 0.0486, "stdq": 0.0424, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 98700 }, { "accuracy": 46.2891, "active_queue_size": 16384.0, "cl_loss": 3.918, "doc_norm": 1.5972, "encoder_q-embeddings": 2037.0402, "encoder_q-layer.0": 1385.8779, "encoder_q-layer.1": 1506.7994, "encoder_q-layer.10": 2574.3691, "encoder_q-layer.11": 6528.0801, "encoder_q-layer.2": 1714.5657, "encoder_q-layer.3": 1732.667, "encoder_q-layer.4": 1869.2605, "encoder_q-layer.5": 1996.5181, "encoder_q-layer.6": 2140.126, "encoder_q-layer.7": 2452.5122, "encoder_q-layer.8": 2655.9541, "encoder_q-layer.9": 2418.1975, "epoch": 0.64, "inbatch_neg_score": 0.7848, "inbatch_pos_score": 1.3291, "learning_rate": 6.666666666666667e-07, "loss": 3.918, "norm_diff": 0.1364, "norm_loss": 0.0, "num_token_doc": 66.8149, "num_token_overlap": 11.6642, "num_token_query": 31.9525, "num_token_union": 65.4153, "num_word_context": 202.3049, "num_word_doc": 49.8731, "num_word_query": 23.5846, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4200.079, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7847, "query_norm": 1.4607, "queue_k_norm": 1.6017, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.9525, "sent_len_1": 66.8149, "sent_len_max_0": 127.6775, "sent_len_max_1": 190.2262, "stdk": 0.0481, "stdq": 0.0419, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 98800 }, { "accuracy": 46.7773, "active_queue_size": 16384.0, "cl_loss": 3.8635, "doc_norm": 1.598, "encoder_q-embeddings": 2558.6411, "encoder_q-layer.0": 1736.1559, "encoder_q-layer.1": 1930.3523, "encoder_q-layer.10": 2711.5415, "encoder_q-layer.11": 7081.2075, "encoder_q-layer.2": 2300.5576, "encoder_q-layer.3": 2674.0579, "encoder_q-layer.4": 3021.6377, "encoder_q-layer.5": 3036.3494, "encoder_q-layer.6": 3790.0913, "encoder_q-layer.7": 3766.2595, "encoder_q-layer.8": 3758.5266, "encoder_q-layer.9": 2424.6882, "epoch": 0.64, "inbatch_neg_score": 0.7826, "inbatch_pos_score": 1.332, "learning_rate": 6.111111111111112e-07, "loss": 3.8635, "norm_diff": 0.1357, "norm_loss": 0.0, "num_token_doc": 66.5554, "num_token_overlap": 11.6898, "num_token_query": 31.9245, "num_token_union": 65.2192, "num_word_context": 202.0275, "num_word_doc": 49.6528, "num_word_query": 23.5673, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5263.1632, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7837, "query_norm": 1.4623, "queue_k_norm": 1.6027, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9245, "sent_len_1": 66.5554, "sent_len_max_0": 127.4, "sent_len_max_1": 189.1587, "stdk": 0.0482, "stdq": 0.042, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 98900 }, { "accuracy": 43.5547, "active_queue_size": 16384.0, "cl_loss": 3.8768, "doc_norm": 1.6083, "encoder_q-embeddings": 2935.8311, "encoder_q-layer.0": 1937.2252, "encoder_q-layer.1": 2167.5757, "encoder_q-layer.10": 2921.6199, "encoder_q-layer.11": 7157.7222, "encoder_q-layer.2": 2455.8928, "encoder_q-layer.3": 2888.1963, "encoder_q-layer.4": 3166.9343, "encoder_q-layer.5": 3440.3101, "encoder_q-layer.6": 3549.9126, "encoder_q-layer.7": 3492.947, "encoder_q-layer.8": 3734.8174, "encoder_q-layer.9": 2913.8835, "epoch": 0.64, "inbatch_neg_score": 0.7849, "inbatch_pos_score": 1.3184, "learning_rate": 5.555555555555556e-07, "loss": 3.8768, "norm_diff": 0.1476, "norm_loss": 0.0, "num_token_doc": 66.845, "num_token_overlap": 11.6536, "num_token_query": 31.9118, "num_token_union": 65.4083, "num_word_context": 202.4033, "num_word_doc": 49.8598, "num_word_query": 23.5751, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5362.1469, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7847, "query_norm": 1.4607, "queue_k_norm": 1.6033, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.9118, "sent_len_1": 66.845, "sent_len_max_0": 127.6238, "sent_len_max_1": 190.7325, "stdk": 0.0486, "stdq": 0.0419, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 99000 }, { "accuracy": 47.168, "active_queue_size": 16384.0, "cl_loss": 3.8643, "doc_norm": 1.6052, "encoder_q-embeddings": 2238.6936, "encoder_q-layer.0": 1555.6364, "encoder_q-layer.1": 1727.9968, "encoder_q-layer.10": 3430.9617, "encoder_q-layer.11": 8393.1064, "encoder_q-layer.2": 1929.4777, "encoder_q-layer.3": 2021.9198, "encoder_q-layer.4": 2076.2656, "encoder_q-layer.5": 2178.1711, "encoder_q-layer.6": 2466.7871, "encoder_q-layer.7": 2552.7546, "encoder_q-layer.8": 2808.5969, "encoder_q-layer.9": 2500.0708, "epoch": 0.65, "inbatch_neg_score": 0.7851, "inbatch_pos_score": 1.3486, "learning_rate": 5.000000000000001e-07, "loss": 3.8643, "norm_diff": 0.1382, "norm_loss": 0.0, "num_token_doc": 66.9222, "num_token_overlap": 11.6828, "num_token_query": 31.8483, "num_token_union": 65.4104, "num_word_context": 202.2872, "num_word_doc": 49.9493, "num_word_query": 23.509, "postclip_grad_norm": 1.0, "preclip_grad_norm": 5239.913, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7847, "query_norm": 1.467, "queue_k_norm": 1.6035, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8483, "sent_len_1": 66.9222, "sent_len_max_0": 127.44, "sent_len_max_1": 189.6138, "stdk": 0.0484, "stdq": 0.0422, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 99100 }, { "accuracy": 46.2891, "active_queue_size": 16384.0, "cl_loss": 3.9134, "doc_norm": 1.6096, "encoder_q-embeddings": 4435.3818, "encoder_q-layer.0": 3199.4893, "encoder_q-layer.1": 3473.0129, "encoder_q-layer.10": 2688.9976, "encoder_q-layer.11": 6946.2227, "encoder_q-layer.2": 3958.5754, "encoder_q-layer.3": 4124.5234, "encoder_q-layer.4": 4654.791, "encoder_q-layer.5": 4362.7358, "encoder_q-layer.6": 4147.8677, "encoder_q-layer.7": 3894.7947, "encoder_q-layer.8": 3766.937, "encoder_q-layer.9": 2703.8042, "epoch": 0.65, "inbatch_neg_score": 0.7841, "inbatch_pos_score": 1.3516, "learning_rate": 4.444444444444445e-07, "loss": 3.9134, "norm_diff": 0.1453, "norm_loss": 0.0, "num_token_doc": 66.8604, "num_token_overlap": 11.6205, "num_token_query": 31.805, "num_token_union": 65.3966, "num_word_context": 202.3479, "num_word_doc": 49.8979, "num_word_query": 23.4987, "postclip_grad_norm": 1.0, "preclip_grad_norm": 6499.068, "preclip_grad_norm_avg": 0.0001, "q@queue_neg_score": 0.7847, "query_norm": 1.4642, "queue_k_norm": 1.6038, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.805, "sent_len_1": 66.8604, "sent_len_max_0": 127.5662, "sent_len_max_1": 189.3512, "stdk": 0.0486, "stdq": 0.0421, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 99200 }, { "accuracy": 46.3867, "active_queue_size": 16384.0, "cl_loss": 3.8919, "doc_norm": 1.5988, "encoder_q-embeddings": 2095.3726, "encoder_q-layer.0": 1478.4707, "encoder_q-layer.1": 1519.6444, "encoder_q-layer.10": 2733.8796, "encoder_q-layer.11": 6772.7588, "encoder_q-layer.2": 1760.7255, "encoder_q-layer.3": 1827.8617, "encoder_q-layer.4": 1847.6713, "encoder_q-layer.5": 1955.1255, "encoder_q-layer.6": 2133.5498, "encoder_q-layer.7": 2303.9744, "encoder_q-layer.8": 2648.501, "encoder_q-layer.9": 2489.0508, "epoch": 0.65, "inbatch_neg_score": 0.783, "inbatch_pos_score": 1.3447, "learning_rate": 3.888888888888889e-07, "loss": 3.8919, "norm_diff": 0.1329, "norm_loss": 0.0, "num_token_doc": 66.8826, "num_token_overlap": 11.705, "num_token_query": 32.0582, "num_token_union": 65.4835, "num_word_context": 202.3004, "num_word_doc": 49.877, "num_word_query": 23.68, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4327.6054, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7827, "query_norm": 1.466, "queue_k_norm": 1.604, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 32.0582, "sent_len_1": 66.8826, "sent_len_max_0": 127.5413, "sent_len_max_1": 189.7962, "stdk": 0.0482, "stdq": 0.0422, "stdqueue_k": 0.0485, "stdqueue_q": 0.0, "step": 99300 }, { "accuracy": 45.9961, "active_queue_size": 16384.0, "cl_loss": 3.8742, "doc_norm": 1.5993, "encoder_q-embeddings": 2145.0125, "encoder_q-layer.0": 1466.8593, "encoder_q-layer.1": 1567.3807, "encoder_q-layer.10": 2507.9197, "encoder_q-layer.11": 6779.0244, "encoder_q-layer.2": 1753.7731, "encoder_q-layer.3": 1831.8192, "encoder_q-layer.4": 1955.0911, "encoder_q-layer.5": 1996.6211, "encoder_q-layer.6": 2199.7585, "encoder_q-layer.7": 2349.4712, "encoder_q-layer.8": 2571.5994, "encoder_q-layer.9": 2387.7817, "epoch": 0.65, "inbatch_neg_score": 0.7844, "inbatch_pos_score": 1.3301, "learning_rate": 3.3333333333333335e-07, "loss": 3.8742, "norm_diff": 0.1344, "norm_loss": 0.0, "num_token_doc": 66.6156, "num_token_overlap": 11.6535, "num_token_query": 31.8, "num_token_union": 65.2218, "num_word_context": 202.126, "num_word_doc": 49.701, "num_word_query": 23.4783, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4367.6214, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7847, "query_norm": 1.4649, "queue_k_norm": 1.603, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8, "sent_len_1": 66.6156, "sent_len_max_0": 127.5413, "sent_len_max_1": 189.7463, "stdk": 0.0481, "stdq": 0.0421, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 99400 }, { "accuracy": 47.4609, "active_queue_size": 16384.0, "cl_loss": 3.8985, "doc_norm": 1.6006, "encoder_q-embeddings": 1958.1676, "encoder_q-layer.0": 1287.647, "encoder_q-layer.1": 1328.8726, "encoder_q-layer.10": 3005.8281, "encoder_q-layer.11": 6591.5195, "encoder_q-layer.2": 1509.9871, "encoder_q-layer.3": 1563.2538, "encoder_q-layer.4": 1648.5526, "encoder_q-layer.5": 1720.1658, "encoder_q-layer.6": 2003.0457, "encoder_q-layer.7": 2221.4915, "encoder_q-layer.8": 2672.4023, "encoder_q-layer.9": 2434.2207, "epoch": 0.65, "inbatch_neg_score": 0.7846, "inbatch_pos_score": 1.3477, "learning_rate": 2.777777777777778e-07, "loss": 3.8985, "norm_diff": 0.1335, "norm_loss": 0.0, "num_token_doc": 66.8663, "num_token_overlap": 11.6808, "num_token_query": 31.8469, "num_token_union": 65.4003, "num_word_context": 202.4129, "num_word_doc": 49.8261, "num_word_query": 23.5121, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4163.8689, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7847, "query_norm": 1.4671, "queue_k_norm": 1.6022, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.8469, "sent_len_1": 66.8663, "sent_len_max_0": 127.4213, "sent_len_max_1": 188.9737, "stdk": 0.0482, "stdq": 0.0422, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 99500 }, { "accuracy": 46.875, "active_queue_size": 16384.0, "cl_loss": 3.8856, "doc_norm": 1.6072, "encoder_q-embeddings": 2466.5588, "encoder_q-layer.0": 1788.0814, "encoder_q-layer.1": 1925.4646, "encoder_q-layer.10": 2624.7512, "encoder_q-layer.11": 6907.7202, "encoder_q-layer.2": 2231.2671, "encoder_q-layer.3": 2419.3628, "encoder_q-layer.4": 2580.3406, "encoder_q-layer.5": 2661.4688, "encoder_q-layer.6": 2787.3252, "encoder_q-layer.7": 3038.7134, "encoder_q-layer.8": 3070.842, "encoder_q-layer.9": 2526.0342, "epoch": 0.65, "inbatch_neg_score": 0.7845, "inbatch_pos_score": 1.3477, "learning_rate": 2.2222222222222224e-07, "loss": 3.8856, "norm_diff": 0.1446, "norm_loss": 0.0, "num_token_doc": 66.7144, "num_token_overlap": 11.7458, "num_token_query": 32.0116, "num_token_union": 65.342, "num_word_context": 202.239, "num_word_doc": 49.7925, "num_word_query": 23.6628, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4832.8522, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7847, "query_norm": 1.4626, "queue_k_norm": 1.6032, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 32.0116, "sent_len_1": 66.7144, "sent_len_max_0": 127.535, "sent_len_max_1": 189.15, "stdk": 0.0485, "stdq": 0.042, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 99600 }, { "accuracy": 47.6562, "active_queue_size": 16384.0, "cl_loss": 3.8678, "doc_norm": 1.5978, "encoder_q-embeddings": 2308.2661, "encoder_q-layer.0": 1579.8948, "encoder_q-layer.1": 1652.8452, "encoder_q-layer.10": 2784.7471, "encoder_q-layer.11": 6881.8394, "encoder_q-layer.2": 1827.2471, "encoder_q-layer.3": 1873.6425, "encoder_q-layer.4": 2070.834, "encoder_q-layer.5": 2113.0884, "encoder_q-layer.6": 2440.6409, "encoder_q-layer.7": 2543.376, "encoder_q-layer.8": 2749.7339, "encoder_q-layer.9": 2521.0298, "epoch": 0.65, "inbatch_neg_score": 0.7857, "inbatch_pos_score": 1.3398, "learning_rate": 1.6666666666666668e-07, "loss": 3.8678, "norm_diff": 0.1335, "norm_loss": 0.0, "num_token_doc": 66.7307, "num_token_overlap": 11.6947, "num_token_query": 31.9084, "num_token_union": 65.3167, "num_word_context": 202.3541, "num_word_doc": 49.8121, "num_word_query": 23.5676, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4557.6968, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7852, "query_norm": 1.4644, "queue_k_norm": 1.6011, "queue_ptr": 4096.0, "queue_q_norm": 0.0, "sent_len_0": 31.9084, "sent_len_1": 66.7307, "sent_len_max_0": 127.6825, "sent_len_max_1": 188.9263, "stdk": 0.0481, "stdq": 0.0421, "stdqueue_k": 0.0483, "stdqueue_q": 0.0, "step": 99700 }, { "accuracy": 48.2422, "active_queue_size": 16384.0, "cl_loss": 3.8986, "doc_norm": 1.6024, "encoder_q-embeddings": 2420.751, "encoder_q-layer.0": 1585.2169, "encoder_q-layer.1": 1755.2018, "encoder_q-layer.10": 2794.9807, "encoder_q-layer.11": 6594.4141, "encoder_q-layer.2": 1987.0453, "encoder_q-layer.3": 2024.8167, "encoder_q-layer.4": 2205.6174, "encoder_q-layer.5": 2203.8206, "encoder_q-layer.6": 2286.8831, "encoder_q-layer.7": 2462.8313, "encoder_q-layer.8": 2793.1128, "encoder_q-layer.9": 2433.4597, "epoch": 0.65, "inbatch_neg_score": 0.7838, "inbatch_pos_score": 1.3584, "learning_rate": 1.1111111111111112e-07, "loss": 3.8986, "norm_diff": 0.1303, "norm_loss": 0.0, "num_token_doc": 66.6716, "num_token_overlap": 11.6469, "num_token_query": 31.8308, "num_token_union": 65.2787, "num_word_context": 201.9179, "num_word_doc": 49.7831, "num_word_query": 23.5102, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4483.0461, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7837, "query_norm": 1.472, "queue_k_norm": 1.602, "queue_ptr": 8192.0, "queue_q_norm": 0.0, "sent_len_0": 31.8308, "sent_len_1": 66.6716, "sent_len_max_0": 127.295, "sent_len_max_1": 188.5238, "stdk": 0.0483, "stdq": 0.0425, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 99800 }, { "accuracy": 45.3125, "active_queue_size": 16384.0, "cl_loss": 3.8908, "doc_norm": 1.6068, "encoder_q-embeddings": 2464.4199, "encoder_q-layer.0": 1651.7192, "encoder_q-layer.1": 1682.8474, "encoder_q-layer.10": 2546.4102, "encoder_q-layer.11": 6868.8525, "encoder_q-layer.2": 1920.0251, "encoder_q-layer.3": 1925.817, "encoder_q-layer.4": 2099.387, "encoder_q-layer.5": 2006.5034, "encoder_q-layer.6": 2215.0063, "encoder_q-layer.7": 2418.5674, "encoder_q-layer.8": 2603.1936, "encoder_q-layer.9": 2435.4177, "epoch": 0.65, "inbatch_neg_score": 0.7857, "inbatch_pos_score": 1.3311, "learning_rate": 5.555555555555556e-08, "loss": 3.8908, "norm_diff": 0.1441, "norm_loss": 0.0, "num_token_doc": 66.8027, "num_token_overlap": 11.6667, "num_token_query": 31.9337, "num_token_union": 65.402, "num_word_context": 202.3702, "num_word_doc": 49.8102, "num_word_query": 23.5977, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4477.2237, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7856, "query_norm": 1.4627, "queue_k_norm": 1.6021, "queue_ptr": 12288.0, "queue_q_norm": 0.0, "sent_len_0": 31.9337, "sent_len_1": 66.8027, "sent_len_max_0": 127.5025, "sent_len_max_1": 191.3275, "stdk": 0.0485, "stdq": 0.042, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 99900 }, { "accuracy": 45.3125, "active_queue_size": 16384.0, "cl_loss": 3.8856, "doc_norm": 1.6042, "encoder_q-embeddings": 1978.3813, "encoder_q-layer.0": 1328.2655, "encoder_q-layer.1": 1429.391, "encoder_q-layer.10": 2599.437, "encoder_q-layer.11": 6779.5425, "encoder_q-layer.2": 1635.9724, "encoder_q-layer.3": 1647.1279, "encoder_q-layer.4": 1760.8834, "encoder_q-layer.5": 1812.024, "encoder_q-layer.6": 2167.1995, "encoder_q-layer.7": 2423.8809, "encoder_q-layer.8": 2681.1174, "encoder_q-layer.9": 2400.9573, "epoch": 0.65, "inbatch_neg_score": 0.7861, "inbatch_pos_score": 1.3369, "learning_rate": 0.0, "loss": 3.8856, "norm_diff": 0.1376, "norm_loss": 0.0, "num_token_doc": 66.5492, "num_token_overlap": 11.6546, "num_token_query": 31.7946, "num_token_union": 65.1495, "num_word_context": 202.0858, "num_word_doc": 49.6703, "num_word_query": 23.4882, "postclip_grad_norm": 1.0, "preclip_grad_norm": 4245.6731, "preclip_grad_norm_avg": 0.0, "q@queue_neg_score": 0.7852, "query_norm": 1.4666, "queue_k_norm": 1.6016, "queue_ptr": 0.0, "queue_q_norm": 0.0, "sent_len_0": 31.7946, "sent_len_1": 66.5492, "sent_len_max_0": 127.5962, "sent_len_max_1": 189.7287, "stdk": 0.0483, "stdq": 0.0422, "stdqueue_k": 0.0484, "stdqueue_q": 0.0, "step": 100000 }, { "dev_runtime": 43.2843, "dev_samples_per_second": 1.479, "dev_steps_per_second": 0.023, "epoch": 0.65, "step": 100000, "test_accuracy": 93.49365234375, "test_active_queue_size": 16384.0, "test_cl_loss": 0.37501072883605957, "test_doc_norm": 1.5701944828033447, "test_inbatch_neg_score": 1.1212103366851807, "test_inbatch_pos_score": 2.0276026725769043, "test_loss": 0.37501072883605957, "test_loss_align": 0.8943744897842407, "test_loss_unif": 2.8349671363830566, "test_loss_unif_q@queue": 2.8349671363830566, "test_norm_diff": 0.0068693868815898895, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.7722526788711548, "test_query_norm": 1.5687135457992554, "test_queue_k_norm": 1.6023495197296143, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.041642919182777405, "test_stdq": 0.041556548327207565, "test_stdqueue_k": 0.04838546738028526, "test_stdqueue_q": 0.0 }, { "dev_runtime": 43.2843, "dev_samples_per_second": 1.479, "dev_steps_per_second": 0.023, "epoch": 0.65, "eval_beir-arguana_ndcg@10": 0.37955, "eval_beir-arguana_recall@10": 0.65149, "eval_beir-arguana_recall@100": 0.93457, "eval_beir-arguana_recall@20": 0.77738, "eval_beir-avg_ndcg@10": 0.3862135, "eval_beir-avg_recall@10": 0.45590125000000004, "eval_beir-avg_recall@100": 0.6404155833333334, "eval_beir-avg_recall@20": 0.5158790833333333, "eval_beir-cqadupstack_ndcg@10": 0.264705, "eval_beir-cqadupstack_recall@10": 0.3632225, "eval_beir-cqadupstack_recall@100": 0.6014158333333334, "eval_beir-cqadupstack_recall@20": 0.4335508333333334, "eval_beir-fiqa_ndcg@10": 0.25245, "eval_beir-fiqa_recall@10": 0.31073, "eval_beir-fiqa_recall@100": 0.57848, "eval_beir-fiqa_recall@20": 0.37975, "eval_beir-nfcorpus_ndcg@10": 0.30262, "eval_beir-nfcorpus_recall@10": 0.15211, "eval_beir-nfcorpus_recall@100": 0.28685, "eval_beir-nfcorpus_recall@20": 0.18187, "eval_beir-nq_ndcg@10": 0.28918, "eval_beir-nq_recall@10": 0.47301, "eval_beir-nq_recall@100": 0.81301, "eval_beir-nq_recall@20": 0.59311, "eval_beir-quora_ndcg@10": 0.78744, "eval_beir-quora_recall@10": 0.89623, "eval_beir-quora_recall@100": 0.97907, "eval_beir-quora_recall@20": 0.93488, "eval_beir-scidocs_ndcg@10": 0.15287, "eval_beir-scidocs_recall@10": 0.16118, "eval_beir-scidocs_recall@100": 0.37422, "eval_beir-scidocs_recall@20": 0.22043, "eval_beir-scifact_ndcg@10": 0.65164, "eval_beir-scifact_recall@10": 0.79467, "eval_beir-scifact_recall@100": 0.93156, "eval_beir-scifact_recall@20": 0.83133, "eval_beir-trec-covid_ndcg@10": 0.58524, "eval_beir-trec-covid_recall@10": 0.62, "eval_beir-trec-covid_recall@100": 0.4676, "eval_beir-trec-covid_recall@20": 0.6, "eval_beir-webis-touche2020_ndcg@10": 0.19644, "eval_beir-webis-touche2020_recall@10": 0.13637, "eval_beir-webis-touche2020_recall@100": 0.43738, "eval_beir-webis-touche2020_recall@20": 0.20649, "eval_senteval-avg_sts": 0.7476502848762407, "eval_senteval-sickr_spearman": 0.7166329672415084, "eval_senteval-stsb_spearman": 0.7786676025109732, "step": 100000, "test_accuracy": 93.49365234375, "test_active_queue_size": 16384.0, "test_cl_loss": 0.37501072883605957, "test_doc_norm": 1.5701944828033447, "test_inbatch_neg_score": 1.1212103366851807, "test_inbatch_pos_score": 2.0276026725769043, "test_loss": 0.37501072883605957, "test_loss_align": 0.8943744897842407, "test_loss_unif": 2.8349671363830566, "test_loss_unif_q@queue": 2.8349671363830566, "test_norm_diff": 0.0068693868815898895, "test_norm_loss": 0.0, "test_q@queue_neg_score": 0.7722526788711548, "test_query_norm": 1.5687135457992554, "test_queue_k_norm": 1.6023495197296143, "test_queue_ptr": 0.0, "test_queue_q_norm": 0.0, "test_stdk": 0.041642919182777405, "test_stdq": 0.041556548327207565, "test_stdqueue_k": 0.04838546738028526, "test_stdqueue_q": 0.0 }, { "epoch": 0.65, "step": 100000, "total_flos": 0, "train_runtime": 231882.319, "train_samples_per_second": 0.431 } ], "max_steps": 100000, "num_train_epochs": 1, "total_flos": 0, "trial_name": null, "trial_params": null }